Skip to content

Commit

Permalink
problem about to try something dumb
Browse files Browse the repository at this point in the history
  • Loading branch information
LukeDefeo committed Aug 22, 2013
1 parent c373c14 commit 09a18a2
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 70 deletions.
4 changes: 2 additions & 2 deletions NLP_Engine/sentiment_analyser/sentiment_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
path_to_wordset = "../../Data/Training/word_set-small.obj"
word_set = pickle.load(open(os.path.join(os.path.dirname(__file__), path_to_wordset)))
classifier = pickle.load(open(os.path.join(os.path.dirname(__file__), path_to_classifier)))
print 'Sentiment Analyser ready...'


def classify_tweet(tweet):
def classify_tweet(tweet,query_terms=[]):
feature_set = extract_tweet_features(tweet)
return classifier.classify(feature_set)

Expand Down
1 change: 1 addition & 0 deletions NLP_Engine/sentiment_detector/sentiment_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
pos_tagger = POSTagger(os.path.join(os.path.dirname(__file__), 'stanford-model.tagger'),
os.path.join(os.path.dirname(__file__), 'stanford-postagger.jar'), encoding='utf8')

print 'Sentiment Detecter ready...'

def extract_tags(tagged_sent):
tags = [0] * len(tag_index)
Expand Down
4 changes: 4 additions & 0 deletions Test/threads.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import threading
import time
import thread
from WebApp.tweetfetcher import Enum

__author__ = 'Luke'



Sentiments = Enum(['POSITIVE', 'NEGATIVE', 'OBJECTIVE', 'UNCLASSIFIED'])

a = Sentiments.POSITIVE
print a

def function():
print 'inside'
Expand Down
117 changes: 53 additions & 64 deletions WebApp/TweetFetcher.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,56 @@
from NLP_Engine.sentiment_analyser.sentiment_analyser import classify_tweet
from NLP_Engine.sentiment_detector.sentiment_detector import tweet_contains_sentiment

__author__ = 'Luke'
from tweepy.streaming import StreamListener, json
from tweepy import OAuthHandler
from tweepy import Stream


def strip_tweet(tweet):
pass


def tweet_contains_sentiment(tweet):
return True
consumer_key = "ZYaUAuc8zNPM0BL5HgdSSg"
consumer_secret = "x0Xpf6d6P4nHN2GYl91XOK032ppjhCYOIQCQQT9wA"
access_token = "289934046-1sJjC4Oz1OGT3LYnKgoLGRUehicilfgzMR4TrS6v"
access_token_secret = "qlulCQHYvEKBQFyPOHcuvrsVUalSmWh2hCHbyhv4"


def process_sentiment(tweet):
pass


# class TweetStore(object, query):
# def __init__(self):
# self._tweets = []
# self._tweet_fetcher = TweetFetcher()
#
# def get_tweets(self, start):
# return self._tweets[start:]
# class Tweet(object):
# def __init__(self, text, query_terms):
# self.text = text
# self.query_terms = query_terms
# self.contains_sentiment = None
# self.sentiment = None
#
# def add_tweet(self, tweet):
# if tweet_contains_sentiment(tweet):
# process_sentiment(tweet)
# strip_tweet(tweet)
# self._tweets.append(tweet)
# def classify(self):
# if self.contains_sentiment is None:
# self.contains_sentiment = tweet_contains_sentiment(self.text)
#
# def is_alive(self):
# return self._tweet_fetcher._alive
# if self.contains_sentiment is True:
# self.sentiment = classify_tweet(self.text, self.query_terms)


class TweetFetcher(StreamListener):
consumer_key = "ZYaUAuc8zNPM0BL5HgdSSg"
consumer_secret = "x0Xpf6d6P4nHN2GYl91XOK032ppjhCYOIQCQQT9wA"

access_token = "289934046-1sJjC4Oz1OGT3LYnKgoLGRUehicilfgzMR4TrS6v"
access_token_secret = "qlulCQHYvEKBQFyPOHcuvrsVUalSmWh2hCHbyhv4"

def __init__(self, query, max_tweets=1000, ):
def __init__(self, query, tweetstore, max_tweets=1000):
super(TweetFetcher, self).__init__()
self.tweet_store = tweetstore
self._max_tweets = max_tweets
self._query_terms = query.split()
self._tweets = []
self._count = 0
self.alive = True
auth = OAuthHandler(self.consumer_key, self.consumer_secret)
auth.set_access_token(self.access_token, self.access_token_secret)
self._alive = True
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

self._stream = Stream(auth, self)
self._stream.filter(track=self._query_terms, async=True)

def get_latest_tweets(self, amount=5):
out = []
for i in range(amount):
if self._tweets:
out.append(self._tweets.pop(-1))
return out

def get_tweets(self, start):
return self._tweets[int(start):]

def shutdown(self):
self.alive = False

def on_data(self, data):
if len(self._tweets) < self._max_tweets and self.alive:
if self._count < self._max_tweets and self._alive:
tweet = json.loads(data)
if tweet['lang'] == 'en':
self._tweets.append({'guid': len(self._tweets), 'id': tweet['id'], 'text': tweet['text'], })
self.tweet_store.add_tweet(
{'guid': self._count, 'id': tweet['id'], 'text': tweet['text'], 'query': self._query_terms})

self._count += 1
return True
else:
print 'Reached tweet limit ... shutdown'
Expand All @@ -83,18 +61,29 @@ def on_error(self, status):
print status


# def test():
# fetcher = TweetFetcher('pokemon', 15)
# print '1'
# time.sleep(10)
# for data in fetcher.get_latest_tweets():
# print 'blah'
# print data
#
# print 'set 1'
#
# for data in fetcher.get_latest_tweets():
# print 'dattt'
# print data
#
# fetcher.shutdown()
class TweetStore(object):
def __init__(self, query):
self._classified_tweets = []
self._objective_tweets = []
self._tweet_fetcher = TweetFetcher(query, self)
self._query = query

def get_tweets(self, start):
return self._classified_tweets[start:]

def add_tweet(self, tweet):
if tweet_contains_sentiment(tweet['text']):

sentiment = classify_tweet(tweet['text'], tweet['query'])
tweet['contains_sentiment'] = True
tweet['sentiment'] = sentiment
self._classified_tweets.append(tweet)
else:
tweet['contains_sentiment'] = False
self._objective_tweets.append(tweet)

def shutdown(self):
self._tweet_fetcher._alive = False

def is_alive(self):
return self._tweet_fetcher._alive
7 changes: 3 additions & 4 deletions WebApp/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
import json
import django
from tweetfetcher import TweetFetcher
from tweetfetcher import TweetFetcher, TweetStore
from django.http import HttpResponse, Http404, HttpResponseBadRequest
from django.shortcuts import render
from django.template import Context
Expand All @@ -25,22 +25,21 @@ def do_search(request):


def return_json(request):
query = request.GET.get('q', '')
query = request.GET .get('q', '')
if query == '':
return HttpResponseBadRequest()

start = request.GET.get('start', 0)
global __sessions

if query not in __sessions:
__sessions[query] = TweetFetcher(query)
__sessions[query] = TweetStore(query)
time.sleep(2)

fetcher = __sessions[query]
if not fetcher.is_alive():
__sessions.remove(query)
print "removing object"
return return_json(request)

data = fetcher.get_tweets(start)

Expand Down

0 comments on commit 09a18a2

Please sign in to comment.