forked from Ccantey/TwitterSentimentAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SentimentDB2csv.py
93 lines (70 loc) · 2.38 KB
/
SentimentDB2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import MySQLdb
import re
import csv
infile = open('negative-words.txt', 'r')
inotherfile = open('positive-words.txt', 'r')
Nwords = infile.readlines() #read the list(technically string) of negative words
infile.close()
Pwords = inotherfile.readlines()
inotherfile.close()
#Set up database, admin@localhost for now
db = MySQLdb.connect(host='127.0.0.1', user='yourUser', passwd='yourPasswd', db='yourDB')
db.set_character_set('utf8')
curr=db.cursor()
#######Query the database for New York City political expressions#######
curr.execute("""SELECT * FROM yourTable WHERE BODY REGEXP 'obama | romney | bloomberg | m.t.a | mta | schumer | nypd | fdny | port | authority | fema | nyoem | mayor | senator | hud | Cuomo | police | transit | njt | nfip | conservtive | conservatives | liberal | liberals | republican | republicans | democrat | democrats'""")
rows = curr.fetchall()
#store tweets
listOfTweets = []
#store negative words in nkeywords
nkeywords = []
#store positive words in pkeywords
##pkeywords = []
#store longitude
X = []
#store latitude
Y = []
#store time
TIME = []
#append Python lists with the appropriate index from MySQLdb
for i in rows:
listOfTweets.append(i[12])
X.append(i[10])
Y.append(i[11])
TIME.append(i[9])
##for words in neg_words[35:]:
##if using .readlines() returns a list past metadata
##once for negative words
for w in Nwords[35:]:
nkeywords.append(w.rstrip('\n'))
##once for positive words
for w2 in Pwords[35:]:
pkeywords.append(w2.rstrip('\n'))
#compile method of re matches EXACT expressions
RE_WORD = re.compile(r'\b[a-zA-Z]+\b')
count = 0
Tweet = []
sentimentEval = []
#Preprocess the tweets to be all lowercase
for k in [x.lower() for x in listOfTweets]:
#print "COUNT: ", count
ncount = 0
count +=1
#loop through tweet, check to see if word in tweet is in sentiment lists
for word in RE_WORD.findall(k):
if word in nkeywords:
#print 'NEGATIVE', k
ncount -= 1
if word in pkeywords:
#print 'POSITIVE', k
ncount +=1
Tweet.append(k)
#print "TWEET: ", Tweet[-1]
#print "K: ", k
sentimentEval.append(ncount)
#print "SENTIMENT: ", sentimentEval[-1]
#print "NCOUNT: ", ncount
#print
output = zip(Tweet, X, Y, TIME, sentimentEval)
writer = csv.writer(open('tweet_sentimentPrePost.csv', 'wb'))
writer.writerows(output)