Skip to content

Commit

Permalink
Prettify output
Browse files Browse the repository at this point in the history
  • Loading branch information
abdulfatir committed Oct 10, 2017
1 parent c6b331f commit 60ca304
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
9 changes: 7 additions & 2 deletions preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def preprocess_tweet(tweet):
tweet = tweet.strip(' "\'')
# Replace emojis with either EMO_POS or EMO_NEG
tweet = handle_emojis(tweet)
words = tweet.split(' ')
words = tweet.split()

for word in words:
word = preprocess_word(word)
Expand All @@ -71,8 +71,12 @@ def write_status(i, total):


if __name__ == '__main__':
if len(sys.argv) != 2:
print 'Usage: python preprocess.py <raw-CSV>'
exit()
csv_file_name = sys.argv[1]
save_to_file = open(sys.argv[2], 'w')
processed_file_name = sys.argv[1][:-4] + '-processed.csv'
save_to_file = open(processed_file_name, 'w')

with open(csv_file_name, 'r') as csv:
lines = csv.readlines()
Expand All @@ -89,3 +93,4 @@ def write_status(i, total):
if i % 1000 == 0:
write_status(i + 1, total)
save_to_file.close()
print '\nSaved processed tweets to: %s' % processed_file_name
8 changes: 6 additions & 2 deletions stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,13 @@ def analyze_tweet(tweet):
write_status(i + 1, num_tweets)
num_emojis = num_pos_emojis + num_neg_emojis
num_unique_words = len(set(all_words))
print '\nCalculating frequency distribution'
freq_dist = FreqDist(all_words)
pickle.dump(freq_dist, open(sys.argv[1][:-4] + '-freqdist.pkl', 'wb'))
print '\nTweets => Total: %d, Positive: %d, Negative: %d' % (num_tweets, num_pos_tweets, num_neg_tweets)
pkl_file_name = sys.argv[1][:-4] + '-freqdist.pkl'
pickle.dump(freq_dist, open(pkl_file_name, 'wb'))
print 'Saved frequency distribution to %s' % pkl_file_name
print '\n[Analysis Statistics]'
print 'Tweets => Total: %d, Positive: %d, Negative: %d' % (num_tweets, num_pos_tweets, num_neg_tweets)
print 'User Mentions => Total: %d, Avg: %.4f, Max: %d' % (num_mentions, num_mentions / float(num_tweets), max_mentions)
print 'URLs => Total: %d, Avg: %.4f, Max: %d' % (num_urls, num_urls / float(num_tweets), max_urls)
print 'Emojis => Total: %d, Positive: %d, Negative: %d, Avg: %.4f, Max: %d' % (num_emojis, num_pos_emojis, num_neg_emojis, num_emojis / float(num_tweets), max_emojis)
Expand Down

0 comments on commit 60ca304

Please sign in to comment.