forked from crista/exercises-in-programming-style
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tf-07.py
executable file
·48 lines (42 loc) · 1.49 KB
/
tf-07.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
import re, sys, operator
# Mileage may vary. If this crashes, make it lower
RECURSION_LIMIT = 9500
# We add a few more, because, contrary to the name,
# this doesn't just rule recursion: it rules the depth of the call stack
sys.setrecursionlimit(RECURSION_LIMIT+10)
def count(word_list, word_freqs):
# What to do with an empty list
if word_list == []:
return
# The base case, what to do with 1 word
if len(word_list) == 1:
word = word_list[0]
if word not in stopwords:
if word in word_freqs:
word_freqs[word] += 1
else:
word_freqs[word] = 1
# The inductive case, what to do with a list of words
else:
# Process the head word
count([word_list[0]], word_freqs)
# Process the tail
count(word_list[1:], word_freqs)
def wf_print(word_freq):
if word_freq == []:
return
if len(word_freq) == 1:
(w, c) = word_freq[0]
print w, '-', c
else:
wf_print([word_freq[0]])
wf_print(word_freq[1:])
stopwords = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
word_freqs = {}
# Theoretically, we would just call count(words, word_freqs)
# Try doing that and see what happens.
for i in range(0, len(words), RECURSION_LIMIT):
count(words[i:i+RECURSION_LIMIT], word_freqs)
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])