-
Notifications
You must be signed in to change notification settings - Fork 0
/
huffman.py
86 lines (71 loc) · 2.69 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#
import sys, string
codes = {}
def frequency (str) :
freqs = {}
for ch in str :
freqs[ch] = freqs.get(ch,0) + 1
return freqs
def sortFreq (freqs) :
letters = freqs.keys()
tuples = []
for let in letters :
tuples.append((freqs[let],let))
tuples.sort()
return tuples
def buildTree(tuples) :
while len(tuples) > 1 :
leastTwo = tuple(tuples[0:2]) # get the 2 to combine
theRest = tuples[2:] # all the others
combFreq = leastTwo[0][0] + leastTwo[1][0] # the branch points freq
tuples = theRest + [(combFreq,leastTwo)] # add branch point to the end
tuples.sort() # sort it into place
return tuples[0] # Return the single tree inside the list
def trimTree (tree) :
# Trim the freq counters off, leaving just the letters
p = tree[1] # ignore freq count in [0]
if type(p) == type("") : return p # if just a leaf, return it
else : return (trimTree(p[0]), trimTree(p[1])) # trim left then right and recombine
def assignCodes (node, pat='') :
global codes
if type(node) == type("") :
codes[node] = pat # A leaf. set its code
else : #
assignCodes(node[0], pat+"0") # Branch point. Do the left branch
assignCodes(node[1], pat+"1") # then do the right branch.
def encode (str) :
global codes
output = ""
for ch in str : output += codes[ch]
return output
def decode (tree, str) :
output = ""
p = tree
for bit in str :
if bit == '0' : p = p[0] # Head up the left branch
else : p = p[1] # or up the right branch
if type(p) == type("") :
output += p # found a character. Add to output
p = tree # and restart for next character
return output
def main () :
debug = None
str = sys.stdin.read()
freqs = frequency(str)
tuples = sortFreq(freqs)
tree = buildTree(tuples)
if debug : print "Built tree", tree
tree = trimTree(tree)
if debug : print "Trimmed tree", tree
assignCodes(tree)
if debug : showCodes()
small = encode(str)
original = decode (tree, small)
print "Original text length", len(str)
print "Requires %d bits. (%d bytes)" % (len(small), (len(small)+7)/8)
print "Restored matches original", str == original
print "Code for space is ", codes[' ']
print "Code for letter e ", codes['e']
print "Code for letter y ", codes['y']
print "Code for letter z ", codes['z']
if __name__ == "__main__" : main()