This repository has been archived by the owner on Jun 6, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
NumFSAUtils.py
131 lines (116 loc) · 5.06 KB
/
NumFSAUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
""" Module NumFSAUtils -- optional utility functions for the FSA
The FSA module uses these methods if the Numeric module is present"""
__author__ = "Oliver Steele", 'steele@osteele.com'
import Numeric
class TransitionSet:
def __init__(self):
self.capacity = capacity = 100
self.sources = Numeric.zeros(capacity)
self.sinks = Numeric.zeros(capacity)
self.labels = Numeric.zeros(capacity)
self.size = 0
self.stateMap = {}
self.reverseStateMap = []
self.nextStateIndex = 0
self.labelMap = {}
self.reverseLabelMap = []
self.nextLabelIndex = 0
def stateIndex(self, state):
index = self.stateMap.get(state)
if index is None:
index = self.nextStateIndex
self.nextStateIndex = index + 1
self.stateMap[state] = index
self.reverseStateMap.append(state)
return index
def labelIndex(self, label):
index = self.labelMap.get(label)
if index is None:
index = self.nextLabelIndex
self.nextLabelIndex = index + 1
self.labelMap[label] = index
self.reverseLabelMap.append(label)
return index
def append(self, transition):
source, sink, label = transition
source, sink, label = self.stateIndex(source), self.stateIndex(sink), self.labelIndex(label)
index = self.size
if index >= self.capacity:
capacity = self.capacity + 100
self.sources = Numeric.resize(self.sources, [capacity])
self.sinks = Numeric.resize(self.sinks, [capacity])
self.labels = Numeric.resize(self.labels, [capacity])
self.capacity = capacity
self.sources[index] = source
self.sinks[index] = sink
self.labels[index] = label
self.size = self.size + 1
def __len__(self):
return self.size
def toList(self, useStateIndices=0):
transitions = []
for i in range(self.size):
sourceIndex = self.sources[i]
sinkIndex = self.sinks[i]
if not useStateIndices:
sourceIndex = self.reverseStateMap[sourceIndex]
sinkIndex = self.reverseStateMap[sinkIndex]
labelIndex = self.reverseLabelMap[self.labels[i]]
transitions.append((sourceIndex, sinkIndex, labelIndex))
return transitions
def stateCodeFromSet(set):
code = 0
for state in set:
code = code + (1L << state)
return code
def stateCodeToSet(code):
set = []
index, bitmask = 0, 1L
while code:
if code & bitmask:
set.append(index)
code = code & ~bitmask
index, bitmask = index + 1, bitmask << 1
return set
DETERMINITION_PROGRESS_TRIGGER = None # Higher than this prints progress to stdout
DETERMINATION_CUTOFF = None # Higher than this returns (incorrectly, but useful for profiling)
def determinize(states0, alphabet, transitions0, initial0, finals0, epsilonClosure):
from FSA import constructLabelMap
progress = 0
transitions = []
stateCodes, index = [stateCodeFromSet(epsilonClosure(initial0))], 0
transitions = TransitionSet()
while index < len(stateCodes):
if DETERMINATION_CUTOFF and index > DETERMINATION_CUTOFF:
break
stateCode, index = stateCodes[index], index + 1
stateSet = stateCodeToSet(stateCode)
if DETERMINITION_PROGRESS_TRIGGER and len(stateCodes) > DETERMINITION_PROGRESS_TRIGGER:
progress = 1
print 'NumFSAUtils:', index, 'of', len(stateCodes)
localTransitions = filter(lambda (s0,s1,l), set=stateSet:l and s0 in set, transitions0)
if localTransitions:
localLabels = map(lambda(_,__,label):label, localTransitions)
labelMap = constructLabelMap(localLabels, alphabet)
labelTargets = {}
for _, s1, l1 in localTransitions:
for label, positives in labelMap:
if l1 in positives:
successorStates = labelTargets[label] = labelTargets.get(label) or []
for s2 in epsilonClosure(s1):
if s2 not in successorStates:
successorStates.append(s2)
for label, successorStates in labelTargets.items():
successorCode = stateCodeFromSet(successorStates)
transitions.append((stateCode, successorCode, label))
if successorCode not in stateCodes:
stateCodes.append(successorCode)
finalStates = []
for stateCode in stateCodes:
if filter(lambda s,finalStates=finals0:s in finalStates, stateCodeToSet(stateCode)):
finalStates.append(stateCode)
f = transitions.stateIndex
tuple = map(f, stateCodes), alphabet, transitions.toList(useStateIndices=1), f(stateCodes[0]), map(f, finalStates)
if progress:
print 'exiting'
return tuple