-
Notifications
You must be signed in to change notification settings - Fork 0
/
generator.lisp
200 lines (177 loc) · 6.9 KB
/
generator.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
(in-package #:cl-apertium-stream)
(define-condition generation-error (error)
((%nodes :initarg :nodes
:reader nodes)
(%expected-symbol :initarg :expected-symbol
:reader expected-symbol)))
(defun to-unparsed-substream (node)
(unless (eq (car node) :UNPARSED)
(error 'generation-error :nodes (list node) :expected-symbol :UNPARSED))
(cdr node))
(defun to-format-substream (node)
(unless (eq (car node) :FORMAT)
(error 'generation-error :nodes (list node) :expected-symbol :FORMAT))
(let ((format-text (cdr node)))
(if format-text
(format nil "[~A]" format-text)
"")))
(defun to-flag-substream (node)
(unless (eq (car node) :FLAG)
(error 'generation-error :nodes (list node) :expected-symbol :FLAG))
(or (cdr node) ""))
(defun to-ling-form-substream (node)
(unless (eq (car node) :LING-FORM)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :LING-FORM))
(let ((text (cdr node)))
(if text
(escape-ling-form text)
"")))
(defun to-invariable-part-substream (node)
(unless (eq (car node) :INVARIABLE-PART)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :INVARIABLE-PART))
(let ((text (cdr node)))
(if text
(format nil "#~A" (escape-ling-form text))
"")))
(defun to-tags-substream (node)
(unless (eq (car node) :TAGS)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :TAGS))
(let ((texts (cdr node)))
(if texts
(format nil "~{~A~}"
(loop for text in texts collect (format nil "<~A>" text)))
"")))
(defun to-sub-lu-substream (node)
(format nil "~{~A~}"
(loop for child-node in node
collect
(let ((sym (car child-node)))
(case sym
(:FLAG (to-flag-substream child-node))
(:LING-FORM (to-ling-form-substream child-node))
(:TAGS (to-tags-substream child-node))
(:INVARIABLE-PART (to-invariable-part-substream child-node))
(otherwise ""))))))
(defun child-nodes-to-lexical-unit-substream (child-nodes)
(if child-nodes
(format nil "~{~A~^/~}"
(loop for child-node in child-nodes
collect
(to-sub-lu-substream child-node)))
""))
(defun to-lexical-unit-substream (node)
(unless (eq (car node) :LEXICAL-UNIT)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :LEXICAL-UNIT))
(let ((inner-stream (let ((child-nodes (cdr node)))
(child-nodes-to-lexical-unit-substream child-nodes))))
(if (equal "" inner-stream)
""
(format nil "^~A$" inner-stream))))
(defun to-joined-lexical-unit-substream (node)
(unless (eq (car node) :JOINED-LEXICAL-UNIT)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :JOINED-LEXICAL-UNIT))
(let ((lexical-unit-nodes (cdr node)))
(if lexical-unit-nodes
(format nil
"^~{~A~^+~}$"
(loop for sub-lexical-units in lexical-unit-nodes
collect
(child-nodes-to-lexical-unit-substream sub-lexical-units)))
"")))
(defun to-chunk-head-substream (node)
(unless (eq (car node) :HEAD)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :HEAD))
(to-sub-lu-substream (cdr node)))
(defun to-chunk-child-substream (child)
(ecase (car child)
(:UNPARSED (to-unparsed-substream child))
(:LEXICAL-UNIT (to-lexical-unit-substream child))
(:JOINED-LEXICAL-UNIT (to-joined-lexical-unit-substream child))
(:FORMAT (to-format-substream child))))
(defun to-chunk-children-substream (node)
(unless (eq (car node) :CHILDREN)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :CHILDREN))
(format nil
"~{~A~}"
(loop for child in (cdr node)
collect
(to-chunk-child-substream child))))
(defun to-chunk-substream (node)
(unless (eq (car node) :CHUNK)
(error 'generation-error :generation-error :nodes (list node)
:expected-symbol :CHUNK))
(let ((head-substream (to-chunk-head-substream (assoc :HEAD (cdr node))))
(children-substream (to-chunk-children-substream (assoc :CHILDREN (cdr node)))))
(format nil "^~A{~A}$" head-substream children-substream)))
(defun to-stream (nodes)
(format nil
"~{~A~}"
(loop for node in nodes
collect
(ecase (car node)
(:UNPARSED (to-unparsed-substream node))
(:LEXICAL-UNIT (to-lexical-unit-substream node))
(:JOINED-LEXICAL-UNIT (to-joined-lexical-unit-substream node))
(:FORMAT (to-format-substream node))
(:CHUNK (to-chunk-substream node))))))
(defun test-to-stream-1 ()
(to-stream
'((:JOINED-LEXICAL-UNIT
(((:FLAG) (:LING-FORM . "กาขา") (:INVARIABLE-PART) (:TAGS)
(:INVARIABLE-PART)))
(((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t1")
(:INVARIABLE-PART))
((:FLAG) (:LING-FORM . "ขา") (:INVARIABLE-PART) (:TAGS "ม")
(:INVARIABLE-PART))))
(:UNPARSED . " ")
(:JOINED-LEXICAL-UNIT
(((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t1")
(:INVARIABLE-PART))
((:FLAG) (:LING-FORM . "ขา") (:INVARIABLE-PART) (:TAGS "ม")
(:INVARIABLE-PART)))))))
(defun test-to-lexical-unit-stream-1 ()
(to-lexical-unit-substream
'(:LEXICAL-UNIT
((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t1")
(:INVARIABLE-PART))
((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t2")
(:INVARIABLE-PART))
((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t3")
(:INVARIABLE-PART)))))
(defun test-to-chunk-substream ()
(to-chunk-substream
'(:CHUNK
(:HEAD (:FLAG) (:LING-FORM . "N1") (:INVARIABLE-PART) (:TAGS "SN" "a")
(:INVARIABLE-PART))
(:CHILDREN
(:LEXICAL-UNIT
((:FLAG) (:LING-FORM . "i") (:INVARIABLE-PART) (:TAGS) (:INVARIABLE-PART)))
(:UNPARSED . " ") (:FORMAT . "<o>")
(:LEXICAL-UNIT
((:FLAG) (:LING-FORM . "j") (:INVARIABLE-PART) (:TAGS) (:INVARIABLE-PART)))
(:FORMAT . "</o>")
(:LEXICAL-UNIT
((:FLAG) (:LING-FORM . "k") (:INVARIABLE-PART) (:TAGS) (:INVARIABLE-PART)))))))
;; (to-joined-lexical-unit-substream
;; '(:JOINED-LEXICAL-UNIT
;; (((:FLAG) (:LING-FORM . "กาขา") (:INVARIABLE-PART) (:TAGS)
;; (:INVARIABLE-PART)))
;; (((:FLAG) (:LING-FORM . "กา") (:INVARIABLE-PART) (:TAGS "t1")
;; (:INVARIABLE-PART))
;; ((:FLAG) (:LING-FORM . "ขา") (:INVARIABLE-PART) (:TAGS "ม")
;; (:INVARIABLE-PART)))))
;; (to-sub-lu-substream '((:FLAG) (:LING-FORM . "หมาบ้าน") (:INVARIABLE-PART) (:TAGS "n" "sg") (:INVARIABLE-PART . " ตัว")))
;; (to-invariable-part-substream '(:INVARIABLE-PART . " ตัว"))
;; (to-tags-substream '(:TAGS . "n"))
;; (parse 'sub-lu "หมาบ้าน<n><sg># ตัว")
;; ((:FLAG) (:LING-FORM . "หมาบ้าน") (:INVARIABLE-PART) (:TAGS "n" "sg") (:INVARIABLE-PART " ตัว"))
;; (to-unparsed-substream-stream '(:UNPARSED . " "))
;; (to-format-substream '(:FORMAT . "HTML"))
;; (to-flag-substream '(:FLAG . "*"))