Skip to content

Commit

Permalink
fixed some bug
Browse files Browse the repository at this point in the history
  • Loading branch information
SeanLee97 committed Sep 14, 2018
1 parent 2095e49 commit af7e85a
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions xmnlp/postag/postag.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,26 @@ def seg(self, sent):
continue
if R.zh.match(s):
for w in list(self.dag.seg(s)):
yield w
if len(w.strip()) > 0:
yield w
else:
tmp = R.skip.split(s)
for x in tmp:
if R.skip.match(x):
yield x
if len(x.strip()) > 0:
yield x
else:
x = x.replace(' ','')
endigts = R.endigt.findall(x)
parts = re.split(r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+', x)
if len(endigts) > 0:
for w, t in self.re_decode(parts, endigts, False):
yield w
if len(w.strip()) > 0:
yield w
else:
for xx in x:
yield xx
if len(xx.strip()) > 0:
yield xx
def tag(self, sent):
for s in R.zh.split(sent):
s = s.strip()
Expand All @@ -117,19 +121,23 @@ def tag(self, sent):
continue
if R.zh.match(s):
for w,t in self.dag.tag(s):
yield w, t
if len(w.strip()) > 0:
yield w, t
else:
tmp = R.skip.split(s)
for x in tmp:
if R.skip.match(x):
yield x
if len(x.strip()) > 0:
yield x
else:
x = x.replace(' ', '')
endigts = R.endigt.findall(x)
parts = re.split(r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+', x)
if len(endigts) > 0:
for w, t in self.re_decode(parts, endigts, True):
yield w, t
if len(w.strip()) > 0:
yield w, t
else:
for xx in x:
yield xx, 'un'
if len(xx.strip()) > 0:
yield xx, 'un'

0 comments on commit af7e85a

Please sign in to comment.