Skip to content

Commit

Permalink
remove 空白
Browse files Browse the repository at this point in the history
  • Loading branch information
mattn committed Mar 26, 2023
1 parent ece705f commit 6422a34
Showing 1 changed file with 26 additions and 11 deletions.
37 changes: 26 additions & 11 deletions haiku.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func isEnd(c []string) bool {
}

func isSpace(c []string) bool {
return c[1] == "空白"
return c[0] == "空白"
}

// isWord return true when the kind of the word is possible to be leading of
Expand All @@ -36,6 +36,9 @@ func isWord(c []string) bool {
return true
}
}
if c[0] == "記号" && c[1] == "一般" {
return true
}
if c[0] == "助詞" && c[1] != "服助詞" {
return true
}
Expand Down Expand Up @@ -67,7 +70,7 @@ func MatchWithOpt(text string, rule []int, opt *Opt) bool {
if d == nil {
d = uni.Dict()
}
t, err := tokenizer.New(d, tokenizer.Nop())
t, err := tokenizer.New(d, tokenizer.OmitBosEos())
if err != nil {
return false
}
Expand All @@ -77,13 +80,19 @@ func MatchWithOpt(text string, rule []int, opt *Opt) bool {
r := make([]int, len(rule))
copy(r, rule)

var tmp []tokenizer.Token
for _, token := range tokens {
c := token.Features()
if len(c) > 0 && c[0] != "空白" {
tmp = append(tmp, token)
}
}
tokens = tmp

for i := 0; i < len(tokens); i++ {
tok := tokens[i]
c := tok.Features()
if len(c) == 0 || isSpace(c) {
continue
}
y := c[len(c)-1]
y := c[6]
if y == "*" {
y = tok.Surface
}
Expand Down Expand Up @@ -116,7 +125,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
if d == nil {
d = uni.Dict()
}
t, err := tokenizer.New(d, tokenizer.Nop())
t, err := tokenizer.New(d, tokenizer.OmitBosEos())
if err != nil {
return nil, err
}
Expand All @@ -129,6 +138,15 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
start := 0
ambigous := 0

var tmp []tokenizer.Token
for _, token := range tokens {
c := token.Features()
if len(c) > 0 && c[0] != "空白" {
tmp = append(tmp, token)
}
}
tokens = tmp

for i := 0; i < len(tokens); i++ {
if reKana.MatchString(tokens[i].Surface) {
surface := tokens[i].Surface
Expand All @@ -155,10 +173,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
for i := 0; i < len(tokens); i++ {
tok := tokens[i]
c := tok.Features()
if len(c) == 0 || isSpace(c) {
continue
}
y := c[len(c)-1]
y := c[6]
if y == "*" {
y = tok.Surface
}
Expand Down

0 comments on commit 6422a34

Please sign in to comment.