Skip to content

Commit

Permalink
[scripts] Apply encoding fix of kaldi-asr#2676 to make_lexicon_fst_si…
Browse files Browse the repository at this point in the history
…lprobs.py (kaldi-asr#2680)
  • Loading branch information
danpovey committed Sep 4, 2018
1 parent 76cb53a commit 1b9f792
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion egs/wsj/s5/utils/lang/make_lexicon_fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def read_lexiconp(filename):
found_large_pronprobs = False
# See the comment near the top of this file, RE why we use latin-1.
with open(filename, 'r', encoding='latin-1') as f:
whitespace = re.compile("[ \t]+")
for line in f:
whitespace = re.compile("[ \t]+")
a = whitespace.split(line.strip())
if len(a) < 2:
print("{0}: error: found bad line '{1}' in lexicon file {2} ".format(
Expand Down
4 changes: 3 additions & 1 deletion egs/wsj/s5/utils/lang/make_lexicon_fst_silprob.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import sys
import math
import re

# The use of latin-1 encoding does not preclude reading utf-8. latin-1
# encoding means "treat words as sequences of bytes", and it is compatible
Expand Down Expand Up @@ -79,8 +80,9 @@ def read_silprobs(filename):
nonsilendcorrection = -1
siloverallprob = -1
with open(filename, 'r', encoding='latin-1') as f:
whitespace = re.compile("[ \t]+")
for line in f:
a = line.split()
a = whitespace.split(line.strip())
if len(a) != 2:
print("{0}: error: found bad line '{1}' in silprobs file {1} ".format(
sys.argv[0], line.strip(), filename), file=sys.stderr)
Expand Down

0 comments on commit 1b9f792

Please sign in to comment.