Skip to content

Commit

Permalink
Add script for detecting bad characters.
Browse files Browse the repository at this point in the history
Co-authored-by: Shu Muto <shu.mutow@nec.com>
  • Loading branch information
s-kawamura-w664 and shu-mutou committed Mar 30, 2021
1 parent 15f1a9c commit b85da54
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions scripts/check-ctrlcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python3

import os
import sys
import re

def main():
args = sys.argv
if (len(args) != 3):
print("Usage: ./check-ctrlcode.py <dir> <ext>")
sys.exit(1)

dirpath = args[1]
ext = args[2]

fullpath = os.path.abspath(dirpath)
if (os.path.isdir(fullpath) is not True):
print("Directory not found.")
sys.exit(1)

check_dir(fullpath, ext)

def check_dir(path, ext):
for f in os.listdir(path):
if(f[0] != "."):
fullpath = os.path.join(path, f)
if(os.path.isfile(fullpath)):
exts = os.path.splitext(f)
if(exts[1] == ext):
check_ctrlcode(fullpath)
else:
check_dir(fullpath, ext)

def check_ctrlcode(filepath):
line = 0
with open(filepath, encoding='utf-8') as f:
while True:
str = f.readline()
if str:
line = line + 1
# check 0x00-0x1f except 0x09(HT), 0x0a(LF), 0x0d(CR)
pattern = re.compile('[\u0000-\u0008\u000b\u000c\u000e-\u001f]')
m = pattern.search(str)
if(m != None):
pos = m.end()
ctrl = m.group().encode("utf-8")
print("{0} <L{1}:{2}:{3}>: {4}\n".format(filepath, line, pos, ctrl, str.replace('\n','')))
else:
break


main()

0 comments on commit b85da54

Please sign in to comment.