Skip to content

Commit

Permalink
bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file…
Browse files Browse the repository at this point in the history
… using pgen (pythonGH-12456)

Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
  • Loading branch information
pablogsal authored Mar 25, 2019
1 parent 027b09c commit 91759d9
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 197 deletions.
129 changes: 43 additions & 86 deletions Lib/keyword.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,98 +1,55 @@
#! /usr/bin/env python3

"""Keywords (from "graminit.c")
"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree after building the interpreter and run:
the python source tree and run:
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
Grammar/Tokens \
Lib/keyword.py
./python Lib/keyword.py
Alternatively, you can run 'make regen-keyword'.
"""

__all__ = ["iskeyword", "kwlist"]

kwlist = [
#--start keywords--
'False',
'None',
'True',
'and',
'as',
'assert',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield',
#--end keywords--
]

kwlist.append('async')
kwlist.append('await')
kwlist.sort()
'False',
'None',
'True',
'and',
'as',
'assert',
'async',
'await',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield'
]

iskeyword = frozenset(kwlist).__contains__

def main():
import sys, re

args = sys.argv[1:]
iptfile = args and args[0] or "Python/graminit.c"
if len(args) > 1: optfile = args[1]
else: optfile = "Lib/keyword.py"

# load the output skeleton from the target, taking care to preserve its
# newline convention.
with open(optfile, newline='') as fp:
format = fp.readlines()
nl = format[0][len(format[0].strip()):] if format else '\n'

# scan the source file for keywords
with open(iptfile) as fp:
strprog = re.compile('"([^"]+)"')
lines = []
for line in fp:
if '{1, "' in line:
match = strprog.search(line)
if match:
lines.append(" '" + match.group(1) + "'," + nl)
lines.sort()

# insert the lines of keywords into the skeleton
try:
start = format.index("#--start keywords--" + nl) + 1
end = format.index("#--end keywords--" + nl)
format[start:end] = lines
except ValueError:
sys.stderr.write("target does not contain format markers\n")
sys.exit(1)

# write the output file
with open(optfile, 'w', newline='') as fp:
fp.writelines(format)

if __name__ == "__main__":
main()
119 changes: 9 additions & 110 deletions Lib/test/test_keyword.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,5 @@
import keyword
import unittest
from test import support
import filecmp
import os
import sys
import subprocess
import shutil
import textwrap

KEYWORD_FILE = support.findfile('keyword.py')
GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
'..', '..', 'Python', 'graminit.c')
TEST_PY_FILE = 'keyword_test.py'
GRAMMAR_TEST_FILE = 'graminit_test.c'
PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
NONEXISTENT_FILE = 'not_here.txt'


class Test_iskeyword(unittest.TestCase):
Expand All @@ -35,103 +20,17 @@ def test_changing_the_kwlist_does_not_affect_iskeyword(self):
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
self.assertFalse(keyword.iskeyword('eggs'))

def test_all_keywords_fail_to_be_used_as_names(self):
for key in keyword.kwlist:
with self.assertRaises(SyntaxError):
exec(f"{key} = 42")

class TestKeywordGeneration(unittest.TestCase):

def _copy_file_without_generated_keywords(self, source_file, dest_file):
with open(source_file, 'rb') as fp:
lines = fp.readlines()
nl = lines[0][len(lines[0].strip()):]
with open(dest_file, 'wb') as fp:
fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])

def _generate_keywords(self, grammar_file, target_keyword_py_file):
proc = subprocess.Popen([sys.executable,
KEYWORD_FILE,
grammar_file,
target_keyword_py_file], stderr=subprocess.PIPE)
stderr = proc.communicate()[1]
return proc.returncode, stderr

@unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
'test only works from source build directory')
def test_real_grammar_and_keyword_file(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))

def test_grammar(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
with open(GRAMMAR_TEST_FILE, 'w') as fp:
# Some of these are probably implementation accidents.
fp.writelines(textwrap.dedent("""\
{2, 1},
{11, "encoding_decl", 0, 2, states_79,
"\000\000\040\000\000\000\000\000\000\000\000\000"
"\000\000\000\000\000\000\000\000\000"},
{1, "jello"},
{326, 0},
{1, "turnip"},
\t{1, "This one is tab indented"
{278, 0},
{1, "crazy but legal"
"also legal" {1, "
{1, "continue"},
{1, "lemon"},
{1, "tomato"},
{1, "wigii"},
{1, 'no good'}
{283, 0},
{1, "too many spaces"}"""))
self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
expected = [
" 'This one is tab indented',",
" 'also legal',",
" 'continue',",
" 'crazy but legal',",
" 'jello',",
" 'lemon',",
" 'tomato',",
" 'turnip',",
" 'wigii',",
]
with open(TEST_PY_FILE) as fp:
lines = fp.read().splitlines()
start = lines.index("#--start keywords--") + 1
end = lines.index("#--end keywords--")
actual = lines[start:end]
self.assertEqual(actual, expected)

def test_empty_grammar_results_in_no_keywords(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE,
PY_FILE_WITHOUT_KEYWORDS)
self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertEqual((0, b''), self._generate_keywords(os.devnull,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))

def test_keywords_py_without_markers_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, os.devnull)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'does not contain format markers')

def test_missing_grammar_file_produces_error(self):
rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_async_and_await_are_keywords(self):
self.assertIn("async", keyword.kwlist)
self.assertIn("await", keyword.kwlist)

def test_missing_keywords_py_file_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_keywords_are_sorted(self):
self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)


if __name__ == "__main__":
Expand Down
11 changes: 10 additions & 1 deletion Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
# Regenerate all generated files

regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
regen-token regen-symbol regen-ast regen-importlib clinic
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic

############################################################################
# Special rules for object files
Expand Down Expand Up @@ -843,6 +843,15 @@ regen-token:
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/token.py

.PHONY: regen-keyword
regen-keyword:
# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
# using Parser/pgen
$(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/keyword.py.new
$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new

.PHONY: regen-symbol
regen-symbol: $(srcdir)/Include/graminit.h
# Regenerate Lib/symbol.py from Include/graminit.h
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
by Pablo Galindo.
60 changes: 60 additions & 0 deletions Parser/pgen/keywordgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""

import argparse

from .pgen import ParserGenerator

TEMPLATE = r'''
"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree and run:
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
Grammar/Tokens \
Lib/keyword.py
Alternatively, you can run 'make regen-keyword'.
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
{keywords}
]
iskeyword = frozenset(kwlist).__contains__
'''.lstrip()

EXTRA_KEYWORDS = ["async", "await"]


def main():
parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
"file from the grammar.")
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument(
"tokens", type=str, help="The file with the token definitions"
)
parser.add_argument(
"keyword_file",
type=argparse.FileType('w'),
help="The path to write the keyword definitions",
)
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens)
grammar = p.make_grammar()

with args.keyword_file as thefile:
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)

keywords = ",\n ".join(map(repr, all_keywords))
thefile.write(TEMPLATE.format(keywords=keywords))


if __name__ == "__main__":
main()

0 comments on commit 91759d9

Please sign in to comment.