Skip to content

Commit

Permalink
Add an initial Bison parser
Browse files Browse the repository at this point in the history
  • Loading branch information
certik committed Apr 7, 2022
1 parent d074303 commit 7bb7ec1
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 108 deletions.
6 changes: 5 additions & 1 deletion build0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@ python grammar/asdl_cpp.py grammar/Python.asdl src/lpython/python_ast.h
# Generate a Fortran ASR from ASR.asdl (C++)
python grammar/asdl_cpp.py grammar/ASR.asdl src/libasr/asr.h

# Generate the tokenizer
# Generate the tokenizer and parser
(cd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp)
(cd src/lpython/parser && bison -Wall -d -r all parser.yy)

grep -n "'" src/lpython/parser/parser.yy && echo "Single quote not allowed" && exit 1
echo "OK"
3 changes: 2 additions & 1 deletion ci/build.xsh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ python grammar/asdl_cpp.py grammar/Python.asdl src/lpython/python_ast.h
# Generate a Python AST from Python.asdl (Python)
python grammar/asdl_py.py

# Generate the tokenizer
# Generate the tokenizer and parser
pushd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp && popd
pushd src/lpython/parser && bison -Wall -d -r all parser.yy && popd

$lpython_version=$(cat version).strip()
$dest="lpython-" + $lpython_version
Expand Down
106 changes: 0 additions & 106 deletions src/lpython/parser/parser.tab.hh

This file was deleted.

216 changes: 216 additions & 0 deletions src/lpython/parser/parser.yy
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
%require "3.0"
%define api.pure
%define api.value.type {LFortran::YYSTYPE}
//%param {LFortran::Parser &p}
%locations
%expect 0 // shift/reduce conflicts

// Uncomment this to get verbose error messages
//%define parse.error verbose

/*
// Uncomment this to enable parser tracing. Then in the main code, set
// extern int yydebug;
// yydebug=1;
%define parse.trace
%printer { fprintf(yyo, "%s", $$.str().c_str()); } <string>
%printer { fprintf(yyo, "%d", $$); } <n>
%printer { std::cerr << "AST TYPE: " << $$->type; } <ast>
*/


%code requires // *.h
{
//#include <lpython/parser/parser.h>
}

%code // *.cpp
{

#include <lpython/parser/parser.h>
#include <lpython/parser/tokenizer.h>
#include <lpython/parser/semantics.h>

/*
int yylex(LFortran::YYSTYPE *yylval, YYLTYPE *yyloc, LFortran::Parser &p)
{
return p.m_tokenizer.lex(p.m_a, *yylval, *yyloc, p.diag);
} // ylex
void yyerror(YYLTYPE *yyloc, LFortran::Parser &p, const std::string &msg)
{
p.handle_yyerror(*yyloc, msg);
}
*/

#define YYLLOC_DEFAULT(Current, Rhs, N) \
do \
if (N) \
{ \
(Current).first = YYRHSLOC (Rhs, 1).first; \
(Current).last = YYRHSLOC (Rhs, N).last; \
} \
else \
{ \
(Current).first = (Current).last = \
YYRHSLOC (Rhs, 0).last; \
} \
while (0)

} // code


// -----------------------------------------------------------------------------
// List of tokens
// All tokens that we use (including "+" and such) are declared here first
// using the %token line. Each token will end up a member of the "enum
// yytokentype" in parser.tab.hh. Tokens can have a string equivalent (such as
// "+" for TK_PLUS) that is used later in the file to simplify reading it, but
// it is equivalent to TK_PLUS. Bison also allows so called "character token
// type" which are specified using single quotes (and that bypass the %token
// definitions), and those are not used here, and we test that the whole file
// does not contain any single quotes to ensure that.
//
// If this list is updated, update also token2text() in parser.cpp.

// Terminal tokens
%token END_OF_FILE 0
%token TK_NEWLINE
%token TK_INDENT
%token TK_DEDENT
%token <string> TK_NAME
%token <int_suffix> TK_INTEGER
%token <string> TK_REAL
%token <int_suffix> TK_IMAG_NUM
%token TK_PLUS "+"
%token TK_MINUS "-"
%token TK_STAR "*"
%token TK_SLASH "/"
%token TK_COLON ":"
%token TK_SEMICOLON ";"
%token TK_COMMA ","
%token TK_EQUAL "="
%token TK_LPAREN "("
%token TK_RPAREN ")"
%token TK_LBRACKET "["
%token TK_RBRACKET "]"
%token TK_LBRACE "{"
%token TK_RBRACE "}"
%token TK_PERCENT "%"
%token TK_VBAR "|"
%token TK_AMPERSAND "&"
%token TK_DOT "."
%token TK_TILDE "~"
%token TK_CARET "^"
%token TK_AT "@"
%token <string> TK_STRING
%token <string> TK_COMMENT
%token <string> TK_EOLCOMMENT
%token TK_POW "**"
%token TK_FLOOR_DIV "//"
%token TK_RIGHTSHIFT ">>"
%token TK_LEFTSHIFT "<<"
%token TK_PLUS_EQUAL "+="
%token TK_MIN_EQUAL "-="
%token TK_STAR_EQUAL "*="
%token TK_SLASH_EQUAL "/="
%token TK_PERCENT_EQUAL "%="
%token TK_AMPER_EQUAL "&="
%token TK_VBAR_EQUAL "|="
%token TK_CARET_EQUAL "^="
%token TK_ATEQUAL "@="
%token TK_RARROW "->"
%token TK_COLONEQUAL ":="
%token TK_ELLIPSIS "..."
%token TK_LEFTSHIFT_EQUAL "<<="
%token TK_RIGHTSHIFT_EQUAL ">>="
%token TK_POW_EQUAL "**="
%token TK_DOUBLESLASH_EQUAL "//="
%token TK_EQ "=="
%token TK_NE "!="
%token TK_LT "<"
%token TK_LE "<="
%token TK_GT ">"
%token TK_GE ">="
%token TK_NOT "not"
%token TK_AND "and"
%token TK_OR "or"
%token TK_TRUE "True"
%token TK_FALSE "False"


// Terminal tokens: Keywords
%token KW_AS
%token KW_ASSERT
%token KW_ASYNC
%token KW_AWAIT
%token KW_BREAK
%token KW_CLASS
%token KW_CONTINUE
%token KW_DEF
%token KW_DEL
%token KW_ELIF
%token KW_ELSE
%token KW_EXCEPT
%token KW_FINALLY
%token KW_FOR
%token KW_FROM
%token KW_GLOBAL
%token KW_IF
%token KW_IMPORT
%token KW_IN
%token KW_IS
%token KW_LAMBDA
%token KW_NONE
%token KW_NONLOCAL
%token KW_PASS
%token KW_RAISE
%token KW_RETURN
%token KW_TRY
%token KW_WHILE
%token KW_WITH
%token KW_YIELD

// Nonterminal tokens

%type <ast> expr

// Precedence

//%left "or"
//%left "and"
//%precedence "not"
//%left "==" "/=" "<" "<=" ">" ">="
%left "-" "+"
%left "*" "/"
//%precedence UMINUS
%right "**"

%start expr

%%

// The order of rules does not matter in Bison (unlike in ANTLR). The
// precedence is specified not by the order but by %left and %right directives
// as well as with %dprec.

// ----------------------------------------------------------------------------
// Top level rules to be used for parsing.

// Higher %dprec means higher precedence

expr
// ### primary
: TK_NAME { $$ = SYMBOL($1, @$); }
| TK_INTEGER { $$ = INTEGER($1, @$); }
| "(" expr ")" { $$ = $2; }


// ### level-2
| expr "+" expr { $$ = ADD($1, $3, @$); }
| expr "-" expr { $$ = SUB($1, $3, @$); }
| expr "*" expr { $$ = MUL($1, $3, @$); }
| expr "/" expr { $$ = DIV($1, $3, @$); }
| expr "**" expr { $$ = POW($1, $3, @$); }

;

0 comments on commit 7bb7ec1

Please sign in to comment.