forked from lcompilers/lpython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ASR.asdl
294 lines (247 loc) · 13 KB
/
ASR.asdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
-- Abstract Semantic Representation (ASR) definition
-- The aim of ASR is to represent all semantics in a non-redundant way, and that
-- has all the semantic information available locally, so that the backend can
-- do a single pass over ASR and have all the information at hand to generate
-- code.
--
-- ASR is always semantically valid Fortran code. It is as far from the original
-- Fortran language code as possible (i.e. everything is explicitly figured out,
-- all semantic information gathered and readily available locally from each ASR
-- node), while ensuring no semantic information was lost (no lowering was
-- done), so one can still generate Fortran code from ASR that will be logically
-- equivalent to the original code.
--
-- ASR can be used to do Fortran level transformations (such as optimizations).
-- ASDL's builtin types are:
-- * identifier
-- * int (signed integers of infinite precision)
-- * string
-- We extend these by:
-- * bool (.true. / .false.)
-- * float (floating point number of infinite precision)
-- * symbol_table (scoped Symbol Table implementation)
-- * node (any ASR node)
--
-- Note: `symbol_table` contains `identifier` -> `symbol` mappings.
module ASR {
unit
= TranslationUnit(symbol_table global_scope, node* items)
-- # Documentation for the symbol type
-- Each symbol has either `symtab` (local symbol table) or `parent_symtab`
-- (where this symbol is stored). One can get to parent_symtab via symtab, so
-- only one is present.
-- Each symbol has a `name` for easy lookup of the name of the symbol when only
-- having a pointer to it.
-- abi=Source means the symbol's implementation is included (full ASR),
-- otherwise it is external (interface ASR, such as procedure interface).
-- SubroutineCall/FunctionCall store the actual final resolved subroutine or
-- function (`name` member). They also store the original symbol
-- (`original_name`), which can be one of: null, GenericProcedure or
-- ExternalSymbol.
-- When a module is compiled, it is parsed into full ASR, an object file is
-- produced, the full ASR (abi=Source, "body" is non-empty) is transformed into
-- interface ASR (abi=LFortran, "body" is empty). Both interface and full ASR
-- is saved into the mod file.
-- When a module is used, it is first looked up in the symbol table (as either
-- full or interface ASR) and used if it is present. Otherwise a mod file is
-- found on the disk, loaded (as either full or interface ASR for LFortran's
-- mod file, depending on LFortran's compiler options; or for GFortran's mod
-- file the corresponding interface ASR is constructed with abi=GFortran) and
-- used. After the ASR is loaded, the symbols that are used are represented as
-- ExternalSymbols in the current scope of the symbol table.
-- ExternalSymbol represents symbols that cannot be looked up in the current
-- scoped symbol table. As an example, if a variable is defined in a module,
-- but used in a nested subroutine, that is not an external symbol
-- because it can be resolved in the current symbol table (nested subroutine)
-- by following the parents. However if a symbol is used from a different
-- module, then it is an external symbol, because usual symbol resolution by
-- going to the parents will not find the definition. The `module_name` member
-- is the name of the module the symbol is in, the `scope_names` is a list of
-- names if the symbol is in a nested symbol table. For example if it is a
-- local variable in a function `f` that is nested in function `g`, then
-- `scope_names=[g, f]`.
-- REPL: each cell is parsed into full ASR, compiled + executed, the full ASR
-- is transformed into interface ASR (abi=LFortran) and kept in the symbol
-- table. A new cell starts with an empty symbol table, whose parent symbol
-- table is the previous cell. That allows function / declaration shadowing.
symbol
= Program(symbol_table symtab, identifier name, identifier* dependencies,
stmt* body)
| Module(symbol_table symtab, identifier name, identifier* dependencies,
bool loaded_from_mod, bool intrinsic)
| Subroutine(symbol_table symtab, identifier name, expr* args, stmt* body,
abi abi, access access, deftype deftype, string? bindc_name, bool pure,
bool module)
| Function(symbol_table symtab, identifier name, expr* args, stmt* body,
expr return_var, abi abi, access access, deftype deftype,
string? bindc_name)
| GenericProcedure(symbol_table parent_symtab, identifier name,
symbol* procs, access access)
| CustomOperator(symbol_table parent_symtab, identifier name,
symbol* procs, access access)
| ExternalSymbol(symbol_table parent_symtab, identifier name,
symbol external, identifier module_name, identifier* scope_names,
identifier original_name, access access)
| DerivedType(symbol_table symtab, identifier name, identifier* members,
abi abi, access access, symbol? parent)
| Variable(symbol_table parent_symtab, identifier name, intent intent,
expr? symbolic_value, expr? value, storage_type storage, ttype type,
abi abi, access access, presence presence, bool value_attr)
| ClassType(symbol_table symtab, identifier name, abi abi, access access)
| ClassProcedure(symbol_table parent_symtab, identifier name, identifier
proc_name, symbol proc, abi abi)
storage_type = Default | Save | Parameter | Allocatable
access = Public | Private
intent = Local | In | Out | InOut | ReturnVar | Unspecified | AssociateBlock
deftype = Implementation | Interface
presence = Required | Optional
-- # Documentation for the ABI type
-- External Yes: the symbol's implementation is not part of ASR, the
-- symbol is just an interface (e.g., subroutine/function interface, or variable
-- marked as external, not allocated by this ASR).
-- External No: the symbol's implementation is part of ASR (e.g.,
-- subroutine/function body is included, variables must be allocated).
-- abi=Source: The symbol's implementation is included in ASR, the backend is
-- free to use any ABI it wants (it might also decide to inline or eliminate
-- the code in optimizations).
-- abi=LFortranModule/GFortranModule/BindC: the symbol's implementation is
-- stored as machine code in some object file that needs to be linked in. It
-- uses the specified ABI (one of LFortran module, GFortran module or C ABI).
-- An interface that uses `iso_c_binding` and `bind(c)` is represented using
-- abi=BindC.
-- abi=Interactive: the symbol's implementation has been provided by the
-- previous REPL execution (e.g., if LLVM backend is used for the interactive
-- mode, the previous execution generated machine code for this symbol's
-- implementation that was loaded into memory). Note: this option might be
-- converted/eliminated to just use LFortran ABI in the future.
-- abi=Intrinsic: the symbol's implementation is implicitly provided by the
-- language itself as an intrinsic function. That means the backend is free to
-- implement it in any way it wants. The function does not have a body, it is
-- just an interface.
abi -- External ABI
= Source -- No Unspecified
| LFortranModule -- Yes LFortran
| GFortranModule -- Yes GFortran
| BindC -- Yes C
| Interactive -- Yes Unspecified
| Intrinsic -- Yes Unspecified
stmt
= Allocate(alloc_arg* args, expr? stat, expr? errmsg)
| Assign(int label, identifier variable)
| Assignment(expr target, expr value, stmt? overloaded)
| Associate(expr target, expr value)
| Cycle()
-- deallocates if allocated otherwise throws a runtime error
| ExplicitDeallocate(symbol* vars)
-- deallocates if allocated otherwise does nothing
| ImplicitDeallocate(symbol* vars)
| DoConcurrentLoop(do_loop_head head, stmt* body)
| DoLoop(do_loop_head head, stmt* body)
| ErrorStop(expr? code)
| Exit()
| ForAllSingle(do_loop_head head, stmt assign_stmt)
-- GoTo points to a GoToTarget with the corresponding target_id within
-- the same procedure. We currently use `int` IDs to link GoTo with
-- GoToTarget to avoid issues with serialization.
| GoTo(int target_id)
-- An empty statement, a target of zero or more GoTo statements
-- the `id` is only unique within a procedure
| GoToTarget(int id)
| If(expr test, stmt* body, stmt* orelse)
| IfArithmetic(expr test, int lt_label, int eq_label, int gt_label)
| Print(expr? fmt, expr* values)
| Open(int label, expr? newunit, expr? filename, expr? status)
| Close(int label, expr? unit, expr? iostat, expr? iomsg, expr? err, expr? status)
| Read(int label, expr? unit, expr? fmt, expr? iomsg, expr? iostat, expr? id, expr* values)
| Rewind(int label, expr? unit, expr? iostat, expr? err)
| Inquire(int label, expr? unit, expr? file, expr? iostat, expr? err,
expr? exist, expr? opened, expr? number, expr? named,
expr? name, expr? access, expr? sequential, expr? direct,
expr? form, expr? formatted, expr? unformatted, expr? recl,
expr? nextrec, expr? blank, expr? position, expr? action,
expr? read, expr? write, expr? readwrite, expr? delim,
expr? pad, expr? flen, expr? blocksize, expr? convert,
expr? carriagecontrol, expr? iolength)
| Write(int label, expr? unit, expr? fmt, expr? iomsg, expr? iostat, expr? id, expr* values)
| Return()
| Select(expr test, case_stmt* body, stmt* default)
| Stop(expr? code)
| Assert(expr test, expr? msg)
| SubroutineCall(symbol name, symbol? original_name, call_arg* args, expr? dt)
| Where(expr test, stmt* body, stmt* orelse)
| WhileLoop(expr test, stmt* body)
| Nullify(symbol* vars)
| Flush(int label, expr unit, expr? err, expr? iomsg, expr? iostat)
expr
= BoolOp(expr left, boolop op, expr right, ttype type, expr? value)
| BinOp(expr left, binop op, expr right, ttype type, expr? value, expr? overloaded)
| StrOp(expr left, strop op, expr right, ttype type, expr? value)
| UnaryOp(unaryop op, expr operand, ttype type, expr? value)
-- Such as: (x, y+z), (3.0, 2.0) generally not known at compile time
| ComplexConstructor(expr re, expr im, ttype type, expr? value)
| NamedExpr(expr target, expr value, ttype type)
| Compare(expr left, cmpop op, expr right, ttype type, expr? value, expr? overloaded)
| IfExp(expr test, expr body, expr orelse, ttype type)
| FunctionCall(symbol name, symbol? original_name,
call_arg* args, ttype type, expr? value, expr? dt)
| DerivedTypeConstructor(symbol dt_sym, expr* args, ttype type)
| ConstantArray(expr* args, ttype type)
| ImpliedDoLoop(expr* values, expr var, expr start, expr end,
expr? increment, ttype type, expr? value)
| ConstantInteger(int n, ttype type)
| ConstantReal(float r, ttype type)
| ConstantComplex(float re, float im, ttype type)
| ConstantLogical(bool value, ttype type)
| ConstantSet(expr* elements, ttype type)
| ConstantTuple(expr* elements, ttype type)
| ConstantString(string s, ttype type)
| ConstantDictionary(expr* keys, expr* values, ttype type)
| BOZ(int v, boz boz_type, ttype? type)
| Var(symbol v)
| ArrayRef(symbol v, array_index* args, ttype type, expr? value)
| DerivedRef(expr v, symbol m, ttype type, expr? value)
| ImplicitCast(expr arg, cast_kind kind, ttype type, expr? value)
| ExplicitCast(expr arg, cast_kind kind, ttype type, expr? value)
-- `len` in Character:
-- >=0 ... the length of the string, known at compile time
-- -1 ... character(*), i.e., inferred at runtime
-- -2 ... character(:), allocatable (possibly we might use -1 for that also)
-- -3 ... character(n+3), i.e., a runtime expression stored in `len_expr`
ttype
= Integer(int kind, dimension* dims)
| Real(int kind, dimension* dims)
| Complex(int kind, dimension* dims)
| Character(int kind, int len, expr? len_expr, dimension* dims)
| Logical(int kind, dimension* dims)
| Set(ttype type)
| Tuple(ttype* type)
| Derived(symbol derived_type, dimension* dims)
| Class(symbol class_type, dimension* dims)
| Dict(ttype key_type, ttype value_type)
| Pointer(ttype type)
boolop = And | Or | Xor | NEqv | Eqv
binop = Add | Sub | Mul | Div | Pow
unaryop = Invert | Not | UAdd | USub
strop = Concat | Repeat
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE
boz = Binary | Hex | Octal
cast_kind
= RealToInteger
| IntegerToReal
| RealToReal
| IntegerToInteger
| RealToComplex
| IntegerToComplex
| IntegerToLogical
| ComplexToComplex
| ComplexToReal
dimension = (expr? start, expr? end)
alloc_arg = (symbol a, dimension* dims)
attribute = Attribute(identifier name, attribute_arg *args)
attribute_arg = (identifier arg)
call_arg = (expr? value)
tbind = Bind(string lang, string name)
array_index = (expr? left, expr? right, expr? step)
do_loop_head = (expr? v, expr? start, expr? end, expr? increment)
case_stmt = CaseStmt(expr* test, stmt* body) | CaseStmt_Range(expr? start, expr? end, stmt* body)
}