Skip to content

Commit

Permalink
fix how to treat math sub/superscripts
Browse files Browse the repository at this point in the history
  • Loading branch information
gfngfn committed Jun 16, 2021
1 parent 2d9e7b7 commit 0404981
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 32 deletions.
4 changes: 3 additions & 1 deletion src/frontend/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ let opsymbol = ( '+' | '-' | '*' | '/' | '^' | '&' | '|' | '!' | ':' | '=' | '<'
let str = [^ ' ' '\t' '\n' '\r' '@' '`' '\\' '{' '}' '<' '>' '%' '|' '*' '$' '#' ';']
let mathsymboltop = ('+' | '-' | '*' | '/' | ':' | '=' | '<' | '>' | '~' | '.' | ',' | '`')
let mathsymbol = (mathsymboltop | '?')
let mathstr = [^ '+' '-' '*' '/' ':' '=' '<' '>' '~' '.' ',' '`' '?' ' ' '\t' '\n' '\r' '\\' '{' '}' '%' '|' '$' '#' ';' '\'' '^' '_' '!']
let mathascii = (small | capital | digit)
let mathstr = [^ '+' '-' '*' '/' ':' '=' '<' '>' '~' '.' ',' '`' '?' ' ' '\t' '\n' '\r' '\\' '{' '}' '%' '|' '$' '#' ';' '\'' '^' '_' '!' 'a'-'z' 'A'-'Z' '0'-'9']

rule progexpr stack = parse
| "%" {
Expand Down Expand Up @@ -555,6 +556,7 @@ and mathexpr stack = parse
| "_" { SUBSCRIPT(get_pos lexbuf) }
| "'"+ { let n = String.length (Lexing.lexeme lexbuf) in PRIMES(get_pos lexbuf, n) }
| (mathsymboltop (mathsymbol*)) { MATHCHARS(get_pos lexbuf, Lexing.lexeme lexbuf) }
| mathascii { MATHCHARS(get_pos lexbuf, Lexing.lexeme lexbuf) }
| mathstr+ { MATHCHARS(get_pos lexbuf, Lexing.lexeme lexbuf) }
| ("#" (identifier as varnm)) {
VARINMATH(get_pos lexbuf, [], varnm)
Expand Down
4 changes: 4 additions & 0 deletions src/frontend/logging.ml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ let warn_cmyk_image file_name =
print_endline (" Please convert the image to a jpeg image with YCbCr (RGB) color model.")


let warn_math_script_without_brace rng =
Format.printf " [Warning] at %a: math script without brace.\n" Range.pp rng


let warn_noninjective_cmap uchpre uch gidorg =
Format.printf " [Warning] Multiple Unicode code points (U+%04X and U+%04X) are mapped to the same GID %d.\n" (Uchar.to_int uchpre) (Uchar.to_int uch) gidorg

Expand Down
7 changes: 7 additions & 0 deletions src/frontend/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,13 @@ let error_log_environment suspended =
NormalLine("and thus it cannot be applied to arguments.");
]

| Typechecker.MultiCharacterMathScriptWithoutBrace(rng) ->
report_error Typechecker [
NormalLine("at " ^ (Range.to_string rng) ^ ":");
NormalLine("more than one character is used as a math sub/superscript without braces;");
NormalLine("use braces for making association explicit.");
]

| Typeenv.IllegalNumberOfTypeArguments(rng, tynm, lenexp, lenerr) ->
report_error Typechecker [
NormalLine("at " ^ (Range.to_string rng) ^ ":");
Expand Down
65 changes: 41 additions & 24 deletions src/frontend/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -356,9 +356,8 @@
insert_last (resitmzlst @ [hditmz]) (UTItem(uta, tlitmzlst)) i depth utast


let primes n =
let uchlst = List.init n (fun _ -> Uchar.of_int 0x2032) in
InternalText.to_utf8 (InternalText.of_uchar_list uchlst)
let primes (n : int) : Uchar.t list =
List.init n (fun _ -> Uchar.of_int 0x2032)
%}

%token <Range.t * Types.var_name> VAR
Expand Down Expand Up @@ -463,6 +462,8 @@
%type <Types.manual_type option * untyped_argument list> nonrecdecargpart
%type <Range.t * manual_type list * (module_name list * type_name)> txapp
%type <Range.t * module_name list * Types.type_name> txbot
%type <bool * Types.untyped_math> mathgroup
%type <Types.untyped_math> mathbot

%%

Expand Down Expand Up @@ -1076,34 +1077,44 @@ mathmain:
}
;
mathtop:
| utm1=mathbot; SUPERSCRIPT; utm2=mathgroup {
make_standard (Ranged utm1) (Ranged utm2) (UTMSuperScript(utm1, utm2))
| utm1=mathbot; SUPERSCRIPT; mg2=mathgroup {
let (has_brace2, utm2) = mg2 in
make_standard (Ranged utm1) (Ranged utm2) (UTMSuperScript(utm1, has_brace2, utm2))
}
| utm1=mathbot; prm=PRIMES {
let (rng, n) = prm in
let utm2 = (rng, UTMChar(primes n)) in
make_standard (Ranged utm1) (Tok rng) (UTMSuperScript(utm1, utm2))
let utm2 = (rng, UTMChars(primes n)) in
make_standard (Ranged utm1) (Tok rng) (UTMSuperScript(utm1, true, utm2))
}
| utm1=mathbot; SUBSCRIPT; utm2=mathgroup; SUPERSCRIPT; utm3=mathgroup {
let utm12 = make_standard (Ranged utm1) (Ranged utm2) (UTMSubScript(utm1, utm2)) in
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript(utm12, utm3))
| utm1=mathbot; SUBSCRIPT; mg2=mathgroup; SUPERSCRIPT; mg3=mathgroup {
let (has_brace2, utm2) = mg2 in
let (has_brace3, utm3) = mg3 in
let utm12 = make_standard (Ranged utm1) (Ranged utm2) (UTMSubScript(utm1, has_brace2, utm2)) in
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript(utm12, has_brace3, utm3))
}
| utm1=mathbot; prm=PRIMES; SUPERSCRIPT; utm3=mathgroup {
| utm1=mathbot; prm=PRIMES; SUPERSCRIPT; mg3=mathgroup {
let (rng, n) = prm in
let utm2 = (rng, UTMChar(primes n)) in
let utm12 = make_standard (Ranged utm1) (Tok rng) (UTMSubScript(utm1, utm2)) in
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript(utm12, utm3))
let utm2 = (rng, UTMChars(primes n)) in
let (has_brace3, utm3) = mg3 in
let utm23 = make_standard (Tok rng) (Ranged utm3) (UTMList([utm2; utm3])) in
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript(utm1, has_brace3, utm23))
}
| utm1=mathbot; SUPERSCRIPT; utm2=mathgroup; SUBSCRIPT; utm3=mathgroup {
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript((Range.dummy "mathtop", UTMSubScript(utm1, utm3)), utm2))
| utm1=mathbot; SUPERSCRIPT; mg2=mathgroup; SUBSCRIPT; mg3=mathgroup {
let (has_brace2, utm2) = mg2 in
let (has_brace3, utm3) = mg3 in
make_standard (Ranged utm1) (Ranged utm3)
(UTMSuperScript((Range.dummy "mathtop", UTMSubScript(utm1, has_brace3, utm3)), has_brace2, utm2))
}
| utm1=mathbot; prm=PRIMES; SUBSCRIPT; utm3=mathgroup {
| utm1=mathbot; prm=PRIMES; SUBSCRIPT; mg3=mathgroup {
let (rng, n) = prm in
let utm2 = (rng, UTMChar(primes n)) in
make_standard (Ranged utm1) (Ranged utm3) (UTMSuperScript((Range.dummy "mathtop", UTMSubScript(utm1, utm3)), utm2))
let (has_brace3, utm3) = mg3 in
let utm2 = (rng, UTMChars(primes n)) in
make_standard (Ranged utm1) (Ranged utm3)
(UTMSuperScript((Range.dummy "mathtop", UTMSubScript(utm1, has_brace3, utm3)), true, utm2))
}
| utm1=mathbot; SUBSCRIPT; utm2=mathgroup {
make_standard (Ranged utm1) (Ranged utm2) (UTMSubScript(utm1, utm2))
| utm1=mathbot; SUBSCRIPT; mg2=mathgroup {
let (has_brace2, utm2) = mg2 in
make_standard (Ranged utm1) (Ranged utm2) (UTMSubScript(utm1, has_brace2, utm2))
}
| utm=mathbot { utm }
;
Expand All @@ -1116,12 +1127,18 @@ mathsubopt:
;
*)
mathgroup:
| opn=BMATHGRP; utm=mathmain; cls=EMATHGRP { make_standard (Tok opn) (Tok cls) (extract_main utm) }
| utm=mathbot { utm }
| opn=BMATHGRP; utm=mathmain; cls=EMATHGRP {
(true, make_standard (Tok opn) (Tok cls) (extract_main utm))
}
| utm=mathbot {
(false, utm)
}
;
mathbot:
| tok=MATHCHARS {
let (rng, s) = tok in (rng, UTMChar(s))
let (rng, s) = tok in
let uchs = InternalText.to_uchar_list (InternalText.of_utf8 s) in
(rng, UTMChars(uchs))
}
| mcmd=mcmd; arglst=list(matharg) {
let (rngcmd, mdlnmlst, csnm) = mcmd in
Expand Down
24 changes: 20 additions & 4 deletions src/frontend/typechecker.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ exception UndefinedVertMacro of Range.t * ctrlseq_name
exception InvalidNumberOfMacroArguments of Range.t * Typeenv.t * macro_parameter_type list
exception LateMacroArgumentExpected of Range.t * Typeenv.t * mono_type
exception EarlyMacroArgumentExpected of Range.t * Typeenv.t * mono_type
exception MultiCharacterMathScriptWithoutBrace of Range.t

exception InternalInclusionError
exception InternalContradictionError of bool
Expand Down Expand Up @@ -1091,23 +1092,38 @@ and typecheck_command_arguments (ecmd : abstract_tree) (tycmd : mono_type) (rngc

and typecheck_math (pre : pre) tyenv ((rng, utmathmain) : untyped_math) : abstract_tree =
let iter = typecheck_math pre tyenv in
let check_brace (has_braceS : bool) (utmathS : untyped_math) : unit =
match (has_braceS, utmathS) with
| (true, _) ->
()

| (false, (rng, UTMChars(uchs))) ->
if List.length uchs >= 2 then
raise (MultiCharacterMathScriptWithoutBrace(rng))
else
Logging.warn_math_script_without_brace rng

| (false, (rng, _)) ->
Logging.warn_math_script_without_brace rng
in
let open HorzBox in
match utmathmain with
| UTMChar(s) ->
let uchs = InternalText.to_uchar_list (InternalText.of_utf8 s) in
| UTMChars(uchs) ->
let ms = uchs |> List.map (fun uch -> MathPure(MathVariantChar(uch))) in
ASTMath(ms)

| UTMList(utmathlst) ->
let astlst = utmathlst |> List.map iter in
BackendMathList(astlst)

| UTMSubScript(utmathB, utmathS) ->
| UTMSubScript(utmathB, has_braceS, utmathS) ->
check_brace has_braceS utmathS;
let astB = iter utmathB in
let astS = iter utmathS in
BackendMathSubscript(astB, astS)

| UTMSuperScript(utmathB, utmathS) ->
| UTMSuperScript(utmathB, has_braceS, utmathS) ->
check_brace has_braceS utmathS;
let astB = iter utmathB in
let astS = iter utmathS in
BackendMathSuperscript(astB, astS)
Expand Down
1 change: 1 addition & 0 deletions src/frontend/typechecker.mli
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ exception UndefinedVertMacro of Range.t * ctrlseq_name
exception InvalidNumberOfMacroArguments of Range.t * Typeenv.t * macro_parameter_type list
exception LateMacroArgumentExpected of Range.t * Typeenv.t * mono_type
exception EarlyMacroArgumentExpected of Range.t * Typeenv.t * mono_type
exception MultiCharacterMathScriptWithoutBrace of Range.t

val main : stage -> Typeenv.t -> untyped_abstract_tree -> (mono_type * Typeenv.t * abstract_tree)

Expand Down
7 changes: 4 additions & 3 deletions src/frontend/types.cppo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,10 @@ and untyped_type_argument = Range.t * var_name * manual_kind
and untyped_math = Range.t * untyped_math_main

and untyped_math_main =
| UTMChar of string
| UTMSuperScript of untyped_math * untyped_math
| UTMSubScript of untyped_math * untyped_math
| UTMChars of Uchar.t list
[@printer (fun ppf uchs -> Format.fprintf ppf "UTMChars(_)")]
| UTMSuperScript of untyped_math * bool * untyped_math
| UTMSubScript of untyped_math * bool * untyped_math
| UTMCommand of untyped_abstract_tree * untyped_command_argument list
| UTMList of untyped_math list
| UTMEmbed of untyped_abstract_tree
Expand Down

0 comments on commit 0404981

Please sign in to comment.