diff options
Diffstat (limited to 'src/lexer.mll')
| -rw-r--r-- | src/lexer.mll | 272 |
1 files changed, 100 insertions, 172 deletions
diff --git a/src/lexer.mll b/src/lexer.mll index 35ab6627..fb33552b 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -50,7 +50,8 @@ { open Parser -open Big_int +module Big_int = Nat_big_num +open Parse_ast module M = Map.Make(String) exception LexError of string * Lexing.position @@ -58,81 +59,102 @@ let r = fun s -> s (* Ulib.Text.of_latin1 *) (* Available as Scanf.unescaped since OCaml 4.0 but 3.12 is still common *) let unescaped s = Scanf.sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x) +let mk_operator prec n op = + match prec, n with + | Infix, 0 -> Op0 op + | Infix, 1 -> Op1 op + | Infix, 2 -> Op2 op + | Infix, 3 -> Op3 op + | Infix, 4 -> Op4 op + | Infix, 5 -> Op5 op + | Infix, 6 -> Op6 op + | Infix, 7 -> Op7 op + | Infix, 8 -> Op8 op + | Infix, 9 -> Op9 op + | InfixL, 0 -> Op0l op + | InfixL, 1 -> Op1l op + | InfixL, 2 -> Op2l op + | InfixL, 3 -> Op3l op + | InfixL, 4 -> Op4l op + | InfixL, 5 -> Op5l op + | InfixL, 6 -> Op6l op + | InfixL, 7 -> Op7l op + | InfixL, 8 -> Op8l op + | InfixL, 9 -> Op9l op + | InfixR, 0 -> Op0r op + | InfixR, 1 -> Op1r op + | InfixR, 2 -> Op2r op + | InfixR, 3 -> Op3r op + | InfixR, 4 -> Op4r op + | InfixR, 5 -> Op5r op + | InfixR, 6 -> Op6r op + | InfixR, 7 -> Op7r op + | InfixR, 8 -> Op8r op + | InfixR, 9 -> Op9r op + | _, _ -> assert false + +let operators = ref M.empty + let kw_table = List.fold_left (fun r (x,y) -> M.add x y r) M.empty [ ("and", (fun _ -> And)); - ("alias", (fun _ -> Alias)); ("as", (fun _ -> As)); ("assert", (fun _ -> Assert)); ("bitzero", (fun _ -> Bitzero)); ("bitone", (fun _ -> Bitone)); - ("bits", (fun _ -> Bits)); ("by", (fun _ -> By)); - ("case", (fun _ -> Case)); + ("match", (fun _ -> Match)); ("clause", (fun _ -> Clause)); - ("const", (fun _ -> Const)); ("dec", (fun _ -> Dec)); - ("def", (fun _ -> Def)); + ("operator", (fun _ -> Op)); ("default", (fun _ -> Default)); - ("deinfix", (fun _ -> Deinfix)); ("effect", (fun _ -> Effect)); - ("Effect", (fun _ -> EFFECT)); ("end", (fun _ -> End)); - ("enumerate", (fun _ -> Enumerate)); + ("enum", (fun _ -> Enum)); ("else", (fun _ -> Else)); ("exit", (fun _ -> Exit)); - ("extern", (fun _ -> Extern)); ("cast", (fun _ -> Cast)); ("false", (fun _ -> False)); - ("try", (fun _ -> Try)); - ("catch", (fun _ -> Catch)); - ("throw", (fun _ -> Throw)); ("forall", (fun _ -> Forall)); - ("exist", (fun _ -> Exist)); ("foreach", (fun _ -> Foreach)); ("function", (fun x -> Function_)); ("overload", (fun _ -> Overload)); + ("throw", (fun _ -> Throw)); + ("try", (fun _ -> Try)); + ("catch", (fun _ -> Catch)); ("if", (fun x -> If_)); ("in", (fun x -> In)); ("inc", (fun _ -> Inc)); - ("IN", (fun x -> IN)); ("let", (fun x -> Let_)); - ("member", (fun x -> Member)); - ("Nat", (fun x -> Nat)); - ("Num", (fun x -> NatNum)); + ("var", (fun _ -> Var)); + ("ref", (fun _ -> Ref)); + ("Int", (fun x -> Int)); ("Order", (fun x -> Order)); ("pure", (fun x -> Pure)); - ("rec", (fun x -> Rec)); ("register", (fun x -> Register)); ("return", (fun x -> Return)); ("scattered", (fun x -> Scattered)); ("sizeof", (fun x -> Sizeof)); ("constraint", (fun x -> Constraint)); ("struct", (fun x -> Struct)); - ("switch", (fun x -> Switch)); ("then", (fun x -> Then)); ("true", (fun x -> True)); ("Type", (fun x -> TYPE)); - ("typedef", (fun x -> Typedef)); + ("type", (fun x -> Typedef)); ("undefined", (fun x -> Undefined)); ("union", (fun x -> Union)); + ("newtype", (fun x -> Newtype)); ("with", (fun x -> With)); - ("when", (fun x -> When)); - ("repeat", (fun x -> Repeat)); - ("until", (fun x -> Until)); - ("while", (fun x -> While)); - ("do", (fun x -> Do)); ("val", (fun x -> Val)); - - ("div", (fun x -> Div_)); - ("mod", (fun x -> Mod)); - ("mod_s", (fun x -> ModUnderS)); - ("quot", (fun x -> Quot)); - ("quot_s", (fun x -> QuotUnderS)); - ("rem", (fun x -> Rem)); + ("repeat", (fun _ -> Repeat)); + ("until", (fun _ -> Until)); + ("while", (fun _ -> While)); + ("do", (fun _ -> Do)); + ("mutual", (fun _ -> Mutual)); + ("bitfield", (fun _ -> Bitfield)); ("barr", (fun x -> Barr)); ("depend", (fun x -> Depend)); @@ -149,25 +171,23 @@ let kw_table = ("unspec", (fun x -> Unspec)); ("nondet", (fun x -> Nondet)); ("escape", (fun x -> Escape)); + ] -] -let default_type_names = ["bool";"unit";"vector";"range";"list";"bit";"nat"; "int"; "real"; - "uint8";"uint16";"uint32";"uint64";"atom";"implicit";"string";"option"] -let custom_type_names : string list ref = ref [] } let ws = [' ''\t']+ -let letter = ['a'-'z''A'-'Z'] +let letter = ['a'-'z''A'-'Z''?'] let digit = ['0'-'9'] -let binarydigit = ['0'-'1'] -let hexdigit = ['0'-'9''A'-'F''a'-'f'] +let binarydigit = ['0'-'1''_'] +let hexdigit = ['0'-'9''A'-'F''a'-'f''_'] let alphanum = letter|digit let startident = letter|'_' let ident = alphanum|['_''\'''#'] let tyvar_start = '\'' -let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''?''@''^''|''~'] +let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''@''^''|'] +let operator = (oper_char+ ('_' ident)?) let escape_sequence = ('\\' ['\\''\"''\'''n''t''b''r']) | ('\\' digit digit digit) | ('\\' 'x' hexdigit hexdigit) rule token = parse @@ -176,164 +196,72 @@ rule token = parse | "\n" { Lexing.new_line lexbuf; token lexbuf } - | "&" { (Amp(r"&")) } - | "@" { (At(r"@")) } - | "|" { Bar } - | "^" { (Carrot(r"^")) } + | "@" { (At "@") } + | "2" ws "^" { TwoCaret } + | "^" { (Caret(r"^")) } + | "::" { ColonColon(r "::") } | ":" { Colon(r ":") } | "," { Comma } + | ".." { DotDot } | "." { Dot } + | "==" as op + { try M.find op !operators + with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) } | "=" { (Eq(r"=")) } - | "!" { (Excl(r"!")) } | ">" { (Gt(r">")) } | "-" { Minus } | "<" { (Lt(r"<")) } | "+" { (Plus(r"+")) } | ";" { Semi } | "*" { (Star(r"*")) } - | "~" { (Tilde(r"~")) } | "_" { Under } + | "[|" { LsquareBar } + | "|]" { RsquareBar } + | "{|" { LcurlyBar } + | "|}" { RcurlyBar } + | "|" { Bar } | "{" { Lcurly } | "}" { Rcurly } + | "()" { Unit(r"()") } | "(" { Lparen } | ")" { Rparen } | "[" { Lsquare } | "]" { Rsquare } - | "&&" as i { (AmpAmp(r i)) } - | "||" { BarBar } - | "||]" { BarBarSquare } - | "|]" { BarSquare } - | "^^" { (CarrotCarrot(r"^^")) } - | "::" as i { (ColonColon(r i)) } - | ":=" { ColonEq } - | ":>" { ColonGt } - | ":]" { ColonSquare } - | ".." { DotDot } - | "==" { (EqEq(r"==")) } | "!=" { (ExclEq(r"!=")) } - | "!!" { (ExclExcl(r"!!")) } | ">=" { (GtEq(r">=")) } - | ">=+" { (GtEqPlus(r">=+")) } - | ">>" { (GtGt(r">>")) } - | ">>>" { (GtGtGt(r">>>")) } - | ">+" { (GtPlus(r">+")) } - | "#>>" { (HashGtGt(r"#>>")) } - | "#<<" { (HashLtLt(r"#<<")) } | "->" { MinusGt } - | "<:" { LtColon } + | "=>" { EqGt(r "=>") } | "<=" { (LtEq(r"<=")) } - | "<=+" { (LtEqPlus(r"<=+")) } - | "<>" { (LtGt(r"<>")) } - | "<<" { (LtLt(r"<<")) } - | "<<<" { (LtLtLt(r"<<<")) } - | "<+" { (LtPlus(r"<+")) } - | "**" { (StarStar(r"**")) } - | "[|" { SquareBar } - | "[||" { SquareBarBar } - | "[:" { SquareColon } - | "~^" { (TildeCarrot(r"~^")) } - - | "+_s" { (PlusUnderS(r"+_s")) } - | "-_s" { (MinusUnderS(r"-_s")) } - | ">=_s" { (GtEqUnderS(r">=_s")) } - | ">=_si" { (GtEqUnderSi(r">=_si")) } - | ">=_u" { (GtEqUnderU(r">=_u")) } - | ">=_ui" { (GtEqUnderUi(r">=_ui")) } - | ">>_u" { (GtGtUnderU(r">>_u")) } - | ">_s" { (GtUnderS(r">_s")) } - | ">_si" { (GtUnderSi(r">_si")) } - | ">_u" { (GtUnderU(r">_u")) } - | ">_ui" { (GtUnderUi(r">_ui")) } - | "<=_s" { (LtEqUnderS(r"<=_s")) } - | "<=_si" { (LtEqUnderSi(r"<=_si")) } - | "<=_u" { (LtEqUnderU(r"<=_u")) } - | "<=_ui" { (LtEqUnderUi(r"<=_ui")) } - | "<_s" { (LtUnderS(r"<_s")) } - | "<_si" { (LtUnderSi(r"<_si")) } - | "<_u" { (LtUnderU(r"<_u")) } - | "<_ui" { (LtUnderUi(r"<_ui")) } - | "*_s" { (StarUnderS(r"*_s")) } - | "**_s" { (StarStarUnderS(r"**_s")) } - | "**_si" { (StarStarUnderSi(r"**_si")) } - | "*_u" { (StarUnderU(r"*_u")) } - | "*_ui" { (StarUnderUi(r"*_ui")) } - | "2^" { (TwoCarrot(r"2^")) } - | "2**" { TwoStarStar } - - - | "(*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf } - | "*)" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) } - + | "/*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf } + | "*/" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) } + | "infix" ws (digit as p) ws (operator as op) + { operators := M.add op (mk_operator Infix (int_of_string (Char.escaped p)) op) !operators; + Fixity (Infix, Big_int.of_string (Char.escaped p), op) } + | "infixl" ws (digit as p) ws (operator as op) + { operators := M.add op (mk_operator InfixL (int_of_string (Char.escaped p)) op) !operators; + Fixity (InfixL, Big_int.of_string (Char.escaped p), op) } + | "infixr" ws (digit as p) ws (operator as op) + { operators := M.add op (mk_operator InfixR (int_of_string (Char.escaped p)) op) !operators; + Fixity (InfixR, Big_int.of_string (Char.escaped p), op) } + | operator as op + { try M.find op !operators + with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) } | tyvar_start startident ident* as i { TyVar(r i) } + | "~" { Id(r"~") } | startident ident* as i { if M.mem i kw_table then (M.find i kw_table) () - else if + (* else if List.mem i default_type_names || List.mem i !custom_type_names then - TyId(r i) - else Id(r i) } - | "&" oper_char+ as i { (AmpI(r i)) } - | "@" oper_char+ as i { (AtI(r i)) } - | "^" oper_char+ as i { (CarrotI(r i)) } - | "/" oper_char+ as i { (DivI(r i)) } - | "=" oper_char+ as i { (EqI(r i)) } - | "!" oper_char+ as i { (ExclI(r i)) } - | ">" oper_char+ as i { (GtI(r i)) } - | "<" oper_char+ as i { (LtI(r i)) } - | "+" oper_char+ as i { (PlusI(r i)) } - | "*" oper_char+ as i { (StarI(r i)) } - | "~" oper_char+ as i { (TildeI(r i)) } - | "&&" oper_char+ as i { (AmpAmpI(r i)) } - | "^^" oper_char+ as i { (CarrotCarrotI(r i)) } - | "::" oper_char+ as i { (ColonColonI(r i)) } - | "==" oper_char+ as i { (EqEqI(r i)) } - | "!=" oper_char+ as i { (ExclEqI(r i)) } - | "!!" oper_char+ as i { (ExclExclI(r i)) } - | ">=" oper_char+ as i { (GtEqI(r i)) } - | ">=+" oper_char+ as i { (GtEqPlusI(r i)) } - | ">>" oper_char+ as i { (GtGtI(r i)) } - | ">>>" oper_char+ as i { (GtGtGtI(r i)) } - | ">+" oper_char+ as i { (GtPlusI(r i)) } - | "#>>" oper_char+ as i { (HashGtGt(r i)) } - | "#<<" oper_char+ as i { (HashLtLt(r i)) } - | "<=" oper_char+ as i { (LtEqI(r i)) } - | "<=+" oper_char+ as i { (LtEqPlusI(r i)) } - | "<<" oper_char+ as i { (LtLtI(r i)) } - | "<<<" oper_char+ as i { (LtLtLtI(r i)) } - | "<+" oper_char+ as i { (LtPlusI(r i)) } - | "**" oper_char+ as i { (StarStarI(r i)) } - | "~^" oper_char+ as i { (TildeCarrot(r i)) } - - | ">=_s" oper_char+ as i { (GtEqUnderSI(r i)) } - | ">=_si" oper_char+ as i { (GtEqUnderSiI(r i)) } - | ">=_u" oper_char+ as i { (GtEqUnderUI(r i)) } - | ">=_ui" oper_char+ as i { (GtEqUnderUiI(r i)) } - | ">>_u" oper_char+ as i { (GtGtUnderUI(r i)) } - | ">_s" oper_char+ as i { (GtUnderSI(r i)) } - | ">_si" oper_char+ as i { (GtUnderSiI(r i)) } - | ">_u" oper_char+ as i { (GtUnderUI(r i)) } - | ">_ui" oper_char+ as i { (GtUnderUiI(r i)) } - | "<=_s" oper_char+ as i { (LtEqUnderSI(r i)) } - | "<=_si" oper_char+ as i { (LtEqUnderSiI(r i)) } - | "<=_u" oper_char+ as i { (LtEqUnderUI(r i)) } - | "<=_ui" oper_char+ as i { (LtEqUnderUiI(r i)) } - | "<_s" oper_char+ as i { (LtUnderSI(r i)) } - | "<_si" oper_char+ as i { (LtUnderSiI(r i)) } - | "<_u" oper_char+ as i { (LtUnderUI(r i)) } - | "<_ui" oper_char+ as i { (LtUnderUiI(r i)) } - | "**_s" oper_char+ as i { (StarStarUnderSI(r i)) } - | "**_si" oper_char+ as i { (StarStarUnderSiI(r i)) } - | "*_u" oper_char+ as i { (StarUnderUI(r i)) } - | "*_ui" oper_char+ as i { (StarUnderUiI(r i)) } - | "2^" oper_char+ as i { (TwoCarrotI(r i)) } - + TyId(r i) *) + else Id(r i) } | (digit+ as i1) "." (digit+ as i2) { (Real (i1 ^ "." ^ i2)) } | "-" (digit* as i1) "." (digit+ as i2) { (Real ("-" ^ i1 ^ "." ^ i2)) } - | digit+ as i { (Num(big_int_of_string i)) } - | "-" digit+ as i { (Num(big_int_of_string i)) } - | "0b" (binarydigit+ as i) { (Bin(i)) } - | "0x" (hexdigit+ as i) { (Hex(i)) } + | digit+ as i { (Num(Big_int.of_string i)) } + | "-" digit+ as i { (Num(Big_int.of_string i)) } + | "0b" (binarydigit+ as i) { (Bin(Util.string_of_list "" (fun s -> s) (Util.split_on_char '_' i))) } + | "0x" (hexdigit+ as i) { (Hex(Util.string_of_list "" (fun s -> s) (Util.split_on_char '_' i))) } | '"' { (String( string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf)) } | eof { Eof } @@ -343,8 +271,8 @@ rule token = parse and comment pos depth = parse - | "(*" { comment pos (depth+1) lexbuf } - | "*)" { if depth = 0 then () + | "/*" { comment pos (depth+1) lexbuf } + | "*/" { if depth = 0 then () else if depth > 0 then comment pos (depth-1) lexbuf else assert false } | "\n" { Lexing.new_line lexbuf; |
