summaryrefslogtreecommitdiff
path: root/src/lexer.mll
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.mll')
-rw-r--r--src/lexer.mll272
1 files changed, 100 insertions, 172 deletions
diff --git a/src/lexer.mll b/src/lexer.mll
index 35ab6627..fb33552b 100644
--- a/src/lexer.mll
+++ b/src/lexer.mll
@@ -50,7 +50,8 @@
{
open Parser
-open Big_int
+module Big_int = Nat_big_num
+open Parse_ast
module M = Map.Make(String)
exception LexError of string * Lexing.position
@@ -58,81 +59,102 @@ let r = fun s -> s (* Ulib.Text.of_latin1 *)
(* Available as Scanf.unescaped since OCaml 4.0 but 3.12 is still common *)
let unescaped s = Scanf.sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
+let mk_operator prec n op =
+ match prec, n with
+ | Infix, 0 -> Op0 op
+ | Infix, 1 -> Op1 op
+ | Infix, 2 -> Op2 op
+ | Infix, 3 -> Op3 op
+ | Infix, 4 -> Op4 op
+ | Infix, 5 -> Op5 op
+ | Infix, 6 -> Op6 op
+ | Infix, 7 -> Op7 op
+ | Infix, 8 -> Op8 op
+ | Infix, 9 -> Op9 op
+ | InfixL, 0 -> Op0l op
+ | InfixL, 1 -> Op1l op
+ | InfixL, 2 -> Op2l op
+ | InfixL, 3 -> Op3l op
+ | InfixL, 4 -> Op4l op
+ | InfixL, 5 -> Op5l op
+ | InfixL, 6 -> Op6l op
+ | InfixL, 7 -> Op7l op
+ | InfixL, 8 -> Op8l op
+ | InfixL, 9 -> Op9l op
+ | InfixR, 0 -> Op0r op
+ | InfixR, 1 -> Op1r op
+ | InfixR, 2 -> Op2r op
+ | InfixR, 3 -> Op3r op
+ | InfixR, 4 -> Op4r op
+ | InfixR, 5 -> Op5r op
+ | InfixR, 6 -> Op6r op
+ | InfixR, 7 -> Op7r op
+ | InfixR, 8 -> Op8r op
+ | InfixR, 9 -> Op9r op
+ | _, _ -> assert false
+
+let operators = ref M.empty
+
let kw_table =
List.fold_left
(fun r (x,y) -> M.add x y r)
M.empty
[
("and", (fun _ -> And));
- ("alias", (fun _ -> Alias));
("as", (fun _ -> As));
("assert", (fun _ -> Assert));
("bitzero", (fun _ -> Bitzero));
("bitone", (fun _ -> Bitone));
- ("bits", (fun _ -> Bits));
("by", (fun _ -> By));
- ("case", (fun _ -> Case));
+ ("match", (fun _ -> Match));
("clause", (fun _ -> Clause));
- ("const", (fun _ -> Const));
("dec", (fun _ -> Dec));
- ("def", (fun _ -> Def));
+ ("operator", (fun _ -> Op));
("default", (fun _ -> Default));
- ("deinfix", (fun _ -> Deinfix));
("effect", (fun _ -> Effect));
- ("Effect", (fun _ -> EFFECT));
("end", (fun _ -> End));
- ("enumerate", (fun _ -> Enumerate));
+ ("enum", (fun _ -> Enum));
("else", (fun _ -> Else));
("exit", (fun _ -> Exit));
- ("extern", (fun _ -> Extern));
("cast", (fun _ -> Cast));
("false", (fun _ -> False));
- ("try", (fun _ -> Try));
- ("catch", (fun _ -> Catch));
- ("throw", (fun _ -> Throw));
("forall", (fun _ -> Forall));
- ("exist", (fun _ -> Exist));
("foreach", (fun _ -> Foreach));
("function", (fun x -> Function_));
("overload", (fun _ -> Overload));
+ ("throw", (fun _ -> Throw));
+ ("try", (fun _ -> Try));
+ ("catch", (fun _ -> Catch));
("if", (fun x -> If_));
("in", (fun x -> In));
("inc", (fun _ -> Inc));
- ("IN", (fun x -> IN));
("let", (fun x -> Let_));
- ("member", (fun x -> Member));
- ("Nat", (fun x -> Nat));
- ("Num", (fun x -> NatNum));
+ ("var", (fun _ -> Var));
+ ("ref", (fun _ -> Ref));
+ ("Int", (fun x -> Int));
("Order", (fun x -> Order));
("pure", (fun x -> Pure));
- ("rec", (fun x -> Rec));
("register", (fun x -> Register));
("return", (fun x -> Return));
("scattered", (fun x -> Scattered));
("sizeof", (fun x -> Sizeof));
("constraint", (fun x -> Constraint));
("struct", (fun x -> Struct));
- ("switch", (fun x -> Switch));
("then", (fun x -> Then));
("true", (fun x -> True));
("Type", (fun x -> TYPE));
- ("typedef", (fun x -> Typedef));
+ ("type", (fun x -> Typedef));
("undefined", (fun x -> Undefined));
("union", (fun x -> Union));
+ ("newtype", (fun x -> Newtype));
("with", (fun x -> With));
- ("when", (fun x -> When));
- ("repeat", (fun x -> Repeat));
- ("until", (fun x -> Until));
- ("while", (fun x -> While));
- ("do", (fun x -> Do));
("val", (fun x -> Val));
-
- ("div", (fun x -> Div_));
- ("mod", (fun x -> Mod));
- ("mod_s", (fun x -> ModUnderS));
- ("quot", (fun x -> Quot));
- ("quot_s", (fun x -> QuotUnderS));
- ("rem", (fun x -> Rem));
+ ("repeat", (fun _ -> Repeat));
+ ("until", (fun _ -> Until));
+ ("while", (fun _ -> While));
+ ("do", (fun _ -> Do));
+ ("mutual", (fun _ -> Mutual));
+ ("bitfield", (fun _ -> Bitfield));
("barr", (fun x -> Barr));
("depend", (fun x -> Depend));
@@ -149,25 +171,23 @@ let kw_table =
("unspec", (fun x -> Unspec));
("nondet", (fun x -> Nondet));
("escape", (fun x -> Escape));
+ ]
-]
-let default_type_names = ["bool";"unit";"vector";"range";"list";"bit";"nat"; "int"; "real";
- "uint8";"uint16";"uint32";"uint64";"atom";"implicit";"string";"option"]
-let custom_type_names : string list ref = ref []
}
let ws = [' ''\t']+
-let letter = ['a'-'z''A'-'Z']
+let letter = ['a'-'z''A'-'Z''?']
let digit = ['0'-'9']
-let binarydigit = ['0'-'1']
-let hexdigit = ['0'-'9''A'-'F''a'-'f']
+let binarydigit = ['0'-'1''_']
+let hexdigit = ['0'-'9''A'-'F''a'-'f''_']
let alphanum = letter|digit
let startident = letter|'_'
let ident = alphanum|['_''\'''#']
let tyvar_start = '\''
-let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''?''@''^''|''~']
+let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''@''^''|']
+let operator = (oper_char+ ('_' ident)?)
let escape_sequence = ('\\' ['\\''\"''\'''n''t''b''r']) | ('\\' digit digit digit) | ('\\' 'x' hexdigit hexdigit)
rule token = parse
@@ -176,164 +196,72 @@ rule token = parse
| "\n"
{ Lexing.new_line lexbuf;
token lexbuf }
-
| "&" { (Amp(r"&")) }
- | "@" { (At(r"@")) }
- | "|" { Bar }
- | "^" { (Carrot(r"^")) }
+ | "@" { (At "@") }
+ | "2" ws "^" { TwoCaret }
+ | "^" { (Caret(r"^")) }
+ | "::" { ColonColon(r "::") }
| ":" { Colon(r ":") }
| "," { Comma }
+ | ".." { DotDot }
| "." { Dot }
+ | "==" as op
+ { try M.find op !operators
+ with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) }
| "=" { (Eq(r"=")) }
- | "!" { (Excl(r"!")) }
| ">" { (Gt(r">")) }
| "-" { Minus }
| "<" { (Lt(r"<")) }
| "+" { (Plus(r"+")) }
| ";" { Semi }
| "*" { (Star(r"*")) }
- | "~" { (Tilde(r"~")) }
| "_" { Under }
+ | "[|" { LsquareBar }
+ | "|]" { RsquareBar }
+ | "{|" { LcurlyBar }
+ | "|}" { RcurlyBar }
+ | "|" { Bar }
| "{" { Lcurly }
| "}" { Rcurly }
+ | "()" { Unit(r"()") }
| "(" { Lparen }
| ")" { Rparen }
| "[" { Lsquare }
| "]" { Rsquare }
- | "&&" as i { (AmpAmp(r i)) }
- | "||" { BarBar }
- | "||]" { BarBarSquare }
- | "|]" { BarSquare }
- | "^^" { (CarrotCarrot(r"^^")) }
- | "::" as i { (ColonColon(r i)) }
- | ":=" { ColonEq }
- | ":>" { ColonGt }
- | ":]" { ColonSquare }
- | ".." { DotDot }
- | "==" { (EqEq(r"==")) }
| "!=" { (ExclEq(r"!=")) }
- | "!!" { (ExclExcl(r"!!")) }
| ">=" { (GtEq(r">=")) }
- | ">=+" { (GtEqPlus(r">=+")) }
- | ">>" { (GtGt(r">>")) }
- | ">>>" { (GtGtGt(r">>>")) }
- | ">+" { (GtPlus(r">+")) }
- | "#>>" { (HashGtGt(r"#>>")) }
- | "#<<" { (HashLtLt(r"#<<")) }
| "->" { MinusGt }
- | "<:" { LtColon }
+ | "=>" { EqGt(r "=>") }
| "<=" { (LtEq(r"<=")) }
- | "<=+" { (LtEqPlus(r"<=+")) }
- | "<>" { (LtGt(r"<>")) }
- | "<<" { (LtLt(r"<<")) }
- | "<<<" { (LtLtLt(r"<<<")) }
- | "<+" { (LtPlus(r"<+")) }
- | "**" { (StarStar(r"**")) }
- | "[|" { SquareBar }
- | "[||" { SquareBarBar }
- | "[:" { SquareColon }
- | "~^" { (TildeCarrot(r"~^")) }
-
- | "+_s" { (PlusUnderS(r"+_s")) }
- | "-_s" { (MinusUnderS(r"-_s")) }
- | ">=_s" { (GtEqUnderS(r">=_s")) }
- | ">=_si" { (GtEqUnderSi(r">=_si")) }
- | ">=_u" { (GtEqUnderU(r">=_u")) }
- | ">=_ui" { (GtEqUnderUi(r">=_ui")) }
- | ">>_u" { (GtGtUnderU(r">>_u")) }
- | ">_s" { (GtUnderS(r">_s")) }
- | ">_si" { (GtUnderSi(r">_si")) }
- | ">_u" { (GtUnderU(r">_u")) }
- | ">_ui" { (GtUnderUi(r">_ui")) }
- | "<=_s" { (LtEqUnderS(r"<=_s")) }
- | "<=_si" { (LtEqUnderSi(r"<=_si")) }
- | "<=_u" { (LtEqUnderU(r"<=_u")) }
- | "<=_ui" { (LtEqUnderUi(r"<=_ui")) }
- | "<_s" { (LtUnderS(r"<_s")) }
- | "<_si" { (LtUnderSi(r"<_si")) }
- | "<_u" { (LtUnderU(r"<_u")) }
- | "<_ui" { (LtUnderUi(r"<_ui")) }
- | "*_s" { (StarUnderS(r"*_s")) }
- | "**_s" { (StarStarUnderS(r"**_s")) }
- | "**_si" { (StarStarUnderSi(r"**_si")) }
- | "*_u" { (StarUnderU(r"*_u")) }
- | "*_ui" { (StarUnderUi(r"*_ui")) }
- | "2^" { (TwoCarrot(r"2^")) }
- | "2**" { TwoStarStar }
-
-
- | "(*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf }
- | "*)" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) }
-
+ | "/*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf }
+ | "*/" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) }
+ | "infix" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator Infix (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (Infix, Big_int.of_string (Char.escaped p), op) }
+ | "infixl" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator InfixL (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (InfixL, Big_int.of_string (Char.escaped p), op) }
+ | "infixr" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator InfixR (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (InfixR, Big_int.of_string (Char.escaped p), op) }
+ | operator as op
+ { try M.find op !operators
+ with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) }
| tyvar_start startident ident* as i { TyVar(r i) }
+ | "~" { Id(r"~") }
| startident ident* as i { if M.mem i kw_table then
(M.find i kw_table) ()
- else if
+ (* else if
List.mem i default_type_names ||
List.mem i !custom_type_names then
- TyId(r i)
- else Id(r i) }
- | "&" oper_char+ as i { (AmpI(r i)) }
- | "@" oper_char+ as i { (AtI(r i)) }
- | "^" oper_char+ as i { (CarrotI(r i)) }
- | "/" oper_char+ as i { (DivI(r i)) }
- | "=" oper_char+ as i { (EqI(r i)) }
- | "!" oper_char+ as i { (ExclI(r i)) }
- | ">" oper_char+ as i { (GtI(r i)) }
- | "<" oper_char+ as i { (LtI(r i)) }
- | "+" oper_char+ as i { (PlusI(r i)) }
- | "*" oper_char+ as i { (StarI(r i)) }
- | "~" oper_char+ as i { (TildeI(r i)) }
- | "&&" oper_char+ as i { (AmpAmpI(r i)) }
- | "^^" oper_char+ as i { (CarrotCarrotI(r i)) }
- | "::" oper_char+ as i { (ColonColonI(r i)) }
- | "==" oper_char+ as i { (EqEqI(r i)) }
- | "!=" oper_char+ as i { (ExclEqI(r i)) }
- | "!!" oper_char+ as i { (ExclExclI(r i)) }
- | ">=" oper_char+ as i { (GtEqI(r i)) }
- | ">=+" oper_char+ as i { (GtEqPlusI(r i)) }
- | ">>" oper_char+ as i { (GtGtI(r i)) }
- | ">>>" oper_char+ as i { (GtGtGtI(r i)) }
- | ">+" oper_char+ as i { (GtPlusI(r i)) }
- | "#>>" oper_char+ as i { (HashGtGt(r i)) }
- | "#<<" oper_char+ as i { (HashLtLt(r i)) }
- | "<=" oper_char+ as i { (LtEqI(r i)) }
- | "<=+" oper_char+ as i { (LtEqPlusI(r i)) }
- | "<<" oper_char+ as i { (LtLtI(r i)) }
- | "<<<" oper_char+ as i { (LtLtLtI(r i)) }
- | "<+" oper_char+ as i { (LtPlusI(r i)) }
- | "**" oper_char+ as i { (StarStarI(r i)) }
- | "~^" oper_char+ as i { (TildeCarrot(r i)) }
-
- | ">=_s" oper_char+ as i { (GtEqUnderSI(r i)) }
- | ">=_si" oper_char+ as i { (GtEqUnderSiI(r i)) }
- | ">=_u" oper_char+ as i { (GtEqUnderUI(r i)) }
- | ">=_ui" oper_char+ as i { (GtEqUnderUiI(r i)) }
- | ">>_u" oper_char+ as i { (GtGtUnderUI(r i)) }
- | ">_s" oper_char+ as i { (GtUnderSI(r i)) }
- | ">_si" oper_char+ as i { (GtUnderSiI(r i)) }
- | ">_u" oper_char+ as i { (GtUnderUI(r i)) }
- | ">_ui" oper_char+ as i { (GtUnderUiI(r i)) }
- | "<=_s" oper_char+ as i { (LtEqUnderSI(r i)) }
- | "<=_si" oper_char+ as i { (LtEqUnderSiI(r i)) }
- | "<=_u" oper_char+ as i { (LtEqUnderUI(r i)) }
- | "<=_ui" oper_char+ as i { (LtEqUnderUiI(r i)) }
- | "<_s" oper_char+ as i { (LtUnderSI(r i)) }
- | "<_si" oper_char+ as i { (LtUnderSiI(r i)) }
- | "<_u" oper_char+ as i { (LtUnderUI(r i)) }
- | "<_ui" oper_char+ as i { (LtUnderUiI(r i)) }
- | "**_s" oper_char+ as i { (StarStarUnderSI(r i)) }
- | "**_si" oper_char+ as i { (StarStarUnderSiI(r i)) }
- | "*_u" oper_char+ as i { (StarUnderUI(r i)) }
- | "*_ui" oper_char+ as i { (StarUnderUiI(r i)) }
- | "2^" oper_char+ as i { (TwoCarrotI(r i)) }
-
+ TyId(r i) *)
+ else Id(r i) }
| (digit+ as i1) "." (digit+ as i2) { (Real (i1 ^ "." ^ i2)) }
| "-" (digit* as i1) "." (digit+ as i2) { (Real ("-" ^ i1 ^ "." ^ i2)) }
- | digit+ as i { (Num(big_int_of_string i)) }
- | "-" digit+ as i { (Num(big_int_of_string i)) }
- | "0b" (binarydigit+ as i) { (Bin(i)) }
- | "0x" (hexdigit+ as i) { (Hex(i)) }
+ | digit+ as i { (Num(Big_int.of_string i)) }
+ | "-" digit+ as i { (Num(Big_int.of_string i)) }
+ | "0b" (binarydigit+ as i) { (Bin(Util.string_of_list "" (fun s -> s) (Util.split_on_char '_' i))) }
+ | "0x" (hexdigit+ as i) { (Hex(Util.string_of_list "" (fun s -> s) (Util.split_on_char '_' i))) }
| '"' { (String(
string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf)) }
| eof { Eof }
@@ -343,8 +271,8 @@ rule token = parse
and comment pos depth = parse
- | "(*" { comment pos (depth+1) lexbuf }
- | "*)" { if depth = 0 then ()
+ | "/*" { comment pos (depth+1) lexbuf }
+ | "*/" { if depth = 0 then ()
else if depth > 0 then comment pos (depth-1) lexbuf
else assert false }
| "\n" { Lexing.new_line lexbuf;