summaryrefslogtreecommitdiff
path: root/src/pre_lexer.mll
diff options
context:
space:
mode:
authorKathy Gray2013-11-22 15:11:46 +0000
committerKathy Gray2013-11-22 15:12:04 +0000
commit994df0138776a9151b52c1210d4b0e57aafb4ced (patch)
tree9e396429d8522bf4805a3b8725f30d6d5c2de7e7 /src/pre_lexer.mll
parent81ccd74f0cc1dbd7bacbeadd86250edf7d6b244a (diff)
Syntax changes per discussions on Thursday.
First pass parser to identify type names is in progress (current test files fail, will correct once pre-parser is in place)
Diffstat (limited to 'src/pre_lexer.mll')
-rw-r--r--src/pre_lexer.mll319
1 files changed, 319 insertions, 0 deletions
diff --git a/src/pre_lexer.mll b/src/pre_lexer.mll
new file mode 100644
index 00000000..6955c7c3
--- /dev/null
+++ b/src/pre_lexer.mll
@@ -0,0 +1,319 @@
+(**************************************************************************)
+(* Lem *)
+(* *)
+(* Dominic Mulligan, University of Cambridge *)
+(* Francesco Zappa Nardelli, INRIA Paris-Rocquencourt *)
+(* Gabriel Kerneis, University of Cambridge *)
+(* Kathy Gray, University of Cambridge *)
+(* Peter Boehm, University of Cambridge (while working on Lem) *)
+(* Peter Sewell, University of Cambridge *)
+(* Scott Owens, University of Kent *)
+(* Thomas Tuerk, University of Cambridge *)
+(* *)
+(* The Lem sources are copyright 2010-2013 *)
+(* by the UK authors above and Institut National de Recherche en *)
+(* Informatique et en Automatique (INRIA). *)
+(* *)
+(* All files except ocaml-lib/pmap.{ml,mli} and ocaml-libpset.{ml,mli} *)
+(* are distributed under the license below. The former are distributed *)
+(* under the LGPLv2, as in the LICENSE file. *)
+(* *)
+(* *)
+(* Redistribution and use in source and binary forms, with or without *)
+(* modification, are permitted provided that the following conditions *)
+(* are met: *)
+(* 1. Redistributions of source code must retain the above copyright *)
+(* notice, this list of conditions and the following disclaimer. *)
+(* 2. Redistributions in binary form must reproduce the above copyright *)
+(* notice, this list of conditions and the following disclaimer in the *)
+(* documentation and/or other materials provided with the distribution. *)
+(* 3. The names of the authors may not be used to endorse or promote *)
+(* products derived from this software without specific prior written *)
+(* permission. *)
+(* *)
+(* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS *)
+(* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *)
+(* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE *)
+(* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY *)
+(* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL *)
+(* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE *)
+(* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *)
+(* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER *)
+(* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR *)
+(* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *)
+(* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *)
+(**************************************************************************)
+
+{
+open Parser
+module M = Map.Make(String)
+exception LexError of string * Lexing.position
+
+let r = fun s -> s (* Ulib.Text.of_latin1 *)
+(* Available as Scanf.unescaped since OCaml 4.0 but 3.12 is still common *)
+let unescaped s = Scanf.sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
+
+let kw_table =
+ List.fold_left
+ (fun r (x,y) -> M.add x y r)
+ M.empty
+ [
+ ("and", (fun _ -> And));
+ ("as", (fun _ -> As));
+ ("bits", (fun _ -> Bits));
+ ("by", (fun _ -> By));
+ ("case", (fun _ -> Case));
+ ("clause", (fun _ -> Clause));
+ ("const", (fun _ -> Const));
+ ("dec", (fun _ -> Dec));
+ ("default", (fun _ -> Default));
+ ("deinfix", (fun _ -> Deinfix));
+ ("effect", (fun _ -> Effect));
+ ("Effects", (fun _ -> Effects));
+ ("end", (fun _ -> End));
+ ("enumerate", (fun _ -> Enumerate));
+ ("else", (fun _ -> Else));
+ ("extern", (fun _ -> Extern));
+ ("false", (fun _ -> False));
+ ("forall", (fun _ -> Forall));
+ ("foreach", (fun _ -> Foreach));
+ ("function", (fun x -> Function_));
+ ("if", (fun x -> If_));
+ ("in", (fun x -> In));
+ ("IN", (fun x -> IN));
+ ("Inc", (fun x -> Inc));
+ ("let", (fun x -> Let_));
+ ("member", (fun x -> Member));
+ ("Nat", (fun x -> Nat));
+ ("Order", (fun x -> Order));
+ ("pure", (fun x -> Pure));
+ ("rec", (fun x -> Rec));
+ ("register", (fun x -> Register));
+ ("scattered", (fun x -> Scattered));
+ ("struct", (fun x -> Struct));
+ ("switch", (fun x -> Switch));
+ ("then", (fun x -> Then));
+ ("true", (fun x -> True));
+ ("Type", (fun x -> TYPE));
+ ("typedef", (fun x -> Typedef));
+ ("undefined", (fun x -> Undefined));
+ ("union", (fun x -> Union));
+ ("with", (fun x -> With));
+ ("val", (fun x -> Val));
+
+ ("AND", (fun x -> AND));
+ ("div", (fun x -> Div_));
+ ("EOR", (fun x -> EOR));
+ ("mod", (fun x -> Mod));
+ ("OR", (fun x -> OR));
+ ("quot", (fun x -> Quot));
+ ("rem", (fun x -> Rem));
+]
+
+}
+
+let ws = [' ''\t']+
+let lowerletter = ['a'-'z']
+let upperletter = ['A'-'Z']
+let letter = ['a'-'z''A'-'Z']
+let digit = ['0'-'9']
+let binarydigit = ['0'-'1']
+let hexdigit = ['0'-'9''A'-'F''a'-'f']
+let alphanum = letter|digit
+let startident = lowerletter|'_'
+let ident = alphanum|['_''\'']
+let starttident = upperletter
+let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''?''@''^''|''~']
+let escape_sequence = ('\\' ['\\''\"''\'''n''t''b''r']) | ('\\' digit digit digit) | ('\\' 'x' hexdigit hexdigit)
+
+rule token = parse
+ | ws
+ { token lexbuf }
+ | "\n"
+ { Lexing.new_line lexbuf;
+ token lexbuf }
+
+ | "2**" { TwoStarStar }
+ | "&" { (Amp(r"&")) }
+ | "@" { (At(r"@")) }
+ | "|" { Bar }
+ | "^" { (Carrot(r"^")) }
+ | ":" { Colon }
+ | "," { Comma }
+ | "." { Dot }
+ | "/" { (Div(r "/")) }
+ | "=" { (Eq(r"=")) }
+ | "!" { (Excl(r"!")) }
+ | ">" { (Gt(r">")) }
+ | "-" { Minus }
+ | "<" { (Lt(r"<")) }
+ | "+" { (Plus(r"+")) }
+ | ";" { Semi }
+ | "*" { (Star(r"*")) }
+ | "~" { (Tilde(r"~")) }
+ | "_" { Under }
+ | "{" { Lcurly }
+ | "}" { Rcurly }
+ | "(" { Lparen }
+ | ")" { Rparen }
+ | "[" { Lsquare }
+ | "]" { Rsquare }
+ | "&&" as i { (AmpAmp(r i)) }
+ | "||" { BarBar }
+ | "|]" { BarSquare }
+ | "||]" { BarBarSquare }
+ | "^^" { (CarrotCarrot(r"^^")) }
+ | "::" as i { (ColonColon(r i)) }
+ | ":=" { ColonEq }
+ | ".." { DotDot }
+ | "=/=" { (EqDivEq(r"=/=")) }
+ | "==" { (EqEq(r"==")) }
+ | "!=" { (ExclEq(r"!=")) }
+ | "!!" { (ExclExcl(r"!!")) }
+ | ">=" { (GtEq(r">=")) }
+ | ">=+" { (GtEqPlus(r">=+")) }
+ | ">>" { (GtGt(r">>")) }
+ | ">>>" { (GtGtGt(r">>>")) }
+ | ">+" { (GtPlus(r">+")) }
+ | "#>>" { (HashGtGt(r"#>>")) }
+ | "#<<" { (HashLtLt(r"#<<")) }
+ | "->" { MinusGt }
+ | "<=" { (LtEq(r"<=")) }
+ | "<=+" { (LtEqPlus(r"<=+")) }
+ | "<>" { (LtGt(r"<>")) }
+ | "<<" { (LtLt(r"<<")) }
+ | "<<<" { (LtLtLt(r"<<<")) }
+ | "<+" { (LtPlus(r"<+")) }
+ | "**" { (StarStar(r"**")) }
+ | "~^" { (TildeCarrot(r"~^")) }
+
+ | ">=_s" { (GtEqUnderS(r">=_s")) }
+ | ">=_si" { (GtEqUnderSi(r">=_si")) }
+ | ">=_u" { (GtEqUnderU(r">=_u")) }
+ | ">=_ui" { (GtEqUnderUi(r">=_ui")) }
+ | ">>_u" { (GtGtUnderU(r">>_u")) }
+ | ">_s" { (GtUnderS(r">_s")) }
+ | ">_si" { (GtUnderSi(r">_si")) }
+ | ">_u" { (GtUnderU(r">_u")) }
+ | ">_ui" { (GtUnderUi(r">_ui")) }
+ | "<=_s" { (LtEqUnderS(r"<=_s")) }
+ | "<=_si" { (LtEqUnderSi(r"<=_si")) }
+ | "<=_u" { (LtEqUnderU(r"<=_u")) }
+ | "<=_ui" { (LtEqUnderUi(r"<=_ui")) }
+ | "<_s" { (LtUnderS(r"<_s")) }
+ | "<_si" { (LtUnderSi(r"<_si")) }
+ | "<_u" { (LtUnderU(r"<_u")) }
+ | "<_ui" { (LtUnderUi(r"<_ui")) }
+ | "**_s" { (StarStarUnderS(r"**_s")) }
+ | "**_si" { (StarStarUnderSi(r"**_si")) }
+ | "*_u" { (StarUnderU(r"*_u")) }
+ | "*_ui" { (StarUnderUi(r"*_ui")) }
+ | "2^" { (TwoCarrot(r"2^")) }
+
+
+ | "(*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf }
+ | "*)" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) }
+
+ | startident ident* as i { if M.mem i kw_table then
+ (M.find i kw_table) ()
+ else
+ Id(r i) }
+ | starttident ident* as i { if M.mem i kw_table then
+ (M.find i kw_table) ()
+ else
+ TyId(r i) }
+ | "&" oper_char+ as i { (AmpI(r i)) }
+ | "@" oper_char+ as i { (AtI(r i)) }
+ | "^" oper_char+ as i { (CarrotI(r i)) }
+ | "/" oper_char+ as i { (DivI(r i)) }
+ | "=" oper_char+ as i { (EqI(r i)) }
+ | "!" oper_char+ as i { (ExclI(r i)) }
+ | ">" oper_char+ as i { (GtI(r i)) }
+ | "<" oper_char+ as i { (LtI(r i)) }
+ | "+" oper_char+ as i { (PlusI(r i)) }
+ | "*" oper_char+ as i { (StarI(r i)) }
+ | "~" oper_char+ as i { (TildeI(r i)) }
+ | "&&" oper_char+ as i { (AmpAmpI(r i)) }
+ | "^^" oper_char+ as i { (CarrotCarrotI(r i)) }
+ | "::" oper_char+ as i { (ColonColonI(r i)) }
+ | "=/=" oper_char+ as i { (EqDivEqI(r i)) }
+ | "==" oper_char+ as i { (EqEqI(r i)) }
+ | "!=" oper_char+ as i { (ExclEqI(r i)) }
+ | "!!" oper_char+ as i { (ExclExclI(r i)) }
+ | ">=" oper_char+ as i { (GtEqI(r i)) }
+ | ">=+" oper_char+ as i { (GtEqPlusI(r i)) }
+ | ">>" oper_char+ as i { (GtGtI(r i)) }
+ | ">>>" oper_char+ as i { (GtGtGtI(r i)) }
+ | ">+" oper_char+ as i { (GtPlusI(r i)) }
+ | "#>>" oper_char+ as i { (HashGtGt(r i)) }
+ | "#<<" oper_char+ as i { (HashLtLt(r i)) }
+ | "<=" oper_char+ as i { (LtEqI(r i)) }
+ | "<=+" oper_char+ as i { (LtEqPlusI(r i)) }
+ | "<<" oper_char+ as i { (LtLtI(r i)) }
+ | "<<<" oper_char+ as i { (LtLtLtI(r i)) }
+ | "<+" oper_char+ as i { (LtPlusI(r i)) }
+ | "**" oper_char+ as i { (StarStarI(r i)) }
+ | "~^" oper_char+ as i { (TildeCarrot(r i)) }
+
+ | ">=_s" oper_char+ as i { (GtEqUnderSI(r i)) }
+ | ">=_si" oper_char+ as i { (GtEqUnderSiI(r i)) }
+ | ">=_u" oper_char+ as i { (GtEqUnderUI(r i)) }
+ | ">=_ui" oper_char+ as i { (GtEqUnderUiI(r i)) }
+ | ">>_u" oper_char+ as i { (GtGtUnderUI(r i)) }
+ | ">_s" oper_char+ as i { (GtUnderSI(r i)) }
+ | ">_si" oper_char+ as i { (GtUnderSiI(r i)) }
+ | ">_u" oper_char+ as i { (GtUnderUI(r i)) }
+ | ">_ui" oper_char+ as i { (GtUnderUiI(r i)) }
+ | "<=_s" oper_char+ as i { (LtEqUnderSI(r i)) }
+ | "<=_si" oper_char+ as i { (LtEqUnderSiI(r i)) }
+ | "<=_u" oper_char+ as i { (LtEqUnderUI(r i)) }
+ | "<=_ui" oper_char+ as i { (LtEqUnderUiI(r i)) }
+ | "<_s" oper_char+ as i { (LtUnderSI(r i)) }
+ | "<_si" oper_char+ as i { (LtUnderSiI(r i)) }
+ | "<_u" oper_char+ as i { (LtUnderUI(r i)) }
+ | "<_ui" oper_char+ as i { (LtUnderUiI(r i)) }
+ | "**_s" oper_char+ as i { (StarStarUnderSI(r i)) }
+ | "**_si" oper_char+ as i { (StarStarUnderSiI(r i)) }
+ | "*_u" oper_char+ as i { (StarUnderUI(r i)) }
+ | "*_ui" oper_char+ as i { (StarUnderUiI(r i)) }
+ | "2^" oper_char+ as i { (TwoCarrotI(r i)) }
+
+ | digit+ as i { (Num(int_of_string i)) }
+ | "0b" (binarydigit+ as i) { (Bin(i)) }
+ | "0x" (hexdigit+ as i) { (Hex(i)) }
+ | '"' { (String(
+ string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf)) }
+ | eof { Eof }
+ | _ as c { raise (LexError(
+ Printf.sprintf "Unexpected character: %c" c,
+ Lexing.lexeme_start_p lexbuf)) }
+
+
+and comment pos depth = parse
+ | "(*" { comment pos (depth+1) lexbuf }
+ | "*)" { if depth = 0 then ()
+ else if depth > 0 then comment pos (depth-1) lexbuf
+ else assert false }
+ | "\n" { Lexing.new_line lexbuf;
+ comment pos depth lexbuf }
+ | '"' { ignore(string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf);
+ comment pos depth lexbuf }
+ | _ { comment pos depth lexbuf }
+ | eof { raise (LexError("Unbalanced comment", pos)) }
+
+and string pos b = parse
+ | ([^'"''\n''\\']*'\n' as i) { Lexing.new_line lexbuf;
+ Buffer.add_string b i;
+ string pos b lexbuf }
+ | ([^'"''\n''\\']* as i) { Buffer.add_string b i; string pos b lexbuf }
+ | escape_sequence as i { Buffer.add_string b i; string pos b lexbuf }
+ | '\\' '\n' ws { Lexing.new_line lexbuf; string pos b lexbuf }
+ | '\\' { assert false (*raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "illegal backslash escape in string"*) }
+ | '"' { let s = unescaped(Buffer.contents b) in
+ (*try Ulib.UTF8.validate s; s
+ with Ulib.UTF8.Malformed_code ->
+ raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "String literal is not valid utf8"))) *) s }
+ | eof { assert false (*raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "String literal not terminated")))*) }