summaryrefslogtreecommitdiff
path: root/src/lexer2.mll
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer2.mll')
-rw-r--r--src/lexer2.mll294
1 files changed, 294 insertions, 0 deletions
diff --git a/src/lexer2.mll b/src/lexer2.mll
new file mode 100644
index 00000000..04ecdca3
--- /dev/null
+++ b/src/lexer2.mll
@@ -0,0 +1,294 @@
+(**************************************************************************)
+(* Sail *)
+(* *)
+(* Copyright (c) 2013-2017 *)
+(* Kathyrn Gray *)
+(* Shaked Flur *)
+(* Stephen Kell *)
+(* Gabriel Kerneis *)
+(* Robert Norton-Wright *)
+(* Christopher Pulte *)
+(* Peter Sewell *)
+(* Alasdair Armstrong *)
+(* Brian Campbell *)
+(* Thomas Bauereiss *)
+(* Anthony Fox *)
+(* Jon French *)
+(* Dominic Mulligan *)
+(* Stephen Kell *)
+(* Mark Wassell *)
+(* *)
+(* All rights reserved. *)
+(* *)
+(* This software was developed by the University of Cambridge Computer *)
+(* Laboratory as part of the Rigorous Engineering of Mainstream Systems *)
+(* (REMS) project, funded by EPSRC grant EP/K008528/1. *)
+(* *)
+(* Redistribution and use in source and binary forms, with or without *)
+(* modification, are permitted provided that the following conditions *)
+(* are met: *)
+(* 1. Redistributions of source code must retain the above copyright *)
+(* notice, this list of conditions and the following disclaimer. *)
+(* 2. Redistributions in binary form must reproduce the above copyright *)
+(* notice, this list of conditions and the following disclaimer in *)
+(* the documentation and/or other materials provided with the *)
+(* distribution. *)
+(* *)
+(* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' *)
+(* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *)
+(* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *)
+(* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR *)
+(* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *)
+(* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *)
+(* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF *)
+(* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *)
+(* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, *)
+(* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT *)
+(* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF *)
+(* SUCH DAMAGE. *)
+(**************************************************************************)
+
+{
+open Parser2
+module Big_int = Nat_big_num
+open Parse_ast
+module M = Map.Make(String)
+exception LexError of string * Lexing.position
+
+let r = fun s -> s (* Ulib.Text.of_latin1 *)
+(* Available as Scanf.unescaped since OCaml 4.0 but 3.12 is still common *)
+let unescaped s = Scanf.sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
+
+let mk_operator prec n op =
+ match prec, n with
+ | Infix, 0 -> Op0 op
+ | Infix, 1 -> Op1 op
+ | Infix, 2 -> Op2 op
+ | Infix, 3 -> Op3 op
+ | Infix, 4 -> Op4 op
+ | Infix, 5 -> Op5 op
+ | Infix, 6 -> Op6 op
+ | Infix, 7 -> Op7 op
+ | Infix, 8 -> Op8 op
+ | Infix, 9 -> Op9 op
+ | InfixL, 0 -> Op0l op
+ | InfixL, 1 -> Op1l op
+ | InfixL, 2 -> Op2l op
+ | InfixL, 3 -> Op3l op
+ | InfixL, 4 -> Op4l op
+ | InfixL, 5 -> Op5l op
+ | InfixL, 6 -> Op6l op
+ | InfixL, 7 -> Op7l op
+ | InfixL, 8 -> Op8l op
+ | InfixL, 9 -> Op9l op
+ | InfixR, 0 -> Op0r op
+ | InfixR, 1 -> Op1r op
+ | InfixR, 2 -> Op2r op
+ | InfixR, 3 -> Op3r op
+ | InfixR, 4 -> Op4r op
+ | InfixR, 5 -> Op5r op
+ | InfixR, 6 -> Op6r op
+ | InfixR, 7 -> Op7r op
+ | InfixR, 8 -> Op8r op
+ | InfixR, 9 -> Op9r op
+ | _, _ -> assert false
+
+let operators = ref M.empty
+
+let kw_table =
+ List.fold_left
+ (fun r (x,y) -> M.add x y r)
+ M.empty
+ [
+ ("and", (fun _ -> And));
+ ("as", (fun _ -> As));
+ ("assert", (fun _ -> Assert));
+ ("bitzero", (fun _ -> Bitzero));
+ ("bitone", (fun _ -> Bitone));
+ ("by", (fun _ -> By));
+ ("match", (fun _ -> Match));
+ ("clause", (fun _ -> Clause));
+ ("dec", (fun _ -> Dec));
+ ("operator", (fun _ -> Op));
+ ("default", (fun _ -> Default));
+ ("effect", (fun _ -> Effect));
+ ("end", (fun _ -> End));
+ ("enum", (fun _ -> Enum));
+ ("else", (fun _ -> Else));
+ ("exit", (fun _ -> Exit));
+ ("cast", (fun _ -> Cast));
+ ("false", (fun _ -> False));
+ ("forall", (fun _ -> Forall));
+ ("foreach", (fun _ -> Foreach));
+ ("function", (fun x -> Function_));
+ ("overload", (fun _ -> Overload));
+ ("throw", (fun _ -> Throw));
+ ("try", (fun _ -> Try));
+ ("catch", (fun _ -> Catch));
+ ("if", (fun x -> If_));
+ ("in", (fun x -> In));
+ ("inc", (fun _ -> Inc));
+ ("let", (fun x -> Let_));
+ ("record", (fun _ -> Record));
+ ("Int", (fun x -> Int));
+ ("Order", (fun x -> Order));
+ ("pure", (fun x -> Pure));
+ ("register", (fun x -> Register));
+ ("return", (fun x -> Return));
+ ("scattered", (fun x -> Scattered));
+ ("sizeof", (fun x -> Sizeof));
+ ("constraint", (fun x -> Constraint));
+ ("struct", (fun x -> Struct));
+ ("then", (fun x -> Then));
+ ("true", (fun x -> True));
+ ("Type", (fun x -> TYPE));
+ ("type", (fun x -> Typedef));
+ ("undefined", (fun x -> Undefined));
+ ("union", (fun x -> Union));
+ ("with", (fun x -> With));
+ ("val", (fun x -> Val));
+ ("repeat", (fun _ -> Repeat));
+ ("until", (fun _ -> Until));
+ ("while", (fun _ -> While));
+ ("do", (fun _ -> Do));
+ ("mutual", (fun _ -> Mutual));
+
+ ("barr", (fun x -> Barr));
+ ("depend", (fun x -> Depend));
+ ("rreg", (fun x -> Rreg));
+ ("wreg", (fun x -> Wreg));
+ ("rmem", (fun x -> Rmem));
+ ("rmemt", (fun x -> Rmemt));
+ ("wmem", (fun x -> Wmem));
+ ("wmv", (fun x -> Wmv));
+ ("wmvt", (fun x -> Wmvt));
+ ("eamem", (fun x -> Eamem));
+ ("exmem", (fun x -> Exmem));
+ ("undef", (fun x -> Undef));
+ ("unspec", (fun x -> Unspec));
+ ("nondet", (fun x -> Nondet));
+ ("escape", (fun x -> Escape));
+ ]
+
+}
+
+let ws = [' ''\t']+
+let letter = ['a'-'z''A'-'Z''?']
+let digit = ['0'-'9']
+let binarydigit = ['0'-'1']
+let hexdigit = ['0'-'9''A'-'F''a'-'f']
+let alphanum = letter|digit
+let startident = letter|'_'
+let ident = alphanum|['_''\'''#']
+let tyvar_start = '\''
+let oper_char = ['!''$''%''&''*''+''-''.''/'':''<''=''>''@''^''|']
+let operator = (oper_char+ ('_' ident)?)
+let escape_sequence = ('\\' ['\\''\"''\'''n''t''b''r']) | ('\\' digit digit digit) | ('\\' 'x' hexdigit hexdigit)
+
+rule token = parse
+ | ws
+ { token lexbuf }
+ | "\n"
+ { Lexing.new_line lexbuf;
+ token lexbuf }
+ | "&" { (Amp(r"&")) }
+ | "@" { (At "@") }
+ | "2" ws "^" { TwoCaret }
+ | "^" { (Caret(r"^")) }
+ | ":" { Colon(r ":") }
+ | "," { Comma }
+ | ".." { DotDot }
+ | "." { Dot }
+ | "==" as op
+ { try M.find op !operators
+ with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) }
+ | "=" { (Eq(r"=")) }
+ | ">" { (Gt(r">")) }
+ | "-" { Minus }
+ | "<" { (Lt(r"<")) }
+ | "+" { (Plus(r"+")) }
+ | ";" { Semi }
+ | "*" { (Star(r"*")) }
+ | "_" { Under }
+ | "[|" { LsquareBar }
+ | "|]" { RsquareBar }
+ | "{|" { LcurlyBar }
+ | "|}" { RcurlyBar }
+ | "|" { Bar }
+ | "{" { Lcurly }
+ | "}" { Rcurly }
+ | "()" { Unit(r"()") }
+ | "(" { Lparen }
+ | ")" { Rparen }
+ | "[" { Lsquare }
+ | "]" { Rsquare }
+ | "!=" { (ExclEq(r"!=")) }
+ | ">=" { (GtEq(r">=")) }
+ | "->" { MinusGt }
+ | "=>" { EqGt(r "=>") }
+ | "<=" { (LtEq(r"<=")) }
+ | "infix" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator Infix (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (Infix, Big_int.of_string (Char.escaped p), op) }
+ | "infixl" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator InfixL (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (InfixL, Big_int.of_string (Char.escaped p), op) }
+ | "infixr" ws (digit as p) ws (operator as op)
+ { operators := M.add op (mk_operator InfixR (int_of_string (Char.escaped p)) op) !operators;
+ Fixity (InfixR, Big_int.of_string (Char.escaped p), op) }
+ | operator as op
+ { try M.find op !operators
+ with Not_found -> raise (LexError ("Operator fixity undeclared " ^ op, Lexing.lexeme_start_p lexbuf)) }
+ | "(*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf }
+ | "*)" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) }
+ | tyvar_start startident ident* as i { TyVar(r i) }
+ | "~" { Id(r"~") }
+ | startident ident* as i { if M.mem i kw_table then
+ (M.find i kw_table) ()
+ (* else if
+ List.mem i default_type_names ||
+ List.mem i !custom_type_names then
+ TyId(r i) *)
+ else Id(r i) }
+ | (digit+ as i1) "." (digit+ as i2) { (Real (i1 ^ "." ^ i2)) }
+ | "-" (digit* as i1) "." (digit+ as i2) { (Real ("-" ^ i1 ^ "." ^ i2)) }
+ | digit+ as i { (Num(Big_int.of_string i)) }
+ | "-" digit+ as i { (Num(Big_int.of_string i)) }
+ | "0b" (binarydigit+ as i) { (Bin(i)) }
+ | "0x" (hexdigit+ as i) { (Hex(i)) }
+ | '"' { (String(
+ string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf)) }
+ | eof { Eof }
+ | _ as c { raise (LexError(
+ Printf.sprintf "Unexpected character: %c" c,
+ Lexing.lexeme_start_p lexbuf)) }
+
+
+and comment pos depth = parse
+ | "(*" { comment pos (depth+1) lexbuf }
+ | "*)" { if depth = 0 then ()
+ else if depth > 0 then comment pos (depth-1) lexbuf
+ else assert false }
+ | "\n" { Lexing.new_line lexbuf;
+ comment pos depth lexbuf }
+ | '"' { ignore(string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf);
+ comment pos depth lexbuf }
+ | _ { comment pos depth lexbuf }
+ | eof { raise (LexError("Unbalanced comment", pos)) }
+
+and string pos b = parse
+ | ([^'"''\n''\\']*'\n' as i) { Lexing.new_line lexbuf;
+ Buffer.add_string b i;
+ string pos b lexbuf }
+ | ([^'"''\n''\\']* as i) { Buffer.add_string b i; string pos b lexbuf }
+ | escape_sequence as i { Buffer.add_string b i; string pos b lexbuf }
+ | '\\' '\n' ws { Lexing.new_line lexbuf; string pos b lexbuf }
+ | '\\' { assert false (*raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "illegal backslash escape in string"*) }
+ | '"' { let s = unescaped(Buffer.contents b) in
+ (*try Ulib.UTF8.validate s; s
+ with Ulib.UTF8.Malformed_code ->
+ raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "String literal is not valid utf8"))) *) s }
+ | eof { assert false (*raise (Reporting_basic.Fatal_error (Reporting_basic.Err_syntax (pos,
+ "String literal not terminated")))*) }