aboutsummaryrefslogtreecommitdiff
path: root/parsing/cLexer.ml
diff options
context:
space:
mode:
authorPierre Roux2019-02-17 10:10:22 +0100
committerPierre Roux2019-03-31 23:17:55 +0200
commiteadb00648127c9a535b533d51189dce41ef16db7 (patch)
tree1e5db53e73950ca4c7d7d9ae5e01a5d5c83ac32f /parsing/cLexer.ml
parent5dd3c18f4e50eef53ae4413b7b80951f17edad5f (diff)
Multiple payload types in tokens
Instead of just string (and empty strings for tokens without payload)
Diffstat (limited to 'parsing/cLexer.ml')
-rw-r--r--parsing/cLexer.ml63
1 files changed, 37 insertions, 26 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml
index 33890545da..b81d89edf9 100644
--- a/parsing/cLexer.ml
+++ b/parsing/cLexer.ml
@@ -790,20 +790,25 @@ let loct_add loct i loc = Hashtbl.add loct i loc
we unfreeze the state of the lexer. This restores the behaviour of the
lexer. B.B. *)
-type te = Tok.t
-
(** Names of tokens, for this lexer, used in Grammar error messages *)
-let token_text = function
- | ("", Some t) -> "'" ^ t ^ "'"
- | ("IDENT", None) -> "identifier"
- | ("IDENT", Some t) -> "'" ^ t ^ "'"
- | ("INT", None) -> "integer"
- | ("INT", Some s) -> "'" ^ s ^ "'"
- | ("STRING", None) -> "string"
- | ("EOI", None) -> "end of input"
- | (con, None) -> con
- | (con, Some prm) -> con ^ " \"" ^ prm ^ "\""
+let token_text : type c. c Tok.p -> string = function
+ | PKEYWORD t -> "'" ^ t ^ "'"
+ | PIDENT None -> "identifier"
+ | PIDENT (Some t) -> "'" ^ t ^ "'"
+ | PINT None -> "integer"
+ | PINT (Some s) -> "'" ^ s ^ "'"
+ | PSTRING None -> "string"
+ | PSTRING (Some s) -> "STRING \"" ^ s ^ "\""
+ | PLEFTQMARK -> "LEFTQMARK"
+ | PEOI -> "end of input"
+ | PPATTERNIDENT None -> "PATTERNIDENT"
+ | PPATTERNIDENT (Some s) -> "PATTERNIDENT \"" ^ s ^ "\""
+ | PFIELD None -> "FIELD"
+ | PFIELD (Some s) -> "FIELD \"" ^ s ^ "\""
+ | PBULLET None -> "BULLET"
+ | PBULLET (Some s) -> "BULLET \"" ^ s ^ "\""
+ | PQUOTATION lbl -> "QUOTATION \"" ^ lbl ^ "\""
let func next_token ?loc cs =
let loct = loct_create () in
@@ -817,18 +822,24 @@ let func next_token ?loc cs =
in
(ts, loct_func loct)
-let make_lexer ~diff_mode = {
- Plexing.tok_func = func (next_token ~diff_mode);
- Plexing.tok_using =
- (fun pat -> match Tok.is_keyword pat with
- | Some (false,s) -> add_keyword ~quotation:NoQuotation s
- | Some (true,s) -> add_keyword ~quotation:Quotation s
- | None -> ());
- Plexing.tok_removing = (fun _ -> ());
- Plexing.tok_match = Tok.match_pattern;
- Plexing.tok_text = token_text }
+module MakeLexer (Diff : sig val mode : bool end) = struct
+ type te = Tok.t
+ type 'c pattern = 'c Tok.p
+ let tok_pattern_eq = Tok.equal_p
+ let tok_pattern_strings = Tok.pattern_strings
+ let tok_func = func (next_token ~diff_mode:Diff.mode)
+ let tok_using : type c. c pattern -> unit = function
+ | PKEYWORD s -> add_keyword ~quotation:NoQuotation s
+ | PQUOTATION s -> add_keyword ~quotation:Quotation s
+ | _ -> ()
+ let tok_removing = (fun _ -> ())
+ let tok_match = Tok.match_pattern
+ let tok_text = token_text
+end
+
+module Lexer = MakeLexer (struct let mode = false end)
-let lexer = make_lexer ~diff_mode:false
+module LexerDiff = MakeLexer (struct let mode = true end)
(** Terminal symbols interpretation *)
@@ -863,6 +874,6 @@ let strip s =
let terminal s =
let s = strip s in
let () = match s with "" -> failwith "empty token." | _ -> () in
- if is_ident_not_keyword s then "IDENT", Some s
- else if is_number s then "INT", Some s
- else "", Some s
+ if is_ident_not_keyword s then PIDENT (Some s)
+ else if is_number s then PINT (Some s)
+ else PKEYWORD s