aboutsummaryrefslogtreecommitdiff
path: root/parsing
diff options
context:
space:
mode:
authorEnrico Tassi2019-03-05 11:42:17 +0100
committerEnrico Tassi2019-03-31 14:36:28 +0200
commit912eaf40d4efd29b7e3489d51c55b8b79206df79 (patch)
treeb4793da97a5513d460c3d08721cb40692eeddd71 /parsing
parented996432fd079583afbb1797c92ad23f654b94eb (diff)
[parsing] Split Tok.t into Tok.t and Tok.pattern
Tokens were having a double role: - the output of the lexer - the items of grammar entries, especially terminals Now tokens are the output of the lexer, and this paves the way for using a richer data type, eg including Loc.t Patterns, as in Plexing.pattern, only represent patterns (for tokens) and now have a bit more structure (eg the wildcard is represented as None, not as "", while a regular pattern for "x" as Some "x")
Diffstat (limited to 'parsing')
-rw-r--r--parsing/cLexer.ml30
-rw-r--r--parsing/cLexer.mli2
-rw-r--r--parsing/extend.ml6
-rw-r--r--parsing/notation_gram.ml2
-rw-r--r--parsing/pcoq.ml19
-rw-r--r--parsing/tok.ml60
-rw-r--r--parsing/tok.mli15
7 files changed, 58 insertions, 76 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml
index 503cfcdb4f..7345afb307 100644
--- a/parsing/cLexer.ml
+++ b/parsing/cLexer.ml
@@ -740,15 +740,15 @@ type te = Tok.t
(** Names of tokens, for this lexer, used in Grammar error messages *)
let token_text = function
- | ("", t) -> "'" ^ t ^ "'"
- | ("IDENT", "") -> "identifier"
- | ("IDENT", t) -> "'" ^ t ^ "'"
- | ("INT", "") -> "integer"
- | ("INT", s) -> "'" ^ s ^ "'"
- | ("STRING", "") -> "string"
- | ("EOI", "") -> "end of input"
- | (con, "") -> con
- | (con, prm) -> con ^ " \"" ^ prm ^ "\""
+ | ("", Some t) -> "'" ^ t ^ "'"
+ | ("IDENT", None) -> "identifier"
+ | ("IDENT", Some t) -> "'" ^ t ^ "'"
+ | ("INT", None) -> "integer"
+ | ("INT", Some s) -> "'" ^ s ^ "'"
+ | ("STRING", None) -> "string"
+ | ("EOI", None) -> "end of input"
+ | (con, None) -> con
+ | (con, Some prm) -> con ^ " \"" ^ prm ^ "\""
let func next_token ?loc cs =
let loct = loct_create () in
@@ -765,9 +765,9 @@ let func next_token ?loc cs =
let make_lexer ~diff_mode = {
Plexing.tok_func = func (next_token ~diff_mode);
Plexing.tok_using =
- (fun pat -> match Tok.of_pattern pat with
- | KEYWORD s -> add_keyword s
- | _ -> ());
+ (fun pat -> match Tok.is_keyword pat with
+ | Some s -> add_keyword s
+ | None -> ());
Plexing.tok_removing = (fun _ -> ());
Plexing.tok_match = Tok.match_pattern;
Plexing.tok_text = token_text }
@@ -807,6 +807,6 @@ let strip s =
let terminal s =
let s = strip s in
let () = match s with "" -> failwith "empty token." | _ -> () in
- if is_ident_not_keyword s then IDENT s
- else if is_number s then INT s
- else KEYWORD s
+ if is_ident_not_keyword s then "IDENT", Some s
+ else if is_number s then "INT", Some s
+ else "", Some s
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
index 807f37a1a4..662f6a477d 100644
--- a/parsing/cLexer.mli
+++ b/parsing/cLexer.mli
@@ -21,7 +21,7 @@ val get_keyword_state : unit -> keyword_state
val check_ident : string -> unit
val is_ident : string -> bool
val check_keyword : string -> unit
-val terminal : string -> Tok.t
+val terminal : string -> Tok.pattern
(** The lexer of Coq: *)
diff --git a/parsing/extend.ml b/parsing/extend.ml
index 9b5537d7f6..ff879dd1c2 100644
--- a/parsing/extend.ml
+++ b/parsing/extend.ml
@@ -44,7 +44,7 @@ type simple_constr_prod_entry_key =
(** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *)
-type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list
+type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.pattern list
type binder_target = ForBinder | ForTerm
@@ -54,7 +54,7 @@ type constr_prod_entry_key =
| ETProdBigint (* Parsed as an (unbounded) integer *)
| ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *)
| ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *)
- | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *)
+ | ETProdConstrList of (production_level * production_position) * Tok.pattern list (* Parsed as non-empty list of constr *)
| ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *)
(** {5 AST for user-provided entries} *)
@@ -80,7 +80,7 @@ type ('a,'b,'c) ty_user_symbol =
(** {5 Type-safe grammar extension} *)
type ('self, 'a) symbol =
-| Atoken : Tok.t -> ('self, string) symbol
+| Atoken : Tok.pattern -> ('self, string) symbol
| Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol
| Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol
| Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol
diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml
index fc5feba58b..a50f8d69e3 100644
--- a/parsing/notation_gram.ml
+++ b/parsing/notation_gram.ml
@@ -21,7 +21,7 @@ type level = Constrexpr.notation_entry * precedence * tolerability list * constr
(* first argument is InCustomEntry s for custom entries *)
type grammar_constr_prod_item =
- | GramConstrTerminal of Tok.t
+ | GramConstrTerminal of Tok.pattern
| GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option
| GramConstrListMark of int * bool * int
(* tells action rule to make a list of the n previous parsed items;
diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml
index 9241205755..f2f5d17da3 100644
--- a/parsing/pcoq.ml
+++ b/parsing/pcoq.ml
@@ -119,25 +119,10 @@ struct
end
module Symbols : sig
- val stoken : Tok.t -> ('s, string) G.ty_symbol
val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol
val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol
end = struct
- let stoken tok =
- let pattern = match tok with
- | Tok.KEYWORD s -> "", s
- | Tok.IDENT s -> "IDENT", s
- | Tok.PATTERNIDENT s -> "PATTERNIDENT", s
- | Tok.FIELD s -> "FIELD", s
- | Tok.INT s -> "INT", s
- | Tok.STRING s -> "STRING", s
- | Tok.LEFTQMARK -> "LEFTQMARK", ""
- | Tok.BULLET s -> "BULLET", s
- | Tok.EOI -> "EOI", ""
- in
- G.s_token pattern
-
let slist0sep x y = G.s_list0sep x y false
let slist1sep x y = G.s_list1sep x y false
end
@@ -158,7 +143,7 @@ end
type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule
let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function
-| Atoken t -> Symbols.stoken t
+| Atoken t -> G.s_token t
| Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s)
| Alist1sep (s,sep) ->
Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep)
@@ -303,7 +288,7 @@ let make_rule r = [None, None, r]
let eoi_entry en =
let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in
- let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in
+ let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (G.s_token Tok.pattern_for_EOI) in
let act = fun _ x loc -> x in
let warning msg = Feedback.msg_warning Pp.(str msg) in
Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]);
diff --git a/parsing/tok.ml b/parsing/tok.ml
index 03825e350f..ba249bd977 100644
--- a/parsing/tok.ml
+++ b/parsing/tok.ml
@@ -82,41 +82,31 @@ let print ppf tok = Format.pp_print_string ppf (to_string tok)
(** For camlp5, conversion from/to [Plexing.pattern],
and a match function analoguous to [Plexing.default_match] *)
-let of_pattern = function
- | "", s -> KEYWORD s
- | "IDENT", s -> IDENT s
- | "PATTERNIDENT", s -> PATTERNIDENT s
- | "FIELD", s -> FIELD s
- | "INT", s -> INT s
- | "STRING", s -> STRING s
- | "LEFTQMARK", _ -> LEFTQMARK
- | "BULLET", s -> BULLET s
- | "EOI", _ -> EOI
- | _ -> failwith "Tok.of_pattern: not a constructor"
+type pattern = string * string option
-let to_pattern = function
- | KEYWORD s -> "", s
- | IDENT s -> "IDENT", s
- | PATTERNIDENT s -> "PATTERNIDENT", s
- | FIELD s -> "FIELD", s
- | INT s -> "INT", s
- | STRING s -> "STRING", s
- | LEFTQMARK -> "LEFTQMARK", ""
- | BULLET s -> "BULLET", s
- | EOI -> "EOI", ""
+let is_keyword = function
+ | "", Some s -> Some s
+ | _ -> None
-let match_pattern =
+let pattern_for_EOI = ("EOI",None)
+let pattern_for_KEYWORD s = ("",Some s)
+let pattern_for_IDENT s = ("IDENT",Some s)
+
+let match_pattern (key, value) =
let err () = raise Stream.Failure in
- function
- | "", "" -> (function KEYWORD s -> s | _ -> err ())
- | "IDENT", "" -> (function IDENT s -> s | _ -> err ())
- | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ())
- | "FIELD", "" -> (function FIELD s -> s | _ -> err ())
- | "INT", "" -> (function INT s -> s | _ -> err ())
- | "STRING", "" -> (function STRING s -> s | _ -> err ())
- | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ())
- | "BULLET", "" -> (function BULLET s -> s | _ -> err ())
- | "EOI", "" -> (function EOI -> "" | _ -> err ())
- | pat ->
- let tok = of_pattern pat in
- function tok' -> if equal tok tok' then snd pat else err ()
+ let cond x =
+ match value with
+ | None -> x
+ | Some value -> if string_equal value x then x else err () in
+ match key with
+ | "" -> (function { v = KEYWORD s } -> cond s | _ -> err ())
+ | "IDENT" when value <> None -> (function { v = (IDENT s | KEYWORD s) } -> cond s | _ -> err ())
+ | "IDENT" -> (function { v = IDENT s } -> cond s | _ -> err ())
+ | "PATTERNIDENT" -> (function { v = PATTERNIDENT s } -> cond s | _ -> err ())
+ | "FIELD" -> (function { v = FIELD s } -> cond s | _ -> err ())
+ | "INT" -> (function { v = INT s } -> cond s | _ -> err ())
+ | "STRING" -> (function { v = STRING s } -> cond s | _ -> err ())
+ | "LEFTQMARK" -> (function { v = LEFTQMARK } -> cond "" | _ -> err ())
+ | "BULLET" -> (function { v = BULLET s } -> cond s | _ -> err ())
+ | "EOI" -> (function { v = EOI } -> cond "" | _ -> err ())
+ | p -> CErrors.anomaly Pp.(str "Tok: unknown pattern " ++ str p)
diff --git a/parsing/tok.mli b/parsing/tok.mli
index 5750096a28..e32047e6ec 100644
--- a/parsing/tok.mli
+++ b/parsing/tok.mli
@@ -29,7 +29,14 @@ val to_string : t -> string
val print : Format.formatter -> t -> unit
val match_keyword : string -> t -> bool
-(** for camlp5 *)
-val of_pattern : string*string -> t
-val to_pattern : t -> string*string
-val match_pattern : string*string -> t -> string
+(** for camlp5,
+ eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END
+ is a pattern ("IDENT", Some "x")
+*)
+type pattern = string * string option (* = Plexing.pattern *)
+
+val is_keyword : pattern -> string option
+val pattern_for_EOI : pattern
+val pattern_for_KEYWORD : string -> pattern
+val pattern_for_IDENT : string -> pattern
+val match_pattern : pattern -> t -> string