diff options
| author | Enrico Tassi | 2019-03-05 11:42:17 +0100 |
|---|---|---|
| committer | Enrico Tassi | 2019-03-31 14:36:28 +0200 |
| commit | 912eaf40d4efd29b7e3489d51c55b8b79206df79 (patch) | |
| tree | b4793da97a5513d460c3d08721cb40692eeddd71 /parsing | |
| parent | ed996432fd079583afbb1797c92ad23f654b94eb (diff) | |
[parsing] Split Tok.t into Tok.t and Tok.pattern
Tokens were having a double role:
- the output of the lexer
- the items of grammar entries, especially terminals
Now tokens are the output of the lexer, and this paves the way for
using a richer data type, eg including Loc.t
Patterns, as in Plexing.pattern, only represent patterns (for tokens)
and now have a bit more structure (eg the wildcard is represented
as None, not as "", while a regular pattern for "x" as Some "x")
Diffstat (limited to 'parsing')
| -rw-r--r-- | parsing/cLexer.ml | 30 | ||||
| -rw-r--r-- | parsing/cLexer.mli | 2 | ||||
| -rw-r--r-- | parsing/extend.ml | 6 | ||||
| -rw-r--r-- | parsing/notation_gram.ml | 2 | ||||
| -rw-r--r-- | parsing/pcoq.ml | 19 | ||||
| -rw-r--r-- | parsing/tok.ml | 60 | ||||
| -rw-r--r-- | parsing/tok.mli | 15 |
7 files changed, 58 insertions, 76 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index 503cfcdb4f..7345afb307 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -740,15 +740,15 @@ type te = Tok.t (** Names of tokens, for this lexer, used in Grammar error messages *) let token_text = function - | ("", t) -> "'" ^ t ^ "'" - | ("IDENT", "") -> "identifier" - | ("IDENT", t) -> "'" ^ t ^ "'" - | ("INT", "") -> "integer" - | ("INT", s) -> "'" ^ s ^ "'" - | ("STRING", "") -> "string" - | ("EOI", "") -> "end of input" - | (con, "") -> con - | (con, prm) -> con ^ " \"" ^ prm ^ "\"" + | ("", Some t) -> "'" ^ t ^ "'" + | ("IDENT", None) -> "identifier" + | ("IDENT", Some t) -> "'" ^ t ^ "'" + | ("INT", None) -> "integer" + | ("INT", Some s) -> "'" ^ s ^ "'" + | ("STRING", None) -> "string" + | ("EOI", None) -> "end of input" + | (con, None) -> con + | (con, Some prm) -> con ^ " \"" ^ prm ^ "\"" let func next_token ?loc cs = let loct = loct_create () in @@ -765,9 +765,9 @@ let func next_token ?loc cs = let make_lexer ~diff_mode = { Plexing.tok_func = func (next_token ~diff_mode); Plexing.tok_using = - (fun pat -> match Tok.of_pattern pat with - | KEYWORD s -> add_keyword s - | _ -> ()); + (fun pat -> match Tok.is_keyword pat with + | Some s -> add_keyword s + | None -> ()); Plexing.tok_removing = (fun _ -> ()); Plexing.tok_match = Tok.match_pattern; Plexing.tok_text = token_text } @@ -807,6 +807,6 @@ let strip s = let terminal s = let s = strip s in let () = match s with "" -> failwith "empty token." | _ -> () in - if is_ident_not_keyword s then IDENT s - else if is_number s then INT s - else KEYWORD s + if is_ident_not_keyword s then "IDENT", Some s + else if is_number s then "INT", Some s + else "", Some s diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli index 807f37a1a4..662f6a477d 100644 --- a/parsing/cLexer.mli +++ b/parsing/cLexer.mli @@ -21,7 +21,7 @@ val get_keyword_state : unit -> keyword_state val check_ident : string -> unit val is_ident : string -> bool val check_keyword : string -> unit -val terminal : string -> Tok.t +val terminal : string -> Tok.pattern (** The lexer of Coq: *) diff --git a/parsing/extend.ml b/parsing/extend.ml index 9b5537d7f6..ff879dd1c2 100644 --- a/parsing/extend.ml +++ b/parsing/extend.ml @@ -44,7 +44,7 @@ type simple_constr_prod_entry_key = (** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *) -type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list +type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.pattern list type binder_target = ForBinder | ForTerm @@ -54,7 +54,7 @@ type constr_prod_entry_key = | ETProdBigint (* Parsed as an (unbounded) integer *) | ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *) | ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *) - | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *) + | ETProdConstrList of (production_level * production_position) * Tok.pattern list (* Parsed as non-empty list of constr *) | ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *) (** {5 AST for user-provided entries} *) @@ -80,7 +80,7 @@ type ('a,'b,'c) ty_user_symbol = (** {5 Type-safe grammar extension} *) type ('self, 'a) symbol = -| Atoken : Tok.t -> ('self, string) symbol +| Atoken : Tok.pattern -> ('self, string) symbol | Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol | Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol | Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml index fc5feba58b..a50f8d69e3 100644 --- a/parsing/notation_gram.ml +++ b/parsing/notation_gram.ml @@ -21,7 +21,7 @@ type level = Constrexpr.notation_entry * precedence * tolerability list * constr (* first argument is InCustomEntry s for custom entries *) type grammar_constr_prod_item = - | GramConstrTerminal of Tok.t + | GramConstrTerminal of Tok.pattern | GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option | GramConstrListMark of int * bool * int (* tells action rule to make a list of the n previous parsed items; diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml index 9241205755..f2f5d17da3 100644 --- a/parsing/pcoq.ml +++ b/parsing/pcoq.ml @@ -119,25 +119,10 @@ struct end module Symbols : sig - val stoken : Tok.t -> ('s, string) G.ty_symbol val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol end = struct - let stoken tok = - let pattern = match tok with - | Tok.KEYWORD s -> "", s - | Tok.IDENT s -> "IDENT", s - | Tok.PATTERNIDENT s -> "PATTERNIDENT", s - | Tok.FIELD s -> "FIELD", s - | Tok.INT s -> "INT", s - | Tok.STRING s -> "STRING", s - | Tok.LEFTQMARK -> "LEFTQMARK", "" - | Tok.BULLET s -> "BULLET", s - | Tok.EOI -> "EOI", "" - in - G.s_token pattern - let slist0sep x y = G.s_list0sep x y false let slist1sep x y = G.s_list1sep x y false end @@ -158,7 +143,7 @@ end type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function -| Atoken t -> Symbols.stoken t +| Atoken t -> G.s_token t | Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s) | Alist1sep (s,sep) -> Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) @@ -303,7 +288,7 @@ let make_rule r = [None, None, r] let eoi_entry en = let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in - let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in + let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (G.s_token Tok.pattern_for_EOI) in let act = fun _ x loc -> x in let warning msg = Feedback.msg_warning Pp.(str msg) in Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]); diff --git a/parsing/tok.ml b/parsing/tok.ml index 03825e350f..ba249bd977 100644 --- a/parsing/tok.ml +++ b/parsing/tok.ml @@ -82,41 +82,31 @@ let print ppf tok = Format.pp_print_string ppf (to_string tok) (** For camlp5, conversion from/to [Plexing.pattern], and a match function analoguous to [Plexing.default_match] *) -let of_pattern = function - | "", s -> KEYWORD s - | "IDENT", s -> IDENT s - | "PATTERNIDENT", s -> PATTERNIDENT s - | "FIELD", s -> FIELD s - | "INT", s -> INT s - | "STRING", s -> STRING s - | "LEFTQMARK", _ -> LEFTQMARK - | "BULLET", s -> BULLET s - | "EOI", _ -> EOI - | _ -> failwith "Tok.of_pattern: not a constructor" +type pattern = string * string option -let to_pattern = function - | KEYWORD s -> "", s - | IDENT s -> "IDENT", s - | PATTERNIDENT s -> "PATTERNIDENT", s - | FIELD s -> "FIELD", s - | INT s -> "INT", s - | STRING s -> "STRING", s - | LEFTQMARK -> "LEFTQMARK", "" - | BULLET s -> "BULLET", s - | EOI -> "EOI", "" +let is_keyword = function + | "", Some s -> Some s + | _ -> None -let match_pattern = +let pattern_for_EOI = ("EOI",None) +let pattern_for_KEYWORD s = ("",Some s) +let pattern_for_IDENT s = ("IDENT",Some s) + +let match_pattern (key, value) = let err () = raise Stream.Failure in - function - | "", "" -> (function KEYWORD s -> s | _ -> err ()) - | "IDENT", "" -> (function IDENT s -> s | _ -> err ()) - | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ()) - | "FIELD", "" -> (function FIELD s -> s | _ -> err ()) - | "INT", "" -> (function INT s -> s | _ -> err ()) - | "STRING", "" -> (function STRING s -> s | _ -> err ()) - | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ()) - | "BULLET", "" -> (function BULLET s -> s | _ -> err ()) - | "EOI", "" -> (function EOI -> "" | _ -> err ()) - | pat -> - let tok = of_pattern pat in - function tok' -> if equal tok tok' then snd pat else err () + let cond x = + match value with + | None -> x + | Some value -> if string_equal value x then x else err () in + match key with + | "" -> (function { v = KEYWORD s } -> cond s | _ -> err ()) + | "IDENT" when value <> None -> (function { v = (IDENT s | KEYWORD s) } -> cond s | _ -> err ()) + | "IDENT" -> (function { v = IDENT s } -> cond s | _ -> err ()) + | "PATTERNIDENT" -> (function { v = PATTERNIDENT s } -> cond s | _ -> err ()) + | "FIELD" -> (function { v = FIELD s } -> cond s | _ -> err ()) + | "INT" -> (function { v = INT s } -> cond s | _ -> err ()) + | "STRING" -> (function { v = STRING s } -> cond s | _ -> err ()) + | "LEFTQMARK" -> (function { v = LEFTQMARK } -> cond "" | _ -> err ()) + | "BULLET" -> (function { v = BULLET s } -> cond s | _ -> err ()) + | "EOI" -> (function { v = EOI } -> cond "" | _ -> err ()) + | p -> CErrors.anomaly Pp.(str "Tok: unknown pattern " ++ str p) diff --git a/parsing/tok.mli b/parsing/tok.mli index 5750096a28..e32047e6ec 100644 --- a/parsing/tok.mli +++ b/parsing/tok.mli @@ -29,7 +29,14 @@ val to_string : t -> string val print : Format.formatter -> t -> unit val match_keyword : string -> t -> bool -(** for camlp5 *) -val of_pattern : string*string -> t -val to_pattern : t -> string*string -val match_pattern : string*string -> t -> string +(** for camlp5, + eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END + is a pattern ("IDENT", Some "x") +*) +type pattern = string * string option (* = Plexing.pattern *) + +val is_keyword : pattern -> string option +val pattern_for_EOI : pattern +val pattern_for_KEYWORD : string -> pattern +val pattern_for_IDENT : string -> pattern +val match_pattern : pattern -> t -> string |
