diff options
| author | Pierre-Marie Pédrot | 2019-03-31 23:07:48 +0200 |
|---|---|---|
| committer | Pierre-Marie Pédrot | 2019-03-31 23:07:48 +0200 |
| commit | 5dd3c18f4e50eef53ae4413b7b80951f17edad5f (patch) | |
| tree | 0a104d6afc109b7b89c8997d1afb3bc9f1e89dc3 | |
| parent | cb502e44aac8328ffd6c37429e050a01f72b2c53 (diff) | |
| parent | f832476404a29d7791c1a6d7970988d3b2e3ad9e (diff) | |
Merge PR #9733: [lexer] keyword protected quotation token for arbitrary text
Ack-by: Zimmi48
Ack-by: ejgallego
Ack-by: gares
Reviewed-by: ppedrot
| -rw-r--r-- | coqpp/coqpp_main.ml | 17 | ||||
| -rw-r--r-- | dev/ci/user-overlays/09733-gares-quotations.sh | 6 | ||||
| -rw-r--r-- | dev/doc/build-system.dune.md | 2 | ||||
| -rw-r--r-- | gramlib/grammar.ml | 10 | ||||
| -rw-r--r-- | gramlib/grammar.mli | 2 | ||||
| -rw-r--r-- | gramlib/plexing.ml | 2 | ||||
| -rw-r--r-- | gramlib/plexing.mli | 4 | ||||
| -rw-r--r-- | parsing/cLexer.ml | 104 | ||||
| -rw-r--r-- | parsing/cLexer.mli | 28 | ||||
| -rw-r--r-- | parsing/extend.ml | 6 | ||||
| -rw-r--r-- | parsing/notation_gram.ml | 2 | ||||
| -rw-r--r-- | parsing/pcoq.ml | 19 | ||||
| -rw-r--r-- | parsing/tok.ml | 85 | ||||
| -rw-r--r-- | parsing/tok.mli | 21 | ||||
| -rw-r--r-- | vernac/egramcoq.ml | 8 | ||||
| -rw-r--r-- | vernac/metasyntax.ml | 9 | ||||
| -rw-r--r-- | vernac/pvernac.ml | 2 |
17 files changed, 209 insertions, 118 deletions
diff --git a/coqpp/coqpp_main.ml b/coqpp/coqpp_main.ml index 90158c3aa3..f4c819eeb6 100644 --- a/coqpp/coqpp_main.ml +++ b/coqpp/coqpp_main.ml @@ -163,6 +163,8 @@ let is_token s = match string_split s with let rec parse_tokens = function | [GSymbString s] -> SymbToken ("", Some s) +| [GSymbQualid ("QUOTATION", None); GSymbString s] -> + SymbToken ("QUOTATION", Some s) | [GSymbQualid ("SELF", None)] -> SymbSelf | [GSymbQualid ("NEXT", None)] -> SymbNext | [GSymbQualid ("LIST0", None); tkn] -> @@ -212,17 +214,9 @@ let print_fun fmt (vars, body) = (** Meta-program instead of calling Tok.of_pattern here because otherwise violates value restriction *) -let print_tok fmt = function -| "", s -> fprintf fmt "Tok.KEYWORD %a" print_string s -| "IDENT", s -> fprintf fmt "Tok.IDENT %a" print_string s -| "PATTERNIDENT", s -> fprintf fmt "Tok.PATTERNIDENT %a" print_string s -| "FIELD", s -> fprintf fmt "Tok.FIELD %a" print_string s -| "INT", s -> fprintf fmt "Tok.INT %a" print_string s -| "STRING", s -> fprintf fmt "Tok.STRING %a" print_string s -| "LEFTQMARK", _ -> fprintf fmt "Tok.LEFTQMARK" -| "BULLET", s -> fprintf fmt "Tok.BULLET %a" print_string s -| "EOI", _ -> fprintf fmt "Tok.EOI" -| _ -> failwith "Tok.of_pattern: not a constructor" +let print_tok fmt (k,o) = + let print_pat fmt = print_opt fmt print_string in + fprintf fmt "(%a,%a)" print_string k print_pat o let rec print_prod fmt p = let (vars, tkns) = List.split p.gprod_symbs in @@ -240,7 +234,6 @@ and print_symbols fmt = function and print_symbol fmt tkn = match tkn with | SymbToken (t, s) -> - let s = match s with None -> "" | Some s -> s in fprintf fmt "(Extend.Atoken (%a))" print_tok (t, s) | SymbEntry (e, None) -> fprintf fmt "(Extend.Aentry %s)" e diff --git a/dev/ci/user-overlays/09733-gares-quotations.sh b/dev/ci/user-overlays/09733-gares-quotations.sh new file mode 100644 index 0000000000..b17454fc4c --- /dev/null +++ b/dev/ci/user-overlays/09733-gares-quotations.sh @@ -0,0 +1,6 @@ +if [ "$CI_PULL_REQUEST" = "9733" ] || [ "$CI_BRANCH" = "quotations" ]; then + + ltac2_CI_REF=quotations + ltac2_CI_GITURL=https://github.com/gares/ltac2 + +fi diff --git a/dev/doc/build-system.dune.md b/dev/doc/build-system.dune.md index b1bfac8cc9..49251d61a1 100644 --- a/dev/doc/build-system.dune.md +++ b/dev/doc/build-system.dune.md @@ -22,7 +22,7 @@ If you want to build the standard libraries and plugins you should call `make -f Makefile.dune voboot`. It is usually enough to do that once per-session. -More helper targets are availabe in `Makefile.dune`, `make -f +More helper targets are available in `Makefile.dune`, `make -f Makefile.dune` will display some help. Dune places build artifacts in a separate directory `_build`; it will diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml index 74350c4f15..9eebe7a1e2 100644 --- a/gramlib/grammar.ml +++ b/gramlib/grammar.ml @@ -16,7 +16,7 @@ module type S = type te type parsable val parsable : ?loc:Loc.t -> char Stream.t -> parsable - val tokens : string -> (string * int) list + val tokens : string -> (string option * int) list module Entry : sig type 'a e @@ -195,7 +195,7 @@ let is_before : type s1 s2 a1 a2. (s1, a1) ty_symbol -> (s2, a2) ty_symbol -> bo match s1, s2 with Stoken ("ANY", _), _ -> false | _, Stoken ("ANY", _) -> true - | Stoken (_, s), Stoken (_, "") when s <> "" -> true + | Stoken (_, Some _), Stoken (_, None) -> true | Stoken _, Stoken _ -> false | Stoken _, _ -> true | _ -> false @@ -683,7 +683,7 @@ let rec print_symbol : type s r. formatter -> (s, r) ty_symbol -> unit = fprintf ppf "LIST1 %a SEP %a%s" print_symbol1 s print_symbol1 t (if osep then " OPT_SEP" else "") | Sopt s -> fprintf ppf "OPT %a" print_symbol1 s - | Stoken (con, prm) when con <> "" && prm <> "" -> + | Stoken (con, Some prm) when con <> "" -> fprintf ppf "%s@ %a" con print_str prm | Snterml (e, l) -> fprintf ppf "%s%s@ LEVEL@ %a" e.ename "" @@ -695,8 +695,8 @@ and print_symbol1 : type s r. formatter -> (s, r) ty_symbol -> unit = | Snterm e -> fprintf ppf "%s%s" e.ename "" | Sself -> pp_print_string ppf "SELF" | Snext -> pp_print_string ppf "NEXT" - | Stoken ("", s) -> print_str ppf s - | Stoken (con, "") -> pp_print_string ppf con + | Stoken ("", Some s) -> print_str ppf s + | Stoken (con, None) -> pp_print_string ppf con | Stree t -> print_level ppf pp_print_space (flatten_tree t) | s -> fprintf ppf "(%a)" print_symbol s diff --git a/gramlib/grammar.mli b/gramlib/grammar.mli index 7cb7a92f85..453ec85187 100644 --- a/gramlib/grammar.mli +++ b/gramlib/grammar.mli @@ -24,7 +24,7 @@ module type S = type te type parsable val parsable : ?loc:Loc.t -> char Stream.t -> parsable - val tokens : string -> (string * int) list + val tokens : string -> (string option * int) list module Entry : sig type 'a e diff --git a/gramlib/plexing.ml b/gramlib/plexing.ml index c294923a85..6da06f147f 100644 --- a/gramlib/plexing.ml +++ b/gramlib/plexing.ml @@ -2,7 +2,7 @@ (* plexing.ml,v *) (* Copyright (c) INRIA 2007-2017 *) -type pattern = string * string +type pattern = string * string option type location_function = int -> Loc.t type 'te lexer_func = ?loc:Loc.t -> char Stream.t -> 'te Stream.t * location_function diff --git a/gramlib/plexing.mli b/gramlib/plexing.mli index f6e4d96b80..26443df026 100644 --- a/gramlib/plexing.mli +++ b/gramlib/plexing.mli @@ -8,13 +8,13 @@ grammars (see module [Grammar]). It also provides some useful functions to create lexers. *) -type pattern = string * string +type pattern = string * string option (* Type for values used by the generated code of the EXTEND statement to represent terminals in entry rules. - The first string is the constructor name (must start with an uppercase character). When it is empty, the second string is supposed to be a keyword. -- The second string is the constructor parameter. Empty if it +- The second component is the constructor parameter. None if it has no parameter (corresponding to the 'wildcard' pattern). - The way tokens patterns are interpreted to parse tokens is done by the lexer, function [tok_match] below. *) diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index 503cfcdb4f..33890545da 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -19,16 +19,19 @@ open Gramlib module CharOrd = struct type t = char let compare : char -> char -> int = compare end module CharMap = Map.Make (CharOrd) +type starts_quotation = NoQuotation | Quotation + type ttree = { - node : string option; - branch : ttree CharMap.t } + node : (string * starts_quotation) option; + branch : ttree CharMap.t; +} let empty_ttree = { node = None; branch = CharMap.empty } -let ttree_add ttree str = +let ttree_add ttree (str,quot) = let rec insert tt i = if i == String.length str then - {node = Some str; branch = tt.branch} + {node = Some (str,quot); branch = tt.branch} else let c = str.[i] in let br = @@ -75,7 +78,7 @@ let ttree_elements ttree = let rec elts tt accu = let accu = match tt.node with | None -> accu - | Some s -> CString.Set.add s accu + | Some (s,_) -> CString.Set.add s accu in CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu in @@ -259,11 +262,11 @@ let is_keyword s = try match (ttree_find !token_tree s).node with None -> false | Some _ -> true with Not_found -> false -let add_keyword str = +let add_keyword ?(quotation=NoQuotation) str = if not (is_keyword str) then begin check_keyword str; - token_tree := ttree_add !token_tree str + token_tree := ttree_add !token_tree (str,quotation) end let remove_keyword str = @@ -529,11 +532,62 @@ and progress_utf8 loc last nj n c tt cs = and progress_from_byte loc last nj tt cs c = progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs +type marker = Delimited of int * char list * char list | ImmediateAsciiIdent + +let peek_marker_len b e s = + let rec peek n = + match stream_nth n s with + | c -> if c = b then peek (n+1) else n, List.make n b, List.make n e + | exception Stream.Failure -> n, List.make n b, List.make n e + in + let len, start, stop = peek 0 in + if len = 0 then raise Stream.Failure + else Delimited (len, start, stop) + +let peek_marker s = + match stream_nth 0 s with + | '(' -> peek_marker_len '(' ')' s + | '[' -> peek_marker_len '[' ']' s + | '{' -> peek_marker_len '{' '}' s + | ('a'..'z' | 'A'..'Z' | '_') -> ImmediateAsciiIdent + | _ -> raise Stream.Failure + +let parse_quotation loc s = + match peek_marker s with + | ImmediateAsciiIdent -> + let c = Stream.next s in + let len = + try ident_tail loc (store 0 c) s with + Stream.Failure -> raise (Stream.Error "") + in + get_buff len + | Delimited (lenmarker, bmarker, emarker) -> + let b = Buffer.create 80 in + let commit1 c = Buffer.add_char b c; Stream.junk s in + let commit l = List.iter commit1 l in + let rec quotation depth = + match Stream.npeek lenmarker s with + | l when l = bmarker -> + commit l; + quotation (depth + 1) + | l when l = emarker -> + commit l; + if depth > 1 then quotation (depth - 1) + | c :: cs -> + commit1 c; + quotation depth + | [] -> raise Stream.Failure + in + quotation 0; + Buffer.contents b + + let find_keyword loc id s = let tt = ttree_find !token_tree id in match progress_further loc tt.node 0 tt s with | None -> raise Not_found - | Some c -> KEYWORD c + | Some (c,NoQuotation) -> KEYWORD c + | Some (c,Quotation) -> QUOTATION(c, parse_quotation loc s) let process_sequence loc bp c cs = let rec aux n cs = @@ -548,7 +602,8 @@ let process_chars ~diff_mode loc bp c cs = let t = progress_from_byte loc None (-1) !token_tree cs c in let ep = Stream.count cs in match t with - | Some t -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (t,NoQuotation) -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (c,Quotation) -> (QUOTATION(c, parse_quotation loc cs), set_loc_pos loc bp ep) | None -> let ep' = bp + utf8_char_size loc cs c in if diff_mode then begin @@ -740,15 +795,15 @@ type te = Tok.t (** Names of tokens, for this lexer, used in Grammar error messages *) let token_text = function - | ("", t) -> "'" ^ t ^ "'" - | ("IDENT", "") -> "identifier" - | ("IDENT", t) -> "'" ^ t ^ "'" - | ("INT", "") -> "integer" - | ("INT", s) -> "'" ^ s ^ "'" - | ("STRING", "") -> "string" - | ("EOI", "") -> "end of input" - | (con, "") -> con - | (con, prm) -> con ^ " \"" ^ prm ^ "\"" + | ("", Some t) -> "'" ^ t ^ "'" + | ("IDENT", None) -> "identifier" + | ("IDENT", Some t) -> "'" ^ t ^ "'" + | ("INT", None) -> "integer" + | ("INT", Some s) -> "'" ^ s ^ "'" + | ("STRING", None) -> "string" + | ("EOI", None) -> "end of input" + | (con, None) -> con + | (con, Some prm) -> con ^ " \"" ^ prm ^ "\"" let func next_token ?loc cs = let loct = loct_create () in @@ -765,9 +820,10 @@ let func next_token ?loc cs = let make_lexer ~diff_mode = { Plexing.tok_func = func (next_token ~diff_mode); Plexing.tok_using = - (fun pat -> match Tok.of_pattern pat with - | KEYWORD s -> add_keyword s - | _ -> ()); + (fun pat -> match Tok.is_keyword pat with + | Some (false,s) -> add_keyword ~quotation:NoQuotation s + | Some (true,s) -> add_keyword ~quotation:Quotation s + | None -> ()); Plexing.tok_removing = (fun _ -> ()); Plexing.tok_match = Tok.match_pattern; Plexing.tok_text = token_text } @@ -807,6 +863,6 @@ let strip s = let terminal s = let s = strip s in let () = match s with "" -> failwith "empty token." | _ -> () in - if is_ident_not_keyword s then IDENT s - else if is_number s then INT s - else KEYWORD s + if is_ident_not_keyword s then "IDENT", Some s + else if is_number s then "INT", Some s + else "", Some s diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli index 807f37a1a4..6d5a621877 100644 --- a/parsing/cLexer.mli +++ b/parsing/cLexer.mli @@ -8,8 +8,32 @@ (* * (see LICENSE file for the text of the license) *) (************************************************************************) +(** When one registers a keyword she can declare it starts a quotation. + In particular using QUOTATION("name:") in a grammar rule + declares "name:" as a keyword and the token QUOTATION is + matched whenever the keyword is followed by an identifier or a + parenthesized text. Eg + + constr:x + string:[....] + ltac:(....) + ltac:{....} + + The delimiter is made of 1 or more occurrences of the same parenthesis, + eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to + contain the closing delimiter, one can make the delimiter longer and avoid + confusion (no escaping). Eg + + string:[[ .. ']' .. ]] + + + Nesting the delimiter is allowed, eg ((..((...))..)) is OK. + + Keywords don't need to end in ':' *) +type starts_quotation = NoQuotation | Quotation + (** This should be functional but it is not due to the interface *) -val add_keyword : string -> unit +val add_keyword : ?quotation:starts_quotation -> string -> unit val remove_keyword : string -> unit val is_keyword : string -> bool val keywords : unit -> CString.Set.t @@ -21,7 +45,7 @@ val get_keyword_state : unit -> keyword_state val check_ident : string -> unit val is_ident : string -> bool val check_keyword : string -> unit -val terminal : string -> Tok.t +val terminal : string -> Tok.pattern (** The lexer of Coq: *) diff --git a/parsing/extend.ml b/parsing/extend.ml index 9b5537d7f6..ff879dd1c2 100644 --- a/parsing/extend.ml +++ b/parsing/extend.ml @@ -44,7 +44,7 @@ type simple_constr_prod_entry_key = (** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *) -type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list +type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.pattern list type binder_target = ForBinder | ForTerm @@ -54,7 +54,7 @@ type constr_prod_entry_key = | ETProdBigint (* Parsed as an (unbounded) integer *) | ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *) | ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *) - | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *) + | ETProdConstrList of (production_level * production_position) * Tok.pattern list (* Parsed as non-empty list of constr *) | ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *) (** {5 AST for user-provided entries} *) @@ -80,7 +80,7 @@ type ('a,'b,'c) ty_user_symbol = (** {5 Type-safe grammar extension} *) type ('self, 'a) symbol = -| Atoken : Tok.t -> ('self, string) symbol +| Atoken : Tok.pattern -> ('self, string) symbol | Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol | Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol | Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml index fc5feba58b..a50f8d69e3 100644 --- a/parsing/notation_gram.ml +++ b/parsing/notation_gram.ml @@ -21,7 +21,7 @@ type level = Constrexpr.notation_entry * precedence * tolerability list * constr (* first argument is InCustomEntry s for custom entries *) type grammar_constr_prod_item = - | GramConstrTerminal of Tok.t + | GramConstrTerminal of Tok.pattern | GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option | GramConstrListMark of int * bool * int (* tells action rule to make a list of the n previous parsed items; diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml index 9241205755..f2f5d17da3 100644 --- a/parsing/pcoq.ml +++ b/parsing/pcoq.ml @@ -119,25 +119,10 @@ struct end module Symbols : sig - val stoken : Tok.t -> ('s, string) G.ty_symbol val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol end = struct - let stoken tok = - let pattern = match tok with - | Tok.KEYWORD s -> "", s - | Tok.IDENT s -> "IDENT", s - | Tok.PATTERNIDENT s -> "PATTERNIDENT", s - | Tok.FIELD s -> "FIELD", s - | Tok.INT s -> "INT", s - | Tok.STRING s -> "STRING", s - | Tok.LEFTQMARK -> "LEFTQMARK", "" - | Tok.BULLET s -> "BULLET", s - | Tok.EOI -> "EOI", "" - in - G.s_token pattern - let slist0sep x y = G.s_list0sep x y false let slist1sep x y = G.s_list1sep x y false end @@ -158,7 +143,7 @@ end type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function -| Atoken t -> Symbols.stoken t +| Atoken t -> G.s_token t | Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s) | Alist1sep (s,sep) -> Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) @@ -303,7 +288,7 @@ let make_rule r = [None, None, r] let eoi_entry en = let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in - let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in + let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (G.s_token Tok.pattern_for_EOI) in let act = fun _ x loc -> x in let warning msg = Feedback.msg_warning Pp.(str msg) in Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]); diff --git a/parsing/tok.ml b/parsing/tok.ml index 03825e350f..f0846dcf90 100644 --- a/parsing/tok.ml +++ b/parsing/tok.ml @@ -21,6 +21,7 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI let equal t1 t2 = match t1, t2 with @@ -34,6 +35,7 @@ let equal t1 t2 = match t1, t2 with | LEFTQMARK, LEFTQMARK -> true | BULLET s1, BULLET s2 -> string_equal s1 s2 | EOI, EOI -> true +| QUOTATION(s1,t1), QUOTATION(s2,t2) -> string_equal s1 s2 && string_equal t1 t2 | _ -> false let extract_string diff_mode = function @@ -58,6 +60,7 @@ let extract_string diff_mode = function | INT s -> s | LEFTQMARK -> "?" | BULLET s -> s + | QUOTATION(_,s) -> s | EOI -> "" let to_string = function @@ -69,12 +72,27 @@ let to_string = function | STRING s -> Format.sprintf "STRING %S" s | LEFTQMARK -> "LEFTQMARK" | BULLET s -> Format.sprintf "BULLET %S" s + | QUOTATION(lbl,s) -> lbl ^ s | EOI -> "EOI" let match_keyword kwd = function | KEYWORD kwd' when kwd = kwd' -> true | _ -> false +(* Invariant, txt is "ident" or a well parenthesized "{{....}}" *) +let trim_quotation txt = + let len = String.length txt in + if len = 0 then None, txt + else + let c = txt.[0] in + if c = '(' || c = '[' || c = '{' then + let rec aux n = + if n < len && txt.[n] = c then aux (n+1) + else Some c, String.sub txt n (len - (2*n)) + in + aux 0 + else None, txt + (* Needed to fix Camlp5 signature. Cannot use Pp because of silly Tox -> Compat -> Pp dependency *) let print ppf tok = Format.pp_print_string ppf (to_string tok) @@ -82,41 +100,38 @@ let print ppf tok = Format.pp_print_string ppf (to_string tok) (** For camlp5, conversion from/to [Plexing.pattern], and a match function analoguous to [Plexing.default_match] *) -let of_pattern = function - | "", s -> KEYWORD s - | "IDENT", s -> IDENT s - | "PATTERNIDENT", s -> PATTERNIDENT s - | "FIELD", s -> FIELD s - | "INT", s -> INT s - | "STRING", s -> STRING s - | "LEFTQMARK", _ -> LEFTQMARK - | "BULLET", s -> BULLET s - | "EOI", _ -> EOI - | _ -> failwith "Tok.of_pattern: not a constructor" +type pattern = string * string option + +let is_keyword = function + | "", Some s -> Some (false,s) + | "QUOTATION", Some s -> Some (true,s) + | _ -> None -let to_pattern = function - | KEYWORD s -> "", s - | IDENT s -> "IDENT", s - | PATTERNIDENT s -> "PATTERNIDENT", s - | FIELD s -> "FIELD", s - | INT s -> "INT", s - | STRING s -> "STRING", s - | LEFTQMARK -> "LEFTQMARK", "" - | BULLET s -> "BULLET", s - | EOI -> "EOI", "" +let pattern_for_EOI = ("EOI",None) +let pattern_for_KEYWORD s = ("",Some s) +let pattern_for_IDENT s = ("IDENT",Some s) -let match_pattern = +let match_pattern (key, value) = let err () = raise Stream.Failure in - function - | "", "" -> (function KEYWORD s -> s | _ -> err ()) - | "IDENT", "" -> (function IDENT s -> s | _ -> err ()) - | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ()) - | "FIELD", "" -> (function FIELD s -> s | _ -> err ()) - | "INT", "" -> (function INT s -> s | _ -> err ()) - | "STRING", "" -> (function STRING s -> s | _ -> err ()) - | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ()) - | "BULLET", "" -> (function BULLET s -> s | _ -> err ()) - | "EOI", "" -> (function EOI -> "" | _ -> err ()) - | pat -> - let tok = of_pattern pat in - function tok' -> if equal tok tok' then snd pat else err () + let cond x = + match value with + | None -> x + | Some value -> if string_equal value x then x else err () in + match key with + | "" -> (function KEYWORD s -> cond s | _ -> err ()) + | "IDENT" when value <> None -> (function (IDENT s | KEYWORD s) -> cond s | _ -> err ()) + | "IDENT" -> (function IDENT s -> cond s | _ -> err ()) + | "PATTERNIDENT" -> (function PATTERNIDENT s -> cond s | _ -> err ()) + | "FIELD" -> (function FIELD s -> cond s | _ -> err ()) + | "INT" -> (function INT s -> cond s | _ -> err ()) + | "STRING" -> (function STRING s -> cond s | _ -> err ()) + | "LEFTQMARK" -> (function LEFTQMARK -> cond "" | _ -> err ()) + | "BULLET" -> (function BULLET s -> cond s | _ -> err ()) + | "QUOTATION" -> (function + | QUOTATION(lbl,s) -> + begin match value with + | None -> assert false + | Some lbl1 -> if string_equal lbl1 lbl then s else err () end + | _ -> err ()) + | "EOI" -> (function EOI -> cond "" | _ -> err ()) + | p -> CErrors.anomaly Pp.(str "Tok: unknown pattern " ++ str p) diff --git a/parsing/tok.mli b/parsing/tok.mli index 5750096a28..8a255be06f 100644 --- a/parsing/tok.mli +++ b/parsing/tok.mli @@ -19,6 +19,7 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI val equal : t -> t -> bool @@ -29,7 +30,19 @@ val to_string : t -> string val print : Format.formatter -> t -> unit val match_keyword : string -> t -> bool -(** for camlp5 *) -val of_pattern : string*string -> t -val to_pattern : t -> string*string -val match_pattern : string*string -> t -> string +(** Utility function for the test returned by a QUOTATION token: + It returns the delimiter parenthesis, if any, and the text + without delimiters. Eg `{{{ text }}}` -> Some '{', ` text ` *) +val trim_quotation : string -> char option * string + +(** for camlp5, + eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END + is a pattern ("IDENT", Some "x") +*) +type pattern = string * string option (* = Plexing.pattern *) + +val is_keyword : pattern -> (bool * string) option +val pattern_for_EOI : pattern +val pattern_for_KEYWORD : string -> pattern +val pattern_for_IDENT : string -> pattern +val match_pattern : pattern -> t -> string diff --git a/vernac/egramcoq.ml b/vernac/egramcoq.ml index 1a07d74a0e..f96b567f1b 100644 --- a/vernac/egramcoq.ml +++ b/vernac/egramcoq.ml @@ -247,10 +247,10 @@ type (_, _) entry = | TTReference : ('self, qualid) entry | TTBigint : ('self, Constrexpr.raw_natural_number) entry | TTConstr : notation_entry * prod_info * 'r target -> ('r, 'r) entry -| TTConstrList : prod_info * Tok.t list * 'r target -> ('r, 'r list) entry +| TTConstrList : prod_info * Tok.pattern list * 'r target -> ('r, 'r list) entry | TTPattern : int -> ('self, cases_pattern_expr) entry | TTOpenBinderList : ('self, local_binder_expr list) entry -| TTClosedBinderList : Tok.t list -> ('self, local_binder_expr list list) entry +| TTClosedBinderList : Tok.pattern list -> ('self, local_binder_expr list list) entry type _ any_entry = TTAny : ('s, 'r) entry -> 's any_entry @@ -319,7 +319,7 @@ let is_binder_level from e = match e with let make_sep_rules = function | [tk] -> Atoken tk | tkl -> - let rec mkrule : Tok.t list -> string rules = function + let rec mkrule : Tok.pattern list -> string rules = function | [] -> Rules ({ norec_rule = Stop }, fun _ -> (* dropped anyway: *) "") | tkn :: rem -> let Rules ({ norec_rule = r }, f) = mkrule rem in @@ -406,7 +406,7 @@ match e with | TTConstrList _ -> { subst with constrlists = v :: subst.constrlists } type (_, _) ty_symbol = -| TyTerm : Tok.t -> ('s, string) ty_symbol +| TyTerm : Tok.pattern -> ('s, string) ty_symbol | TyNonTerm : 's target * ('s, 'a) entry * ('s, 'a) symbol * bool -> ('s, 'a) ty_symbol type ('self, _, 'r) ty_rule = diff --git a/vernac/metasyntax.ml b/vernac/metasyntax.ml index 3da12e7714..781fd404f8 100644 --- a/vernac/metasyntax.ml +++ b/vernac/metasyntax.ml @@ -23,7 +23,6 @@ open Libobject open Constrintern open Vernacexpr open Libnames -open Tok open Notation open Nameops @@ -575,20 +574,20 @@ let is_not_small_constr = function | _ -> false let rec define_keywords_aux = function - | GramConstrNonTerminal(e,Some _) as n1 :: GramConstrTerminal(IDENT k) :: l + | GramConstrNonTerminal(e,Some _) as n1 :: GramConstrTerminal("IDENT",Some k) :: l when is_not_small_constr e -> Flags.if_verbose Feedback.msg_info (str "Identifier '" ++ str k ++ str "' now a keyword"); CLexer.add_keyword k; - n1 :: GramConstrTerminal(KEYWORD k) :: define_keywords_aux l + n1 :: GramConstrTerminal(Tok.pattern_for_KEYWORD k) :: define_keywords_aux l | n :: l -> n :: define_keywords_aux l | [] -> [] (* Ensure that IDENT articulation terminal symbols are keywords *) let define_keywords = function - | GramConstrTerminal(IDENT k)::l -> + | GramConstrTerminal("IDENT", Some k)::l -> Flags.if_verbose Feedback.msg_info (str "Identifier '" ++ str k ++ str "' now a keyword"); CLexer.add_keyword k; - GramConstrTerminal(KEYWORD k) :: define_keywords_aux l + GramConstrTerminal(Tok.pattern_for_KEYWORD k) :: define_keywords_aux l | l -> define_keywords_aux l let distribute a ll = List.map (fun l -> a @ l) ll diff --git a/vernac/pvernac.ml b/vernac/pvernac.ml index 994fad85f0..9438b9749f 100644 --- a/vernac/pvernac.ml +++ b/vernac/pvernac.ml @@ -55,7 +55,7 @@ module Vernac_ = let act_vernac v loc = Some CAst.(make ~loc v) in let act_eoi _ loc = None in let rule = [ - Rule (Next (Stop, Atoken Tok.EOI), act_eoi); + Rule (Next (Stop, Atoken Tok.pattern_for_EOI), act_eoi); Rule (Next (Stop, Aentry vernac_control), act_vernac); ] in Pcoq.grammar_extend main_entry None (None, [None, None, rule]) |
