aboutsummaryrefslogtreecommitdiff
path: root/parsing
diff options
context:
space:
mode:
Diffstat (limited to 'parsing')
-rw-r--r--parsing/cLexer.ml135
-rw-r--r--parsing/cLexer.mli51
-rw-r--r--parsing/extend.ml50
-rw-r--r--parsing/notation_gram.ml2
-rw-r--r--parsing/pcoq.ml119
-rw-r--r--parsing/pcoq.mli2
-rw-r--r--parsing/tok.ml134
-rw-r--r--parsing/tok.mli37
8 files changed, 330 insertions, 200 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml
index 503cfcdb4f..b81d89edf9 100644
--- a/parsing/cLexer.ml
+++ b/parsing/cLexer.ml
@@ -19,16 +19,19 @@ open Gramlib
module CharOrd = struct type t = char let compare : char -> char -> int = compare end
module CharMap = Map.Make (CharOrd)
+type starts_quotation = NoQuotation | Quotation
+
type ttree = {
- node : string option;
- branch : ttree CharMap.t }
+ node : (string * starts_quotation) option;
+ branch : ttree CharMap.t;
+}
let empty_ttree = { node = None; branch = CharMap.empty }
-let ttree_add ttree str =
+let ttree_add ttree (str,quot) =
let rec insert tt i =
if i == String.length str then
- {node = Some str; branch = tt.branch}
+ {node = Some (str,quot); branch = tt.branch}
else
let c = str.[i] in
let br =
@@ -75,7 +78,7 @@ let ttree_elements ttree =
let rec elts tt accu =
let accu = match tt.node with
| None -> accu
- | Some s -> CString.Set.add s accu
+ | Some (s,_) -> CString.Set.add s accu
in
CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu
in
@@ -259,11 +262,11 @@ let is_keyword s =
try match (ttree_find !token_tree s).node with None -> false | Some _ -> true
with Not_found -> false
-let add_keyword str =
+let add_keyword ?(quotation=NoQuotation) str =
if not (is_keyword str) then
begin
check_keyword str;
- token_tree := ttree_add !token_tree str
+ token_tree := ttree_add !token_tree (str,quotation)
end
let remove_keyword str =
@@ -529,11 +532,62 @@ and progress_utf8 loc last nj n c tt cs =
and progress_from_byte loc last nj tt cs c =
progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs
+type marker = Delimited of int * char list * char list | ImmediateAsciiIdent
+
+let peek_marker_len b e s =
+ let rec peek n =
+ match stream_nth n s with
+ | c -> if c = b then peek (n+1) else n, List.make n b, List.make n e
+ | exception Stream.Failure -> n, List.make n b, List.make n e
+ in
+ let len, start, stop = peek 0 in
+ if len = 0 then raise Stream.Failure
+ else Delimited (len, start, stop)
+
+let peek_marker s =
+ match stream_nth 0 s with
+ | '(' -> peek_marker_len '(' ')' s
+ | '[' -> peek_marker_len '[' ']' s
+ | '{' -> peek_marker_len '{' '}' s
+ | ('a'..'z' | 'A'..'Z' | '_') -> ImmediateAsciiIdent
+ | _ -> raise Stream.Failure
+
+let parse_quotation loc s =
+ match peek_marker s with
+ | ImmediateAsciiIdent ->
+ let c = Stream.next s in
+ let len =
+ try ident_tail loc (store 0 c) s with
+ Stream.Failure -> raise (Stream.Error "")
+ in
+ get_buff len
+ | Delimited (lenmarker, bmarker, emarker) ->
+ let b = Buffer.create 80 in
+ let commit1 c = Buffer.add_char b c; Stream.junk s in
+ let commit l = List.iter commit1 l in
+ let rec quotation depth =
+ match Stream.npeek lenmarker s with
+ | l when l = bmarker ->
+ commit l;
+ quotation (depth + 1)
+ | l when l = emarker ->
+ commit l;
+ if depth > 1 then quotation (depth - 1)
+ | c :: cs ->
+ commit1 c;
+ quotation depth
+ | [] -> raise Stream.Failure
+ in
+ quotation 0;
+ Buffer.contents b
+
+
let find_keyword loc id s =
let tt = ttree_find !token_tree id in
match progress_further loc tt.node 0 tt s with
| None -> raise Not_found
- | Some c -> KEYWORD c
+ | Some (c,NoQuotation) -> KEYWORD c
+ | Some (c,Quotation) -> QUOTATION(c, parse_quotation loc s)
let process_sequence loc bp c cs =
let rec aux n cs =
@@ -548,7 +602,8 @@ let process_chars ~diff_mode loc bp c cs =
let t = progress_from_byte loc None (-1) !token_tree cs c in
let ep = Stream.count cs in
match t with
- | Some t -> (KEYWORD t, set_loc_pos loc bp ep)
+ | Some (t,NoQuotation) -> (KEYWORD t, set_loc_pos loc bp ep)
+ | Some (c,Quotation) -> (QUOTATION(c, parse_quotation loc cs), set_loc_pos loc bp ep)
| None ->
let ep' = bp + utf8_char_size loc cs c in
if diff_mode then begin
@@ -735,20 +790,25 @@ let loct_add loct i loc = Hashtbl.add loct i loc
we unfreeze the state of the lexer. This restores the behaviour of the
lexer. B.B. *)
-type te = Tok.t
-
(** Names of tokens, for this lexer, used in Grammar error messages *)
-let token_text = function
- | ("", t) -> "'" ^ t ^ "'"
- | ("IDENT", "") -> "identifier"
- | ("IDENT", t) -> "'" ^ t ^ "'"
- | ("INT", "") -> "integer"
- | ("INT", s) -> "'" ^ s ^ "'"
- | ("STRING", "") -> "string"
- | ("EOI", "") -> "end of input"
- | (con, "") -> con
- | (con, prm) -> con ^ " \"" ^ prm ^ "\""
+let token_text : type c. c Tok.p -> string = function
+ | PKEYWORD t -> "'" ^ t ^ "'"
+ | PIDENT None -> "identifier"
+ | PIDENT (Some t) -> "'" ^ t ^ "'"
+ | PINT None -> "integer"
+ | PINT (Some s) -> "'" ^ s ^ "'"
+ | PSTRING None -> "string"
+ | PSTRING (Some s) -> "STRING \"" ^ s ^ "\""
+ | PLEFTQMARK -> "LEFTQMARK"
+ | PEOI -> "end of input"
+ | PPATTERNIDENT None -> "PATTERNIDENT"
+ | PPATTERNIDENT (Some s) -> "PATTERNIDENT \"" ^ s ^ "\""
+ | PFIELD None -> "FIELD"
+ | PFIELD (Some s) -> "FIELD \"" ^ s ^ "\""
+ | PBULLET None -> "BULLET"
+ | PBULLET (Some s) -> "BULLET \"" ^ s ^ "\""
+ | PQUOTATION lbl -> "QUOTATION \"" ^ lbl ^ "\""
let func next_token ?loc cs =
let loct = loct_create () in
@@ -762,17 +822,24 @@ let func next_token ?loc cs =
in
(ts, loct_func loct)
-let make_lexer ~diff_mode = {
- Plexing.tok_func = func (next_token ~diff_mode);
- Plexing.tok_using =
- (fun pat -> match Tok.of_pattern pat with
- | KEYWORD s -> add_keyword s
- | _ -> ());
- Plexing.tok_removing = (fun _ -> ());
- Plexing.tok_match = Tok.match_pattern;
- Plexing.tok_text = token_text }
+module MakeLexer (Diff : sig val mode : bool end) = struct
+ type te = Tok.t
+ type 'c pattern = 'c Tok.p
+ let tok_pattern_eq = Tok.equal_p
+ let tok_pattern_strings = Tok.pattern_strings
+ let tok_func = func (next_token ~diff_mode:Diff.mode)
+ let tok_using : type c. c pattern -> unit = function
+ | PKEYWORD s -> add_keyword ~quotation:NoQuotation s
+ | PQUOTATION s -> add_keyword ~quotation:Quotation s
+ | _ -> ()
+ let tok_removing = (fun _ -> ())
+ let tok_match = Tok.match_pattern
+ let tok_text = token_text
+end
+
+module Lexer = MakeLexer (struct let mode = false end)
-let lexer = make_lexer ~diff_mode:false
+module LexerDiff = MakeLexer (struct let mode = true end)
(** Terminal symbols interpretation *)
@@ -807,6 +874,6 @@ let strip s =
let terminal s =
let s = strip s in
let () = match s with "" -> failwith "empty token." | _ -> () in
- if is_ident_not_keyword s then IDENT s
- else if is_number s then INT s
- else KEYWORD s
+ if is_ident_not_keyword s then PIDENT (Some s)
+ else if is_number s then PINT (Some s)
+ else PKEYWORD s
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
index 807f37a1a4..9df3e45f49 100644
--- a/parsing/cLexer.mli
+++ b/parsing/cLexer.mli
@@ -8,8 +8,32 @@
(* * (see LICENSE file for the text of the license) *)
(************************************************************************)
+(** When one registers a keyword she can declare it starts a quotation.
+ In particular using QUOTATION("name:") in a grammar rule
+ declares "name:" as a keyword and the token QUOTATION is
+ matched whenever the keyword is followed by an identifier or a
+ parenthesized text. Eg
+
+ constr:x
+ string:[....]
+ ltac:(....)
+ ltac:{....}
+
+ The delimiter is made of 1 or more occurrences of the same parenthesis,
+ eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to
+ contain the closing delimiter, one can make the delimiter longer and avoid
+ confusion (no escaping). Eg
+
+ string:[[ .. ']' .. ]]
+
+
+ Nesting the delimiter is allowed, eg ((..((...))..)) is OK.
+
+ Keywords don't need to end in ':' *)
+type starts_quotation = NoQuotation | Quotation
+
(** This should be functional but it is not due to the interface *)
-val add_keyword : string -> unit
+val add_keyword : ?quotation:starts_quotation -> string -> unit
val remove_keyword : string -> unit
val is_keyword : string -> bool
val keywords : unit -> CString.Set.t
@@ -21,26 +45,14 @@ val get_keyword_state : unit -> keyword_state
val check_ident : string -> unit
val is_ident : string -> bool
val check_keyword : string -> unit
-val terminal : string -> Tok.t
-(** The lexer of Coq: *)
+(** When string is neither an ident nor an int, returns a keyword. *)
+val terminal : string -> string Tok.p
-(* modtype Grammar.GLexerType: sig
- type te val
- lexer : te Plexing.lexer
- end
-
-where
+(** The lexer of Coq: *)
- type lexer 'te =
- { tok_func : lexer_func 'te;
- tok_using : pattern -> unit;
- tok_removing : pattern -> unit;
- tok_match : pattern -> 'te -> string;
- tok_text : pattern -> string;
- tok_comm : mutable option (list location) }
- *)
-include Gramlib.Grammar.GLexerType with type te = Tok.t
+module Lexer :
+ Gramlib.Grammar.GLexerType with type te = Tok.t and type 'c pattern = 'c Tok.p
module Error : sig
type t
@@ -66,4 +78,5 @@ as if it was unquoted, possibly becoming multiple tokens
it was not in a comment, possibly becoming multiple tokens
- return any unrecognized Ascii or UTF-8 character as a string
*)
-val make_lexer : diff_mode:bool -> Tok.t Gramlib.Plexing.lexer
+module LexerDiff :
+ Gramlib.Grammar.GLexerType with type te = Tok.t and type 'c pattern = 'c Tok.p
diff --git a/parsing/extend.ml b/parsing/extend.ml
index 9b5537d7f6..dd7c301dfb 100644
--- a/parsing/extend.ml
+++ b/parsing/extend.ml
@@ -10,7 +10,7 @@
(** Entry keys for constr notations *)
-type 'a entry = 'a Gramlib.Grammar.GMake(CLexer).Entry.e
+type 'a entry = 'a Gramlib.Grammar.GMake(CLexer.Lexer).Entry.e
type side = Left | Right
@@ -44,7 +44,7 @@ type simple_constr_prod_entry_key =
(** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *)
-type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list
+type binder_entry_kind = ETBinderOpen | ETBinderClosed of string Tok.p list
type binder_target = ForBinder | ForTerm
@@ -54,7 +54,7 @@ type constr_prod_entry_key =
| ETProdBigint (* Parsed as an (unbounded) integer *)
| ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *)
| ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *)
- | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *)
+ | ETProdConstrList of (production_level * production_position) * string Tok.p list (* Parsed as non-empty list of constr *)
| ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *)
(** {5 AST for user-provided entries} *)
@@ -79,30 +79,34 @@ type ('a,'b,'c) ty_user_symbol =
(** {5 Type-safe grammar extension} *)
-type ('self, 'a) symbol =
-| Atoken : Tok.t -> ('self, string) symbol
-| Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol
-| Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol
-| Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol
-| Alist0sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol
-| Aopt : ('self, 'a) symbol -> ('self, 'a option) symbol
-| Aself : ('self, 'self) symbol
-| Anext : ('self, 'self) symbol
-| Aentry : 'a entry -> ('self, 'a) symbol
-| Aentryl : 'a entry * string -> ('self, 'a) symbol
-| Arules : 'a rules list -> ('self, 'a) symbol
-
-and ('self, _, 'r) rule =
-| Stop : ('self, 'r, 'r) rule
-| Next : ('self, 'a, 'r) rule * ('self, 'b) symbol -> ('self, 'b -> 'a, 'r) rule
-
-and ('a, 'r) norec_rule = { norec_rule : 's. ('s, 'a, 'r) rule }
+type norec = NoRec (* just two *)
+type mayrec = MayRec (* incompatible types *)
+
+type ('self, 'trec, 'a) symbol =
+| Atoken : 'c Tok.p -> ('self, norec, 'c) symbol
+| Alist1 : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a list) symbol
+| Alist1sep : ('self, 'trec, 'a) symbol * ('self, norec, _) symbol
+ -> ('self, 'trec, 'a list) symbol
+| Alist0 : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a list) symbol
+| Alist0sep : ('self, 'trec, 'a) symbol * ('self, norec, _) symbol
+ -> ('self, 'trec, 'a list) symbol
+| Aopt : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a option) symbol
+| Aself : ('self, mayrec, 'self) symbol
+| Anext : ('self, mayrec, 'self) symbol
+| Aentry : 'a entry -> ('self, norec, 'a) symbol
+| Aentryl : 'a entry * string -> ('self, norec, 'a) symbol
+| Arules : 'a rules list -> ('self, norec, 'a) symbol
+
+and ('self, 'trec, _, 'r) rule =
+| Stop : ('self, norec, 'r, 'r) rule
+| Next : ('self, _, 'a, 'r) rule * ('self, _, 'b) symbol -> ('self, mayrec, 'b -> 'a, 'r) rule
+| NextNoRec : ('self, norec, 'a, 'r) rule * ('self, norec, 'b) symbol -> ('self, norec, 'b -> 'a, 'r) rule
and 'a rules =
-| Rules : ('act, Loc.t -> 'a) norec_rule * 'act -> 'a rules
+| Rules : (_, norec, 'act, Loc.t -> 'a) rule * 'act -> 'a rules
type 'a production_rule =
-| Rule : ('a, 'act, Loc.t -> 'a) rule * 'act -> 'a production_rule
+| Rule : ('a, _, 'act, Loc.t -> 'a) rule * 'act -> 'a production_rule
type 'a single_extend_statement =
string option *
diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml
index fc5feba58b..6df0d6f21a 100644
--- a/parsing/notation_gram.ml
+++ b/parsing/notation_gram.ml
@@ -21,7 +21,7 @@ type level = Constrexpr.notation_entry * precedence * tolerability list * constr
(* first argument is InCustomEntry s for custom entries *)
type grammar_constr_prod_item =
- | GramConstrTerminal of Tok.t
+ | GramConstrTerminal of string Tok.p
| GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option
| GramConstrListMark of int * bool * int
(* tells action rule to make a list of the n previous parsed items;
diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml
index 9241205755..8f38e437b4 100644
--- a/parsing/pcoq.ml
+++ b/parsing/pcoq.ml
@@ -17,7 +17,7 @@ open Gramlib
(** The parser of Coq *)
module G : sig
- include Grammar.S with type te = Tok.t
+ include Grammar.S with type te = Tok.t and type 'c pattern = 'c Tok.p
(* where Grammar.S
@@ -67,7 +67,7 @@ module type S =
end with type 'a Entry.e = 'a Extend.entry = struct
- include Grammar.GMake(CLexer)
+ include Grammar.GMake(CLexer.Lexer)
type coq_parsable = parsable * CLexer.lexer_state ref
@@ -107,7 +107,7 @@ end
module Entry =
struct
- type 'a t = 'a Grammar.GMake(CLexer).Entry.e
+ type 'a t = 'a Grammar.GMake(CLexer.Lexer).Entry.e
let create = G.Entry.create
let parse = G.entry_parse
@@ -118,30 +118,6 @@ struct
end
-module Symbols : sig
- val stoken : Tok.t -> ('s, string) G.ty_symbol
- val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol
- val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol
-end = struct
-
- let stoken tok =
- let pattern = match tok with
- | Tok.KEYWORD s -> "", s
- | Tok.IDENT s -> "IDENT", s
- | Tok.PATTERNIDENT s -> "PATTERNIDENT", s
- | Tok.FIELD s -> "FIELD", s
- | Tok.INT s -> "INT", s
- | Tok.STRING s -> "STRING", s
- | Tok.LEFTQMARK -> "LEFTQMARK", ""
- | Tok.BULLET s -> "BULLET", s
- | Tok.EOI -> "EOI", ""
- in
- G.s_token pattern
-
- let slist0sep x y = G.s_list0sep x y false
- let slist1sep x y = G.s_list1sep x y false
-end
-
(** Grammar extensions *)
(** NB: [extend_statement =
@@ -155,43 +131,73 @@ end
(** Binding general entry keys to symbol *)
-type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule
-
-let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function
-| Atoken t -> Symbols.stoken t
-| Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s)
+type ('s, 'trec, 'a, 'r) casted_rule =
+| CastedRNo : ('s, G.ty_norec, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, norec, 'a, 'r) casted_rule
+| CastedRMay : ('s, G.ty_mayrec, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, mayrec, 'a, 'r) casted_rule
+
+type ('s, 'trec, 'a) casted_symbol =
+| CastedSNo : ('s, G.ty_norec, 'a) G.ty_symbol -> ('s, norec, 'a) casted_symbol
+| CastedSMay : ('s, G.ty_mayrec, 'a) G.ty_symbol -> ('s, mayrec, 'a) casted_symbol
+
+let rec symbol_of_prod_entry_key : type s tr a. (s, tr, a) symbol -> (s, tr, a) casted_symbol =
+function
+| Atoken t -> CastedSNo (G.s_token t)
+| Alist1 s ->
+ begin match symbol_of_prod_entry_key s with
+ | CastedSNo s -> CastedSNo (G.s_list1 s)
+ | CastedSMay s -> CastedSMay (G.s_list1 s) end
| Alist1sep (s,sep) ->
- Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep)
-| Alist0 s -> G.s_list0 (symbol_of_prod_entry_key s)
+ let CastedSNo sep = symbol_of_prod_entry_key sep in
+ begin match symbol_of_prod_entry_key s with
+ | CastedSNo s -> CastedSNo (G.s_list1sep s sep false)
+ | CastedSMay s -> CastedSMay (G.s_list1sep s sep false) end
+| Alist0 s ->
+ begin match symbol_of_prod_entry_key s with
+ | CastedSNo s -> CastedSNo (G.s_list0 s)
+ | CastedSMay s -> CastedSMay (G.s_list0 s) end
| Alist0sep (s,sep) ->
- Symbols.slist0sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep)
-| Aopt s -> G.s_opt (symbol_of_prod_entry_key s)
-| Aself -> G.s_self
-| Anext -> G.s_next
-| Aentry e -> G.s_nterm e
-| Aentryl (e, n) -> G.s_nterml e n
+ let CastedSNo sep = symbol_of_prod_entry_key sep in
+ begin match symbol_of_prod_entry_key s with
+ | CastedSNo s -> CastedSNo (G.s_list0sep s sep false)
+ | CastedSMay s -> CastedSMay (G.s_list0sep s sep false) end
+| Aopt s ->
+ begin match symbol_of_prod_entry_key s with
+ | CastedSNo s -> CastedSNo (G.s_opt s)
+ | CastedSMay s -> CastedSMay (G.s_opt s) end
+| Aself -> CastedSMay G.s_self
+| Anext -> CastedSMay G.s_next
+| Aentry e -> CastedSNo (G.s_nterm e)
+| Aentryl (e, n) -> CastedSNo (G.s_nterml e n)
| Arules rs ->
let warning msg = Feedback.msg_warning Pp.(str msg) in
- G.s_rules ~warning:(Some warning) (List.map symbol_of_rules rs)
+ CastedSNo (G.s_rules ~warning:(Some warning) (List.map symbol_of_rules rs))
-and symbol_of_rule : type s a r. (s, a, Loc.t -> r) Extend.rule -> (s, a, Loc.t -> r) casted_rule = function
-| Stop -> Casted (G.r_stop, fun act loc -> act loc)
+and symbol_of_rule : type s tr a r. (s, tr, a, Loc.t -> r) Extend.rule -> (s, tr, a, Loc.t -> r) casted_rule = function
+| Stop -> CastedRNo (G.r_stop, fun act loc -> act loc)
| Next (r, s) ->
- let Casted (r, cast) = symbol_of_rule r in
- Casted (G.r_next r (symbol_of_prod_entry_key s), (fun act x -> cast (act x)))
-
-and symbol_of_rules : type a. a Extend.rules -> a G.ty_production = function
+ begin match symbol_of_rule r, symbol_of_prod_entry_key s with
+ | CastedRNo (r, cast), CastedSNo s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x)))
+ | CastedRNo (r, cast), CastedSMay s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x)))
+ | CastedRMay (r, cast), CastedSNo s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x)))
+ | CastedRMay (r, cast), CastedSMay s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x))) end
+| NextNoRec (r, s) ->
+ let CastedRNo (r, cast) = symbol_of_rule r in
+ let CastedSNo s = symbol_of_prod_entry_key s in
+ CastedRNo (G.r_next_norec r s, (fun act x -> cast (act x)))
+
+and symbol_of_rules : type a. a Extend.rules -> a G.ty_rules = function
| Rules (r, act) ->
- let Casted (symb, cast) = symbol_of_rule r.norec_rule in
- G.production (symb, cast act)
+ let CastedRNo (symb, cast) = symbol_of_rule r in
+ G.rules (symb, cast act)
(** FIXME: This is a hack around a deficient camlp5 API *)
-type 'a any_production = AnyProduction : ('a, 'f, Loc.t -> 'a) G.ty_rule * 'f -> 'a any_production
+type 'a any_production = AnyProduction : ('a, 'tr, 'f, Loc.t -> 'a) G.ty_rule * 'f -> 'a any_production
let of_coq_production_rule : type a. a Extend.production_rule -> a any_production = function
| Rule (toks, act) ->
- let Casted (symb, cast) = symbol_of_rule toks in
- AnyProduction (symb, cast act)
+ match symbol_of_rule toks with
+ | CastedRNo (symb, cast) -> AnyProduction (symb, cast act)
+ | CastedRMay (symb, cast) -> AnyProduction (symb, cast act)
let of_coq_single_extend_statement (lvl, assoc, rule) =
(lvl, assoc, List.map of_coq_production_rule rule)
@@ -303,7 +309,7 @@ let make_rule r = [None, None, r]
let eoi_entry en =
let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in
- let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in
+ let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (G.s_token Tok.PEOI) in
let act = fun _ x loc -> x in
let warning msg = Feedback.msg_warning Pp.(str msg) in
Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]);
@@ -440,8 +446,11 @@ module Module =
let module_expr = Entry.create "module_expr"
let module_type = Entry.create "module_type"
end
-let epsilon_value f e =
- let r = G.production (G.r_next G.r_stop (symbol_of_prod_entry_key e), (fun x _ -> f x)) in
+let epsilon_value (type s tr a) f (e : (s, tr, a) symbol) =
+ let r =
+ match symbol_of_prod_entry_key e with
+ | CastedSNo s -> G.production (G.r_next G.r_stop s, (fun x _ -> f x))
+ | CastedSMay s -> G.production (G.r_next G.r_stop s, (fun x _ -> f x)) in
let ext = [None, None, [r]] in
let entry = Gram.entry_create "epsilon" in
let warning msg = Feedback.msg_warning Pp.(str msg) in
diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli
index 0418249e42..e361f0d00f 100644
--- a/parsing/pcoq.mli
+++ b/parsing/pcoq.mli
@@ -203,7 +203,7 @@ module Module :
val module_type : module_ast Entry.t
end
-val epsilon_value : ('a -> 'self) -> ('self, 'a) Extend.symbol -> 'self option
+val epsilon_value : ('a -> 'self) -> ('self, _, 'a) Extend.symbol -> 'self option
(** {5 Extending the parser without synchronization} *)
diff --git a/parsing/tok.ml b/parsing/tok.ml
index 03825e350f..186d0502fc 100644
--- a/parsing/tok.ml
+++ b/parsing/tok.ml
@@ -12,6 +12,31 @@
let string_equal (s1 : string) s2 = s1 = s2
+type 'c p =
+ | PKEYWORD : string -> string p
+ | PPATTERNIDENT : string option -> string p
+ | PIDENT : string option -> string p
+ | PFIELD : string option -> string p
+ | PINT : string option -> string p
+ | PSTRING : string option -> string p
+ | PLEFTQMARK : unit p
+ | PBULLET : string option -> string p
+ | PQUOTATION : string -> string p
+ | PEOI : unit p
+
+let pattern_strings : type c. c p -> string * string option =
+ function
+ | PKEYWORD s -> "", Some s
+ | PPATTERNIDENT s -> "PATTERNIDENT", s
+ | PIDENT s -> "IDENT", s
+ | PFIELD s -> "FIELD", s
+ | PINT s -> "INT", s
+ | PSTRING s -> "STRING", s
+ | PLEFTQMARK -> "LEFTQMARK", None
+ | PBULLET s -> "BULLET", s
+ | PQUOTATION lbl -> "QUOTATION", Some lbl
+ | PEOI -> "EOI", None
+
type t =
| KEYWORD of string
| PATTERNIDENT of string
@@ -21,8 +46,25 @@ type t =
| STRING of string
| LEFTQMARK
| BULLET of string
+ | QUOTATION of string * string
| EOI
+let equal_p (type a b) (t1 : a p) (t2 : b p) : (a, b) Util.eq option =
+ let streq s1 s2 = match s1, s2 with None, None -> true
+ | Some s1, Some s2 -> string_equal s1 s2 | _ -> false in
+ match t1, t2 with
+ | PKEYWORD s1, PKEYWORD s2 when string_equal s1 s2 -> Some Util.Refl
+ | PPATTERNIDENT s1, PPATTERNIDENT s2 when streq s1 s2 -> Some Util.Refl
+ | PIDENT s1, PIDENT s2 when streq s1 s2 -> Some Util.Refl
+ | PFIELD s1, PFIELD s2 when streq s1 s2 -> Some Util.Refl
+ | PINT s1, PINT s2 when streq s1 s2 -> Some Util.Refl
+ | PSTRING s1, PSTRING s2 when streq s1 s2 -> Some Util.Refl
+ | PLEFTQMARK, PLEFTQMARK -> Some Util.Refl
+ | PBULLET s1, PBULLET s2 when streq s1 s2 -> Some Util.Refl
+ | PQUOTATION s1, PQUOTATION s2 when string_equal s1 s2 -> Some Util.Refl
+ | PEOI, PEOI -> Some Util.Refl
+ | _ -> None
+
let equal t1 t2 = match t1, t2 with
| IDENT s1, KEYWORD s2 -> string_equal s1 s2
| KEYWORD s1, KEYWORD s2 -> string_equal s1 s2
@@ -34,6 +76,7 @@ let equal t1 t2 = match t1, t2 with
| LEFTQMARK, LEFTQMARK -> true
| BULLET s1, BULLET s2 -> string_equal s1 s2
| EOI, EOI -> true
+| QUOTATION(s1,t1), QUOTATION(s2,t2) -> string_equal s1 s2 && string_equal t1 t2
| _ -> false
let extract_string diff_mode = function
@@ -58,65 +101,40 @@ let extract_string diff_mode = function
| INT s -> s
| LEFTQMARK -> "?"
| BULLET s -> s
+ | QUOTATION(_,s) -> s
| EOI -> ""
-let to_string = function
- | KEYWORD s -> Format.sprintf "%S" s
- | IDENT s -> Format.sprintf "IDENT %S" s
- | PATTERNIDENT s -> Format.sprintf "PATTERNIDENT %S" s
- | FIELD s -> Format.sprintf "FIELD %S" s
- | INT s -> Format.sprintf "INT %s" s
- | STRING s -> Format.sprintf "STRING %S" s
- | LEFTQMARK -> "LEFTQMARK"
- | BULLET s -> Format.sprintf "BULLET %S" s
- | EOI -> "EOI"
-
-let match_keyword kwd = function
- | KEYWORD kwd' when kwd = kwd' -> true
- | _ -> false
-
-(* Needed to fix Camlp5 signature.
- Cannot use Pp because of silly Tox -> Compat -> Pp dependency *)
-let print ppf tok = Format.pp_print_string ppf (to_string tok)
-
-(** For camlp5, conversion from/to [Plexing.pattern],
- and a match function analoguous to [Plexing.default_match] *)
-
-let of_pattern = function
- | "", s -> KEYWORD s
- | "IDENT", s -> IDENT s
- | "PATTERNIDENT", s -> PATTERNIDENT s
- | "FIELD", s -> FIELD s
- | "INT", s -> INT s
- | "STRING", s -> STRING s
- | "LEFTQMARK", _ -> LEFTQMARK
- | "BULLET", s -> BULLET s
- | "EOI", _ -> EOI
- | _ -> failwith "Tok.of_pattern: not a constructor"
-
-let to_pattern = function
- | KEYWORD s -> "", s
- | IDENT s -> "IDENT", s
- | PATTERNIDENT s -> "PATTERNIDENT", s
- | FIELD s -> "FIELD", s
- | INT s -> "INT", s
- | STRING s -> "STRING", s
- | LEFTQMARK -> "LEFTQMARK", ""
- | BULLET s -> "BULLET", s
- | EOI -> "EOI", ""
+(* Invariant, txt is "ident" or a well parenthesized "{{....}}" *)
+let trim_quotation txt =
+ let len = String.length txt in
+ if len = 0 then None, txt
+ else
+ let c = txt.[0] in
+ if c = '(' || c = '[' || c = '{' then
+ let rec aux n =
+ if n < len && txt.[n] = c then aux (n+1)
+ else Some c, String.sub txt n (len - (2*n))
+ in
+ aux 0
+ else None, txt
-let match_pattern =
+let match_pattern (type c) (p : c p) : t -> c =
let err () = raise Stream.Failure in
- function
- | "", "" -> (function KEYWORD s -> s | _ -> err ())
- | "IDENT", "" -> (function IDENT s -> s | _ -> err ())
- | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ())
- | "FIELD", "" -> (function FIELD s -> s | _ -> err ())
- | "INT", "" -> (function INT s -> s | _ -> err ())
- | "STRING", "" -> (function STRING s -> s | _ -> err ())
- | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ())
- | "BULLET", "" -> (function BULLET s -> s | _ -> err ())
- | "EOI", "" -> (function EOI -> "" | _ -> err ())
- | pat ->
- let tok = of_pattern pat in
- function tok' -> if equal tok tok' then snd pat else err ()
+ let seq = string_equal in
+ match p with
+ | PKEYWORD s -> (function KEYWORD s' when seq s s' -> s' | _ -> err ())
+ | PIDENT None -> (function IDENT s' -> s' | _ -> err ())
+ | PIDENT (Some s) -> (function (IDENT s' | KEYWORD s') when seq s s' -> s' | _ -> err ())
+ | PPATTERNIDENT None -> (function PATTERNIDENT s -> s | _ -> err ())
+ | PPATTERNIDENT (Some s) -> (function PATTERNIDENT s' when seq s s' -> s' | _ -> err ())
+ | PFIELD None -> (function FIELD s -> s | _ -> err ())
+ | PFIELD (Some s) -> (function FIELD s' when seq s s' -> s' | _ -> err ())
+ | PINT None -> (function INT s -> s | _ -> err ())
+ | PINT (Some s) -> (function INT s' when seq s s' -> s' | _ -> err ())
+ | PSTRING None -> (function STRING s -> s | _ -> err ())
+ | PSTRING (Some s) -> (function STRING s' when seq s s' -> s' | _ -> err ())
+ | PLEFTQMARK -> (function LEFTQMARK -> () | _ -> err ())
+ | PBULLET None -> (function BULLET s -> s | _ -> err ())
+ | PBULLET (Some s) -> (function BULLET s' when seq s s' -> s' | _ -> err ())
+ | PQUOTATION lbl -> (function QUOTATION(lbl',s') when string_equal lbl lbl' -> s' | _ -> err ())
+ | PEOI -> (function EOI -> () | _ -> err ())
diff --git a/parsing/tok.mli b/parsing/tok.mli
index 5750096a28..678877720d 100644
--- a/parsing/tok.mli
+++ b/parsing/tok.mli
@@ -10,6 +10,20 @@
(** The type of token for the Coq lexer and parser *)
+type 'c p =
+ | PKEYWORD : string -> string p
+ | PPATTERNIDENT : string option -> string p
+ | PIDENT : string option -> string p
+ | PFIELD : string option -> string p
+ | PINT : string option -> string p
+ | PSTRING : string option -> string p
+ | PLEFTQMARK : unit p
+ | PBULLET : string option -> string p
+ | PQUOTATION : string -> string p
+ | PEOI : unit p
+
+val pattern_strings : 'c p -> string * string option
+
type t =
| KEYWORD of string
| PATTERNIDENT of string
@@ -19,17 +33,22 @@ type t =
| STRING of string
| LEFTQMARK
| BULLET of string
+ | QUOTATION of string * string
| EOI
+val equal_p : 'a p -> 'b p -> ('a, 'b) Util.eq option
+
val equal : t -> t -> bool
(* pass true for diff_mode *)
val extract_string : bool -> t -> string
-val to_string : t -> string
-(* Needed to fit Camlp5 signature *)
-val print : Format.formatter -> t -> unit
-val match_keyword : string -> t -> bool
-
-(** for camlp5 *)
-val of_pattern : string*string -> t
-val to_pattern : t -> string*string
-val match_pattern : string*string -> t -> string
+
+(** Utility function for the test returned by a QUOTATION token:
+ It returns the delimiter parenthesis, if any, and the text
+ without delimiters. Eg `{{{ text }}}` -> Some '{', ` text ` *)
+val trim_quotation : string -> char option * string
+
+(** for camlp5,
+ eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END
+ is a pattern (PIDENT (Some "x"))
+*)
+val match_pattern : 'c p -> t -> 'c