diff options
Diffstat (limited to 'parsing')
| -rw-r--r-- | parsing/cLexer.ml | 135 | ||||
| -rw-r--r-- | parsing/cLexer.mli | 51 | ||||
| -rw-r--r-- | parsing/extend.ml | 50 | ||||
| -rw-r--r-- | parsing/notation_gram.ml | 2 | ||||
| -rw-r--r-- | parsing/pcoq.ml | 119 | ||||
| -rw-r--r-- | parsing/pcoq.mli | 2 | ||||
| -rw-r--r-- | parsing/tok.ml | 134 | ||||
| -rw-r--r-- | parsing/tok.mli | 37 |
8 files changed, 330 insertions, 200 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index 503cfcdb4f..b81d89edf9 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -19,16 +19,19 @@ open Gramlib module CharOrd = struct type t = char let compare : char -> char -> int = compare end module CharMap = Map.Make (CharOrd) +type starts_quotation = NoQuotation | Quotation + type ttree = { - node : string option; - branch : ttree CharMap.t } + node : (string * starts_quotation) option; + branch : ttree CharMap.t; +} let empty_ttree = { node = None; branch = CharMap.empty } -let ttree_add ttree str = +let ttree_add ttree (str,quot) = let rec insert tt i = if i == String.length str then - {node = Some str; branch = tt.branch} + {node = Some (str,quot); branch = tt.branch} else let c = str.[i] in let br = @@ -75,7 +78,7 @@ let ttree_elements ttree = let rec elts tt accu = let accu = match tt.node with | None -> accu - | Some s -> CString.Set.add s accu + | Some (s,_) -> CString.Set.add s accu in CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu in @@ -259,11 +262,11 @@ let is_keyword s = try match (ttree_find !token_tree s).node with None -> false | Some _ -> true with Not_found -> false -let add_keyword str = +let add_keyword ?(quotation=NoQuotation) str = if not (is_keyword str) then begin check_keyword str; - token_tree := ttree_add !token_tree str + token_tree := ttree_add !token_tree (str,quotation) end let remove_keyword str = @@ -529,11 +532,62 @@ and progress_utf8 loc last nj n c tt cs = and progress_from_byte loc last nj tt cs c = progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs +type marker = Delimited of int * char list * char list | ImmediateAsciiIdent + +let peek_marker_len b e s = + let rec peek n = + match stream_nth n s with + | c -> if c = b then peek (n+1) else n, List.make n b, List.make n e + | exception Stream.Failure -> n, List.make n b, List.make n e + in + let len, start, stop = peek 0 in + if len = 0 then raise Stream.Failure + else Delimited (len, start, stop) + +let peek_marker s = + match stream_nth 0 s with + | '(' -> peek_marker_len '(' ')' s + | '[' -> peek_marker_len '[' ']' s + | '{' -> peek_marker_len '{' '}' s + | ('a'..'z' | 'A'..'Z' | '_') -> ImmediateAsciiIdent + | _ -> raise Stream.Failure + +let parse_quotation loc s = + match peek_marker s with + | ImmediateAsciiIdent -> + let c = Stream.next s in + let len = + try ident_tail loc (store 0 c) s with + Stream.Failure -> raise (Stream.Error "") + in + get_buff len + | Delimited (lenmarker, bmarker, emarker) -> + let b = Buffer.create 80 in + let commit1 c = Buffer.add_char b c; Stream.junk s in + let commit l = List.iter commit1 l in + let rec quotation depth = + match Stream.npeek lenmarker s with + | l when l = bmarker -> + commit l; + quotation (depth + 1) + | l when l = emarker -> + commit l; + if depth > 1 then quotation (depth - 1) + | c :: cs -> + commit1 c; + quotation depth + | [] -> raise Stream.Failure + in + quotation 0; + Buffer.contents b + + let find_keyword loc id s = let tt = ttree_find !token_tree id in match progress_further loc tt.node 0 tt s with | None -> raise Not_found - | Some c -> KEYWORD c + | Some (c,NoQuotation) -> KEYWORD c + | Some (c,Quotation) -> QUOTATION(c, parse_quotation loc s) let process_sequence loc bp c cs = let rec aux n cs = @@ -548,7 +602,8 @@ let process_chars ~diff_mode loc bp c cs = let t = progress_from_byte loc None (-1) !token_tree cs c in let ep = Stream.count cs in match t with - | Some t -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (t,NoQuotation) -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (c,Quotation) -> (QUOTATION(c, parse_quotation loc cs), set_loc_pos loc bp ep) | None -> let ep' = bp + utf8_char_size loc cs c in if diff_mode then begin @@ -735,20 +790,25 @@ let loct_add loct i loc = Hashtbl.add loct i loc we unfreeze the state of the lexer. This restores the behaviour of the lexer. B.B. *) -type te = Tok.t - (** Names of tokens, for this lexer, used in Grammar error messages *) -let token_text = function - | ("", t) -> "'" ^ t ^ "'" - | ("IDENT", "") -> "identifier" - | ("IDENT", t) -> "'" ^ t ^ "'" - | ("INT", "") -> "integer" - | ("INT", s) -> "'" ^ s ^ "'" - | ("STRING", "") -> "string" - | ("EOI", "") -> "end of input" - | (con, "") -> con - | (con, prm) -> con ^ " \"" ^ prm ^ "\"" +let token_text : type c. c Tok.p -> string = function + | PKEYWORD t -> "'" ^ t ^ "'" + | PIDENT None -> "identifier" + | PIDENT (Some t) -> "'" ^ t ^ "'" + | PINT None -> "integer" + | PINT (Some s) -> "'" ^ s ^ "'" + | PSTRING None -> "string" + | PSTRING (Some s) -> "STRING \"" ^ s ^ "\"" + | PLEFTQMARK -> "LEFTQMARK" + | PEOI -> "end of input" + | PPATTERNIDENT None -> "PATTERNIDENT" + | PPATTERNIDENT (Some s) -> "PATTERNIDENT \"" ^ s ^ "\"" + | PFIELD None -> "FIELD" + | PFIELD (Some s) -> "FIELD \"" ^ s ^ "\"" + | PBULLET None -> "BULLET" + | PBULLET (Some s) -> "BULLET \"" ^ s ^ "\"" + | PQUOTATION lbl -> "QUOTATION \"" ^ lbl ^ "\"" let func next_token ?loc cs = let loct = loct_create () in @@ -762,17 +822,24 @@ let func next_token ?loc cs = in (ts, loct_func loct) -let make_lexer ~diff_mode = { - Plexing.tok_func = func (next_token ~diff_mode); - Plexing.tok_using = - (fun pat -> match Tok.of_pattern pat with - | KEYWORD s -> add_keyword s - | _ -> ()); - Plexing.tok_removing = (fun _ -> ()); - Plexing.tok_match = Tok.match_pattern; - Plexing.tok_text = token_text } +module MakeLexer (Diff : sig val mode : bool end) = struct + type te = Tok.t + type 'c pattern = 'c Tok.p + let tok_pattern_eq = Tok.equal_p + let tok_pattern_strings = Tok.pattern_strings + let tok_func = func (next_token ~diff_mode:Diff.mode) + let tok_using : type c. c pattern -> unit = function + | PKEYWORD s -> add_keyword ~quotation:NoQuotation s + | PQUOTATION s -> add_keyword ~quotation:Quotation s + | _ -> () + let tok_removing = (fun _ -> ()) + let tok_match = Tok.match_pattern + let tok_text = token_text +end + +module Lexer = MakeLexer (struct let mode = false end) -let lexer = make_lexer ~diff_mode:false +module LexerDiff = MakeLexer (struct let mode = true end) (** Terminal symbols interpretation *) @@ -807,6 +874,6 @@ let strip s = let terminal s = let s = strip s in let () = match s with "" -> failwith "empty token." | _ -> () in - if is_ident_not_keyword s then IDENT s - else if is_number s then INT s - else KEYWORD s + if is_ident_not_keyword s then PIDENT (Some s) + else if is_number s then PINT (Some s) + else PKEYWORD s diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli index 807f37a1a4..9df3e45f49 100644 --- a/parsing/cLexer.mli +++ b/parsing/cLexer.mli @@ -8,8 +8,32 @@ (* * (see LICENSE file for the text of the license) *) (************************************************************************) +(** When one registers a keyword she can declare it starts a quotation. + In particular using QUOTATION("name:") in a grammar rule + declares "name:" as a keyword and the token QUOTATION is + matched whenever the keyword is followed by an identifier or a + parenthesized text. Eg + + constr:x + string:[....] + ltac:(....) + ltac:{....} + + The delimiter is made of 1 or more occurrences of the same parenthesis, + eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to + contain the closing delimiter, one can make the delimiter longer and avoid + confusion (no escaping). Eg + + string:[[ .. ']' .. ]] + + + Nesting the delimiter is allowed, eg ((..((...))..)) is OK. + + Keywords don't need to end in ':' *) +type starts_quotation = NoQuotation | Quotation + (** This should be functional but it is not due to the interface *) -val add_keyword : string -> unit +val add_keyword : ?quotation:starts_quotation -> string -> unit val remove_keyword : string -> unit val is_keyword : string -> bool val keywords : unit -> CString.Set.t @@ -21,26 +45,14 @@ val get_keyword_state : unit -> keyword_state val check_ident : string -> unit val is_ident : string -> bool val check_keyword : string -> unit -val terminal : string -> Tok.t -(** The lexer of Coq: *) +(** When string is neither an ident nor an int, returns a keyword. *) +val terminal : string -> string Tok.p -(* modtype Grammar.GLexerType: sig - type te val - lexer : te Plexing.lexer - end - -where +(** The lexer of Coq: *) - type lexer 'te = - { tok_func : lexer_func 'te; - tok_using : pattern -> unit; - tok_removing : pattern -> unit; - tok_match : pattern -> 'te -> string; - tok_text : pattern -> string; - tok_comm : mutable option (list location) } - *) -include Gramlib.Grammar.GLexerType with type te = Tok.t +module Lexer : + Gramlib.Grammar.GLexerType with type te = Tok.t and type 'c pattern = 'c Tok.p module Error : sig type t @@ -66,4 +78,5 @@ as if it was unquoted, possibly becoming multiple tokens it was not in a comment, possibly becoming multiple tokens - return any unrecognized Ascii or UTF-8 character as a string *) -val make_lexer : diff_mode:bool -> Tok.t Gramlib.Plexing.lexer +module LexerDiff : + Gramlib.Grammar.GLexerType with type te = Tok.t and type 'c pattern = 'c Tok.p diff --git a/parsing/extend.ml b/parsing/extend.ml index 9b5537d7f6..dd7c301dfb 100644 --- a/parsing/extend.ml +++ b/parsing/extend.ml @@ -10,7 +10,7 @@ (** Entry keys for constr notations *) -type 'a entry = 'a Gramlib.Grammar.GMake(CLexer).Entry.e +type 'a entry = 'a Gramlib.Grammar.GMake(CLexer.Lexer).Entry.e type side = Left | Right @@ -44,7 +44,7 @@ type simple_constr_prod_entry_key = (** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *) -type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list +type binder_entry_kind = ETBinderOpen | ETBinderClosed of string Tok.p list type binder_target = ForBinder | ForTerm @@ -54,7 +54,7 @@ type constr_prod_entry_key = | ETProdBigint (* Parsed as an (unbounded) integer *) | ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *) | ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *) - | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *) + | ETProdConstrList of (production_level * production_position) * string Tok.p list (* Parsed as non-empty list of constr *) | ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *) (** {5 AST for user-provided entries} *) @@ -79,30 +79,34 @@ type ('a,'b,'c) ty_user_symbol = (** {5 Type-safe grammar extension} *) -type ('self, 'a) symbol = -| Atoken : Tok.t -> ('self, string) symbol -| Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol -| Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol -| Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol -| Alist0sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol -| Aopt : ('self, 'a) symbol -> ('self, 'a option) symbol -| Aself : ('self, 'self) symbol -| Anext : ('self, 'self) symbol -| Aentry : 'a entry -> ('self, 'a) symbol -| Aentryl : 'a entry * string -> ('self, 'a) symbol -| Arules : 'a rules list -> ('self, 'a) symbol - -and ('self, _, 'r) rule = -| Stop : ('self, 'r, 'r) rule -| Next : ('self, 'a, 'r) rule * ('self, 'b) symbol -> ('self, 'b -> 'a, 'r) rule - -and ('a, 'r) norec_rule = { norec_rule : 's. ('s, 'a, 'r) rule } +type norec = NoRec (* just two *) +type mayrec = MayRec (* incompatible types *) + +type ('self, 'trec, 'a) symbol = +| Atoken : 'c Tok.p -> ('self, norec, 'c) symbol +| Alist1 : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a list) symbol +| Alist1sep : ('self, 'trec, 'a) symbol * ('self, norec, _) symbol + -> ('self, 'trec, 'a list) symbol +| Alist0 : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a list) symbol +| Alist0sep : ('self, 'trec, 'a) symbol * ('self, norec, _) symbol + -> ('self, 'trec, 'a list) symbol +| Aopt : ('self, 'trec, 'a) symbol -> ('self, 'trec, 'a option) symbol +| Aself : ('self, mayrec, 'self) symbol +| Anext : ('self, mayrec, 'self) symbol +| Aentry : 'a entry -> ('self, norec, 'a) symbol +| Aentryl : 'a entry * string -> ('self, norec, 'a) symbol +| Arules : 'a rules list -> ('self, norec, 'a) symbol + +and ('self, 'trec, _, 'r) rule = +| Stop : ('self, norec, 'r, 'r) rule +| Next : ('self, _, 'a, 'r) rule * ('self, _, 'b) symbol -> ('self, mayrec, 'b -> 'a, 'r) rule +| NextNoRec : ('self, norec, 'a, 'r) rule * ('self, norec, 'b) symbol -> ('self, norec, 'b -> 'a, 'r) rule and 'a rules = -| Rules : ('act, Loc.t -> 'a) norec_rule * 'act -> 'a rules +| Rules : (_, norec, 'act, Loc.t -> 'a) rule * 'act -> 'a rules type 'a production_rule = -| Rule : ('a, 'act, Loc.t -> 'a) rule * 'act -> 'a production_rule +| Rule : ('a, _, 'act, Loc.t -> 'a) rule * 'act -> 'a production_rule type 'a single_extend_statement = string option * diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml index fc5feba58b..6df0d6f21a 100644 --- a/parsing/notation_gram.ml +++ b/parsing/notation_gram.ml @@ -21,7 +21,7 @@ type level = Constrexpr.notation_entry * precedence * tolerability list * constr (* first argument is InCustomEntry s for custom entries *) type grammar_constr_prod_item = - | GramConstrTerminal of Tok.t + | GramConstrTerminal of string Tok.p | GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option | GramConstrListMark of int * bool * int (* tells action rule to make a list of the n previous parsed items; diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml index 9241205755..8f38e437b4 100644 --- a/parsing/pcoq.ml +++ b/parsing/pcoq.ml @@ -17,7 +17,7 @@ open Gramlib (** The parser of Coq *) module G : sig - include Grammar.S with type te = Tok.t + include Grammar.S with type te = Tok.t and type 'c pattern = 'c Tok.p (* where Grammar.S @@ -67,7 +67,7 @@ module type S = end with type 'a Entry.e = 'a Extend.entry = struct - include Grammar.GMake(CLexer) + include Grammar.GMake(CLexer.Lexer) type coq_parsable = parsable * CLexer.lexer_state ref @@ -107,7 +107,7 @@ end module Entry = struct - type 'a t = 'a Grammar.GMake(CLexer).Entry.e + type 'a t = 'a Grammar.GMake(CLexer.Lexer).Entry.e let create = G.Entry.create let parse = G.entry_parse @@ -118,30 +118,6 @@ struct end -module Symbols : sig - val stoken : Tok.t -> ('s, string) G.ty_symbol - val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol - val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol -end = struct - - let stoken tok = - let pattern = match tok with - | Tok.KEYWORD s -> "", s - | Tok.IDENT s -> "IDENT", s - | Tok.PATTERNIDENT s -> "PATTERNIDENT", s - | Tok.FIELD s -> "FIELD", s - | Tok.INT s -> "INT", s - | Tok.STRING s -> "STRING", s - | Tok.LEFTQMARK -> "LEFTQMARK", "" - | Tok.BULLET s -> "BULLET", s - | Tok.EOI -> "EOI", "" - in - G.s_token pattern - - let slist0sep x y = G.s_list0sep x y false - let slist1sep x y = G.s_list1sep x y false -end - (** Grammar extensions *) (** NB: [extend_statement = @@ -155,43 +131,73 @@ end (** Binding general entry keys to symbol *) -type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule - -let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function -| Atoken t -> Symbols.stoken t -| Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s) +type ('s, 'trec, 'a, 'r) casted_rule = +| CastedRNo : ('s, G.ty_norec, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, norec, 'a, 'r) casted_rule +| CastedRMay : ('s, G.ty_mayrec, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, mayrec, 'a, 'r) casted_rule + +type ('s, 'trec, 'a) casted_symbol = +| CastedSNo : ('s, G.ty_norec, 'a) G.ty_symbol -> ('s, norec, 'a) casted_symbol +| CastedSMay : ('s, G.ty_mayrec, 'a) G.ty_symbol -> ('s, mayrec, 'a) casted_symbol + +let rec symbol_of_prod_entry_key : type s tr a. (s, tr, a) symbol -> (s, tr, a) casted_symbol = +function +| Atoken t -> CastedSNo (G.s_token t) +| Alist1 s -> + begin match symbol_of_prod_entry_key s with + | CastedSNo s -> CastedSNo (G.s_list1 s) + | CastedSMay s -> CastedSMay (G.s_list1 s) end | Alist1sep (s,sep) -> - Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) -| Alist0 s -> G.s_list0 (symbol_of_prod_entry_key s) + let CastedSNo sep = symbol_of_prod_entry_key sep in + begin match symbol_of_prod_entry_key s with + | CastedSNo s -> CastedSNo (G.s_list1sep s sep false) + | CastedSMay s -> CastedSMay (G.s_list1sep s sep false) end +| Alist0 s -> + begin match symbol_of_prod_entry_key s with + | CastedSNo s -> CastedSNo (G.s_list0 s) + | CastedSMay s -> CastedSMay (G.s_list0 s) end | Alist0sep (s,sep) -> - Symbols.slist0sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) -| Aopt s -> G.s_opt (symbol_of_prod_entry_key s) -| Aself -> G.s_self -| Anext -> G.s_next -| Aentry e -> G.s_nterm e -| Aentryl (e, n) -> G.s_nterml e n + let CastedSNo sep = symbol_of_prod_entry_key sep in + begin match symbol_of_prod_entry_key s with + | CastedSNo s -> CastedSNo (G.s_list0sep s sep false) + | CastedSMay s -> CastedSMay (G.s_list0sep s sep false) end +| Aopt s -> + begin match symbol_of_prod_entry_key s with + | CastedSNo s -> CastedSNo (G.s_opt s) + | CastedSMay s -> CastedSMay (G.s_opt s) end +| Aself -> CastedSMay G.s_self +| Anext -> CastedSMay G.s_next +| Aentry e -> CastedSNo (G.s_nterm e) +| Aentryl (e, n) -> CastedSNo (G.s_nterml e n) | Arules rs -> let warning msg = Feedback.msg_warning Pp.(str msg) in - G.s_rules ~warning:(Some warning) (List.map symbol_of_rules rs) + CastedSNo (G.s_rules ~warning:(Some warning) (List.map symbol_of_rules rs)) -and symbol_of_rule : type s a r. (s, a, Loc.t -> r) Extend.rule -> (s, a, Loc.t -> r) casted_rule = function -| Stop -> Casted (G.r_stop, fun act loc -> act loc) +and symbol_of_rule : type s tr a r. (s, tr, a, Loc.t -> r) Extend.rule -> (s, tr, a, Loc.t -> r) casted_rule = function +| Stop -> CastedRNo (G.r_stop, fun act loc -> act loc) | Next (r, s) -> - let Casted (r, cast) = symbol_of_rule r in - Casted (G.r_next r (symbol_of_prod_entry_key s), (fun act x -> cast (act x))) - -and symbol_of_rules : type a. a Extend.rules -> a G.ty_production = function + begin match symbol_of_rule r, symbol_of_prod_entry_key s with + | CastedRNo (r, cast), CastedSNo s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x))) + | CastedRNo (r, cast), CastedSMay s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x))) + | CastedRMay (r, cast), CastedSNo s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x))) + | CastedRMay (r, cast), CastedSMay s -> CastedRMay (G.r_next r s, (fun act x -> cast (act x))) end +| NextNoRec (r, s) -> + let CastedRNo (r, cast) = symbol_of_rule r in + let CastedSNo s = symbol_of_prod_entry_key s in + CastedRNo (G.r_next_norec r s, (fun act x -> cast (act x))) + +and symbol_of_rules : type a. a Extend.rules -> a G.ty_rules = function | Rules (r, act) -> - let Casted (symb, cast) = symbol_of_rule r.norec_rule in - G.production (symb, cast act) + let CastedRNo (symb, cast) = symbol_of_rule r in + G.rules (symb, cast act) (** FIXME: This is a hack around a deficient camlp5 API *) -type 'a any_production = AnyProduction : ('a, 'f, Loc.t -> 'a) G.ty_rule * 'f -> 'a any_production +type 'a any_production = AnyProduction : ('a, 'tr, 'f, Loc.t -> 'a) G.ty_rule * 'f -> 'a any_production let of_coq_production_rule : type a. a Extend.production_rule -> a any_production = function | Rule (toks, act) -> - let Casted (symb, cast) = symbol_of_rule toks in - AnyProduction (symb, cast act) + match symbol_of_rule toks with + | CastedRNo (symb, cast) -> AnyProduction (symb, cast act) + | CastedRMay (symb, cast) -> AnyProduction (symb, cast act) let of_coq_single_extend_statement (lvl, assoc, rule) = (lvl, assoc, List.map of_coq_production_rule rule) @@ -303,7 +309,7 @@ let make_rule r = [None, None, r] let eoi_entry en = let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in - let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in + let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (G.s_token Tok.PEOI) in let act = fun _ x loc -> x in let warning msg = Feedback.msg_warning Pp.(str msg) in Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]); @@ -440,8 +446,11 @@ module Module = let module_expr = Entry.create "module_expr" let module_type = Entry.create "module_type" end -let epsilon_value f e = - let r = G.production (G.r_next G.r_stop (symbol_of_prod_entry_key e), (fun x _ -> f x)) in +let epsilon_value (type s tr a) f (e : (s, tr, a) symbol) = + let r = + match symbol_of_prod_entry_key e with + | CastedSNo s -> G.production (G.r_next G.r_stop s, (fun x _ -> f x)) + | CastedSMay s -> G.production (G.r_next G.r_stop s, (fun x _ -> f x)) in let ext = [None, None, [r]] in let entry = Gram.entry_create "epsilon" in let warning msg = Feedback.msg_warning Pp.(str msg) in diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli index 0418249e42..e361f0d00f 100644 --- a/parsing/pcoq.mli +++ b/parsing/pcoq.mli @@ -203,7 +203,7 @@ module Module : val module_type : module_ast Entry.t end -val epsilon_value : ('a -> 'self) -> ('self, 'a) Extend.symbol -> 'self option +val epsilon_value : ('a -> 'self) -> ('self, _, 'a) Extend.symbol -> 'self option (** {5 Extending the parser without synchronization} *) diff --git a/parsing/tok.ml b/parsing/tok.ml index 03825e350f..186d0502fc 100644 --- a/parsing/tok.ml +++ b/parsing/tok.ml @@ -12,6 +12,31 @@ let string_equal (s1 : string) s2 = s1 = s2 +type 'c p = + | PKEYWORD : string -> string p + | PPATTERNIDENT : string option -> string p + | PIDENT : string option -> string p + | PFIELD : string option -> string p + | PINT : string option -> string p + | PSTRING : string option -> string p + | PLEFTQMARK : unit p + | PBULLET : string option -> string p + | PQUOTATION : string -> string p + | PEOI : unit p + +let pattern_strings : type c. c p -> string * string option = + function + | PKEYWORD s -> "", Some s + | PPATTERNIDENT s -> "PATTERNIDENT", s + | PIDENT s -> "IDENT", s + | PFIELD s -> "FIELD", s + | PINT s -> "INT", s + | PSTRING s -> "STRING", s + | PLEFTQMARK -> "LEFTQMARK", None + | PBULLET s -> "BULLET", s + | PQUOTATION lbl -> "QUOTATION", Some lbl + | PEOI -> "EOI", None + type t = | KEYWORD of string | PATTERNIDENT of string @@ -21,8 +46,25 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI +let equal_p (type a b) (t1 : a p) (t2 : b p) : (a, b) Util.eq option = + let streq s1 s2 = match s1, s2 with None, None -> true + | Some s1, Some s2 -> string_equal s1 s2 | _ -> false in + match t1, t2 with + | PKEYWORD s1, PKEYWORD s2 when string_equal s1 s2 -> Some Util.Refl + | PPATTERNIDENT s1, PPATTERNIDENT s2 when streq s1 s2 -> Some Util.Refl + | PIDENT s1, PIDENT s2 when streq s1 s2 -> Some Util.Refl + | PFIELD s1, PFIELD s2 when streq s1 s2 -> Some Util.Refl + | PINT s1, PINT s2 when streq s1 s2 -> Some Util.Refl + | PSTRING s1, PSTRING s2 when streq s1 s2 -> Some Util.Refl + | PLEFTQMARK, PLEFTQMARK -> Some Util.Refl + | PBULLET s1, PBULLET s2 when streq s1 s2 -> Some Util.Refl + | PQUOTATION s1, PQUOTATION s2 when string_equal s1 s2 -> Some Util.Refl + | PEOI, PEOI -> Some Util.Refl + | _ -> None + let equal t1 t2 = match t1, t2 with | IDENT s1, KEYWORD s2 -> string_equal s1 s2 | KEYWORD s1, KEYWORD s2 -> string_equal s1 s2 @@ -34,6 +76,7 @@ let equal t1 t2 = match t1, t2 with | LEFTQMARK, LEFTQMARK -> true | BULLET s1, BULLET s2 -> string_equal s1 s2 | EOI, EOI -> true +| QUOTATION(s1,t1), QUOTATION(s2,t2) -> string_equal s1 s2 && string_equal t1 t2 | _ -> false let extract_string diff_mode = function @@ -58,65 +101,40 @@ let extract_string diff_mode = function | INT s -> s | LEFTQMARK -> "?" | BULLET s -> s + | QUOTATION(_,s) -> s | EOI -> "" -let to_string = function - | KEYWORD s -> Format.sprintf "%S" s - | IDENT s -> Format.sprintf "IDENT %S" s - | PATTERNIDENT s -> Format.sprintf "PATTERNIDENT %S" s - | FIELD s -> Format.sprintf "FIELD %S" s - | INT s -> Format.sprintf "INT %s" s - | STRING s -> Format.sprintf "STRING %S" s - | LEFTQMARK -> "LEFTQMARK" - | BULLET s -> Format.sprintf "BULLET %S" s - | EOI -> "EOI" - -let match_keyword kwd = function - | KEYWORD kwd' when kwd = kwd' -> true - | _ -> false - -(* Needed to fix Camlp5 signature. - Cannot use Pp because of silly Tox -> Compat -> Pp dependency *) -let print ppf tok = Format.pp_print_string ppf (to_string tok) - -(** For camlp5, conversion from/to [Plexing.pattern], - and a match function analoguous to [Plexing.default_match] *) - -let of_pattern = function - | "", s -> KEYWORD s - | "IDENT", s -> IDENT s - | "PATTERNIDENT", s -> PATTERNIDENT s - | "FIELD", s -> FIELD s - | "INT", s -> INT s - | "STRING", s -> STRING s - | "LEFTQMARK", _ -> LEFTQMARK - | "BULLET", s -> BULLET s - | "EOI", _ -> EOI - | _ -> failwith "Tok.of_pattern: not a constructor" - -let to_pattern = function - | KEYWORD s -> "", s - | IDENT s -> "IDENT", s - | PATTERNIDENT s -> "PATTERNIDENT", s - | FIELD s -> "FIELD", s - | INT s -> "INT", s - | STRING s -> "STRING", s - | LEFTQMARK -> "LEFTQMARK", "" - | BULLET s -> "BULLET", s - | EOI -> "EOI", "" +(* Invariant, txt is "ident" or a well parenthesized "{{....}}" *) +let trim_quotation txt = + let len = String.length txt in + if len = 0 then None, txt + else + let c = txt.[0] in + if c = '(' || c = '[' || c = '{' then + let rec aux n = + if n < len && txt.[n] = c then aux (n+1) + else Some c, String.sub txt n (len - (2*n)) + in + aux 0 + else None, txt -let match_pattern = +let match_pattern (type c) (p : c p) : t -> c = let err () = raise Stream.Failure in - function - | "", "" -> (function KEYWORD s -> s | _ -> err ()) - | "IDENT", "" -> (function IDENT s -> s | _ -> err ()) - | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ()) - | "FIELD", "" -> (function FIELD s -> s | _ -> err ()) - | "INT", "" -> (function INT s -> s | _ -> err ()) - | "STRING", "" -> (function STRING s -> s | _ -> err ()) - | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ()) - | "BULLET", "" -> (function BULLET s -> s | _ -> err ()) - | "EOI", "" -> (function EOI -> "" | _ -> err ()) - | pat -> - let tok = of_pattern pat in - function tok' -> if equal tok tok' then snd pat else err () + let seq = string_equal in + match p with + | PKEYWORD s -> (function KEYWORD s' when seq s s' -> s' | _ -> err ()) + | PIDENT None -> (function IDENT s' -> s' | _ -> err ()) + | PIDENT (Some s) -> (function (IDENT s' | KEYWORD s') when seq s s' -> s' | _ -> err ()) + | PPATTERNIDENT None -> (function PATTERNIDENT s -> s | _ -> err ()) + | PPATTERNIDENT (Some s) -> (function PATTERNIDENT s' when seq s s' -> s' | _ -> err ()) + | PFIELD None -> (function FIELD s -> s | _ -> err ()) + | PFIELD (Some s) -> (function FIELD s' when seq s s' -> s' | _ -> err ()) + | PINT None -> (function INT s -> s | _ -> err ()) + | PINT (Some s) -> (function INT s' when seq s s' -> s' | _ -> err ()) + | PSTRING None -> (function STRING s -> s | _ -> err ()) + | PSTRING (Some s) -> (function STRING s' when seq s s' -> s' | _ -> err ()) + | PLEFTQMARK -> (function LEFTQMARK -> () | _ -> err ()) + | PBULLET None -> (function BULLET s -> s | _ -> err ()) + | PBULLET (Some s) -> (function BULLET s' when seq s s' -> s' | _ -> err ()) + | PQUOTATION lbl -> (function QUOTATION(lbl',s') when string_equal lbl lbl' -> s' | _ -> err ()) + | PEOI -> (function EOI -> () | _ -> err ()) diff --git a/parsing/tok.mli b/parsing/tok.mli index 5750096a28..678877720d 100644 --- a/parsing/tok.mli +++ b/parsing/tok.mli @@ -10,6 +10,20 @@ (** The type of token for the Coq lexer and parser *) +type 'c p = + | PKEYWORD : string -> string p + | PPATTERNIDENT : string option -> string p + | PIDENT : string option -> string p + | PFIELD : string option -> string p + | PINT : string option -> string p + | PSTRING : string option -> string p + | PLEFTQMARK : unit p + | PBULLET : string option -> string p + | PQUOTATION : string -> string p + | PEOI : unit p + +val pattern_strings : 'c p -> string * string option + type t = | KEYWORD of string | PATTERNIDENT of string @@ -19,17 +33,22 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI +val equal_p : 'a p -> 'b p -> ('a, 'b) Util.eq option + val equal : t -> t -> bool (* pass true for diff_mode *) val extract_string : bool -> t -> string -val to_string : t -> string -(* Needed to fit Camlp5 signature *) -val print : Format.formatter -> t -> unit -val match_keyword : string -> t -> bool - -(** for camlp5 *) -val of_pattern : string*string -> t -val to_pattern : t -> string*string -val match_pattern : string*string -> t -> string + +(** Utility function for the test returned by a QUOTATION token: + It returns the delimiter parenthesis, if any, and the text + without delimiters. Eg `{{{ text }}}` -> Some '{', ` text ` *) +val trim_quotation : string -> char option * string + +(** for camlp5, + eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END + is a pattern (PIDENT (Some "x")) +*) +val match_pattern : 'c p -> t -> 'c |
