diff options
| author | Enrico Tassi | 2019-03-08 16:16:34 +0100 |
|---|---|---|
| committer | Enrico Tassi | 2019-03-31 14:36:28 +0200 |
| commit | 46b9e086093d66ff7a916e0475549a9cfb0b056d (patch) | |
| tree | e164a8738f3372d1c46a318e39f27abf0f26f9eb | |
| parent | 912eaf40d4efd29b7e3489d51c55b8b79206df79 (diff) | |
[parsing] add keyword-protected generic quotation
One can now register a quotation using a grammar rule with
QUOTATION("name:"). "name:" becomes a keyword and the token is
generated for name: followed by a an identifier or a parenthesized
text. Eg
constr:x
string:[....]
ltac:(....)
ltac:{....}
The delimiter is made of 1 or more occurrences of the same parenthesis,
eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to
contain the closing delimiter, one can make the delimiter longer and avoid
confusion (no escaping). Eg
string:[[ .. ']' .. ]]
Nesting the delimiter is allowed, eg ((..((...))..)) is OK.
The text inside the quotation is returned as a string (including the
parentheses), so that a third party parser can take care of it.
Keywords don't need to end in ':'.
| -rw-r--r-- | coqpp/coqpp_main.ml | 2 | ||||
| -rw-r--r-- | parsing/cLexer.ml | 76 | ||||
| -rw-r--r-- | parsing/cLexer.mli | 4 | ||||
| -rw-r--r-- | parsing/tok.ml | 47 | ||||
| -rw-r--r-- | parsing/tok.mli | 8 |
5 files changed, 114 insertions, 23 deletions
diff --git a/coqpp/coqpp_main.ml b/coqpp/coqpp_main.ml index c8a87e6cb6..f4c819eeb6 100644 --- a/coqpp/coqpp_main.ml +++ b/coqpp/coqpp_main.ml @@ -163,6 +163,8 @@ let is_token s = match string_split s with let rec parse_tokens = function | [GSymbString s] -> SymbToken ("", Some s) +| [GSymbQualid ("QUOTATION", None); GSymbString s] -> + SymbToken ("QUOTATION", Some s) | [GSymbQualid ("SELF", None)] -> SymbSelf | [GSymbQualid ("NEXT", None)] -> SymbNext | [GSymbQualid ("LIST0", None); tkn] -> diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index 7345afb307..33890545da 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -19,16 +19,19 @@ open Gramlib module CharOrd = struct type t = char let compare : char -> char -> int = compare end module CharMap = Map.Make (CharOrd) +type starts_quotation = NoQuotation | Quotation + type ttree = { - node : string option; - branch : ttree CharMap.t } + node : (string * starts_quotation) option; + branch : ttree CharMap.t; +} let empty_ttree = { node = None; branch = CharMap.empty } -let ttree_add ttree str = +let ttree_add ttree (str,quot) = let rec insert tt i = if i == String.length str then - {node = Some str; branch = tt.branch} + {node = Some (str,quot); branch = tt.branch} else let c = str.[i] in let br = @@ -75,7 +78,7 @@ let ttree_elements ttree = let rec elts tt accu = let accu = match tt.node with | None -> accu - | Some s -> CString.Set.add s accu + | Some (s,_) -> CString.Set.add s accu in CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu in @@ -259,11 +262,11 @@ let is_keyword s = try match (ttree_find !token_tree s).node with None -> false | Some _ -> true with Not_found -> false -let add_keyword str = +let add_keyword ?(quotation=NoQuotation) str = if not (is_keyword str) then begin check_keyword str; - token_tree := ttree_add !token_tree str + token_tree := ttree_add !token_tree (str,quotation) end let remove_keyword str = @@ -529,11 +532,62 @@ and progress_utf8 loc last nj n c tt cs = and progress_from_byte loc last nj tt cs c = progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs +type marker = Delimited of int * char list * char list | ImmediateAsciiIdent + +let peek_marker_len b e s = + let rec peek n = + match stream_nth n s with + | c -> if c = b then peek (n+1) else n, List.make n b, List.make n e + | exception Stream.Failure -> n, List.make n b, List.make n e + in + let len, start, stop = peek 0 in + if len = 0 then raise Stream.Failure + else Delimited (len, start, stop) + +let peek_marker s = + match stream_nth 0 s with + | '(' -> peek_marker_len '(' ')' s + | '[' -> peek_marker_len '[' ']' s + | '{' -> peek_marker_len '{' '}' s + | ('a'..'z' | 'A'..'Z' | '_') -> ImmediateAsciiIdent + | _ -> raise Stream.Failure + +let parse_quotation loc s = + match peek_marker s with + | ImmediateAsciiIdent -> + let c = Stream.next s in + let len = + try ident_tail loc (store 0 c) s with + Stream.Failure -> raise (Stream.Error "") + in + get_buff len + | Delimited (lenmarker, bmarker, emarker) -> + let b = Buffer.create 80 in + let commit1 c = Buffer.add_char b c; Stream.junk s in + let commit l = List.iter commit1 l in + let rec quotation depth = + match Stream.npeek lenmarker s with + | l when l = bmarker -> + commit l; + quotation (depth + 1) + | l when l = emarker -> + commit l; + if depth > 1 then quotation (depth - 1) + | c :: cs -> + commit1 c; + quotation depth + | [] -> raise Stream.Failure + in + quotation 0; + Buffer.contents b + + let find_keyword loc id s = let tt = ttree_find !token_tree id in match progress_further loc tt.node 0 tt s with | None -> raise Not_found - | Some c -> KEYWORD c + | Some (c,NoQuotation) -> KEYWORD c + | Some (c,Quotation) -> QUOTATION(c, parse_quotation loc s) let process_sequence loc bp c cs = let rec aux n cs = @@ -548,7 +602,8 @@ let process_chars ~diff_mode loc bp c cs = let t = progress_from_byte loc None (-1) !token_tree cs c in let ep = Stream.count cs in match t with - | Some t -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (t,NoQuotation) -> (KEYWORD t, set_loc_pos loc bp ep) + | Some (c,Quotation) -> (QUOTATION(c, parse_quotation loc cs), set_loc_pos loc bp ep) | None -> let ep' = bp + utf8_char_size loc cs c in if diff_mode then begin @@ -766,7 +821,8 @@ let make_lexer ~diff_mode = { Plexing.tok_func = func (next_token ~diff_mode); Plexing.tok_using = (fun pat -> match Tok.is_keyword pat with - | Some s -> add_keyword s + | Some (false,s) -> add_keyword ~quotation:NoQuotation s + | Some (true,s) -> add_keyword ~quotation:Quotation s | None -> ()); Plexing.tok_removing = (fun _ -> ()); Plexing.tok_match = Tok.match_pattern; diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli index 662f6a477d..b9f89f5052 100644 --- a/parsing/cLexer.mli +++ b/parsing/cLexer.mli @@ -9,7 +9,9 @@ (************************************************************************) (** This should be functional but it is not due to the interface *) -val add_keyword : string -> unit +type starts_quotation = NoQuotation | Quotation + +val add_keyword : ?quotation:starts_quotation -> string -> unit val remove_keyword : string -> unit val is_keyword : string -> bool val keywords : unit -> CString.Set.t diff --git a/parsing/tok.ml b/parsing/tok.ml index ba249bd977..f0846dcf90 100644 --- a/parsing/tok.ml +++ b/parsing/tok.ml @@ -21,6 +21,7 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI let equal t1 t2 = match t1, t2 with @@ -34,6 +35,7 @@ let equal t1 t2 = match t1, t2 with | LEFTQMARK, LEFTQMARK -> true | BULLET s1, BULLET s2 -> string_equal s1 s2 | EOI, EOI -> true +| QUOTATION(s1,t1), QUOTATION(s2,t2) -> string_equal s1 s2 && string_equal t1 t2 | _ -> false let extract_string diff_mode = function @@ -58,6 +60,7 @@ let extract_string diff_mode = function | INT s -> s | LEFTQMARK -> "?" | BULLET s -> s + | QUOTATION(_,s) -> s | EOI -> "" let to_string = function @@ -69,12 +72,27 @@ let to_string = function | STRING s -> Format.sprintf "STRING %S" s | LEFTQMARK -> "LEFTQMARK" | BULLET s -> Format.sprintf "BULLET %S" s + | QUOTATION(lbl,s) -> lbl ^ s | EOI -> "EOI" let match_keyword kwd = function | KEYWORD kwd' when kwd = kwd' -> true | _ -> false +(* Invariant, txt is "ident" or a well parenthesized "{{....}}" *) +let trim_quotation txt = + let len = String.length txt in + if len = 0 then None, txt + else + let c = txt.[0] in + if c = '(' || c = '[' || c = '{' then + let rec aux n = + if n < len && txt.[n] = c then aux (n+1) + else Some c, String.sub txt n (len - (2*n)) + in + aux 0 + else None, txt + (* Needed to fix Camlp5 signature. Cannot use Pp because of silly Tox -> Compat -> Pp dependency *) let print ppf tok = Format.pp_print_string ppf (to_string tok) @@ -85,7 +103,8 @@ let print ppf tok = Format.pp_print_string ppf (to_string tok) type pattern = string * string option let is_keyword = function - | "", Some s -> Some s + | "", Some s -> Some (false,s) + | "QUOTATION", Some s -> Some (true,s) | _ -> None let pattern_for_EOI = ("EOI",None) @@ -99,14 +118,20 @@ let match_pattern (key, value) = | None -> x | Some value -> if string_equal value x then x else err () in match key with - | "" -> (function { v = KEYWORD s } -> cond s | _ -> err ()) - | "IDENT" when value <> None -> (function { v = (IDENT s | KEYWORD s) } -> cond s | _ -> err ()) - | "IDENT" -> (function { v = IDENT s } -> cond s | _ -> err ()) - | "PATTERNIDENT" -> (function { v = PATTERNIDENT s } -> cond s | _ -> err ()) - | "FIELD" -> (function { v = FIELD s } -> cond s | _ -> err ()) - | "INT" -> (function { v = INT s } -> cond s | _ -> err ()) - | "STRING" -> (function { v = STRING s } -> cond s | _ -> err ()) - | "LEFTQMARK" -> (function { v = LEFTQMARK } -> cond "" | _ -> err ()) - | "BULLET" -> (function { v = BULLET s } -> cond s | _ -> err ()) - | "EOI" -> (function { v = EOI } -> cond "" | _ -> err ()) + | "" -> (function KEYWORD s -> cond s | _ -> err ()) + | "IDENT" when value <> None -> (function (IDENT s | KEYWORD s) -> cond s | _ -> err ()) + | "IDENT" -> (function IDENT s -> cond s | _ -> err ()) + | "PATTERNIDENT" -> (function PATTERNIDENT s -> cond s | _ -> err ()) + | "FIELD" -> (function FIELD s -> cond s | _ -> err ()) + | "INT" -> (function INT s -> cond s | _ -> err ()) + | "STRING" -> (function STRING s -> cond s | _ -> err ()) + | "LEFTQMARK" -> (function LEFTQMARK -> cond "" | _ -> err ()) + | "BULLET" -> (function BULLET s -> cond s | _ -> err ()) + | "QUOTATION" -> (function + | QUOTATION(lbl,s) -> + begin match value with + | None -> assert false + | Some lbl1 -> if string_equal lbl1 lbl then s else err () end + | _ -> err ()) + | "EOI" -> (function EOI -> cond "" | _ -> err ()) | p -> CErrors.anomaly Pp.(str "Tok: unknown pattern " ++ str p) diff --git a/parsing/tok.mli b/parsing/tok.mli index e32047e6ec..8a255be06f 100644 --- a/parsing/tok.mli +++ b/parsing/tok.mli @@ -19,6 +19,7 @@ type t = | STRING of string | LEFTQMARK | BULLET of string + | QUOTATION of string * string | EOI val equal : t -> t -> bool @@ -29,13 +30,18 @@ val to_string : t -> string val print : Format.formatter -> t -> unit val match_keyword : string -> t -> bool +(** Utility function for the test returned by a QUOTATION token: + It returns the delimiter parenthesis, if any, and the text + without delimiters. Eg `{{{ text }}}` -> Some '{', ` text ` *) +val trim_quotation : string -> char option * string + (** for camlp5, eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END is a pattern ("IDENT", Some "x") *) type pattern = string * string option (* = Plexing.pattern *) -val is_keyword : pattern -> string option +val is_keyword : pattern -> (bool * string) option val pattern_for_EOI : pattern val pattern_for_KEYWORD : string -> pattern val pattern_for_IDENT : string -> pattern |
