aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEnrico Tassi2019-03-08 16:16:34 +0100
committerEnrico Tassi2019-03-31 14:36:28 +0200
commit46b9e086093d66ff7a916e0475549a9cfb0b056d (patch)
treee164a8738f3372d1c46a318e39f27abf0f26f9eb
parent912eaf40d4efd29b7e3489d51c55b8b79206df79 (diff)
[parsing] add keyword-protected generic quotation
One can now register a quotation using a grammar rule with QUOTATION("name:"). "name:" becomes a keyword and the token is generated for name: followed by a an identifier or a parenthesized text. Eg constr:x string:[....] ltac:(....) ltac:{....} The delimiter is made of 1 or more occurrences of the same parenthesis, eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to contain the closing delimiter, one can make the delimiter longer and avoid confusion (no escaping). Eg string:[[ .. ']' .. ]] Nesting the delimiter is allowed, eg ((..((...))..)) is OK. The text inside the quotation is returned as a string (including the parentheses), so that a third party parser can take care of it. Keywords don't need to end in ':'.
-rw-r--r--coqpp/coqpp_main.ml2
-rw-r--r--parsing/cLexer.ml76
-rw-r--r--parsing/cLexer.mli4
-rw-r--r--parsing/tok.ml47
-rw-r--r--parsing/tok.mli8
5 files changed, 114 insertions, 23 deletions
diff --git a/coqpp/coqpp_main.ml b/coqpp/coqpp_main.ml
index c8a87e6cb6..f4c819eeb6 100644
--- a/coqpp/coqpp_main.ml
+++ b/coqpp/coqpp_main.ml
@@ -163,6 +163,8 @@ let is_token s = match string_split s with
let rec parse_tokens = function
| [GSymbString s] -> SymbToken ("", Some s)
+| [GSymbQualid ("QUOTATION", None); GSymbString s] ->
+ SymbToken ("QUOTATION", Some s)
| [GSymbQualid ("SELF", None)] -> SymbSelf
| [GSymbQualid ("NEXT", None)] -> SymbNext
| [GSymbQualid ("LIST0", None); tkn] ->
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml
index 7345afb307..33890545da 100644
--- a/parsing/cLexer.ml
+++ b/parsing/cLexer.ml
@@ -19,16 +19,19 @@ open Gramlib
module CharOrd = struct type t = char let compare : char -> char -> int = compare end
module CharMap = Map.Make (CharOrd)
+type starts_quotation = NoQuotation | Quotation
+
type ttree = {
- node : string option;
- branch : ttree CharMap.t }
+ node : (string * starts_quotation) option;
+ branch : ttree CharMap.t;
+}
let empty_ttree = { node = None; branch = CharMap.empty }
-let ttree_add ttree str =
+let ttree_add ttree (str,quot) =
let rec insert tt i =
if i == String.length str then
- {node = Some str; branch = tt.branch}
+ {node = Some (str,quot); branch = tt.branch}
else
let c = str.[i] in
let br =
@@ -75,7 +78,7 @@ let ttree_elements ttree =
let rec elts tt accu =
let accu = match tt.node with
| None -> accu
- | Some s -> CString.Set.add s accu
+ | Some (s,_) -> CString.Set.add s accu
in
CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu
in
@@ -259,11 +262,11 @@ let is_keyword s =
try match (ttree_find !token_tree s).node with None -> false | Some _ -> true
with Not_found -> false
-let add_keyword str =
+let add_keyword ?(quotation=NoQuotation) str =
if not (is_keyword str) then
begin
check_keyword str;
- token_tree := ttree_add !token_tree str
+ token_tree := ttree_add !token_tree (str,quotation)
end
let remove_keyword str =
@@ -529,11 +532,62 @@ and progress_utf8 loc last nj n c tt cs =
and progress_from_byte loc last nj tt cs c =
progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs
+type marker = Delimited of int * char list * char list | ImmediateAsciiIdent
+
+let peek_marker_len b e s =
+ let rec peek n =
+ match stream_nth n s with
+ | c -> if c = b then peek (n+1) else n, List.make n b, List.make n e
+ | exception Stream.Failure -> n, List.make n b, List.make n e
+ in
+ let len, start, stop = peek 0 in
+ if len = 0 then raise Stream.Failure
+ else Delimited (len, start, stop)
+
+let peek_marker s =
+ match stream_nth 0 s with
+ | '(' -> peek_marker_len '(' ')' s
+ | '[' -> peek_marker_len '[' ']' s
+ | '{' -> peek_marker_len '{' '}' s
+ | ('a'..'z' | 'A'..'Z' | '_') -> ImmediateAsciiIdent
+ | _ -> raise Stream.Failure
+
+let parse_quotation loc s =
+ match peek_marker s with
+ | ImmediateAsciiIdent ->
+ let c = Stream.next s in
+ let len =
+ try ident_tail loc (store 0 c) s with
+ Stream.Failure -> raise (Stream.Error "")
+ in
+ get_buff len
+ | Delimited (lenmarker, bmarker, emarker) ->
+ let b = Buffer.create 80 in
+ let commit1 c = Buffer.add_char b c; Stream.junk s in
+ let commit l = List.iter commit1 l in
+ let rec quotation depth =
+ match Stream.npeek lenmarker s with
+ | l when l = bmarker ->
+ commit l;
+ quotation (depth + 1)
+ | l when l = emarker ->
+ commit l;
+ if depth > 1 then quotation (depth - 1)
+ | c :: cs ->
+ commit1 c;
+ quotation depth
+ | [] -> raise Stream.Failure
+ in
+ quotation 0;
+ Buffer.contents b
+
+
let find_keyword loc id s =
let tt = ttree_find !token_tree id in
match progress_further loc tt.node 0 tt s with
| None -> raise Not_found
- | Some c -> KEYWORD c
+ | Some (c,NoQuotation) -> KEYWORD c
+ | Some (c,Quotation) -> QUOTATION(c, parse_quotation loc s)
let process_sequence loc bp c cs =
let rec aux n cs =
@@ -548,7 +602,8 @@ let process_chars ~diff_mode loc bp c cs =
let t = progress_from_byte loc None (-1) !token_tree cs c in
let ep = Stream.count cs in
match t with
- | Some t -> (KEYWORD t, set_loc_pos loc bp ep)
+ | Some (t,NoQuotation) -> (KEYWORD t, set_loc_pos loc bp ep)
+ | Some (c,Quotation) -> (QUOTATION(c, parse_quotation loc cs), set_loc_pos loc bp ep)
| None ->
let ep' = bp + utf8_char_size loc cs c in
if diff_mode then begin
@@ -766,7 +821,8 @@ let make_lexer ~diff_mode = {
Plexing.tok_func = func (next_token ~diff_mode);
Plexing.tok_using =
(fun pat -> match Tok.is_keyword pat with
- | Some s -> add_keyword s
+ | Some (false,s) -> add_keyword ~quotation:NoQuotation s
+ | Some (true,s) -> add_keyword ~quotation:Quotation s
| None -> ());
Plexing.tok_removing = (fun _ -> ());
Plexing.tok_match = Tok.match_pattern;
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
index 662f6a477d..b9f89f5052 100644
--- a/parsing/cLexer.mli
+++ b/parsing/cLexer.mli
@@ -9,7 +9,9 @@
(************************************************************************)
(** This should be functional but it is not due to the interface *)
-val add_keyword : string -> unit
+type starts_quotation = NoQuotation | Quotation
+
+val add_keyword : ?quotation:starts_quotation -> string -> unit
val remove_keyword : string -> unit
val is_keyword : string -> bool
val keywords : unit -> CString.Set.t
diff --git a/parsing/tok.ml b/parsing/tok.ml
index ba249bd977..f0846dcf90 100644
--- a/parsing/tok.ml
+++ b/parsing/tok.ml
@@ -21,6 +21,7 @@ type t =
| STRING of string
| LEFTQMARK
| BULLET of string
+ | QUOTATION of string * string
| EOI
let equal t1 t2 = match t1, t2 with
@@ -34,6 +35,7 @@ let equal t1 t2 = match t1, t2 with
| LEFTQMARK, LEFTQMARK -> true
| BULLET s1, BULLET s2 -> string_equal s1 s2
| EOI, EOI -> true
+| QUOTATION(s1,t1), QUOTATION(s2,t2) -> string_equal s1 s2 && string_equal t1 t2
| _ -> false
let extract_string diff_mode = function
@@ -58,6 +60,7 @@ let extract_string diff_mode = function
| INT s -> s
| LEFTQMARK -> "?"
| BULLET s -> s
+ | QUOTATION(_,s) -> s
| EOI -> ""
let to_string = function
@@ -69,12 +72,27 @@ let to_string = function
| STRING s -> Format.sprintf "STRING %S" s
| LEFTQMARK -> "LEFTQMARK"
| BULLET s -> Format.sprintf "BULLET %S" s
+ | QUOTATION(lbl,s) -> lbl ^ s
| EOI -> "EOI"
let match_keyword kwd = function
| KEYWORD kwd' when kwd = kwd' -> true
| _ -> false
+(* Invariant, txt is "ident" or a well parenthesized "{{....}}" *)
+let trim_quotation txt =
+ let len = String.length txt in
+ if len = 0 then None, txt
+ else
+ let c = txt.[0] in
+ if c = '(' || c = '[' || c = '{' then
+ let rec aux n =
+ if n < len && txt.[n] = c then aux (n+1)
+ else Some c, String.sub txt n (len - (2*n))
+ in
+ aux 0
+ else None, txt
+
(* Needed to fix Camlp5 signature.
Cannot use Pp because of silly Tox -> Compat -> Pp dependency *)
let print ppf tok = Format.pp_print_string ppf (to_string tok)
@@ -85,7 +103,8 @@ let print ppf tok = Format.pp_print_string ppf (to_string tok)
type pattern = string * string option
let is_keyword = function
- | "", Some s -> Some s
+ | "", Some s -> Some (false,s)
+ | "QUOTATION", Some s -> Some (true,s)
| _ -> None
let pattern_for_EOI = ("EOI",None)
@@ -99,14 +118,20 @@ let match_pattern (key, value) =
| None -> x
| Some value -> if string_equal value x then x else err () in
match key with
- | "" -> (function { v = KEYWORD s } -> cond s | _ -> err ())
- | "IDENT" when value <> None -> (function { v = (IDENT s | KEYWORD s) } -> cond s | _ -> err ())
- | "IDENT" -> (function { v = IDENT s } -> cond s | _ -> err ())
- | "PATTERNIDENT" -> (function { v = PATTERNIDENT s } -> cond s | _ -> err ())
- | "FIELD" -> (function { v = FIELD s } -> cond s | _ -> err ())
- | "INT" -> (function { v = INT s } -> cond s | _ -> err ())
- | "STRING" -> (function { v = STRING s } -> cond s | _ -> err ())
- | "LEFTQMARK" -> (function { v = LEFTQMARK } -> cond "" | _ -> err ())
- | "BULLET" -> (function { v = BULLET s } -> cond s | _ -> err ())
- | "EOI" -> (function { v = EOI } -> cond "" | _ -> err ())
+ | "" -> (function KEYWORD s -> cond s | _ -> err ())
+ | "IDENT" when value <> None -> (function (IDENT s | KEYWORD s) -> cond s | _ -> err ())
+ | "IDENT" -> (function IDENT s -> cond s | _ -> err ())
+ | "PATTERNIDENT" -> (function PATTERNIDENT s -> cond s | _ -> err ())
+ | "FIELD" -> (function FIELD s -> cond s | _ -> err ())
+ | "INT" -> (function INT s -> cond s | _ -> err ())
+ | "STRING" -> (function STRING s -> cond s | _ -> err ())
+ | "LEFTQMARK" -> (function LEFTQMARK -> cond "" | _ -> err ())
+ | "BULLET" -> (function BULLET s -> cond s | _ -> err ())
+ | "QUOTATION" -> (function
+ | QUOTATION(lbl,s) ->
+ begin match value with
+ | None -> assert false
+ | Some lbl1 -> if string_equal lbl1 lbl then s else err () end
+ | _ -> err ())
+ | "EOI" -> (function EOI -> cond "" | _ -> err ())
| p -> CErrors.anomaly Pp.(str "Tok: unknown pattern " ++ str p)
diff --git a/parsing/tok.mli b/parsing/tok.mli
index e32047e6ec..8a255be06f 100644
--- a/parsing/tok.mli
+++ b/parsing/tok.mli
@@ -19,6 +19,7 @@ type t =
| STRING of string
| LEFTQMARK
| BULLET of string
+ | QUOTATION of string * string
| EOI
val equal : t -> t -> bool
@@ -29,13 +30,18 @@ val to_string : t -> string
val print : Format.formatter -> t -> unit
val match_keyword : string -> t -> bool
+(** Utility function for the test returned by a QUOTATION token:
+ It returns the delimiter parenthesis, if any, and the text
+ without delimiters. Eg `{{{ text }}}` -> Some '{', ` text ` *)
+val trim_quotation : string -> char option * string
+
(** for camlp5,
eg GRAMMAR EXTEND ..... [ IDENT "x" -> .... END
is a pattern ("IDENT", Some "x")
*)
type pattern = string * string option (* = Plexing.pattern *)
-val is_keyword : pattern -> string option
+val is_keyword : pattern -> (bool * string) option
val pattern_for_EOI : pattern
val pattern_for_KEYWORD : string -> pattern
val pattern_for_IDENT : string -> pattern