From cb1a23c2fed25e395d4a6c17214cb5a005ae063a Mon Sep 17 00:00:00 2001 From: Guillaume Melquiond Date: Sun, 27 Sep 2020 10:25:04 +0200 Subject: Make "xxx:{{" always start a quotation, whether registered or not. This commit also prevents quotations using "(" and "[" from gobbling sentences. As a consequence, dynamically-registered quotations can no longer modify where Coq sentences stop. --- parsing/cLexer.ml | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index a98cf3b7de..688a76c49c 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -512,6 +512,12 @@ and progress_utf8 loc last nj n c tt cs = and progress_from_byte loc last nj tt cs c = progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs +let blank_or_eof cs = + match Stream.peek cs with + | None -> true + | Some (' ' | '\t' | '\n' |'\r') -> true + | _ -> false + type marker = Delimited of int * char list * char list | ImmediateAsciiIdent let peek_marker_len b e s = @@ -542,6 +548,11 @@ let parse_quotation loc bp s = in get_buff len, set_loc_pos loc bp (Stream.count s) | Delimited (lenmarker, bmarker, emarker) -> + let dot_gobbling = + (* only quotations starting with two curly braces can gobble sentences *) + match bmarker with + | '{' :: '{' :: _ -> true + | _ -> false in let b = Buffer.create 80 in let commit1 c = Buffer.add_char b c; Stream.junk s in let commit l = List.iter commit1 l in @@ -557,6 +568,10 @@ let parse_quotation loc bp s = commit1 '\n'; let loc = bump_loc_line_last loc (Stream.count s) in quotation loc depth + | '.' :: _ -> + commit1 '.'; + if not dot_gobbling && blank_or_eof s then raise Stream.Failure; + quotation loc depth | c :: cs -> commit1 c; quotation loc depth @@ -567,6 +582,13 @@ let parse_quotation loc bp s = let find_keyword loc id bp s = + match Stream.npeek 3 s with + | [':'; '{'; '{'] -> + (* "xxx:{{" always starts a sentence-gobbling quotation, whether registered or not *) + Stream.junk s; + let txt, loc = parse_quotation loc bp s in + QUOTATION (id ^ ":", txt), loc + | _ -> let tt = ttree_find !token_tree id in match progress_further loc tt.node 0 tt s with | None -> raise Not_found @@ -645,12 +667,6 @@ let parse_after_qmark ~diff_mode loc bp s = | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars ~diff_mode loc bp '?' s) -let blank_or_eof cs = - match Stream.peek cs with - | None -> true - | Some (' ' | '\t' | '\n' |'\r') -> true - | _ -> false - (* Parse a token in a char stream *) let rec next_token ~diff_mode loc s = -- cgit v1.2.3 From 27601f57d849757871af430b14efa379c7b84952 Mon Sep 17 00:00:00 2001 From: Guillaume Melquiond Date: Sun, 27 Sep 2020 10:25:48 +0200 Subject: Recognize only ":{{" as a sentence-gobbling quotation. --- ide/coqide/coq_lex.mll | 74 ++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/ide/coqide/coq_lex.mll b/ide/coqide/coq_lex.mll index a65954d566..5d5e5f0e14 100644 --- a/ide/coqide/coq_lex.mll +++ b/ide/coqide/coq_lex.mll @@ -50,52 +50,40 @@ and comment = parse | utf8_extra_byte { incr utf8_adjust; comment lexbuf } | _ { comment lexbuf } -and quotation o c n l = parse +and quotation n l = parse | eof { raise Unterminated } -| utf8_extra_byte { incr utf8_adjust; quotation o c n l lexbuf } -| _ { - let x = Lexing.lexeme lexbuf in - if x = o then quotation_nesting o c n l 1 lexbuf - else if x = c then - if n = 1 && l = 1 then () - else quotation_closing o c n l 1 lexbuf - else quotation o c n l lexbuf -} +| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf } +| "{" { quotation_nesting n l 1 lexbuf } +| "}" { quotation_closing n l 1 lexbuf } +| _ { quotation n l lexbuf } -and quotation_nesting o c n l v = parse +and quotation_nesting n l v = parse | eof { raise Unterminated } -| utf8_extra_byte { incr utf8_adjust; quotation o c n l lexbuf } -| _ { - let x = Lexing.lexeme lexbuf in - if x = o then - if n = v+1 then quotation o c n (l+1) lexbuf - else quotation_nesting o c n l (v+1) lexbuf - else if x = c then quotation_closing o c n l 1 lexbuf - else quotation o c n l lexbuf +| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf } +| "{" { + if n = v+1 then quotation n (l+1) lexbuf + else quotation_nesting n l (v+1) lexbuf } +| "}" { quotation_closing n l 1 lexbuf } +| _ { quotation n l lexbuf } -and quotation_closing o c n l v = parse +and quotation_closing n l v = parse | eof { raise Unterminated } -| utf8_extra_byte { incr utf8_adjust; quotation o c n l lexbuf } -| _ { - let x = Lexing.lexeme lexbuf in - if x = c then - if n = v+1 then - if l = 1 then () - else quotation o c n (l-1) lexbuf - else quotation_closing o c n l (v+1) lexbuf - else if x = o then quotation_nesting o c n l 1 lexbuf - else quotation o c n l lexbuf +| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf } +| "}" { + if n = v+1 then + if l = 1 then () + else quotation n (l-1) lexbuf + else quotation_closing n l (v+1) lexbuf } +| "{" { quotation_nesting n l 1 lexbuf } +| _ { quotation n l lexbuf } -and quotation_start o c n = parse +and quotation_start n = parse | eof { raise Unterminated } -| utf8_extra_byte { incr utf8_adjust; quotation o c n 1 lexbuf } -| _ { - let x = Lexing.lexeme lexbuf in - if x = o then quotation_start o c (n+1) lexbuf - else quotation o c n 1 lexbuf -} +| utf8_extra_byte { incr utf8_adjust; quotation n 1 lexbuf } +| "{" { quotation_start (n+1) lexbuf } +| _ { quotation n 1 lexbuf } (** NB : [mkiter] should be called on increasing offsets *) @@ -130,16 +118,8 @@ and sentence initial stamp = parse if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence; sentence initial stamp lexbuf } - | ['a'-'z' 'A'-'Z'] ":{" { - quotation_start "{" "}" 1 lexbuf; - sentence false stamp lexbuf - } - | ['a'-'z' 'A'-'Z'] ":[" { - quotation_start "[" "]" 1 lexbuf; - sentence false stamp lexbuf - } - | ['a'-'z' 'A'-'Z'] ":(" { - quotation_start "(" ")" 1 lexbuf; + | ['a'-'z' 'A'-'Z'] ":{{" { + quotation_start 2 lexbuf; sentence false stamp lexbuf } | space+ { -- cgit v1.2.3 From a5abb5d9c2d8c61be2444b5f5922cf124f98bae0 Mon Sep 17 00:00:00 2001 From: Guillaume Melquiond Date: Sun, 27 Sep 2020 14:40:22 +0200 Subject: Avoid lookup up too many characters. This would cause issues in noninteractive mode. For example, when using Drop, the first character of the OCaml code would be read by Coq's REPL instead of OCaml's REPL. The peek_string function is quite inefficient, since the Stream module does not provide any good function to lookup arbitrary characters (or to push back characters). --- parsing/cLexer.ml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index 688a76c49c..f485970eec 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -580,15 +580,26 @@ let parse_quotation loc bp s = let loc = quotation loc 0 in Buffer.contents b, set_loc_pos loc bp (Stream.count s) +let peek_string v s = + let l = String.length v in + let rec aux i = + if Int.equal i l then true + else + let l' = Stream.npeek (i + 1) s in + match List.nth l' i with + | c -> Char.equal c v.[i] && aux (i + 1) + | exception _ -> false (* EOF *) in + aux 0 let find_keyword loc id bp s = - match Stream.npeek 3 s with - | [':'; '{'; '{'] -> + if peek_string ":{{" s then + begin (* "xxx:{{" always starts a sentence-gobbling quotation, whether registered or not *) Stream.junk s; let txt, loc = parse_quotation loc bp s in QUOTATION (id ^ ":", txt), loc - | _ -> + end + else let tt = ttree_find !token_tree id in match progress_further loc tt.node 0 tt s with | None -> raise Not_found -- cgit v1.2.3