aboutsummaryrefslogtreecommitdiff
path: root/ide/coqide/coq_lex.mll
diff options
context:
space:
mode:
Diffstat (limited to 'ide/coqide/coq_lex.mll')
-rw-r--r--ide/coqide/coq_lex.mll156
1 files changed, 156 insertions, 0 deletions
diff --git a/ide/coqide/coq_lex.mll b/ide/coqide/coq_lex.mll
new file mode 100644
index 0000000000..fe9f108a94
--- /dev/null
+++ b/ide/coqide/coq_lex.mll
@@ -0,0 +1,156 @@
+(************************************************************************)
+(* * The Coq Proof Assistant / The Coq Development Team *)
+(* v * Copyright INRIA, CNRS and contributors *)
+(* <O___,, * (see version control and CREDITS file for authors & dates) *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(* * (see LICENSE file for the text of the license) *)
+(************************************************************************)
+
+{
+ exception Unterminated
+
+ let utf8_adjust = ref 0
+
+ let utf8_lexeme_start lexbuf =
+ Lexing.lexeme_start lexbuf - !utf8_adjust
+}
+
+let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *)
+
+let number = [ '0'-'9' ]+
+
+let string = "\"" _+ "\""
+
+let alpha = ['a'-'z' 'A'-'Z']
+
+let ident = alpha (alpha | number | '_' | "'")*
+
+let undotted_sep = ((number | '[' ident ']') space* ':' space*)? '{' | '}' | '-'+ | '+'+ | '*'+
+
+let vernac_control = "Fail" | "Time" | "Redirect" space+ string | "Timeout" space+ number
+
+let dot_sep = '.' (space | eof)
+
+let utf8_extra_byte = [ '\x80' - '\xBF' ]
+
+rule coq_string = parse
+ | "\"\"" { coq_string lexbuf }
+ | "\"" { () }
+ | eof { () }
+ | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf }
+ | _ { coq_string lexbuf }
+
+and comment = parse
+ | "(*" { let _ = comment lexbuf in comment lexbuf }
+ | "\"" { let () = coq_string lexbuf in comment lexbuf }
+ | "*)" { Some (utf8_lexeme_start lexbuf) }
+ | eof { None }
+ | utf8_extra_byte { incr utf8_adjust; comment lexbuf }
+ | _ { comment lexbuf }
+
+and quotation o c n l = parse | eof { raise Unterminated } | _ {
+ let x = Lexing.lexeme lexbuf in
+ if x = o then quotation_nesting o c n l 1 lexbuf
+ else if x = c then
+ if n = 1 && l = 1 then ()
+ else quotation_closing o c n l 1 lexbuf
+ else quotation o c n l lexbuf
+}
+
+and quotation_nesting o c n l v = parse | eof { raise Unterminated } | _ {
+ let x = Lexing.lexeme lexbuf in
+ if x = o then
+ if n = v+1 then quotation o c n (l+1) lexbuf
+ else quotation_nesting o c n l (v+1) lexbuf
+ else if x = c then quotation_closing o c n l 1 lexbuf
+ else quotation o c n l lexbuf
+}
+
+and quotation_closing o c n l v = parse | eof { raise Unterminated } | _ {
+ let x = Lexing.lexeme lexbuf in
+ if x = c then
+ if n = v+1 then
+ if l = 1 then ()
+ else quotation o c n (l-1) lexbuf
+ else quotation_closing o c n l (v+1) lexbuf
+ else if x = o then quotation_nesting o c n l 1 lexbuf
+ else quotation o c n l lexbuf
+}
+
+and quotation_start o c n = parse | eof { raise Unterminated } | _ {
+ let x = Lexing.lexeme lexbuf in
+ if x = o then quotation_start o c (n+1) lexbuf
+ else quotation o c n 1 lexbuf
+}
+
+(** NB : [mkiter] should be called on increasing offsets *)
+
+and sentence initial stamp = parse
+ | "(*" {
+ match comment lexbuf with
+ | None -> raise Unterminated
+ | Some comm_last ->
+ stamp comm_last Tags.Script.comment;
+ sentence initial stamp lexbuf
+ }
+ | "\"" {
+ let () = coq_string lexbuf in
+ sentence false stamp lexbuf
+ }
+ | ".." {
+ (* We must have a particular rule for parsing "..", where no dot
+ is a terminator, even if we have a blank afterwards
+ (cf. for instance the syntax for recursive notation).
+ This rule and the following one also allow to treat the "..."
+ special case, where the third dot is a terminator. *)
+ sentence false stamp lexbuf
+ }
+ | dot_sep {
+ (* The usual "." terminator *)
+ stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence;
+ sentence true stamp lexbuf
+ }
+ | (vernac_control space+)* undotted_sep {
+ (* Separators like { or } and bullets * - + are only active
+ at the start of a sentence *)
+ if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence;
+ sentence initial stamp lexbuf
+ }
+ | ['a'-'z' 'A'-'Z'] ":{" {
+ quotation_start "{" "}" 1 lexbuf;
+ sentence false stamp lexbuf
+ }
+ | ['a'-'z' 'A'-'Z'] ":[" {
+ quotation_start "[" "]" 1 lexbuf;
+ sentence false stamp lexbuf
+ }
+ | ['a'-'z' 'A'-'Z'] ":(" {
+ quotation_start "(" ")" 1 lexbuf;
+ sentence false stamp lexbuf
+ }
+ | space+ {
+ (* Parsing spaces is the only situation preserving initiality *)
+ sentence initial stamp lexbuf
+ }
+ | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf }
+ | eof { if initial then () else raise Unterminated }
+ | _ {
+ (* Any other characters *)
+ sentence false stamp lexbuf
+ }
+
+{
+
+ (** Parse sentences in string [slice], tagging last characters
+ of sentences with the [stamp] function.
+ It will raise [Unterminated] if [slice] ends with an unfinished
+ sentence.
+ *)
+
+ let delimit_sentences stamp slice =
+ utf8_adjust := 0;
+ sentence true stamp (Lexing.from_string slice)
+
+}