diff options
Diffstat (limited to 'ide/coqide/coq_lex.mll')
| -rw-r--r-- | ide/coqide/coq_lex.mll | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/ide/coqide/coq_lex.mll b/ide/coqide/coq_lex.mll new file mode 100644 index 0000000000..fe9f108a94 --- /dev/null +++ b/ide/coqide/coq_lex.mll @@ -0,0 +1,156 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * Copyright INRIA, CNRS and contributors *) +(* <O___,, * (see version control and CREDITS file for authors & dates) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +{ + exception Unterminated + + let utf8_adjust = ref 0 + + let utf8_lexeme_start lexbuf = + Lexing.lexeme_start lexbuf - !utf8_adjust +} + +let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *) + +let number = [ '0'-'9' ]+ + +let string = "\"" _+ "\"" + +let alpha = ['a'-'z' 'A'-'Z'] + +let ident = alpha (alpha | number | '_' | "'")* + +let undotted_sep = ((number | '[' ident ']') space* ':' space*)? '{' | '}' | '-'+ | '+'+ | '*'+ + +let vernac_control = "Fail" | "Time" | "Redirect" space+ string | "Timeout" space+ number + +let dot_sep = '.' (space | eof) + +let utf8_extra_byte = [ '\x80' - '\xBF' ] + +rule coq_string = parse + | "\"\"" { coq_string lexbuf } + | "\"" { () } + | eof { () } + | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf } + | _ { coq_string lexbuf } + +and comment = parse + | "(*" { let _ = comment lexbuf in comment lexbuf } + | "\"" { let () = coq_string lexbuf in comment lexbuf } + | "*)" { Some (utf8_lexeme_start lexbuf) } + | eof { None } + | utf8_extra_byte { incr utf8_adjust; comment lexbuf } + | _ { comment lexbuf } + +and quotation o c n l = parse | eof { raise Unterminated } | _ { + let x = Lexing.lexeme lexbuf in + if x = o then quotation_nesting o c n l 1 lexbuf + else if x = c then + if n = 1 && l = 1 then () + else quotation_closing o c n l 1 lexbuf + else quotation o c n l lexbuf +} + +and quotation_nesting o c n l v = parse | eof { raise Unterminated } | _ { + let x = Lexing.lexeme lexbuf in + if x = o then + if n = v+1 then quotation o c n (l+1) lexbuf + else quotation_nesting o c n l (v+1) lexbuf + else if x = c then quotation_closing o c n l 1 lexbuf + else quotation o c n l lexbuf +} + +and quotation_closing o c n l v = parse | eof { raise Unterminated } | _ { + let x = Lexing.lexeme lexbuf in + if x = c then + if n = v+1 then + if l = 1 then () + else quotation o c n (l-1) lexbuf + else quotation_closing o c n l (v+1) lexbuf + else if x = o then quotation_nesting o c n l 1 lexbuf + else quotation o c n l lexbuf +} + +and quotation_start o c n = parse | eof { raise Unterminated } | _ { + let x = Lexing.lexeme lexbuf in + if x = o then quotation_start o c (n+1) lexbuf + else quotation o c n 1 lexbuf +} + +(** NB : [mkiter] should be called on increasing offsets *) + +and sentence initial stamp = parse + | "(*" { + match comment lexbuf with + | None -> raise Unterminated + | Some comm_last -> + stamp comm_last Tags.Script.comment; + sentence initial stamp lexbuf + } + | "\"" { + let () = coq_string lexbuf in + sentence false stamp lexbuf + } + | ".." { + (* We must have a particular rule for parsing "..", where no dot + is a terminator, even if we have a blank afterwards + (cf. for instance the syntax for recursive notation). + This rule and the following one also allow to treat the "..." + special case, where the third dot is a terminator. *) + sentence false stamp lexbuf + } + | dot_sep { + (* The usual "." terminator *) + stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence; + sentence true stamp lexbuf + } + | (vernac_control space+)* undotted_sep { + (* Separators like { or } and bullets * - + are only active + at the start of a sentence *) + if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence; + sentence initial stamp lexbuf + } + | ['a'-'z' 'A'-'Z'] ":{" { + quotation_start "{" "}" 1 lexbuf; + sentence false stamp lexbuf + } + | ['a'-'z' 'A'-'Z'] ":[" { + quotation_start "[" "]" 1 lexbuf; + sentence false stamp lexbuf + } + | ['a'-'z' 'A'-'Z'] ":(" { + quotation_start "(" ")" 1 lexbuf; + sentence false stamp lexbuf + } + | space+ { + (* Parsing spaces is the only situation preserving initiality *) + sentence initial stamp lexbuf + } + | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf } + | eof { if initial then () else raise Unterminated } + | _ { + (* Any other characters *) + sentence false stamp lexbuf + } + +{ + + (** Parse sentences in string [slice], tagging last characters + of sentences with the [stamp] function. + It will raise [Unterminated] if [slice] ends with an unfinished + sentence. + *) + + let delimit_sentences stamp slice = + utf8_adjust := 0; + sentence true stamp (Lexing.from_string slice) + +} |
