diff options
Diffstat (limited to 'parsing')
| -rw-r--r-- | parsing/cLexer.ml | 816 | ||||
| -rw-r--r-- | parsing/cLexer.mli | 69 | ||||
| -rw-r--r-- | parsing/dune | 15 | ||||
| -rw-r--r-- | parsing/extend.ml | 117 | ||||
| -rw-r--r-- | parsing/g_constr.mlg | 526 | ||||
| -rw-r--r-- | parsing/g_prim.mlg | 122 | ||||
| -rw-r--r-- | parsing/notation_gram.ml | 43 | ||||
| -rw-r--r-- | parsing/notgram_ops.ml | 71 | ||||
| -rw-r--r-- | parsing/notgram_ops.mli | 20 | ||||
| -rw-r--r-- | parsing/parsing.mllib | 9 | ||||
| -rw-r--r-- | parsing/pcoq.ml | 608 | ||||
| -rw-r--r-- | parsing/pcoq.mli | 271 | ||||
| -rw-r--r-- | parsing/ppextend.ml | 77 | ||||
| -rw-r--r-- | parsing/ppextend.mli | 51 | ||||
| -rw-r--r-- | parsing/tok.ml | 122 | ||||
| -rw-r--r-- | parsing/tok.mli | 35 |
16 files changed, 2972 insertions, 0 deletions
diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml new file mode 100644 index 0000000000..49d6cf01d9 --- /dev/null +++ b/parsing/cLexer.ml @@ -0,0 +1,816 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Pp +open Util +open Tok +open Gramlib + +(* Dictionaries: trees annotated with string options, each node being a map + from chars to dictionaries (the subtrees). A trie, in other words. *) + +module CharOrd = struct type t = char let compare : char -> char -> int = compare end +module CharMap = Map.Make (CharOrd) + +type ttree = { + node : string option; + branch : ttree CharMap.t } + +let empty_ttree = { node = None; branch = CharMap.empty } + +let ttree_add ttree str = + let rec insert tt i = + if i == String.length str then + {node = Some str; branch = tt.branch} + else + let c = str.[i] in + let br = + match try Some (CharMap.find c tt.branch) with Not_found -> None with + | Some tt' -> + CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> + let tt' = {node = None; branch = CharMap.empty} in + CharMap.add c (insert tt' (i + 1)) tt.branch + in + { node = tt.node; branch = br } + in + insert ttree 0 + +(* Search a string in a dictionary: raises [Not_found] + if the word is not present. *) + +let ttree_find ttree str = + let rec proc_rec tt i = + if i == String.length str then tt + else proc_rec (CharMap.find str.[i] tt.branch) (i+1) + in + proc_rec ttree 0 + +(* Removes a string from a dictionary: returns an equal dictionary + if the word not present. *) +let ttree_remove ttree str = + let rec remove tt i = + if i == String.length str then + {node = None; branch = tt.branch} + else + let c = str.[i] in + let br = + match try Some (CharMap.find c tt.branch) with Not_found -> None with + | Some tt' -> + CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> tt.branch + in + { node = tt.node; branch = br } + in + remove ttree 0 + +let ttree_elements ttree = + let rec elts tt accu = + let accu = match tt.node with + | None -> accu + | Some s -> CString.Set.add s accu + in + CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu + in + elts ttree CString.Set.empty + +(* Errors occurring while lexing (explained as "Lexer error: ...") *) + +module Error = struct + + type t = + | Illegal_character + | Unterminated_comment + | Unterminated_string + | Undefined_token + | Bad_token of string + + exception E of t + + let to_string x = + "Syntax Error: Lexer: " ^ + (match x with + | Illegal_character -> "Illegal character" + | Unterminated_comment -> "Unterminated comment" + | Unterminated_string -> "Unterminated string" + | Undefined_token -> "Undefined token" + | Bad_token tok -> Format.sprintf "Bad token %S" tok) + +end +open Error + +let err loc str = Loc.raise ~loc (Error.E str) + +let bad_token str = raise (Error.E (Bad_token str)) + +(* Update a loc without allocating an intermediate pair *) +let set_loc_pos loc bp ep = + Ploc.sub loc (bp - loc.Loc.bp) (ep - bp) + +(* Increase line number by 1 and update position of beginning of line *) +let bump_loc_line loc bol_pos = + Loc.{ loc with + line_nb = loc.line_nb + 1; + line_nb_last = loc.line_nb + 1; + bol_pos; + bol_pos_last = bol_pos; + } + +(* Same as [bump_loc_line], but for the last line in location *) +(* For an obscure reason, camlp5 does not give an easy way to set line_nb_stop, + so we have to resort to a hack merging two locations. *) +(* Warning: [bump_loc_line_last] changes the end position. You may need to call + [set_loc_pos] to fix it. *) +let bump_loc_line_last loc bol_pos = + let open Loc in + let loc' = { loc with + line_nb = loc.line_nb_last + 1; + line_nb_last = loc.line_nb_last + 1; + bol_pos; + bol_pos_last = bol_pos; + bp = loc.bp + 1; + ep = loc.ep + 1; + } in + Loc.merge loc loc' + +(* For some reason, the [Ploc.after] function of Camlp5 does not update line + numbers, so we define our own function that does it. *) +let after loc = + Loc.{ loc with + line_nb = loc.line_nb_last; + bol_pos = loc.bol_pos_last; + bp = loc.ep; + } + +(** Lexer conventions on tokens *) + +type token_kind = + | Utf8Token of (Unicode.status * int) + | AsciiChar + | EmptyStream + +let error_utf8 loc cs = + let bp = Stream.count cs in + Stream.junk cs; (* consume the char to avoid read it and fail again *) + let loc = set_loc_pos loc bp (bp+1) in + err loc Illegal_character + +let utf8_char_size loc cs = function + (* Utf8 leading byte *) + | '\x00'..'\x7F' -> 1 + | '\xC0'..'\xDF' -> 2 + | '\xE0'..'\xEF' -> 3 + | '\xF0'..'\xF7' -> 4 + | _ (* '\x80'..\xBF'|'\xF8'..'\xFF' *) -> error_utf8 loc cs + +let njunk n = Util.repeat n Stream.junk + +let check_utf8_trailing_byte loc cs c = + if not (Int.equal (Char.code c land 0xC0) 0x80) then error_utf8 loc cs + +(* Recognize utf8 blocks (of length less than 4 bytes) *) +(* but don't certify full utf8 compliance (e.g. no emptyness check) *) +let lookup_utf8_tail loc c cs = + let c1 = Char.code c in + if Int.equal (c1 land 0x40) 0 || Int.equal (c1 land 0x38) 0x38 then error_utf8 loc cs + else + let n, unicode = + if Int.equal (c1 land 0x20) 0 then + match Stream.npeek 2 cs with + | [_;c2] -> + check_utf8_trailing_byte loc cs c2; + 2, (c1 land 0x1F) lsl 6 + (Char.code c2 land 0x3F) + | _ -> error_utf8 loc cs + else if Int.equal (c1 land 0x10) 0 then + match Stream.npeek 3 cs with + | [_;c2;c3] -> + check_utf8_trailing_byte loc cs c2; + check_utf8_trailing_byte loc cs c3; + 3, (c1 land 0x0F) lsl 12 + (Char.code c2 land 0x3F) lsl 6 + + (Char.code c3 land 0x3F) + | _ -> error_utf8 loc cs + else match Stream.npeek 4 cs with + | [_;c2;c3;c4] -> + check_utf8_trailing_byte loc cs c2; + check_utf8_trailing_byte loc cs c3; + check_utf8_trailing_byte loc cs c4; + 4, (c1 land 0x07) lsl 18 + (Char.code c2 land 0x3F) lsl 12 + + (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F) + | _ -> error_utf8 loc cs + in + Utf8Token (Unicode.classify unicode, n) + +let lookup_utf8 loc cs = + match Stream.peek cs with + | Some ('\x00'..'\x7F') -> AsciiChar + | Some ('\x80'..'\xFF' as c) -> lookup_utf8_tail loc c cs + | None -> EmptyStream + +let unlocated f x = f x + (** FIXME: should we still unloc the exception? *) +(* try f x with Loc.Exc_located (_, exc) -> raise exc *) + +let check_keyword str = + let rec loop_symb s = match Stream.peek s with + | Some (' ' | '\n' | '\r' | '\t' | '"') -> + Stream.junk s; + bad_token str + | _ -> + match unlocated lookup_utf8 Ploc.dummy s with + | Utf8Token (_,n) -> njunk n s; loop_symb s + | AsciiChar -> Stream.junk s; loop_symb s + | EmptyStream -> () + in + loop_symb (Stream.of_string str) + +let check_ident str = + let rec loop_id intail s = match Stream.peek s with + | Some ('a'..'z' | 'A'..'Z' | '_') -> + Stream.junk s; + loop_id true s + | Some ('0'..'9' | '\'') when intail -> + Stream.junk s; + loop_id true s + | _ -> + match unlocated lookup_utf8 Ploc.dummy s with + | Utf8Token (st, n) when not intail && Unicode.is_valid_ident_initial st -> njunk n s; loop_id true s + | Utf8Token (st, n) when intail && Unicode.is_valid_ident_trailing st -> + njunk n s; + loop_id true s + | EmptyStream -> () + | Utf8Token _ | AsciiChar -> bad_token str + in + loop_id false (Stream.of_string str) + +let is_ident str = + try let _ = check_ident str in true with Error.E _ -> false + +(* Keyword and symbol dictionary *) +let token_tree = ref empty_ttree + +let is_keyword s = + try match (ttree_find !token_tree s).node with None -> false | Some _ -> true + with Not_found -> false + +let add_keyword str = + if not (is_keyword str) then + begin + check_keyword str; + token_tree := ttree_add !token_tree str + end + +let remove_keyword str = + token_tree := ttree_remove !token_tree str + +let keywords () = ttree_elements !token_tree + +(* Freeze and unfreeze the state of the lexer *) +type keyword_state = ttree + +let get_keyword_state () = !token_tree +let set_keyword_state tt = (token_tree := tt) + +(* The string buffering machinery *) + +let buff = ref (Bytes.create 80) + +let store len x = + let open Bytes in + if len >= length !buff then + buff := cat !buff (create (length !buff)); + set !buff len x; + succ len + +let rec nstore n len cs = + if n>0 then nstore (n-1) (store len (Stream.next cs)) cs else len + +let get_buff len = Bytes.sub_string !buff 0 len + +(* The classical lexer: idents, numbers, quoted strings, comments *) + +let warn_unrecognized_unicode = + CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing" + (fun (u,id) -> + strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \ + lexical status as part of identifier \"%s\"." u id)) + +let rec ident_tail loc len s = match Stream.peek s with + | Some ('a'..'z' | 'A'..'Z' | '0'..'9' | '\'' | '_' as c) -> + Stream.junk s; + ident_tail loc (store len c) s + | _ -> + match lookup_utf8 loc s with + | Utf8Token (st, n) when Unicode.is_valid_ident_trailing st -> + ident_tail loc (nstore n len s) s + | Utf8Token (st, n) when Unicode.is_unknown st -> + let id = get_buff len in + let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in + warn_unrecognized_unicode ~loc (u,id); len + | _ -> len + +let rec number len s = match Stream.peek s with + | Some ('0'..'9' as c) -> Stream.junk s; number (store len c) s + | _ -> len + +let warn_comment_terminator_in_string = + CWarnings.create ~name:"comment-terminator-in-string" ~category:"parsing" + (fun () -> + (strbrk + "Not interpreting \"*)\" as the end of current \ + non-terminated comment because it occurs in a \ + non-terminated string of the comment.")) + +(* If the string being lexed is in a comment, [comm_level] is Some i with i the + current level of comments nesting. Otherwise, [comm_level] is None. *) +let rec string loc ~comm_level bp len s = match Stream.peek s with + | Some '"' -> + Stream.junk s; + let esc = + match Stream.peek s with + Some '"' -> Stream.junk s; true + | _ -> false + in + if esc then string loc ~comm_level bp (store len '"') s else (loc, len) + | Some '(' -> + Stream.junk s; + (fun s -> match Stream.peek s with + | Some '*' -> + Stream.junk s; + let comm_level = Option.map succ comm_level in + string loc ~comm_level + bp (store (store len '(') '*') + s + | _ -> + string loc ~comm_level bp (store len '(') s) s + | Some '*' -> + Stream.junk s; + (fun s -> match Stream.peek s with + | Some ')' -> + Stream.junk s; + let () = match comm_level with + | Some 0 -> + warn_comment_terminator_in_string ~loc () + | _ -> () + in + let comm_level = Option.map pred comm_level in + string loc ~comm_level bp (store (store len '*') ')') s + | _ -> + string loc ~comm_level bp (store len '*') s) s + | Some ('\n' as c) -> + Stream.junk s; + let ep = Stream.count s in + (* If we are parsing a comment, the string if not part of a token so we + update the first line of the location. Otherwise, we update the last + line. *) + let loc = + if Option.has_some comm_level then bump_loc_line loc ep + else bump_loc_line_last loc ep + in + string loc ~comm_level bp (store len c) s + | Some c -> + Stream.junk s; + string loc ~comm_level bp (store len c) s + | _ -> + let _ = Stream.empty s in + let ep = Stream.count s in + let loc = set_loc_pos loc bp ep in + err loc Unterminated_string + +(* To associate locations to a file name *) +let current_file = ref Loc.ToplevelInput + +(* Utilities for comments in beautify *) +let comment_begin = ref None +let comm_loc bp = match !comment_begin with +| None -> comment_begin := Some bp +| _ -> () + +let comments = ref [] +let current_comment = Buffer.create 8192 +let between_commands = ref true + +(* The state of the lexer visible from outside *) +type lexer_state = int option * string * bool * ((int * int) * string) list * Loc.source + +let init_lexer_state f = (None,"",true,[],f) +let set_lexer_state (o,s,b,c,f) = + comment_begin := o; + Buffer.clear current_comment; Buffer.add_string current_comment s; + between_commands := b; + comments := c; + current_file := f +let get_lexer_state () = + (!comment_begin, Buffer.contents current_comment, !between_commands, !comments, !current_file) +let drop_lexer_state () = + set_lexer_state (init_lexer_state Loc.ToplevelInput) + +let get_comment_state (_,_,_,c,_) = c + +let real_push_char c = Buffer.add_char current_comment c + +(* Add a char if it is between two commands, if it is a newline or + if the last char is not a space itself. *) +let push_char c = + if + !between_commands || List.mem c ['\n';'\r'] || + (List.mem c [' ';'\t']&& + (Int.equal (Buffer.length current_comment) 0 || + not (let s = Buffer.contents current_comment in + List.mem s.[String.length s - 1] [' ';'\t';'\n';'\r']))) + then + real_push_char c + +let push_string s = Buffer.add_string current_comment s + +let null_comment s = + let rec null i = + i<0 || (List.mem s.[i] [' ';'\t';'\n';'\r'] && null (i-1)) in + null (String.length s - 1) + +let comment_stop ep = + let current_s = Buffer.contents current_comment in + (if !Flags.beautify && Buffer.length current_comment > 0 && + (!between_commands || not(null_comment current_s)) then + let bp = match !comment_begin with + Some bp -> bp + | None -> + Feedback.msg_notice + (str "No begin location for comment '" + ++ str current_s ++str"' ending at " + ++ int ep); + ep-1 in + comments := ((bp,ep),current_s) :: !comments); + Buffer.clear current_comment; + comment_begin := None; + between_commands := false + +let rec comment loc bp s = + let bp2 = Stream.count s in + match Stream.peek s with + Some '(' -> + Stream.junk s; + let loc = + try + match Stream.peek s with + Some '*' -> + Stream.junk s; + push_string "(*"; comment loc bp s + | _ -> push_string "("; loc + with Stream.Failure -> raise (Stream.Error "") + in + comment loc bp s + | Some '*' -> + Stream.junk s; + begin try + match Stream.peek s with + Some ')' -> Stream.junk s; push_string "*)"; loc + | _ -> real_push_char '*'; comment loc bp s + with Stream.Failure -> raise (Stream.Error "") + end + | Some '"' -> + Stream.junk s; + let loc, len = string loc ~comm_level:(Some 0) bp2 0 s in + push_string "\""; push_string (get_buff len); push_string "\""; + comment loc bp s + | _ -> + match try Some (Stream.empty s) with Stream.Failure -> None with + | Some _ -> + let ep = Stream.count s in + let loc = set_loc_pos loc bp ep in + err loc Unterminated_comment + | _ -> + match Stream.peek s with + Some ('\n' as z) -> + Stream.junk s; + let ep = Stream.count s in + real_push_char z; comment (bump_loc_line loc ep) bp s + | Some z -> + Stream.junk s; + real_push_char z; comment loc bp s + | _ -> raise Stream.Failure + +(* Parse a special token, using the [token_tree] *) + +(* Peek as much utf-8 lexemes as possible *) +(* and retain the longest valid special token obtained *) +let rec progress_further loc last nj tt cs = + try progress_from_byte loc last nj tt cs (List.nth (Stream.npeek (nj+1) cs) nj) + with Failure _ -> last + +and update_longest_valid_token loc last nj tt cs = + match tt.node with + | Some _ as last' -> + stream_njunk nj cs; + progress_further loc last' 0 tt cs + | None -> + progress_further loc last nj tt cs + +(* nj is the number of char peeked since last valid token *) +(* n the number of char in utf8 block *) +and progress_utf8 loc last nj n c tt cs = + try + let tt = CharMap.find c tt.branch in + if Int.equal n 1 then + update_longest_valid_token loc last (nj+n) tt cs + else + match Util.List.skipn (nj+1) (Stream.npeek (nj+n) cs) with + | l when Int.equal (List.length l) (n - 1) -> + List.iter (check_utf8_trailing_byte loc cs) l; + let tt = List.fold_left (fun tt c -> CharMap.find c tt.branch) tt l in + update_longest_valid_token loc last (nj+n) tt cs + | _ -> + error_utf8 loc cs + with Not_found -> + last + +and progress_from_byte loc last nj tt cs c = + progress_utf8 loc last nj (utf8_char_size loc cs c) c tt cs + +let find_keyword loc id s = + let tt = ttree_find !token_tree id in + match progress_further loc tt.node 0 tt s with + | None -> raise Not_found + | Some c -> KEYWORD c + +let process_sequence loc bp c cs = + let rec aux n cs = + match Stream.peek cs with + | Some c' when c == c' -> Stream.junk cs; aux (n+1) cs + | _ -> BULLET (String.make n c), set_loc_pos loc bp (Stream.count cs) + in + aux 1 cs + +(* Must be a special token *) +let process_chars ~diff_mode loc bp c cs = + let t = progress_from_byte loc None (-1) !token_tree cs c in + let ep = Stream.count cs in + match t with + | Some t -> (KEYWORD t, set_loc_pos loc bp ep) + | None -> + let ep' = bp + utf8_char_size loc cs c in + if diff_mode then begin + let len = ep' - bp in + ignore (store 0 c); + ignore (nstore (len - 1) 1 cs); + IDENT (get_buff len), set_loc_pos loc bp ep + end else begin + njunk (ep' - ep) cs; + let loc = set_loc_pos loc bp ep' in + err loc Undefined_token + end + +(* Parse what follows a dot *) + +let parse_after_dot ~diff_mode loc c bp s = match Stream.peek s with + | Some ('a'..'z' | 'A'..'Z' | '_' as d) -> + Stream.junk s; + let len = + try ident_tail loc (store 0 d) s with + Stream.Failure -> raise (Stream.Error "") + in + let field = get_buff len in + (try find_keyword loc ("."^field) s with Not_found -> FIELD field) + | _ -> + match lookup_utf8 loc s with + | Utf8Token (st, n) when Unicode.is_valid_ident_initial st -> + let len = ident_tail loc (nstore n 0 s) s in + let field = get_buff len in + (try find_keyword loc ("."^field) s with Not_found -> FIELD field) + | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars ~diff_mode loc bp c s) + +(* Parse what follows a question mark *) + +let parse_after_qmark ~diff_mode loc bp s = + match Stream.peek s with + | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK + | None -> KEYWORD "?" + | _ -> + match lookup_utf8 loc s with + | Utf8Token (st, _) when Unicode.is_valid_ident_initial st -> LEFTQMARK + | AsciiChar | Utf8Token _ | EmptyStream -> + fst (process_chars ~diff_mode loc bp '?' s) + +let blank_or_eof cs = + match Stream.peek cs with + | None -> true + | Some (' ' | '\t' | '\n' |'\r') -> true + | _ -> false + +(* Parse a token in a char stream *) + +let rec next_token ~diff_mode loc s = + let bp = Stream.count s in + match Stream.peek s with + | Some ('\n' as c) -> + Stream.junk s; + let ep = Stream.count s in + comm_loc bp; push_char c; next_token ~diff_mode (bump_loc_line loc ep) s + | Some (' ' | '\t' | '\r' as c) -> + Stream.junk s; + comm_loc bp; push_char c; next_token ~diff_mode loc s + | Some ('.' as c) -> + Stream.junk s; + let t = + try parse_after_dot ~diff_mode loc c bp s with + Stream.Failure -> raise (Stream.Error "") + in + let ep = Stream.count s in + comment_stop bp; + (* We enforce that "." should either be part of a larger keyword, + for instance ".(", or followed by a blank or eof. *) + let () = match t with + | KEYWORD ("." | "...") -> + if not (blank_or_eof s) then + err (set_loc_pos loc bp (ep+1)) Undefined_token; + between_commands := true; + | _ -> () + in + t, set_loc_pos loc bp ep + | Some ('-' | '+' | '*' as c) -> + Stream.junk s; + let t,new_between_commands = + if !between_commands then process_sequence loc bp c s, true + else process_chars ~diff_mode loc bp c s,false + in + comment_stop bp; between_commands := new_between_commands; t + | Some '?' -> + Stream.junk s; + let ep = Stream.count s in + let t = parse_after_qmark ~diff_mode loc bp s in + comment_stop bp; (t, set_loc_pos loc bp ep) + | Some ('a'..'z' | 'A'..'Z' | '_' as c) -> + Stream.junk s; + let len = + try ident_tail loc (store 0 c) s with + Stream.Failure -> raise (Stream.Error "") + in + let ep = Stream.count s in + let id = get_buff len in + comment_stop bp; + (try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep + | Some ('0'..'9' as c) -> + Stream.junk s; + let len = + try number (store 0 c) s with + Stream.Failure -> raise (Stream.Error "") + in + let ep = Stream.count s in + comment_stop bp; + (INT (get_buff len), set_loc_pos loc bp ep) + | Some '\"' -> + Stream.junk s; + let (loc, len) = + try string loc ~comm_level:None bp 0 s with + Stream.Failure -> raise (Stream.Error "") + in + let ep = Stream.count s in + comment_stop bp; + (STRING (get_buff len), set_loc_pos loc bp ep) + | Some ('(' as c) -> + Stream.junk s; + begin try + match Stream.peek s with + | Some '*' when diff_mode -> + Stream.junk s; + let ep = Stream.count s in + (IDENT "(*", set_loc_pos loc bp ep) + | Some '*' -> + Stream.junk s; + comm_loc bp; + push_string "(*"; + let loc = comment loc bp s in next_token ~diff_mode loc s + | _ -> let t = process_chars ~diff_mode loc bp c s in comment_stop bp; t + with Stream.Failure -> raise (Stream.Error "") + end + | Some ('{' | '}' as c) -> + Stream.junk s; + let ep = Stream.count s in + let t,new_between_commands = + if !between_commands then (KEYWORD (String.make 1 c), set_loc_pos loc bp ep), true + else process_chars ~diff_mode loc bp c s, false + in + comment_stop bp; between_commands := new_between_commands; t + | _ -> + match lookup_utf8 loc s with + | Utf8Token (st, n) when Unicode.is_valid_ident_initial st -> + let len = ident_tail loc (nstore n 0 s) s in + let id = get_buff len in + let ep = Stream.count s in + comment_stop bp; + (try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep + | AsciiChar | Utf8Token _ -> + let t = process_chars ~diff_mode loc bp (Stream.next s) s in + comment_stop bp; t + | EmptyStream -> + comment_stop bp; (EOI, set_loc_pos loc bp (bp+1)) + +(* (* Debug: uncomment this for tracing tokens seen by coq...*) +let next_token ~diff_mode loc s = + let (t,loc as r) = next_token ~diff_mode loc s in + Printf.eprintf "(line %i, %i-%i)[%s]\n%!" (Ploc.line_nb loc) (Ploc.first_pos loc) (Ploc.last_pos loc) (Tok.to_string t); + r *) + +(* Location table system for creating tables associating a token count + to its location in a char stream (the source) *) + +let locerr () = invalid_arg "Lexer: location function" + +let loct_create () = Hashtbl.create 207 + +let loct_func loct i = + try Hashtbl.find loct i with Not_found -> locerr () + +let loct_add loct i loc = Hashtbl.add loct i loc + +(** {6 The lexer of Coq} *) + +(** Note: removing a token. + We do nothing because [remove_token] is called only when removing a grammar + rule with [Grammar.delete_rule]. The latter command is called only when + unfreezing the state of the grammar entries (see GRAMMAR summary, file + env/metasyntax.ml). Therefore, instead of removing tokens one by one, + we unfreeze the state of the lexer. This restores the behaviour of the + lexer. B.B. *) + +type te = Tok.t + +(** Names of tokens, for this lexer, used in Grammar error messages *) + +let token_text = function + | ("", t) -> "'" ^ t ^ "'" + | ("IDENT", "") -> "identifier" + | ("IDENT", t) -> "'" ^ t ^ "'" + | ("INT", "") -> "integer" + | ("INT", s) -> "'" ^ s ^ "'" + | ("STRING", "") -> "string" + | ("EOI", "") -> "end of input" + | (con, "") -> con + | (con, prm) -> con ^ " \"" ^ prm ^ "\"" + +let func next_token cs = + let loct = loct_create () in + let cur_loc = ref (Loc.create !current_file 1 0 0 0) in + let ts = + Stream.from + (fun i -> + let (tok, loc) = next_token !cur_loc cs in + cur_loc := after loc; + loct_add loct i loc; Some tok) + in + (ts, loct_func loct) + +let make_lexer ~diff_mode = { + Plexing.tok_func = func (next_token ~diff_mode); + Plexing.tok_using = + (fun pat -> match Tok.of_pattern pat with + | KEYWORD s -> add_keyword s + | _ -> ()); + Plexing.tok_removing = (fun _ -> ()); + Plexing.tok_match = Tok.match_pattern; + Plexing.tok_text = token_text } + +let lexer = make_lexer ~diff_mode:false + +(** Terminal symbols interpretation *) + +let is_ident_not_keyword s = + is_ident s && not (is_keyword s) + +let is_number s = + let rec aux i = + Int.equal (String.length s) i || + match s.[i] with '0'..'9' -> aux (i+1) | _ -> false + in aux 0 + +let strip s = + let len = + let rec loop i len = + if Int.equal i (String.length s) then len + else if s.[i] == ' ' then loop (i + 1) len + else loop (i + 1) (len + 1) + in + loop 0 0 + in + if len == String.length s then s + else + let s' = Bytes.create len in + let rec loop i i' = + if i == String.length s then s' + else if s.[i] == ' ' then loop (i + 1) i' + else begin Bytes.set s' i' s.[i]; loop (i + 1) (i' + 1) end + in + Bytes.to_string (loop 0 0) + +let terminal s = + let s = strip s in + let () = match s with "" -> failwith "empty token." | _ -> () in + if is_ident_not_keyword s then IDENT s + else if is_number s then INT s + else KEYWORD s diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli new file mode 100644 index 0000000000..af3fd7f318 --- /dev/null +++ b/parsing/cLexer.mli @@ -0,0 +1,69 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +(** This should be functional but it is not due to the interface *) +val add_keyword : string -> unit +val remove_keyword : string -> unit +val is_keyword : string -> bool +val keywords : unit -> CString.Set.t + +type keyword_state +val set_keyword_state : keyword_state -> unit +val get_keyword_state : unit -> keyword_state + +val check_ident : string -> unit +val is_ident : string -> bool +val check_keyword : string -> unit +val terminal : string -> Tok.t + +(** The lexer of Coq: *) + +(* modtype Grammar.GLexerType: sig + type te val + lexer : te Plexing.lexer + end + +where + + type lexer 'te = + { tok_func : lexer_func 'te; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + tok_match : pattern -> 'te -> string; + tok_text : pattern -> string; + tok_comm : mutable option (list location) } + *) +include Gramlib.Grammar.GLexerType with type te = Tok.t + +module Error : sig + type t + exception E of t + val to_string : t -> string +end + +(* Mainly for comments state, etc... *) +type lexer_state + +val init_lexer_state : Loc.source -> lexer_state +val set_lexer_state : lexer_state -> unit +val get_lexer_state : unit -> lexer_state +val drop_lexer_state : unit -> unit +val get_comment_state : lexer_state -> ((int * int) * string) list + +(** Create a lexer. true enables alternate handling for computing diffs. +It ensures that, ignoring white space, the concatenated tokens equal the input +string. Specifically: +- for strings, return the enclosing quotes as tokens and treat the quoted value +as if it was unquoted, possibly becoming multiple tokens +- for comments, return the "(*" as a token and treat the contents of the comment as if +it was not in a comment, possibly becoming multiple tokens +- return any unrecognized Ascii or UTF-8 character as a string +*) +val make_lexer : diff_mode:bool -> Tok.t Gramlib.Plexing.lexer diff --git a/parsing/dune b/parsing/dune new file mode 100644 index 0000000000..e91740650f --- /dev/null +++ b/parsing/dune @@ -0,0 +1,15 @@ +(library + (name parsing) + (public_name coq.parsing) + (wrapped false) + (libraries coq.gramlib proofs)) + +(rule + (targets g_prim.ml) + (deps (:mlg-file g_prim.mlg)) + (action (run coqpp %{mlg-file}))) + +(rule + (targets g_constr.ml) + (deps (:mlg-file g_constr.mlg)) + (action (run coqpp %{mlg-file}))) diff --git a/parsing/extend.ml b/parsing/extend.ml new file mode 100644 index 0000000000..9b5537d7f6 --- /dev/null +++ b/parsing/extend.ml @@ -0,0 +1,117 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +(** Entry keys for constr notations *) + +type 'a entry = 'a Gramlib.Grammar.GMake(CLexer).Entry.e + +type side = Left | Right + +type production_position = + | BorderProd of side * Gramlib.Gramext.g_assoc option + | InternalProd + +type production_level = + | NextLevel + | NumLevel of int + +(** User-level types used to tell how to parse or interpret of the non-terminal *) + +type 'a constr_entry_key_gen = + | ETIdent + | ETGlobal + | ETBigint + | ETBinder of bool (* open list of binders if true, closed list of binders otherwise *) + | ETConstr of Constrexpr.notation_entry * Notation_term.constr_as_binder_kind option * 'a + | ETPattern of bool * int option (* true = strict pattern, i.e. not a single variable *) + +(** Entries level (left-hand side of grammar rules) *) + +type constr_entry_key = + (production_level * production_position) constr_entry_key_gen + +(** Entries used in productions, vernac side (e.g. "x bigint" or "x ident") *) + +type simple_constr_prod_entry_key = + production_level option constr_entry_key_gen + +(** Entries used in productions (in right-hand-side of grammar rules), to parse non-terminals *) + +type binder_entry_kind = ETBinderOpen | ETBinderClosed of Tok.t list + +type binder_target = ForBinder | ForTerm + +type constr_prod_entry_key = + | ETProdName (* Parsed as a name (ident or _) *) + | ETProdReference (* Parsed as a global reference *) + | ETProdBigint (* Parsed as an (unbounded) integer *) + | ETProdConstr of Constrexpr.notation_entry * (production_level * production_position) (* Parsed as constr or pattern, or a subentry of those *) + | ETProdPattern of int (* Parsed as pattern as a binder (as subpart of a constr) *) + | ETProdConstrList of (production_level * production_position) * Tok.t list (* Parsed as non-empty list of constr *) + | ETProdBinderList of binder_entry_kind (* Parsed as non-empty list of local binders *) + +(** {5 AST for user-provided entries} *) + +type 'a user_symbol = +| Ulist1 of 'a user_symbol +| Ulist1sep of 'a user_symbol * string +| Ulist0 of 'a user_symbol +| Ulist0sep of 'a user_symbol * string +| Uopt of 'a user_symbol +| Uentry of 'a +| Uentryl of 'a * int + +type ('a,'b,'c) ty_user_symbol = +| TUlist1 : ('a,'b,'c) ty_user_symbol -> ('a list,'b list,'c list) ty_user_symbol +| TUlist1sep : ('a,'b,'c) ty_user_symbol * string -> ('a list,'b list,'c list) ty_user_symbol +| TUlist0 : ('a,'b,'c) ty_user_symbol -> ('a list,'b list,'c list) ty_user_symbol +| TUlist0sep : ('a,'b,'c) ty_user_symbol * string -> ('a list,'b list,'c list) ty_user_symbol +| TUopt : ('a,'b,'c) ty_user_symbol -> ('a option, 'b option, 'c option) ty_user_symbol +| TUentry : ('a, 'b, 'c) Genarg.ArgT.tag -> ('a,'b,'c) ty_user_symbol +| TUentryl : ('a, 'b, 'c) Genarg.ArgT.tag * int -> ('a,'b,'c) ty_user_symbol + +(** {5 Type-safe grammar extension} *) + +type ('self, 'a) symbol = +| Atoken : Tok.t -> ('self, string) symbol +| Alist1 : ('self, 'a) symbol -> ('self, 'a list) symbol +| Alist1sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol +| Alist0 : ('self, 'a) symbol -> ('self, 'a list) symbol +| Alist0sep : ('self, 'a) symbol * ('self, _) symbol -> ('self, 'a list) symbol +| Aopt : ('self, 'a) symbol -> ('self, 'a option) symbol +| Aself : ('self, 'self) symbol +| Anext : ('self, 'self) symbol +| Aentry : 'a entry -> ('self, 'a) symbol +| Aentryl : 'a entry * string -> ('self, 'a) symbol +| Arules : 'a rules list -> ('self, 'a) symbol + +and ('self, _, 'r) rule = +| Stop : ('self, 'r, 'r) rule +| Next : ('self, 'a, 'r) rule * ('self, 'b) symbol -> ('self, 'b -> 'a, 'r) rule + +and ('a, 'r) norec_rule = { norec_rule : 's. ('s, 'a, 'r) rule } + +and 'a rules = +| Rules : ('act, Loc.t -> 'a) norec_rule * 'act -> 'a rules + +type 'a production_rule = +| Rule : ('a, 'act, Loc.t -> 'a) rule * 'act -> 'a production_rule + +type 'a single_extend_statement = + string option * + (* Level *) + Gramlib.Gramext.g_assoc option * + (* Associativity *) + 'a production_rule list + (* Symbol list with the interpretation function *) + +type 'a extend_statement = + Gramlib.Gramext.position option * + 'a single_extend_statement list diff --git a/parsing/g_constr.mlg b/parsing/g_constr.mlg new file mode 100644 index 0000000000..b3ae24e941 --- /dev/null +++ b/parsing/g_constr.mlg @@ -0,0 +1,526 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +{ + +open Names +open Constr +open Libnames +open Glob_term +open Constrexpr +open Constrexpr_ops +open Util +open Tok +open Namegen +open Decl_kinds + +open Pcoq +open Pcoq.Prim +open Pcoq.Constr + +(* TODO: avoid this redefinition without an extra dep to Notation_ops *) +let ldots_var = Id.of_string ".." + +let constr_kw = + [ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "in"; "for"; + "end"; "as"; "let"; "if"; "then"; "else"; "return"; + "Prop"; "Set"; "Type"; ".("; "_"; ".."; + "`{"; "`("; "{|"; "|}" ] + +let _ = List.iter CLexer.add_keyword constr_kw + +let mk_cast = function + (c,(_,None)) -> c + | (c,(_,Some ty)) -> + let loc = Loc.merge_opt (constr_loc c) (constr_loc ty) + in CAst.make ?loc @@ CCast(c, CastConv ty) + +let binder_of_name expl { CAst.loc = loc; v = na } = + CLocalAssum ([CAst.make ?loc na], Default expl, + CAst.make ?loc @@ CHole (Some (Evar_kinds.BinderType na), IntroAnonymous, None)) + +let binders_of_names l = + List.map (binder_of_name Explicit) l + +let mk_fixb (id,bl,ann,body,(loc,tyc)) : fix_expr = + let ty = match tyc with + Some ty -> ty + | None -> CAst.make @@ CHole (None, IntroAnonymous, None) in + (id,ann,bl,ty,body) + +let mk_cofixb (id,bl,ann,body,(loc,tyc)) : cofix_expr = + let _ = Option.map (fun { CAst.loc = aloc } -> + CErrors.user_err ?loc:aloc + ~hdr:"Constr:mk_cofixb" + (Pp.str"Annotation forbidden in cofix expression.")) (fst ann) in + let ty = match tyc with + Some ty -> ty + | None -> CAst.make @@ CHole (None, IntroAnonymous, None) in + (id,bl,ty,body) + +let mk_fix(loc,kw,id,dcls) = + if kw then + let fb : fix_expr list = List.map mk_fixb dcls in + CAst.make ~loc @@ CFix(id,fb) + else + let fb : cofix_expr list = List.map mk_cofixb dcls in + CAst.make ~loc @@ CCoFix(id,fb) + +let mk_single_fix (loc,kw,dcl) = + let (id,_,_,_,_) = dcl in mk_fix(loc,kw,id,[dcl]) + +let err () = raise Stream.Failure + +(* Hack to parse "(x:=t)" as an explicit argument without conflicts with the *) +(* admissible notation "(x t)" *) +let lpar_id_coloneq = + Pcoq.Entry.of_parser "test_lpar_id_coloneq" + (fun strm -> + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | IDENT s -> + (match stream_nth 2 strm with + | KEYWORD ":=" -> + stream_njunk 3 strm; + Names.Id.of_string s + | _ -> err ()) + | _ -> err ()) + | _ -> err ()) + +let impl_ident_head = + Pcoq.Entry.of_parser "impl_ident_head" + (fun strm -> + match stream_nth 0 strm with + | KEYWORD "{" -> + (match stream_nth 1 strm with + | IDENT ("wf"|"struct"|"measure") -> err () + | IDENT s -> + stream_njunk 2 strm; + Names.Id.of_string s + | _ -> err ()) + | _ -> err ()) + +let name_colon = + Pcoq.Entry.of_parser "name_colon" + (fun strm -> + match stream_nth 0 strm with + | IDENT s -> + (match stream_nth 1 strm with + | KEYWORD ":" -> + stream_njunk 2 strm; + Name (Names.Id.of_string s) + | _ -> err ()) + | KEYWORD "_" -> + (match stream_nth 1 strm with + | KEYWORD ":" -> + stream_njunk 2 strm; + Anonymous + | _ -> err ()) + | _ -> err ()) + +let aliasvar = function { CAst.v = CPatAlias (_, na) } -> Some na | _ -> None + +} + +GRAMMAR EXTEND Gram + GLOBAL: binder_constr lconstr constr operconstr universe_level sort sort_family + global constr_pattern lconstr_pattern Constr.ident + closed_binder open_binders binder binders binders_fixannot + record_declaration typeclass_constraint pattern appl_arg; + Constr.ident: + [ [ id = Prim.ident -> { id } ] ] + ; + Prim.name: + [ [ "_" -> { CAst.make ~loc Anonymous } ] ] + ; + global: + [ [ r = Prim.reference -> { r } ] ] + ; + constr_pattern: + [ [ c = constr -> { c } ] ] + ; + lconstr_pattern: + [ [ c = lconstr -> { c } ] ] + ; + sort: + [ [ "Set" -> { GSet } + | "Prop" -> { GProp } + | "Type" -> { GType [] } + | "Type"; "@{"; u = universe; "}" -> { GType u } + ] ] + ; + sort_family: + [ [ "Set" -> { Sorts.InSet } + | "Prop" -> { Sorts.InProp } + | "Type" -> { Sorts.InType } + ] ] + ; + universe_expr: + [ [ id = global; "+"; n = natural -> { Some (id,n) } + | id = global -> { Some (id,0) } + | "_" -> { None } + ] ] + ; + universe: + [ [ IDENT "max"; "("; ids = LIST1 universe_expr SEP ","; ")" -> { ids } + | u = universe_expr -> { [u] } + ] ] + ; + lconstr: + [ [ c = operconstr LEVEL "200" -> { c } ] ] + ; + constr: + [ [ c = operconstr LEVEL "8" -> { c } + | "@"; f=global; i = instance -> { CAst.make ~loc @@ CAppExpl((None,f,i),[]) } ] ] + ; + operconstr: + [ "200" RIGHTA + [ c = binder_constr -> { c } ] + | "100" RIGHTA + [ c1 = operconstr; "<:"; c2 = binder_constr -> + { CAst.make ~loc @@ CCast(c1, CastVM c2) } + | c1 = operconstr; "<:"; c2 = SELF -> + { CAst.make ~loc @@ CCast(c1, CastVM c2) } + | c1 = operconstr; "<<:"; c2 = binder_constr -> + { CAst.make ~loc @@ CCast(c1, CastNative c2) } + | c1 = operconstr; "<<:"; c2 = SELF -> + { CAst.make ~loc @@ CCast(c1, CastNative c2) } + | c1 = operconstr; ":";c2 = binder_constr -> + { CAst.make ~loc @@ CCast(c1, CastConv c2) } + | c1 = operconstr; ":"; c2 = SELF -> + { CAst.make ~loc @@ CCast(c1, CastConv c2) } + | c1 = operconstr; ":>" -> + { CAst.make ~loc @@ CCast(c1, CastCoerce) } ] + | "99" RIGHTA [ ] + | "90" RIGHTA [ ] + | "10" LEFTA + [ f=operconstr; args=LIST1 appl_arg -> { CAst.make ~loc @@ CApp((None,f),args) } + | "@"; f=global; i = instance; args=LIST0 NEXT -> { CAst.make ~loc @@ CAppExpl((None,f,i),args) } + | "@"; lid = pattern_identref; args=LIST1 identref -> + { let { CAst.loc = locid; v = id } = lid in + let args = List.map (fun x -> CAst.make @@ CRef (qualid_of_ident ?loc:x.CAst.loc x.CAst.v, None), None) args in + CAst.make ~loc @@ CApp((None, CAst.make ?loc:locid @@ CPatVar id),args) } ] + | "9" + [ ".."; c = operconstr LEVEL "0"; ".." -> + { CAst.make ~loc @@ CAppExpl ((None, (qualid_of_ident ~loc ldots_var), None),[c]) } ] + | "8" [ ] + | "1" LEFTA + [ c=operconstr; ".("; f=global; args=LIST0 appl_arg; ")" -> + { CAst.make ~loc @@ CApp((Some (List.length args+1), CAst.make @@ CRef (f,None)),args@[c,None]) } + | c=operconstr; ".("; "@"; f=global; + args=LIST0 (operconstr LEVEL "9"); ")" -> + { CAst.make ~loc @@ CAppExpl((Some (List.length args+1),f,None),args@[c]) } + | c=operconstr; "%"; key=IDENT -> { CAst.make ~loc @@ CDelimiters (key,c) } ] + | "0" + [ c=atomic_constr -> { c } + | c=match_constr -> { c } + | "("; c = operconstr LEVEL "200"; ")" -> + { (match c.CAst.v with + | CPrim (Numeral (n,true)) -> + CAst.make ~loc @@ CNotation((InConstrEntrySomeLevel,"( _ )"),([c],[],[],[])) + | _ -> c) } + | "{|"; c = record_declaration; "|}" -> { c } + | "{"; c = binder_constr ; "}" -> + { CAst.make ~loc @@ CNotation((InConstrEntrySomeLevel,"{ _ }"),([c],[],[],[])) } + | "`{"; c = operconstr LEVEL "200"; "}" -> + { CAst.make ~loc @@ CGeneralization (Implicit, None, c) } + | "`("; c = operconstr LEVEL "200"; ")" -> + { CAst.make ~loc @@ CGeneralization (Explicit, None, c) } + ] ] + ; + record_declaration: + [ [ fs = record_fields -> { CAst.make ~loc @@ CRecord fs } ] ] + ; + + record_fields: + [ [ f = record_field_declaration; ";"; fs = record_fields -> { f :: fs } + | f = record_field_declaration -> { [f] } + | -> { [] } + ] ] + ; + + record_field_declaration: + [ [ id = global; bl = binders; ":="; c = lconstr -> + { (id, mkLambdaCN ~loc bl c) } ] ] + ; + binder_constr: + [ [ "forall"; bl = open_binders; ","; c = operconstr LEVEL "200" -> + { mkProdCN ~loc bl c } + | "fun"; bl = open_binders; "=>"; c = operconstr LEVEL "200" -> + { mkLambdaCN ~loc bl c } + | "let"; id=name; bl = binders; ty = type_cstr; ":="; + c1 = operconstr LEVEL "200"; "in"; c2 = operconstr LEVEL "200" -> + { let ty,c1 = match ty, c1 with + | (_,None), { CAst.v = CCast(c, CastConv t) } -> (Loc.tag ?loc:(constr_loc t) @@ Some t), c (* Tolerance, see G_vernac.def_body *) + | _, _ -> ty, c1 in + CAst.make ~loc @@ CLetIn(id,mkLambdaCN ?loc:(constr_loc c1) bl c1, + Option.map (mkProdCN ?loc:(fst ty) bl) (snd ty), c2) } + | "let"; fx = single_fix; "in"; c = operconstr LEVEL "200" -> + { let fixp = mk_single_fix fx in + let { CAst.loc = li; v = id } = match fixp.CAst.v with + CFix(id,_) -> id + | CCoFix(id,_) -> id + | _ -> assert false in + CAst.make ~loc @@ CLetIn( CAst.make ?loc:li @@ Name id,fixp,None,c) } + | "let"; lb = ["("; l=LIST0 name SEP ","; ")" -> { l } | "()" -> { [] } ]; + po = return_type; + ":="; c1 = operconstr LEVEL "200"; "in"; + c2 = operconstr LEVEL "200" -> + { CAst.make ~loc @@ CLetTuple (lb,po,c1,c2) } + | "let"; "'"; p=pattern; ":="; c1 = operconstr LEVEL "200"; + "in"; c2 = operconstr LEVEL "200" -> + { CAst.make ~loc @@ + CCases (LetPatternStyle, None, [c1, None, None], [CAst.make ~loc ([[p]], c2)]) } + | "let"; "'"; p=pattern; ":="; c1 = operconstr LEVEL "200"; + rt = case_type; "in"; c2 = operconstr LEVEL "200" -> + { CAst.make ~loc @@ + CCases (LetPatternStyle, Some rt, [c1, aliasvar p, None], [CAst.make ~loc ([[p]], c2)]) } + + | "let"; "'"; p=pattern; "in"; t = pattern LEVEL "200"; + ":="; c1 = operconstr LEVEL "200"; rt = case_type; + "in"; c2 = operconstr LEVEL "200" -> + { CAst.make ~loc @@ + CCases (LetPatternStyle, Some rt, [c1, aliasvar p, Some t], [CAst.make ~loc ([[p]], c2)]) } + | "if"; c=operconstr LEVEL "200"; po = return_type; + "then"; b1=operconstr LEVEL "200"; + "else"; b2=operconstr LEVEL "200" -> + { CAst.make ~loc @@ CIf (c, po, b1, b2) } + | c=fix_constr -> { c } ] ] + ; + appl_arg: + [ [ id = lpar_id_coloneq; c=lconstr; ")" -> + { (c,Some (CAst.make ~loc @@ ExplByName id)) } + | c=operconstr LEVEL "9" -> { (c,None) } ] ] + ; + atomic_constr: + [ [ g=global; i=instance -> { CAst.make ~loc @@ CRef (g,i) } + | s=sort -> { CAst.make ~loc @@ CSort s } + | n=INT -> { CAst.make ~loc @@ CPrim (Numeral (n,true)) } + | s=string -> { CAst.make ~loc @@ CPrim (String s) } + | "_" -> { CAst.make ~loc @@ CHole (None, IntroAnonymous, None) } + | "?"; "["; id=ident; "]" -> { CAst.make ~loc @@ CHole (None, IntroIdentifier id, None) } + | "?"; "["; id=pattern_ident; "]" -> { CAst.make ~loc @@ CHole (None, IntroFresh id, None) } + | id=pattern_ident; inst = evar_instance -> { CAst.make ~loc @@ CEvar(id,inst) } ] ] + ; + inst: + [ [ id = ident; ":="; c = lconstr -> { (id,c) } ] ] + ; + evar_instance: + [ [ "@{"; l = LIST1 inst SEP ";"; "}" -> { l } + | -> { [] } ] ] + ; + instance: + [ [ "@{"; l = LIST0 universe_level; "}" -> { Some l } + | -> { None } ] ] + ; + universe_level: + [ [ "Set" -> { GSet } + | "Prop" -> { GProp } + | "Type" -> { GType UUnknown } + | "_" -> { GType UAnonymous } + | id = global -> { GType (UNamed id) } + ] ] + ; + fix_constr: + [ [ fx1=single_fix -> { mk_single_fix fx1 } + | f=single_fix; "with"; dcls=LIST1 fix_decl SEP "with"; + "for"; id=identref -> + { let (_,kw,dcl1) = f in + mk_fix(loc,kw,id,dcl1::dcls) } + ] ] + ; + single_fix: + [ [ kw=fix_kw; dcl=fix_decl -> { (loc,kw,dcl) } ] ] + ; + fix_kw: + [ [ "fix" -> { true } + | "cofix" -> { false } ] ] + ; + fix_decl: + [ [ id=identref; bl=binders_fixannot; ty=type_cstr; ":="; + c=operconstr LEVEL "200" -> + { (id,fst bl,snd bl,c,ty) } ] ] + ; + match_constr: + [ [ "match"; ci=LIST1 case_item SEP ","; ty=OPT case_type; "with"; + br=branches; "end" -> { CAst.make ~loc @@ CCases(RegularStyle,ty,ci,br) } ] ] + ; + case_item: + [ [ c=operconstr LEVEL "100"; + ona = OPT ["as"; id=name -> { id } ]; + ty = OPT ["in"; t=pattern -> { t } ] -> + { (c,ona,ty) } ] ] + ; + case_type: + [ [ "return"; ty = operconstr LEVEL "100" -> { ty } ] ] + ; + return_type: + [ [ a = OPT [ na = OPT["as"; na=name -> { na } ]; + ty = case_type -> { (na,ty) } ] -> + { match a with + | None -> None, None + | Some (na,t) -> (na, Some t) } + ] ] + ; + branches: + [ [ OPT"|"; br=LIST0 eqn SEP "|" -> { br } ] ] + ; + mult_pattern: + [ [ pl = LIST1 pattern LEVEL "99" SEP "," -> { pl } ] ] + ; + eqn: + [ [ pll = LIST1 mult_pattern SEP "|"; + "=>"; rhs = lconstr -> { (CAst.make ~loc (pll,rhs)) } ] ] + ; + record_pattern: + [ [ id = global; ":="; pat = pattern -> { (id, pat) } ] ] + ; + record_patterns: + [ [ p = record_pattern; ";"; ps = record_patterns -> { p :: ps } + | p = record_pattern; ";" -> { [p] } + | p = record_pattern-> { [p] } + | -> { [] } + ] ] + ; + pattern: + [ "200" RIGHTA [ ] + | "100" RIGHTA + [ p = pattern; "|"; pl = LIST1 pattern SEP "|" -> { CAst.make ~loc @@ CPatOr (p::pl) } ] + | "99" RIGHTA [ ] + | "90" RIGHTA [ ] + | "10" LEFTA + [ p = pattern; "as"; na = name -> + { CAst.make ~loc @@ CPatAlias (p, na) } + | p = pattern; lp = LIST1 NEXT -> { mkAppPattern ~loc p lp } + | "@"; r = Prim.reference; lp = LIST0 NEXT -> + { CAst.make ~loc @@ CPatCstr (r, Some lp, []) } ] + | "1" LEFTA + [ c = pattern; "%"; key=IDENT -> { CAst.make ~loc @@ CPatDelimiters (key,c) } ] + | "0" + [ r = Prim.reference -> { CAst.make ~loc @@ CPatAtom (Some r) } + | "{|"; pat = record_patterns; "|}" -> { CAst.make ~loc @@ CPatRecord pat } + | "_" -> { CAst.make ~loc @@ CPatAtom None } + | "("; p = pattern LEVEL "200"; ")" -> + { (match p.CAst.v with + | CPatPrim (Numeral (n,true)) -> + CAst.make ~loc @@ CPatNotation((InConstrEntrySomeLevel,"( _ )"),([p],[]),[]) + | _ -> p) } + | "("; p = pattern LEVEL "200"; ":"; ty = lconstr; ")" -> + { let p = + match p with + | { CAst.v = CPatPrim (Numeral (n,true)) } -> + CAst.make ~loc @@ CPatNotation((InConstrEntrySomeLevel,"( _ )"),([p],[]),[]) + | _ -> p + in + CAst.make ~loc @@ CPatCast (p, ty) } + | n = INT -> { CAst.make ~loc @@ CPatPrim (Numeral (n,true)) } + | s = string -> { CAst.make ~loc @@ CPatPrim (String s) } ] ] + ; + impl_ident_tail: + [ [ "}" -> { binder_of_name Implicit } + | nal=LIST1 name; ":"; c=lconstr; "}" -> + { (fun na -> CLocalAssum (na::nal,Default Implicit,c)) } + | nal=LIST1 name; "}" -> + { (fun na -> CLocalAssum (na::nal,Default Implicit, + CAst.make ?loc:(Loc.merge_opt na.CAst.loc (Some loc)) @@ + CHole (Some (Evar_kinds.BinderType na.CAst.v), IntroAnonymous, None))) } + | ":"; c=lconstr; "}" -> + { (fun na -> CLocalAssum ([na],Default Implicit,c)) } + ] ] + ; + fixannot: + [ [ "{"; IDENT "struct"; id=identref; "}" -> { (Some id, CStructRec) } + | "{"; IDENT "wf"; rel=constr; id=OPT identref; "}" -> { (id, CWfRec rel) } + | "{"; IDENT "measure"; m=constr; id=OPT identref; + rel=OPT constr; "}" -> { (id, CMeasureRec (m,rel)) } + ] ] + ; + impl_name_head: + [ [ id = impl_ident_head -> { CAst.make ~loc @@ Name id } ] ] + ; + binders_fixannot: + [ [ na = impl_name_head; assum = impl_ident_tail; bl = binders_fixannot -> + { (assum na :: fst bl), snd bl } + | f = fixannot -> { [], f } + | b = binder; bl = binders_fixannot -> { b @ fst bl, snd bl } + | -> { [], (None, CStructRec) } + ] ] + ; + open_binders: + (* Same as binders but parentheses around a closed binder are optional if + the latter is unique *) + [ [ + id = name; idl = LIST0 name; ":"; c = lconstr -> + { [CLocalAssum (id::idl,Default Explicit,c)] + (* binders factorized with open binder *) } + | id = name; idl = LIST0 name; bl = binders -> + { binders_of_names (id::idl) @ bl } + | id1 = name; ".."; id2 = name -> + { [CLocalAssum ([id1;(CAst.make ~loc (Name ldots_var));id2], + Default Explicit, CAst.make ~loc @@ CHole (None, IntroAnonymous, None))] } + | bl = closed_binder; bl' = binders -> + { bl@bl' } + ] ] + ; + binders: + [ [ l = LIST0 binder -> { List.flatten l } ] ] + ; + binder: + [ [ id = name -> { [CLocalAssum ([id],Default Explicit, CAst.make ~loc @@ CHole (None, IntroAnonymous, None))] } + | bl = closed_binder -> { bl } ] ] + ; + closed_binder: + [ [ "("; id=name; idl=LIST1 name; ":"; c=lconstr; ")" -> + { [CLocalAssum (id::idl,Default Explicit,c)] } + | "("; id=name; ":"; c=lconstr; ")" -> + { [CLocalAssum ([id],Default Explicit,c)] } + | "("; id=name; ":="; c=lconstr; ")" -> + { (match c.CAst.v with + | CCast(c, CastConv t) -> [CLocalDef (id,c,Some t)] + | _ -> [CLocalDef (id,c,None)]) } + | "("; id=name; ":"; t=lconstr; ":="; c=lconstr; ")" -> + { [CLocalDef (id,c,Some t)] } + | "{"; id=name; "}" -> + { [CLocalAssum ([id],Default Implicit, CAst.make ~loc @@ CHole (None, IntroAnonymous, None))] } + | "{"; id=name; idl=LIST1 name; ":"; c=lconstr; "}" -> + { [CLocalAssum (id::idl,Default Implicit,c)] } + | "{"; id=name; ":"; c=lconstr; "}" -> + { [CLocalAssum ([id],Default Implicit,c)] } + | "{"; id=name; idl=LIST1 name; "}" -> + { List.map (fun id -> CLocalAssum ([id],Default Implicit, CAst.make ~loc @@ CHole (None, IntroAnonymous, None))) (id::idl) } + | "`("; tc = LIST1 typeclass_constraint SEP "," ; ")" -> + { List.map (fun (n, b, t) -> CLocalAssum ([n], Generalized (Implicit, Explicit, b), t)) tc } + | "`{"; tc = LIST1 typeclass_constraint SEP "," ; "}" -> + { List.map (fun (n, b, t) -> CLocalAssum ([n], Generalized (Implicit, Implicit, b), t)) tc } + | "'"; p = pattern LEVEL "0" -> + { let (p, ty) = + match p.CAst.v with + | CPatCast (p, ty) -> (p, Some ty) + | _ -> (p, None) + in + [CLocalPattern (CAst.make ~loc (p, ty))] } + ] ] + ; + typeclass_constraint: + [ [ "!" ; c = operconstr LEVEL "200" -> { (CAst.make ~loc Anonymous), true, c } + | "{"; id = name; "}"; ":" ; expl = [ "!" -> { true } | -> { false } ] ; c = operconstr LEVEL "200" -> + { id, expl, c } + | iid=name_colon ; expl = [ "!" -> { true } | -> { false } ] ; c = operconstr LEVEL "200" -> + { (CAst.make ~loc iid), expl, c } + | c = operconstr LEVEL "200" -> + { (CAst.make ~loc Anonymous), false, c } + ] ] + ; + + type_cstr: + [ [ c=OPT [":"; c=lconstr -> { c } ] -> { Loc.tag ~loc c } ] ] + ; + END diff --git a/parsing/g_prim.mlg b/parsing/g_prim.mlg new file mode 100644 index 0000000000..6247a12640 --- /dev/null +++ b/parsing/g_prim.mlg @@ -0,0 +1,122 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +{ + +open Names +open Libnames + +open Pcoq.Prim + +let prim_kw = ["{"; "}"; "["; "]"; "("; ")"; "'"] +let _ = List.iter CLexer.add_keyword prim_kw + + +let local_make_qualid loc l id = make_qualid ~loc (DirPath.make l) id + +let my_int_of_string loc s = + try + int_of_string s + with Failure _ -> + CErrors.user_err ~loc (Pp.str "This number is too large.") + +} + +GRAMMAR EXTEND Gram + GLOBAL: + bigint natural integer identref name ident var preident + fullyqualid qualid reference dirpath ne_lstring + ne_string string lstring pattern_ident pattern_identref by_notation smart_global; + preident: + [ [ s = IDENT -> { s } ] ] + ; + ident: + [ [ s = IDENT -> { Id.of_string s } ] ] + ; + pattern_ident: + [ [ LEFTQMARK; id = ident -> { id } ] ] + ; + pattern_identref: + [ [ id = pattern_ident -> { CAst.make ~loc id } ] ] + ; + var: (* as identref, but interpret as a term identifier in ltac *) + [ [ id = ident -> { CAst.make ~loc id } ] ] + ; + identref: + [ [ id = ident -> { CAst.make ~loc id } ] ] + ; + field: + [ [ s = FIELD -> { Id.of_string s } ] ] + ; + fields: + [ [ id = field; f = fields -> { let (l,id') = f in (l@[id],id') } + | id = field -> { ([],id) } + ] ] + ; + fullyqualid: + [ [ id = ident; f=fields -> { let (l,id') = f in CAst.make ~loc @@ id::List.rev (id'::l) } + | id = ident -> { CAst.make ~loc [id] } + ] ] + ; + basequalid: + [ [ id = ident; f=fields -> { let (l,id') = f in local_make_qualid loc (l@[id]) id' } + | id = ident -> { qualid_of_ident ~loc id } + ] ] + ; + name: + [ [ IDENT "_" -> { CAst.make ~loc Anonymous } + | id = ident -> { CAst.make ~loc @@ Name id } ] ] + ; + reference: + [ [ id = ident; f = fields -> { + let (l,id') = f in + local_make_qualid loc (l@[id]) id' } + | id = ident -> { local_make_qualid loc [] id } + ] ] + ; + by_notation: + [ [ s = ne_string; sc = OPT ["%"; key = IDENT -> { key } ] -> { (s, sc) } ] ] + ; + smart_global: + [ [ c = reference -> { CAst.make ~loc @@ Constrexpr.AN c } + | ntn = by_notation -> { CAst.make ~loc @@ Constrexpr.ByNotation ntn } ] ] + ; + qualid: + [ [ qid = basequalid -> { qid } ] ] + ; + ne_string: + [ [ s = STRING -> + { if s="" then CErrors.user_err ~loc (Pp.str"Empty string."); s } + ] ] + ; + ne_lstring: + [ [ s = ne_string -> { CAst.make ~loc s } ] ] + ; + dirpath: + [ [ id = ident; l = LIST0 field -> + { DirPath.make (List.rev (id::l)) } ] ] + ; + string: + [ [ s = STRING -> { s } ] ] + ; + lstring: + [ [ s = string -> { CAst.make ~loc s } ] ] + ; + integer: + [ [ i = INT -> { my_int_of_string loc i } + | "-"; i = INT -> { - my_int_of_string loc i } ] ] + ; + natural: + [ [ i = INT -> { my_int_of_string loc i } ] ] + ; + bigint: (* Negative numbers are dealt with elsewhere *) + [ [ i = INT -> { i } ] ] + ; +END diff --git a/parsing/notation_gram.ml b/parsing/notation_gram.ml new file mode 100644 index 0000000000..fc5feba58b --- /dev/null +++ b/parsing/notation_gram.ml @@ -0,0 +1,43 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Names +open Extend + +(** Dealing with precedences *) + +type precedence = int +type parenRelation = L | E | Any | Prec of precedence +type tolerability = precedence * parenRelation + +type level = Constrexpr.notation_entry * precedence * tolerability list * constr_entry_key list + (* first argument is InCustomEntry s for custom entries *) + +type grammar_constr_prod_item = + | GramConstrTerminal of Tok.t + | GramConstrNonTerminal of Extend.constr_prod_entry_key * Id.t option + | GramConstrListMark of int * bool * int + (* tells action rule to make a list of the n previous parsed items; + concat with last parsed list when true; additionally release + the p last items as if they were parsed autonomously *) + +(** Grammar rules for a notation *) + +type one_notation_grammar = { + notgram_level : level; + notgram_assoc : Gramlib.Gramext.g_assoc option; + notgram_notation : Constrexpr.notation; + notgram_prods : grammar_constr_prod_item list list; +} + +type notation_grammar = { + notgram_onlyprinting : bool; + notgram_rules : one_notation_grammar list +} diff --git a/parsing/notgram_ops.ml b/parsing/notgram_ops.ml new file mode 100644 index 0000000000..5cc1292c92 --- /dev/null +++ b/parsing/notgram_ops.ml @@ -0,0 +1,71 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Pp +open CErrors +open Util +open Notation +open Notation_gram + +(* Uninterpreted notation levels *) + +let notation_level_map = Summary.ref ~name:"notation_level_map" NotationMap.empty + +let declare_notation_level ?(onlyprint=false) ntn level = + try + let (level,onlyprint) = NotationMap.find ntn !notation_level_map in + if not onlyprint then anomaly (str "Notation " ++ pr_notation ntn ++ str " is already assigned a level.") + with Not_found -> + notation_level_map := NotationMap.add ntn (level,onlyprint) !notation_level_map + +let level_of_notation ?(onlyprint=false) ntn = + let (level,onlyprint') = NotationMap.find ntn !notation_level_map in + if onlyprint' && not onlyprint then raise Not_found; + level + +(**********************************************************************) +(* Equality *) + +open Extend + +let parenRelation_eq t1 t2 = match t1, t2 with +| L, L | E, E | Any, Any -> true +| Prec l1, Prec l2 -> Int.equal l1 l2 +| _ -> false + +let production_position_eq pp1 pp2 = match (pp1,pp2) with +| BorderProd (side1,assoc1), BorderProd (side2,assoc2) -> side1 = side2 && assoc1 = assoc2 +| InternalProd, InternalProd -> true +| (BorderProd _ | InternalProd), _ -> false + +let production_level_eq l1 l2 = match (l1,l2) with +| NextLevel, NextLevel -> true +| NumLevel n1, NumLevel n2 -> Int.equal n1 n2 +| (NextLevel | NumLevel _), _ -> false + +let constr_entry_key_eq eq v1 v2 = match v1, v2 with +| ETIdent, ETIdent -> true +| ETGlobal, ETGlobal -> true +| ETBigint, ETBigint -> true +| ETBinder b1, ETBinder b2 -> b1 == b2 +| ETConstr (s1,bko1,lev1), ETConstr (s2,bko2,lev2) -> + notation_entry_eq s1 s2 && eq lev1 lev2 && Option.equal (=) bko1 bko2 +| ETPattern (b1,n1), ETPattern (b2,n2) -> b1 = b2 && Option.equal Int.equal n1 n2 +| (ETIdent | ETGlobal | ETBigint | ETBinder _ | ETConstr _ | ETPattern _), _ -> false + +let level_eq_gen strict (s1, l1, t1, u1) (s2, l2, t2, u2) = + let tolerability_eq (i1, r1) (i2, r2) = Int.equal i1 i2 && parenRelation_eq r1 r2 in + let prod_eq (l1,pp1) (l2,pp2) = + not strict || + (production_level_eq l1 l2 && production_position_eq pp1 pp2) in + notation_entry_eq s1 s2 && Int.equal l1 l2 && List.equal tolerability_eq t1 t2 + && List.equal (constr_entry_key_eq prod_eq) u1 u2 + +let level_eq = level_eq_gen false diff --git a/parsing/notgram_ops.mli b/parsing/notgram_ops.mli new file mode 100644 index 0000000000..f427a607b7 --- /dev/null +++ b/parsing/notgram_ops.mli @@ -0,0 +1,20 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +(* Merge with metasyntax? *) +open Constrexpr +open Notation_gram + +val level_eq : level -> level -> bool + +(** {6 Declare and test the level of a (possibly uninterpreted) notation } *) + +val declare_notation_level : ?onlyprint:bool -> notation -> level -> unit +val level_of_notation : ?onlyprint:bool -> notation -> level (** raise [Not_found] if no level or not respecting onlyprint *) diff --git a/parsing/parsing.mllib b/parsing/parsing.mllib new file mode 100644 index 0000000000..2154f2f881 --- /dev/null +++ b/parsing/parsing.mllib @@ -0,0 +1,9 @@ +Tok +CLexer +Extend +Notation_gram +Ppextend +Notgram_ops +Pcoq +G_constr +G_prim diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml new file mode 100644 index 0000000000..19ae97da77 --- /dev/null +++ b/parsing/pcoq.ml @@ -0,0 +1,608 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open CErrors +open Util +open Extend +open Genarg +open Gramlib + +(** The parser of Coq *) +module G : sig + + include Grammar.S with type te = Tok.t + +(* where Grammar.S + +module type S = + sig + type te = 'x; + type parsable = 'x; + value parsable : Stream.t char -> parsable; + value tokens : string -> list (string * int); + value glexer : Plexing.lexer te; + value set_algorithm : parse_algorithm -> unit; + module Entry : + sig + type e 'a = 'y; + value create : string -> e 'a; + value parse : e 'a -> parsable -> 'a; + value parse_token_stream : e 'a -> Stream.t te -> 'a; + value name : e 'a -> string; + value of_parser : string -> (Stream.t te -> 'a) -> e 'a; + value print : Format.formatter -> e 'a -> unit; + external obj : e 'a -> Gramext.g_entry te = "%identity"; + end + ; + module Unsafe : + sig + value gram_reinit : Plexing.lexer te -> unit; + value clear_entry : Entry.e 'a -> unit; + end + ; + value extend : + Entry.e 'a -> option Gramext.position -> + list + (option string * option Gramext.g_assoc * + list (list (Gramext.g_symbol te) * Gramext.g_action)) -> + unit; + value delete_rule : Entry.e 'a -> list (Gramext.g_symbol te) -> unit; + end + *) + + type coq_parsable + + val coq_parsable : ?file:Loc.source -> char Stream.t -> coq_parsable + val entry_create : string -> 'a entry + val entry_parse : 'a entry -> coq_parsable -> 'a + + val comment_state : coq_parsable -> ((int * int) * string) list + +end with type 'a Entry.e = 'a Extend.entry = struct + + include Grammar.GMake(CLexer) + + type coq_parsable = parsable * CLexer.lexer_state ref + + let coq_parsable ?(file=Loc.ToplevelInput) c = + let state = ref (CLexer.init_lexer_state file) in + CLexer.set_lexer_state !state; + let a = parsable c in + state := CLexer.get_lexer_state (); + (a,state) + + let entry_create = Entry.create + + let entry_parse e (p,state) = + CLexer.set_lexer_state !state; + try + let c = Entry.parse e p in + state := CLexer.get_lexer_state (); + c + with Ploc.Exc (loc,e) -> + CLexer.drop_lexer_state (); + let loc' = Loc.get_loc (Exninfo.info e) in + let loc = match loc' with None -> loc | Some loc -> loc in + Loc.raise ~loc e + + let comment_state (p,state) = + CLexer.get_comment_state !state + +end + +module Parsable = +struct + type t = G.coq_parsable + let make = G.coq_parsable + let comment_state = G.comment_state +end + +module Entry = +struct + + type 'a t = 'a Grammar.GMake(CLexer).Entry.e + + let create = G.Entry.create + let parse = G.entry_parse + let print = G.Entry.print + let of_parser = G.Entry.of_parser + let name = G.Entry.name + let parse_token_stream = G.Entry.parse_token_stream + +end + +module Symbols : sig + val stoken : Tok.t -> ('s, string) G.ty_symbol + val slist0sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol + val slist1sep : ('s, 'a) G.ty_symbol -> ('s, 'b) G.ty_symbol -> ('s, 'a list) G.ty_symbol +end = struct + + let stoken tok = + let pattern = match tok with + | Tok.KEYWORD s -> "", s + | Tok.IDENT s -> "IDENT", s + | Tok.PATTERNIDENT s -> "PATTERNIDENT", s + | Tok.FIELD s -> "FIELD", s + | Tok.INT s -> "INT", s + | Tok.STRING s -> "STRING", s + | Tok.LEFTQMARK -> "LEFTQMARK", "" + | Tok.BULLET s -> "BULLET", s + | Tok.EOI -> "EOI", "" + in + G.s_token pattern + + let slist0sep x y = G.s_list0sep x y false + let slist1sep x y = G.s_list1sep x y false +end + +(** Grammar extensions *) + +(** NB: [extend_statement = + gram_position option * single_extend_statement list] + and [single_extend_statement = + string option * gram_assoc option * production_rule list] + and [production_rule = symbol list * action] + + In [single_extend_statement], first two parameters are name and + assoc iff a level is created *) + +(** Binding general entry keys to symbol *) + +type ('s, 'a, 'r) casted_rule = Casted : ('s, 'b, 'r) G.ty_rule * ('a -> 'b) -> ('s, 'a, 'r) casted_rule + +let rec symbol_of_prod_entry_key : type s a. (s, a) symbol -> (s, a) G.ty_symbol = function +| Atoken t -> Symbols.stoken t +| Alist1 s -> G.s_list1 (symbol_of_prod_entry_key s) +| Alist1sep (s,sep) -> + Symbols.slist1sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) +| Alist0 s -> G.s_list0 (symbol_of_prod_entry_key s) +| Alist0sep (s,sep) -> + Symbols.slist0sep (symbol_of_prod_entry_key s) (symbol_of_prod_entry_key sep) +| Aopt s -> G.s_opt (symbol_of_prod_entry_key s) +| Aself -> G.s_self +| Anext -> G.s_next +| Aentry e -> G.s_nterm e +| Aentryl (e, n) -> G.s_nterml e n +| Arules rs -> + let warning msg = Feedback.msg_warning Pp.(str msg) in + G.s_rules ~warning:(Some warning) (List.map symbol_of_rules rs) + +and symbol_of_rule : type s a r. (s, a, Loc.t -> r) Extend.rule -> (s, a, Loc.t -> r) casted_rule = function +| Stop -> Casted (G.r_stop, fun act loc -> act loc) +| Next (r, s) -> + let Casted (r, cast) = symbol_of_rule r in + Casted (G.r_next r (symbol_of_prod_entry_key s), (fun act x -> cast (act x))) + +and symbol_of_rules : type a. a Extend.rules -> a G.ty_production = function +| Rules (r, act) -> + let Casted (symb, cast) = symbol_of_rule r.norec_rule in + G.production (symb, cast act) + +(** FIXME: This is a hack around a deficient camlp5 API *) +type 'a any_production = AnyProduction : ('a, 'f, Loc.t -> 'a) G.ty_rule * 'f -> 'a any_production + +let of_coq_production_rule : type a. a Extend.production_rule -> a any_production = function +| Rule (toks, act) -> + let Casted (symb, cast) = symbol_of_rule toks in + AnyProduction (symb, cast act) + +let of_coq_single_extend_statement (lvl, assoc, rule) = + (lvl, assoc, List.map of_coq_production_rule rule) + +let of_coq_extend_statement (pos, st) = + (pos, List.map of_coq_single_extend_statement st) + +let fix_extend_statement (pos, st) = + let fix_single_extend_statement (lvl, assoc, rules) = + let fix_production_rule (AnyProduction (s, act)) = G.production (s, act) in + (lvl, assoc, List.map fix_production_rule rules) + in + (pos, List.map fix_single_extend_statement st) + +(** Type of reinitialization data *) +type gram_reinit = Gramlib.Gramext.g_assoc * Gramlib.Gramext.position + +type extend_rule = +| ExtendRule : 'a G.Entry.e * gram_reinit option * 'a extend_statement -> extend_rule + +module EntryCommand = Dyn.Make () +module EntryData = struct type _ t = Ex : 'b G.Entry.e String.Map.t -> ('a * 'b) t end +module EntryDataMap = EntryCommand.Map(EntryData) + +type ext_kind = + | ByGrammar of extend_rule + | ByEXTEND of (unit -> unit) * (unit -> unit) + | ByEntry : ('a * 'b) EntryCommand.tag * string * 'b G.Entry.e -> ext_kind + +(** The list of extensions *) + +let camlp5_state = ref [] + +let camlp5_entries = ref EntryDataMap.empty + +(** Deletion *) + +let grammar_delete e reinit (pos,rls) = + List.iter + (fun (n,ass,lev) -> + List.iter (fun (AnyProduction (pil,_)) -> G.safe_delete_rule e pil) (List.rev lev)) + (List.rev rls); + match reinit with + | Some (a,ext) -> + let lev = match pos with + | Some (Gramext.Level n) -> n + | _ -> assert false + in + let warning msg = Feedback.msg_warning Pp.(str msg) in + (G.safe_extend ~warning:(Some warning) e) (Some ext) [Some lev,Some a,[]] + | None -> () + +(** Extension *) + +let grammar_extend e reinit ext = + let ext = of_coq_extend_statement ext in + let undo () = grammar_delete e reinit ext in + let pos, ext = fix_extend_statement ext in + let redo () = G.safe_extend ~warning:None e pos ext in + camlp5_state := ByEXTEND (undo, redo) :: !camlp5_state; + redo () + +let grammar_extend_sync e reinit ext = + camlp5_state := ByGrammar (ExtendRule (e, reinit, ext)) :: !camlp5_state; + let pos, ext = fix_extend_statement (of_coq_extend_statement ext) in + G.safe_extend ~warning:None e pos ext + +(** The apparent parser of Coq; encapsulate G to keep track + of the extensions. *) + +module Gram = + struct + include G + end + +(** Remove extensions + + [n] is the number of extended entries (not the number of Grammar commands!) + to remove. *) + +let rec remove_grammars n = + if n>0 then + match !camlp5_state with + | [] -> anomaly ~label:"Pcoq.remove_grammars" (Pp.str "too many rules to remove.") + | ByGrammar (ExtendRule (g, reinit, ext)) :: t -> + grammar_delete g reinit (of_coq_extend_statement ext); + camlp5_state := t; + remove_grammars (n-1) + | ByEXTEND (undo,redo)::t -> + undo(); + camlp5_state := t; + remove_grammars n; + redo(); + camlp5_state := ByEXTEND (undo,redo) :: !camlp5_state + | ByEntry (tag, name, e) :: t -> + G.Unsafe.clear_entry e; + camlp5_state := t; + let EntryData.Ex entries = + try EntryDataMap.find tag !camlp5_entries + with Not_found -> EntryData.Ex String.Map.empty + in + let entries = String.Map.remove name entries in + camlp5_entries := EntryDataMap.add tag (EntryData.Ex entries) !camlp5_entries; + remove_grammars (n - 1) + +let make_rule r = [None, None, r] + +(** An entry that checks we reached the end of the input. *) + +let eoi_entry en = + let e = Entry.create ((Gram.Entry.name en) ^ "_eoi") in + let symbs = G.r_next (G.r_next G.r_stop (G.s_nterm en)) (Symbols.stoken Tok.EOI) in + let act = fun _ x loc -> x in + let warning msg = Feedback.msg_warning Pp.(str msg) in + Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]); + e + +let map_entry f en = + let e = Entry.create ((Gram.Entry.name en) ^ "_map") in + let symbs = G.r_next G.r_stop (G.s_nterm en) in + let act = fun x loc -> f x in + let warning msg = Feedback.msg_warning Pp.(str msg) in + Gram.safe_extend ~warning:(Some warning) e None (make_rule [G.production (symbs, act)]); + e + +(* Parse a string, does NOT check if the entire string was read + (use eoi_entry) *) + +let parse_string f x = + let strm = Stream.of_string x in Gram.entry_parse f (Gram.coq_parsable strm) + +type gram_universe = string + +let utables : (string, unit) Hashtbl.t = + Hashtbl.create 97 + +let create_universe u = + let () = Hashtbl.add utables u () in + u + +let uprim = create_universe "prim" +let uconstr = create_universe "constr" +let utactic = create_universe "tactic" + +let get_univ u = + if Hashtbl.mem utables u then u + else raise Not_found + +let new_entry u s = + let ename = u ^ ":" ^ s in + let e = Entry.create ename in + e + +let make_gen_entry u s = new_entry u s + +module GrammarObj = +struct + type ('r, _, _) obj = 'r Entry.t + let name = "grammar" + let default _ = None +end + +module Grammar = Register(GrammarObj) + +let register_grammar = Grammar.register0 +let genarg_grammar = Grammar.obj + +let create_generic_entry (type a) u s (etyp : a raw_abstract_argument_type) : a Entry.t = + let e = new_entry u s in + let Rawwit t = etyp in + let () = Grammar.register0 t e in + e + +(* Initial grammar entries *) + +module Prim = + struct + let gec_gen n = make_gen_entry uprim n + + (* Entries that can be referred via the string -> Entry.t table *) + (* Typically for tactic or vernac extensions *) + let preident = gec_gen "preident" + let ident = gec_gen "ident" + let natural = gec_gen "natural" + let integer = gec_gen "integer" + let bigint = Entry.create "Prim.bigint" + let string = gec_gen "string" + let lstring = Entry.create "Prim.lstring" + let reference = make_gen_entry uprim "reference" + let by_notation = Entry.create "by_notation" + let smart_global = Entry.create "smart_global" + + (* parsed like ident but interpreted as a term *) + let var = gec_gen "var" + + let name = Entry.create "Prim.name" + let identref = Entry.create "Prim.identref" + let univ_decl = Entry.create "Prim.univ_decl" + let ident_decl = Entry.create "Prim.ident_decl" + let pattern_ident = Entry.create "pattern_ident" + let pattern_identref = Entry.create "pattern_identref" + + (* A synonym of ident - maybe ident will be located one day *) + let base_ident = Entry.create "Prim.base_ident" + + let qualid = Entry.create "Prim.qualid" + let fullyqualid = Entry.create "Prim.fullyqualid" + let dirpath = Entry.create "Prim.dirpath" + + let ne_string = Entry.create "Prim.ne_string" + let ne_lstring = Entry.create "Prim.ne_lstring" + + end + +module Constr = + struct + let gec_constr = make_gen_entry uconstr + + (* Entries that can be referred via the string -> Entry.t table *) + let constr = gec_constr "constr" + let operconstr = gec_constr "operconstr" + let constr_eoi = eoi_entry constr + let lconstr = gec_constr "lconstr" + let binder_constr = gec_constr "binder_constr" + let ident = make_gen_entry uconstr "ident" + let global = make_gen_entry uconstr "global" + let universe_level = make_gen_entry uconstr "universe_level" + let sort = make_gen_entry uconstr "sort" + let sort_family = make_gen_entry uconstr "sort_family" + let pattern = Entry.create "constr:pattern" + let constr_pattern = gec_constr "constr_pattern" + let lconstr_pattern = gec_constr "lconstr_pattern" + let closed_binder = Entry.create "constr:closed_binder" + let binder = Entry.create "constr:binder" + let binders = Entry.create "constr:binders" + let open_binders = Entry.create "constr:open_binders" + let binders_fixannot = Entry.create "constr:binders_fixannot" + let typeclass_constraint = Entry.create "constr:typeclass_constraint" + let record_declaration = Entry.create "constr:record_declaration" + let appl_arg = Entry.create "constr:appl_arg" + end + +module Module = + struct + let module_expr = Entry.create "module_expr" + let module_type = Entry.create "module_type" + end + +let epsilon_value f e = + let r = G.production (G.r_next G.r_stop (symbol_of_prod_entry_key e), (fun x _ -> f x)) in + let ext = [None, None, [r]] in + let entry = Gram.entry_create "epsilon" in + let warning msg = Feedback.msg_warning Pp.(str msg) in + let () = G.safe_extend ~warning:(Some warning) entry None ext in + try Some (parse_string entry "") with _ -> None + +(** Synchronized grammar extensions *) + +module GramState = Store.Make () + +type 'a grammar_extension = 'a -> GramState.t -> extend_rule list * GramState.t + +module GrammarCommand = Dyn.Make () +module GrammarInterp = struct type 'a t = 'a grammar_extension end +module GrammarInterpMap = GrammarCommand.Map(GrammarInterp) + +let grammar_interp = ref GrammarInterpMap.empty + +type ('a, 'b) entry_extension = 'a -> GramState.t -> string list * GramState.t + +module EntryInterp = struct type _ t = Ex : ('a, 'b) entry_extension -> ('a * 'b) t end +module EntryInterpMap = EntryCommand.Map(EntryInterp) + +let entry_interp = ref EntryInterpMap.empty + +type grammar_entry = +| GramExt of int * GrammarCommand.t +| EntryExt : int * ('a * 'b) EntryCommand.tag * 'a -> grammar_entry + +let (grammar_stack : (grammar_entry * GramState.t) list ref) = ref [] + +type 'a grammar_command = 'a GrammarCommand.tag +type ('a, 'b) entry_command = ('a * 'b) EntryCommand.tag + +let create_grammar_command name interp : _ grammar_command = + let obj = GrammarCommand.create name in + let () = grammar_interp := GrammarInterpMap.add obj interp !grammar_interp in + obj + +let create_entry_command name (interp : ('a, 'b) entry_extension) : ('a, 'b) entry_command = + let obj = EntryCommand.create name in + let () = entry_interp := EntryInterpMap.add obj (EntryInterp.Ex interp) !entry_interp in + obj + +let extend_grammar_command tag g = + let modify = GrammarInterpMap.find tag !grammar_interp in + let grammar_state = match !grammar_stack with + | [] -> GramState.empty + | (_, st) :: _ -> st + in + let (rules, st) = modify g grammar_state in + let iter (ExtendRule (e, reinit, ext)) = grammar_extend_sync e reinit ext in + let () = List.iter iter rules in + let nb = List.length rules in + grammar_stack := (GramExt (nb, GrammarCommand.Dyn (tag, g)), st) :: !grammar_stack + +let extend_entry_command (type a) (type b) (tag : (a, b) entry_command) (g : a) : b Gram.Entry.e list = + let EntryInterp.Ex modify = EntryInterpMap.find tag !entry_interp in + let grammar_state = match !grammar_stack with + | [] -> GramState.empty + | (_, st) :: _ -> st + in + let (names, st) = modify g grammar_state in + let entries = List.map (fun name -> Gram.entry_create name) names in + let iter name e = + camlp5_state := ByEntry (tag, name, e) :: !camlp5_state; + let EntryData.Ex old = + try EntryDataMap.find tag !camlp5_entries + with Not_found -> EntryData.Ex String.Map.empty + in + let entries = String.Map.add name e old in + camlp5_entries := EntryDataMap.add tag (EntryData.Ex entries) !camlp5_entries + in + let () = List.iter2 iter names entries in + let nb = List.length entries in + let () = grammar_stack := (EntryExt (nb, tag, g), st) :: !grammar_stack in + entries + +let find_custom_entry tag name = + let EntryData.Ex map = EntryDataMap.find tag !camlp5_entries in + String.Map.find name map + +let extend_dyn_grammar (e, _) = match e with +| GramExt (_, (GrammarCommand.Dyn (tag, g))) -> extend_grammar_command tag g +| EntryExt (_, tag, g) -> ignore (extend_entry_command tag g) + +(** Registering extra grammar *) + +type any_entry = AnyEntry : 'a Gram.Entry.e -> any_entry + +let grammar_names : any_entry list String.Map.t ref = ref String.Map.empty + +let register_grammars_by_name name grams = + grammar_names := String.Map.add name grams !grammar_names + +let find_grammars_by_name name = + try String.Map.find name !grammar_names + with Not_found -> + let fold (EntryDataMap.Any (tag, EntryData.Ex map)) accu = + try AnyEntry (String.Map.find name map) :: accu + with Not_found -> accu + in + EntryDataMap.fold fold !camlp5_entries [] + +(** Summary functions: the state of the lexer is included in that of the parser. + Because the grammar affects the set of keywords when adding or removing + grammar rules. *) +type frozen_t = + (grammar_entry * GramState.t) list * + CLexer.keyword_state + +let freeze ~marshallable : frozen_t = + (!grammar_stack, CLexer.get_keyword_state ()) + +(* We compare the current state of the grammar and the state to unfreeze, + by computing the longest common suffixes *) +let factorize_grams l1 l2 = + if l1 == l2 then ([], [], l1) else List.share_tails l1 l2 + +let rec number_of_entries accu = function +| [] -> accu +| ((GramExt (p, _) | EntryExt (p, _, _)), _) :: rem -> + number_of_entries (p + accu) rem + +let unfreeze (grams, lex) = + let (undo, redo, common) = factorize_grams !grammar_stack grams in + let n = number_of_entries 0 undo in + remove_grammars n; + grammar_stack := common; + CLexer.set_keyword_state lex; + List.iter extend_dyn_grammar (List.rev redo) + +(** No need to provide an init function : the grammar state is + statically available, and already empty initially, while + the lexer state should not be resetted, since it contains + keywords declared in g_*.ml4 *) + +let parser_summary_tag = + Summary.declare_summary_tag "GRAMMAR_LEXER" + { Summary.freeze_function = freeze; + Summary.unfreeze_function = unfreeze; + Summary.init_function = Summary.nop } + +let with_grammar_rule_protection f x = + let fs = freeze ~marshallable:false in + try let a = f x in unfreeze fs; a + with reraise -> + let reraise = CErrors.push reraise in + let () = unfreeze fs in + iraise reraise + +(** Registering grammar of generic arguments *) + +let () = + let open Stdarg in + Grammar.register0 wit_int (Prim.integer); + Grammar.register0 wit_string (Prim.string); + Grammar.register0 wit_pre_ident (Prim.preident); + Grammar.register0 wit_ident (Prim.ident); + Grammar.register0 wit_var (Prim.var); + Grammar.register0 wit_ref (Prim.reference); + Grammar.register0 wit_sort_family (Constr.sort_family); + Grammar.register0 wit_constr (Constr.constr); + () diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli new file mode 100644 index 0000000000..352857d4cd --- /dev/null +++ b/parsing/pcoq.mli @@ -0,0 +1,271 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Names +open Extend +open Genarg +open Constrexpr +open Libnames + +(** The parser of Coq *) + +module Parsable : +sig + type t + val make : ?file:Loc.source -> char Stream.t -> t + (* Get comment parsing information from the Lexer *) + val comment_state : t -> ((int * int) * string) list +end + +module Entry : sig + type 'a t = 'a Extend.entry + val create : string -> 'a t + val parse : 'a t -> Parsable.t -> 'a + val print : Format.formatter -> 'a t -> unit + val of_parser : string -> (Tok.t Stream.t -> 'a) -> 'a t + val parse_token_stream : 'a t -> Tok.t Stream.t -> 'a + val name : 'a t -> string +end + +(** The parser of Coq is built from three kinds of rule declarations: + + - dynamic rules declared at the evaluation of Coq files (using + e.g. Notation, Infix, or Tactic Notation) + - static rules explicitly defined in files g_*.ml4 + - static rules macro-generated by ARGUMENT EXTEND, TACTIC EXTEND and + VERNAC EXTEND (see e.g. file extratactics.ml4) +*) + +(** Dynamic extension of rules + + For constr notations, dynamic addition of new rules is done in + several steps: + + - "x + y" (user gives a notation string of type Topconstr.notation) + | (together with a constr entry level, e.g. 50, and indications of) + | (subentries, e.g. x in constr next level and y constr same level) + | + | splitting into tokens by Metasyntax.split_notation_string + V + [String "x"; String "+"; String "y"] : symbol_token list + | + | interpreted as a mixed parsing/printing production + | by Metasyntax.analyse_notation_tokens + V + [NonTerminal "x"; Terminal "+"; NonTerminal "y"] : symbol list + | + | translated to a parsing production by Metasyntax.make_production + V + [GramConstrNonTerminal (ETConstr (NextLevel,(BorderProd Left,LeftA)), + Some "x"); + GramConstrTerminal ("","+"); + GramConstrNonTerminal (ETConstr (NextLevel,(BorderProd Right,LeftA)), + Some "y")] + : grammar_constr_prod_item list + | + | Egrammar.make_constr_prod_item + V + Gramext.g_symbol list which is sent to camlp5 + + For user level tactic notations, dynamic addition of new rules is + also done in several steps: + + - "f" constr(x) (user gives a Tactic Notation command) + | + | parsing + V + [TacTerm "f"; TacNonTerm ("constr", Some "x")] + : grammar_tactic_prod_item_expr list + | + | Metasyntax.interp_prod_item + V + [GramTerminal "f"; + GramNonTerminal (ConstrArgType, Aentry ("constr","constr"), Some "x")] + : grammar_prod_item list + | + | Egrammar.make_prod_item + V + Gramext.g_symbol list + + For TACTIC/VERNAC/ARGUMENT EXTEND, addition of new rules is done as follows: + + - "f" constr(x) (developer gives an EXTEND rule) + | + | macro-generation in tacextend.ml4/vernacextend.ml4/argextend.ml4 + V + [GramTerminal "f"; + GramNonTerminal (ConstrArgType, Aentry ("constr","constr"), Some "x")] + | + | Egrammar.make_prod_item + V + Gramext.g_symbol list + +*) + +(** Parse a string *) + +val parse_string : 'a Entry.t -> string -> 'a +val eoi_entry : 'a Entry.t -> 'a Entry.t +val map_entry : ('a -> 'b) -> 'a Entry.t -> 'b Entry.t + +type gram_universe + +val get_univ : string -> gram_universe +val create_universe : string -> gram_universe + +val new_entry : gram_universe -> string -> 'a Entry.t + +val uprim : gram_universe +val uconstr : gram_universe +val utactic : gram_universe + + +val register_grammar : ('raw, 'glb, 'top) genarg_type -> 'raw Entry.t -> unit +val genarg_grammar : ('raw, 'glb, 'top) genarg_type -> 'raw Entry.t + +val create_generic_entry : gram_universe -> string -> + ('a, rlevel) abstract_argument_type -> 'a Entry.t + +module Prim : + sig + open Names + open Libnames + val preident : string Entry.t + val ident : Id.t Entry.t + val name : lname Entry.t + val identref : lident Entry.t + val univ_decl : universe_decl_expr Entry.t + val ident_decl : ident_decl Entry.t + val pattern_ident : Id.t Entry.t + val pattern_identref : lident Entry.t + val base_ident : Id.t Entry.t + val natural : int Entry.t + val bigint : Constrexpr.raw_natural_number Entry.t + val integer : int Entry.t + val string : string Entry.t + val lstring : lstring Entry.t + val reference : qualid Entry.t + val qualid : qualid Entry.t + val fullyqualid : Id.t list CAst.t Entry.t + val by_notation : (string * string option) Entry.t + val smart_global : qualid or_by_notation Entry.t + val dirpath : DirPath.t Entry.t + val ne_string : string Entry.t + val ne_lstring : lstring Entry.t + val var : lident Entry.t + end + +module Constr : + sig + val constr : constr_expr Entry.t + val constr_eoi : constr_expr Entry.t + val lconstr : constr_expr Entry.t + val binder_constr : constr_expr Entry.t + val operconstr : constr_expr Entry.t + val ident : Id.t Entry.t + val global : qualid Entry.t + val universe_level : Glob_term.glob_level Entry.t + val sort : Glob_term.glob_sort Entry.t + val sort_family : Sorts.family Entry.t + val pattern : cases_pattern_expr Entry.t + val constr_pattern : constr_expr Entry.t + val lconstr_pattern : constr_expr Entry.t + val closed_binder : local_binder_expr list Entry.t + val binder : local_binder_expr list Entry.t (* closed_binder or variable *) + val binders : local_binder_expr list Entry.t (* list of binder *) + val open_binders : local_binder_expr list Entry.t + val binders_fixannot : (local_binder_expr list * (lident option * recursion_order_expr)) Entry.t + val typeclass_constraint : (lname * bool * constr_expr) Entry.t + val record_declaration : constr_expr Entry.t + val appl_arg : (constr_expr * explicitation CAst.t option) Entry.t + end + +module Module : + sig + val module_expr : module_ast Entry.t + val module_type : module_ast Entry.t + end + +val epsilon_value : ('a -> 'self) -> ('self, 'a) Extend.symbol -> 'self option + +(** {5 Extending the parser without synchronization} *) + +type gram_reinit = Gramlib.Gramext.g_assoc * Gramlib.Gramext.position +(** Type of reinitialization data *) + +val grammar_extend : 'a Entry.t -> gram_reinit option -> + 'a Extend.extend_statement -> unit +(** Extend the grammar of Coq, without synchronizing it with the backtracking + mechanism. This means that grammar extensions defined this way will survive + an undo. *) + +(** {5 Extending the parser with summary-synchronized commands} *) + +module GramState : Store.S +(** Auxiliary state of the grammar. Any added data must be marshallable. *) + +(** {6 Extension with parsing rules} *) + +type 'a grammar_command +(** Type of synchronized parsing extensions. The ['a] type should be + marshallable. *) + +type extend_rule = +| ExtendRule : 'a Entry.t * gram_reinit option * 'a extend_statement -> extend_rule + +type 'a grammar_extension = 'a -> GramState.t -> extend_rule list * GramState.t +(** Grammar extension entry point. Given some ['a] and a current grammar state, + such a function must produce the list of grammar extensions that will be + applied in the same order and kept synchronized w.r.t. the summary, together + with a new state. It should be pure. *) + +val create_grammar_command : string -> 'a grammar_extension -> 'a grammar_command +(** Create a new grammar-modifying command with the given name. The extension + function is called to generate the rules for a given data. *) + +val extend_grammar_command : 'a grammar_command -> 'a -> unit +(** Extend the grammar of Coq with the given data. *) + +(** {6 Extension with parsing entries} *) + +type ('a, 'b) entry_command +(** Type of synchronized entry creation. The ['a] type should be + marshallable. *) + +type ('a, 'b) entry_extension = 'a -> GramState.t -> string list * GramState.t +(** Entry extension entry point. Given some ['a] and a current grammar state, + such a function must produce the list of entry extensions that will be + created and kept synchronized w.r.t. the summary, together + with a new state. It should be pure. *) + +val create_entry_command : string -> ('a, 'b) entry_extension -> ('a, 'b) entry_command +(** Create a new entry-creating command with the given name. The extension + function is called to generate the new entries for a given data. *) + +val extend_entry_command : ('a, 'b) entry_command -> 'a -> 'b Entry.t list +(** Create new synchronized entries using the provided data. *) + +val find_custom_entry : ('a, 'b) entry_command -> string -> 'b Entry.t +(** Find an entry generated by the synchronized system in the current state. + @raise Not_found if non-existent. *) + +(** {6 Protection w.r.t. backtrack} *) + +val with_grammar_rule_protection : ('a -> 'b) -> 'a -> 'b + +type frozen_t +val parser_summary_tag : frozen_t Summary.Dyn.tag + +(** Registering grammars by name *) + +type any_entry = AnyEntry : 'a Entry.t -> any_entry + +val register_grammars_by_name : string -> any_entry list -> unit +val find_grammars_by_name : string -> any_entry list diff --git a/parsing/ppextend.ml b/parsing/ppextend.ml new file mode 100644 index 0000000000..e1f5e20117 --- /dev/null +++ b/parsing/ppextend.ml @@ -0,0 +1,77 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Util +open Pp +open CErrors +open Notation +open Notation_gram + +(*s Pretty-print. *) + +type ppbox = + | PpHB of int + | PpHOVB of int + | PpHVB of int + | PpVB of int + +type ppcut = + | PpBrk of int * int + | PpFnl + +let ppcmd_of_box = function + | PpHB n -> h n + | PpHOVB n -> hov n + | PpHVB n -> hv n + | PpVB n -> v n + +let ppcmd_of_cut = function + | PpFnl -> fnl () + | PpBrk(n1,n2) -> brk(n1,n2) + +type unparsing = + | UnpMetaVar of int * parenRelation + | UnpBinderMetaVar of int * parenRelation + | UnpListMetaVar of int * parenRelation * unparsing list + | UnpBinderListMetaVar of int * bool * unparsing list + | UnpTerminal of string + | UnpBox of ppbox * unparsing Loc.located list + | UnpCut of ppcut + +type unparsing_rule = unparsing list * precedence +type extra_unparsing_rules = (string * string) list +(* Concrete syntax for symbolic-extension table *) +let notation_rules = + Summary.ref ~name:"notation-rules" (NotationMap.empty : (unparsing_rule * extra_unparsing_rules * notation_grammar) NotationMap.t) + +let declare_notation_rule ntn ~extra unpl gram = + notation_rules := NotationMap.add ntn (unpl,extra,gram) !notation_rules + +let find_notation_printing_rule ntn = + try pi1 (NotationMap.find ntn !notation_rules) + with Not_found -> anomaly (str "No printing rule found for " ++ pr_notation ntn ++ str ".") +let find_notation_extra_printing_rules ntn = + try pi2 (NotationMap.find ntn !notation_rules) + with Not_found -> [] +let find_notation_parsing_rules ntn = + try pi3 (NotationMap.find ntn !notation_rules) + with Not_found -> anomaly (str "No parsing rule found for " ++ pr_notation ntn ++ str ".") + +let get_defined_notations () = + NotationSet.elements @@ NotationMap.domain !notation_rules + +let add_notation_extra_printing_rule ntn k v = + try + notation_rules := + let p, pp, gr = NotationMap.find ntn !notation_rules in + NotationMap.add ntn (p, (k,v) :: pp, gr) !notation_rules + with Not_found -> + user_err ~hdr:"add_notation_extra_printing_rule" + (str "No such Notation.") diff --git a/parsing/ppextend.mli b/parsing/ppextend.mli new file mode 100644 index 0000000000..7eb5967a3e --- /dev/null +++ b/parsing/ppextend.mli @@ -0,0 +1,51 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +open Constrexpr +open Notation_gram + +(** {6 Pretty-print. } *) + +type ppbox = + | PpHB of int + | PpHOVB of int + | PpHVB of int + | PpVB of int + +type ppcut = + | PpBrk of int * int + | PpFnl + +val ppcmd_of_box : ppbox -> Pp.t -> Pp.t + +val ppcmd_of_cut : ppcut -> Pp.t + +(** {6 Printing rules for notations} *) + +(** Declare and look for the printing rule for symbolic notations *) +type unparsing = + | UnpMetaVar of int * parenRelation + | UnpBinderMetaVar of int * parenRelation + | UnpListMetaVar of int * parenRelation * unparsing list + | UnpBinderListMetaVar of int * bool * unparsing list + | UnpTerminal of string + | UnpBox of ppbox * unparsing Loc.located list + | UnpCut of ppcut + +type unparsing_rule = unparsing list * precedence +type extra_unparsing_rules = (string * string) list +val declare_notation_rule : notation -> extra:extra_unparsing_rules -> unparsing_rule -> notation_grammar -> unit +val find_notation_printing_rule : notation -> unparsing_rule +val find_notation_extra_printing_rules : notation -> extra_unparsing_rules +val find_notation_parsing_rules : notation -> notation_grammar +val add_notation_extra_printing_rule : notation -> string -> string -> unit + +(** Returns notations with defined parsing/printing rules *) +val get_defined_notations : unit -> notation list diff --git a/parsing/tok.ml b/parsing/tok.ml new file mode 100644 index 0000000000..03825e350f --- /dev/null +++ b/parsing/tok.ml @@ -0,0 +1,122 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +(** The type of token for the Coq lexer and parser *) + +let string_equal (s1 : string) s2 = s1 = s2 + +type t = + | KEYWORD of string + | PATTERNIDENT of string + | IDENT of string + | FIELD of string + | INT of string + | STRING of string + | LEFTQMARK + | BULLET of string + | EOI + +let equal t1 t2 = match t1, t2 with +| IDENT s1, KEYWORD s2 -> string_equal s1 s2 +| KEYWORD s1, KEYWORD s2 -> string_equal s1 s2 +| PATTERNIDENT s1, PATTERNIDENT s2 -> string_equal s1 s2 +| IDENT s1, IDENT s2 -> string_equal s1 s2 +| FIELD s1, FIELD s2 -> string_equal s1 s2 +| INT s1, INT s2 -> string_equal s1 s2 +| STRING s1, STRING s2 -> string_equal s1 s2 +| LEFTQMARK, LEFTQMARK -> true +| BULLET s1, BULLET s2 -> string_equal s1 s2 +| EOI, EOI -> true +| _ -> false + +let extract_string diff_mode = function + | KEYWORD s -> s + | IDENT s -> s + | STRING s -> + if diff_mode then + let escape_quotes s = + let len = String.length s in + let buf = Buffer.create len in + for i = 0 to len-1 do + let ch = String.get s i in + Buffer.add_char buf ch; + if ch = '"' then Buffer.add_char buf '"' else () + done; + Buffer.contents buf + in + "\"" ^ (escape_quotes s) ^ "\"" + else s + | PATTERNIDENT s -> s + | FIELD s -> if diff_mode then "." ^ s else s + | INT s -> s + | LEFTQMARK -> "?" + | BULLET s -> s + | EOI -> "" + +let to_string = function + | KEYWORD s -> Format.sprintf "%S" s + | IDENT s -> Format.sprintf "IDENT %S" s + | PATTERNIDENT s -> Format.sprintf "PATTERNIDENT %S" s + | FIELD s -> Format.sprintf "FIELD %S" s + | INT s -> Format.sprintf "INT %s" s + | STRING s -> Format.sprintf "STRING %S" s + | LEFTQMARK -> "LEFTQMARK" + | BULLET s -> Format.sprintf "BULLET %S" s + | EOI -> "EOI" + +let match_keyword kwd = function + | KEYWORD kwd' when kwd = kwd' -> true + | _ -> false + +(* Needed to fix Camlp5 signature. + Cannot use Pp because of silly Tox -> Compat -> Pp dependency *) +let print ppf tok = Format.pp_print_string ppf (to_string tok) + +(** For camlp5, conversion from/to [Plexing.pattern], + and a match function analoguous to [Plexing.default_match] *) + +let of_pattern = function + | "", s -> KEYWORD s + | "IDENT", s -> IDENT s + | "PATTERNIDENT", s -> PATTERNIDENT s + | "FIELD", s -> FIELD s + | "INT", s -> INT s + | "STRING", s -> STRING s + | "LEFTQMARK", _ -> LEFTQMARK + | "BULLET", s -> BULLET s + | "EOI", _ -> EOI + | _ -> failwith "Tok.of_pattern: not a constructor" + +let to_pattern = function + | KEYWORD s -> "", s + | IDENT s -> "IDENT", s + | PATTERNIDENT s -> "PATTERNIDENT", s + | FIELD s -> "FIELD", s + | INT s -> "INT", s + | STRING s -> "STRING", s + | LEFTQMARK -> "LEFTQMARK", "" + | BULLET s -> "BULLET", s + | EOI -> "EOI", "" + +let match_pattern = + let err () = raise Stream.Failure in + function + | "", "" -> (function KEYWORD s -> s | _ -> err ()) + | "IDENT", "" -> (function IDENT s -> s | _ -> err ()) + | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ()) + | "FIELD", "" -> (function FIELD s -> s | _ -> err ()) + | "INT", "" -> (function INT s -> s | _ -> err ()) + | "STRING", "" -> (function STRING s -> s | _ -> err ()) + | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ()) + | "BULLET", "" -> (function BULLET s -> s | _ -> err ()) + | "EOI", "" -> (function EOI -> "" | _ -> err ()) + | pat -> + let tok = of_pattern pat in + function tok' -> if equal tok tok' then snd pat else err () diff --git a/parsing/tok.mli b/parsing/tok.mli new file mode 100644 index 0000000000..5750096a28 --- /dev/null +++ b/parsing/tok.mli @@ -0,0 +1,35 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* <O___,, * (see CREDITS file for the list of authors) *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(* * (see LICENSE file for the text of the license) *) +(************************************************************************) + +(** The type of token for the Coq lexer and parser *) + +type t = + | KEYWORD of string + | PATTERNIDENT of string + | IDENT of string + | FIELD of string + | INT of string + | STRING of string + | LEFTQMARK + | BULLET of string + | EOI + +val equal : t -> t -> bool +(* pass true for diff_mode *) +val extract_string : bool -> t -> string +val to_string : t -> string +(* Needed to fit Camlp5 signature *) +val print : Format.formatter -> t -> unit +val match_keyword : string -> t -> bool + +(** for camlp5 *) +val of_pattern : string*string -> t +val to_pattern : t -> string*string +val match_pattern : string*string -> t -> string |
