From e07efb3798c7c6ec54aac9093ab50fddfc6c6a5b Mon Sep 17 00:00:00 2001 From: Hugo Herbelin Date: Mon, 5 Apr 2021 16:40:10 +0200 Subject: Relying on the abstract notion of streams with location for parsing. We also get rid of ploc.ml, now useless, relying a priori on more robust code in lStream.ml for location reporting (see e.g. parse_parsable in grammar.ml). --- parsing/cLexer.ml | 43 +++++++++++++------------------------------ parsing/pcoq.ml | 44 ++++++++++++++++++++++---------------------- parsing/pcoq.mli | 2 +- 3 files changed, 36 insertions(+), 53 deletions(-) (limited to 'parsing') diff --git a/parsing/cLexer.ml b/parsing/cLexer.ml index d8d2f2a2ef..bb1797ee02 100644 --- a/parsing/cLexer.ml +++ b/parsing/cLexer.ml @@ -11,7 +11,6 @@ open Pp open Util open Tok -open Gramlib (* Dictionaries: trees annotated with string options, each node being a map from chars to dictionaries (the subtrees). A trie, in other words. *) @@ -115,7 +114,7 @@ let bad_token str = raise (Error.E (Bad_token str)) (* Update a loc without allocating an intermediate pair *) let set_loc_pos loc bp ep = - Ploc.sub loc (bp - loc.Loc.bp) (ep - bp) + Loc.sub loc (bp - loc.Loc.bp) (ep - bp) (* Increase line number by 1 and update position of beginning of line *) let bump_loc_line loc bol_pos = @@ -216,7 +215,9 @@ let lookup_utf8 loc cs = | Some ('\x80'..'\xFF' as c) -> lookup_utf8_tail loc c cs | None -> EmptyStream -let unlocated f x = f x +let unlocated f x = + let dummy_loc = Loc.(initial ToplevelInput) in + f dummy_loc x (** FIXME: should we still unloc the exception? *) (* try f x with Loc.Exc_located (_, exc) -> raise exc *) @@ -226,7 +227,7 @@ let check_keyword str = Stream.junk s; bad_token str | _ -> - match unlocated lookup_utf8 Ploc.dummy s with + match unlocated lookup_utf8 s with | Utf8Token (_,n) -> njunk n s; loop_symb s | AsciiChar -> Stream.junk s; loop_symb s | EmptyStream -> () @@ -242,7 +243,7 @@ let check_ident str = Stream.junk s; loop_id true s | _ -> - match unlocated lookup_utf8 Ploc.dummy s with + match unlocated lookup_utf8 s with | Utf8Token (st, n) when not intail && Unicode.is_valid_ident_initial st -> njunk n s; loop_id true s | Utf8Token (st, n) when intail && Unicode.is_valid_ident_trailing st -> njunk n s; @@ -793,20 +794,6 @@ let next_token ~diff_mode loc s = Printf.eprintf "(line %i, %i-%i)[%s]\n%!" (Ploc.line_nb loc) (Ploc.first_pos loc) (Ploc.last_pos loc) (Tok.to_string t); r *) -(* Location table system for creating tables associating a token count - to its location in a char stream (the source) *) - -let locerr i = - let m = "Lexer: location function called on token "^string_of_int i in - invalid_arg m - -let loct_create () = Hashtbl.create 207 - -let loct_func loct i = - try Hashtbl.find loct i with Not_found -> locerr i - -let loct_add loct i loc = Hashtbl.add loct i loc - (** {6 The lexer of Coq} *) (** Note: removing a token. @@ -837,17 +824,13 @@ let token_text : type c. c Tok.p -> string = function | PBULLET (Some s) -> "BULLET \"" ^ s ^ "\"" | PQUOTATION lbl -> "QUOTATION \"" ^ lbl ^ "\"" -let func next_token ?loc cs = - let loct = loct_create () in - let cur_loc = ref (Option.default Loc.(initial ToplevelInput) loc) in - let ts = - Stream.from - (fun i -> - let (tok, loc) = next_token !cur_loc cs in - cur_loc := after loc; - loct_add loct i loc; Some tok) - in - (ts, loct_func loct) +let func next_token ?(source=Loc.ToplevelInput) cs = + let cur_loc = ref (Loc.initial source) in + LStream.from ~source + (fun i -> + let (tok, loc) = next_token !cur_loc cs in + cur_loc := after loc; + Some (tok,loc)) module MakeLexer (Diff : sig val mode : bool end) = struct type te = Tok.t diff --git a/parsing/pcoq.ml b/parsing/pcoq.ml index cc9e1bb31d..3393bdab7b 100644 --- a/parsing/pcoq.ml +++ b/parsing/pcoq.ml @@ -21,49 +21,49 @@ struct let err () = raise Stream.Failure - type t = Gramlib.Plexing.location_function -> int -> Tok.t Stream.t -> int option + type t = int -> Tok.t LStream.t -> int option - let rec contiguous tok n m = + let rec contiguous n m strm = n == m || - let (_, ep) = Loc.unloc (tok n) in - let (bp, _) = Loc.unloc (tok (n + 1)) in - Int.equal ep bp && contiguous tok (succ n) m + let (_, ep) = Loc.unloc (LStream.get_loc n strm) in + let (bp, _) = Loc.unloc (LStream.get_loc (n + 1) strm) in + Int.equal ep bp && contiguous (succ n) m strm - let check_no_space tok m strm = - let n = Stream.count strm in - if contiguous tok n (n+m-1) then Some m else None + let check_no_space m strm = + let n = LStream.count strm in + if contiguous n (n+m-1) strm then Some m else None let to_entry s (lk : t) = - let run tok strm = match lk tok 0 strm with None -> err () | Some _ -> () in - Entry.of_parser s run + let run strm = match lk 0 strm with None -> err () | Some _ -> () in + Entry.(of_parser s { parser_fun = run }) - let (>>) (lk1 : t) lk2 tok n strm = match lk1 tok n strm with + let (>>) (lk1 : t) lk2 n strm = match lk1 n strm with | None -> None - | Some n -> lk2 tok n strm + | Some n -> lk2 n strm - let (<+>) (lk1 : t) lk2 tok n strm = match lk1 tok n strm with - | None -> lk2 tok n strm + let (<+>) (lk1 : t) lk2 n strm = match lk1 n strm with + | None -> lk2 n strm | Some n -> Some n - let lk_empty tok n strm = Some n + let lk_empty n strm = Some n - let lk_kw kw tok n strm = match stream_nth n strm with + let lk_kw kw n strm = match LStream.peek_nth n strm with | Tok.KEYWORD kw' | Tok.IDENT kw' -> if String.equal kw kw' then Some (n + 1) else None | _ -> None - let lk_kws kws tok n strm = match stream_nth n strm with + let lk_kws kws n strm = match LStream.peek_nth n strm with | Tok.KEYWORD kw | Tok.IDENT kw -> if List.mem_f String.equal kw kws then Some (n + 1) else None | _ -> None - let lk_ident tok n strm = match stream_nth n strm with + let lk_ident n strm = match LStream.peek_nth n strm with | Tok.IDENT _ -> Some (n + 1) | _ -> None - let lk_ident_except idents tok n strm = match stream_nth n strm with + let lk_ident_except idents n strm = match LStream.peek_nth n strm with | Tok.IDENT ident when not (List.mem_f String.equal ident idents) -> Some (n + 1) | _ -> None - let lk_nat tok n strm = match stream_nth n strm with + let lk_nat n strm = match LStream.peek_nth n strm with | Tok.NUMBER p when NumTok.Unsigned.is_nat p -> Some (n + 1) | _ -> None @@ -191,9 +191,9 @@ let eoi_entry en = (* Parse a string, does NOT check if the entire string was read (use eoi_entry) *) -let parse_string f ?loc x = +let parse_string f ?source x = let strm = Stream.of_string x in - Entry.parse f (Parsable.make ?loc strm) + Entry.parse f (Parsable.make ?source strm) (* universes not used by Coq build but still used by some plugins *) type gram_universe = string diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli index 06d05a4797..d9165ff5a6 100644 --- a/parsing/pcoq.mli +++ b/parsing/pcoq.mli @@ -119,7 +119,7 @@ end (** Parse a string *) -val parse_string : 'a Entry.t -> ?loc:Loc.t -> string -> 'a +val parse_string : 'a Entry.t -> ?source:Loc.source -> string -> 'a val eoi_entry : 'a Entry.t -> 'a Entry.t type gram_universe [@@deprecated "Deprecated in 8.13"] -- cgit v1.2.3