aboutsummaryrefslogtreecommitdiff
path: root/gramlib
diff options
context:
space:
mode:
authorPierre-Marie Pédrot2020-03-31 00:48:42 +0200
committerPierre-Marie Pédrot2020-03-31 00:48:42 +0200
commite2f0814688511be93659c2258b91248698f18d4a (patch)
tree06c1860a6e5b45ee154e45bfbddfff228ac22cdd /gramlib
parent8c85a8651605dd82ce2223a28ca38f31359a88bd (diff)
parent5c9f318f5f1b6e85b03bba9450ac059377be54fc (diff)
Merge PR #11647: [rfc] Consolidation of parsing interfaces
Ack-by: SkySkimmer Reviewed-by: ppedrot
Diffstat (limited to 'gramlib')
-rw-r--r--gramlib/grammar.ml226
-rw-r--r--gramlib/grammar.mli46
-rw-r--r--gramlib/plexing.ml13
-rw-r--r--gramlib/plexing.mli13
4 files changed, 221 insertions, 77 deletions
diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml
index 0024d70466..d6951fff6d 100644
--- a/gramlib/grammar.ml
+++ b/gramlib/grammar.ml
@@ -8,8 +8,6 @@ open Util
(* Functorial interface *)
-module type GLexerType = Plexing.Lexer
-
type norec
type mayrec
@@ -20,6 +18,7 @@ module type S = sig
module Parsable : sig
type t
val make : ?loc:Loc.t -> char Stream.t -> t
+ val comments : t -> ((int * int) * string) list
end
val tokens : string -> (string option * int) list
@@ -27,6 +26,7 @@ module type S = sig
module Entry : sig
type 'a t
val make : string -> 'a t
+ val create : string -> 'a t
val parse : 'a t -> Parsable.t -> 'a
val name : 'a t -> string
val of_parser : string -> (Plexing.location_function -> te Stream.t -> 'a) -> 'a t
@@ -51,7 +51,7 @@ module type S = sig
val self : ('self, mayrec, 'self) t
val next : ('self, mayrec, 'self) t
val token : 'c pattern -> ('self, norec, 'c) t
- val rules : warning:(string -> unit) option -> 'a Rules.t list -> ('self, norec, 'a) t
+ val rules : 'a Rules.t list -> ('self, norec, 'a) t
end and Rule : sig
@@ -77,21 +77,39 @@ module type S = sig
val make : ('a, _, 'f, Loc.t -> 'a) Rule.t -> 'f -> 'a t
end
- module Unsafe :
- sig
+ type 'a single_extend_statement =
+ string option * Gramext.g_assoc option * 'a Production.t list
+
+ type 'a extend_statement =
+ { pos : Gramext.position option
+ ; data : 'a single_extend_statement list
+ }
+
+ val generalize_symbol : ('a, 'tr, 'c) Symbol.t -> ('a, norec, 'c) Symbol.t option
+
+ val mk_rule : 'a pattern list -> string Rules.t
+
+ (* Used in custom entries, should tweak? *)
+ val level_of_nonterm : ('a, norec, 'c) Symbol.t -> string option
+
+end
+
+module type ExtS = sig
+
+ include S
+
+ val safe_extend : 'a Entry.t -> 'a extend_statement -> unit
+ val safe_delete_rule : 'a Entry.t -> 'a Production.t -> unit
+
+ module Unsafe : sig
val clear_entry : 'a Entry.t -> unit
end
- val safe_extend : warning:(string -> unit) option ->
- 'a Entry.t -> Gramext.position option ->
- (string option * Gramext.g_assoc option * 'a Production.t list)
- list ->
- unit
- val safe_delete_rule : 'a Entry.t -> ('a, _, 'f, 'r) Rule.t -> unit
+
end
(* Implementation *)
-module GMake (L : GLexerType) = struct
+module GMake (L : Plexing.S) = struct
type te = L.te
type 'c pattern = 'c L.pattern
@@ -324,7 +342,7 @@ let and_and_tree (type s tr' trt tr trn trs trb f) (ar : (tr', trt, tr) ty_and_r
| MayRec2, _, MayRec -> MayRec2 | MayRec2, _, NoRec -> MayRec2
| NoRec2, NoRec3, NoRec -> NoRec2
-let insert_tree (type s trs trt tr p k a) ~warning entry_name (ar : (trs, trt, tr) ty_and_ex) (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, tr, a) ty_tree =
+let insert_tree (type s trs trt tr p k a) entry_name (ar : (trs, trt, tr) ty_and_ex) (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, tr, a) ty_tree =
let rec insert : type trs trt tr p f k. (trs, trt, tr) ty_and_ex -> (s, trs, p) ty_symbols -> (p, k, f) rel_prod -> (s, trt, f) ty_tree -> k -> (s, tr, f) ty_tree =
fun ar symbols pf tree action ->
match symbols, pf with
@@ -338,15 +356,15 @@ let insert_tree (type s trs trt tr p k a) ~warning entry_name (ar : (trs, trt, t
| NR10, Node (_, n) -> Node (MayRec3, node n)
| NR11, Node (NoRec3, n) -> Node (NoRec3, node n)
| NR11, LocAct (old_action, action_list) ->
- begin match warning with
- | None -> ()
- | Some warn_fn ->
+ (* What to do about this warning? For now it is disabled *)
+ if false then
+ begin
let msg =
"<W> Grammar extension: " ^
(if entry_name = "" then "" else "in ["^entry_name^"%s], ") ^
"some rule has been masked" in
- warn_fn msg
- end;
+ Feedback.msg_warning (Pp.str msg)
+ end;
LocAct (action, old_action :: action_list)
| NR11, DeadEnd -> LocAct (action, [])
and insert_in_tree : type trs trs' trs'' trt tr a p f k. (trs'', trt, tr) ty_and_ex -> (trs, trs', trs'') ty_and_rec -> (s, trs, a) ty_symbol -> (s, trs', p) ty_symbols -> (p, k, a -> f) rel_prod -> (s, trt, f) ty_tree -> k -> (s, tr, f) ty_tree =
@@ -405,14 +423,14 @@ let insert_tree (type s trs trt tr p k a) ~warning entry_name (ar : (trs, trt, t
in
insert ar gsymbols pf tree action
-let insert_tree_norec (type s p k a) ~warning entry_name (gsymbols : (s, norec, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, norec, a) ty_tree) : (s, norec, a) ty_tree =
- insert_tree ~warning entry_name NR11 gsymbols pf action tree
+let insert_tree_norec (type s p k a) entry_name (gsymbols : (s, norec, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, norec, a) ty_tree) : (s, norec, a) ty_tree =
+ insert_tree entry_name NR11 gsymbols pf action tree
-let insert_tree (type s trs trt p k a) ~warning entry_name (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, a) ty_mayrec_tree =
+let insert_tree (type s trs trt p k a) entry_name (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, a) ty_mayrec_tree =
let MayRecNR ar = and_symbols_tree gsymbols tree in
- MayRecTree (insert_tree ~warning entry_name ar gsymbols pf action tree)
+ MayRecTree (insert_tree entry_name ar gsymbols pf action tree)
-let srules (type self a) ~warning (rl : a ty_rules list) : (self, norec, a) ty_symbol =
+let srules (type self a) (rl : a ty_rules list) : (self, norec, a) ty_symbol =
let rec retype_tree : type s a. (s, norec, a) ty_tree -> (self, norec, a) ty_tree =
function
| Node (NoRec3, {node = s; son = son; brother = bro}) ->
@@ -439,7 +457,7 @@ let srules (type self a) ~warning (rl : a ty_rules list) : (self, norec, a) ty_s
(fun tree (TRules (symbols, action)) ->
let symbols = retype_rule symbols in
let AnyS (symbols, pf) = get_symbols symbols in
- insert_tree_norec ~warning "" symbols pf action tree)
+ insert_tree_norec "" symbols pf action tree)
DeadEnd rl
in
Stree t
@@ -449,19 +467,19 @@ let is_level_labelled n (Level lev) =
Some n1 -> n = n1
| None -> false
-let insert_level (type s tr p k) ~warning entry_name (symbols : (s, tr, p) ty_symbols) (pf : (p, k, Loc.t -> s) rel_prod) (action : k) (slev : s ty_level) : s ty_level =
+let insert_level (type s tr p k) entry_name (symbols : (s, tr, p) ty_symbols) (pf : (p, k, Loc.t -> s) rel_prod) (action : k) (slev : s ty_level) : s ty_level =
match symbols with
| TCns (_, Sself, symbols) ->
let Level slev = slev in
let RelS pf = pf in
- let MayRecTree lsuffix = insert_tree ~warning entry_name symbols pf action slev.lsuffix in
+ let MayRecTree lsuffix = insert_tree entry_name symbols pf action slev.lsuffix in
Level
{assoc = slev.assoc; lname = slev.lname;
lsuffix = lsuffix;
lprefix = slev.lprefix}
| _ ->
let Level slev = slev in
- let MayRecTree lprefix = insert_tree ~warning entry_name symbols pf action slev.lprefix in
+ let MayRecTree lprefix = insert_tree entry_name symbols pf action slev.lprefix in
Level
{assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix;
lprefix = lprefix}
@@ -475,34 +493,27 @@ let empty_lev lname assoc =
Level
{assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd}
-let change_lev ~warning (Level lev) n lname assoc =
+let change_lev (Level lev) n lname assoc =
let a =
match assoc with
None -> lev.assoc
| Some a ->
if a <> lev.assoc then
- begin
- match warning with
- | None -> ()
- | Some warn_fn ->
- warn_fn ("<W> Changing associativity of level \""^n^"\"")
- end;
- a
+ Feedback.msg_warning (Pp.str ("<W> Changing associativity of level \""^n^"\""));
+ a
in
- begin match lname with
- Some n ->
- if lname <> lev.lname then
- begin match warning with
- | None -> ()
- | Some warn_fn ->
- warn_fn ("<W> Level label \""^n^"\" ignored")
- end;
- | None -> ()
+ begin
+ match lname with
+ | Some n ->
+ (* warning disabled; it was in the past *)
+ if false && lname <> lev.lname then
+ Feedback.msg_warning (Pp.str ("<W> Level label \""^n^"\" ignored"))
+ | None -> ()
end;
Level
{assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix}
-let get_level ~warning entry position levs =
+let get_level entry position levs =
match position with
Some First -> [], empty_lev, levs
| Some Last -> levs, empty_lev, []
@@ -515,7 +526,7 @@ let get_level ~warning entry position levs =
flush stderr;
failwith "Grammar.extend"
| lev :: levs ->
- if is_level_labelled n lev then [], change_lev ~warning lev n, levs
+ if is_level_labelled n lev then [], change_lev lev n, levs
else
let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2
in
@@ -550,7 +561,7 @@ let get_level ~warning entry position levs =
get levs
| None ->
match levs with
- lev :: levs -> [], change_lev ~warning lev "<top>", levs
+ lev :: levs -> [], change_lev lev "<top>", levs
| [] -> [], empty_lev, []
let change_to_self0 (type s) (type trec) (type a) (entry : s ty_entry) : (s, trec, a) ty_symbol -> (s, a) ty_mayrec_symbol =
@@ -600,7 +611,7 @@ let insert_tokens gram symbols =
in
linsert symbols
-let levels_of_rules ~warning entry position rules =
+let levels_of_rules entry position rules =
let elev =
match entry.edesc with
Dlevels elev -> elev
@@ -612,7 +623,7 @@ let levels_of_rules ~warning entry position rules =
match rules with
| [] -> elev
| _ ->
- let (levs1, make_lev, levs2) = get_level ~warning entry position elev in
+ let (levs1, make_lev, levs2) = get_level entry position elev in
let (levs, _) =
List.fold_left
(fun (levs, make_lev) (lname, assoc, level) ->
@@ -623,7 +634,7 @@ let levels_of_rules ~warning entry position rules =
let MayRecRule symbols = change_to_self entry symbols in
let AnyS (symbols, pf) = get_symbols symbols in
insert_tokens egram symbols;
- insert_level ~warning entry.ename symbols pf action lev)
+ insert_level entry.ename symbols pf action lev)
lev level
in
lev :: levs, empty_lev)
@@ -1479,8 +1490,8 @@ let init_entry_functions entry =
let f = continue_parser_of_entry entry in
entry.econtinue <- f; f lev bp a strm)
-let extend_entry ~warning entry position rules =
- let elev = levels_of_rules ~warning entry position rules in
+let extend_entry entry position rules =
+ let elev = levels_of_rules entry position rules in
entry.edesc <- Dlevels elev; init_entry_functions entry
(* Deleting a rule *)
@@ -1508,7 +1519,7 @@ module Parsable = struct
{ pa_chr_strm : char Stream.t
; pa_tok_strm : L.te Stream.t
; pa_loc_func : Plexing.location_function
- }
+ ; lexer_state : L.State.t ref }
let parse_parsable entry p =
let efun = entry.estart 0 in
@@ -1544,9 +1555,26 @@ module Parsable = struct
let loc = Stream.count cs, Stream.count cs + 1 in
restore (); Ploc.raise (Ploc.make_unlined loc) exc
+ let parse_parsable e p =
+ L.State.set !(p.lexer_state);
+ try
+ let c = parse_parsable e p in
+ p.lexer_state := L.State.get ();
+ c
+ with Ploc.Exc (loc,e) ->
+ L.State.drop ();
+ let loc' = Loc.get_loc (Exninfo.info e) in
+ let loc = match loc' with None -> loc | Some loc -> loc in
+ Loc.raise ~loc e
+
let make ?loc cs =
+ let lexer_state = ref (L.State.init ()) in
+ L.State.set !lexer_state;
let (ts, lf) = L.tok_func ?loc cs in
- {pa_chr_strm = cs; pa_tok_strm = ts; pa_loc_func = lf}
+ lexer_state := L.State.get ();
+ {pa_chr_strm = cs; pa_tok_strm = ts; pa_loc_func = lf; lexer_state}
+
+ let comments p = L.State.get_comments !(p.lexer_state)
end
@@ -1557,6 +1585,7 @@ module Entry = struct
econtinue =
(fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure);
edesc = Dlevels []}
+ let create = make
let parse (e : 'a t) p : 'a =
Parsable.parse_parsable e p
let parse_token_stream (e : 'a t) ts : 'a =
@@ -1589,7 +1618,7 @@ module rec Symbol : sig
val self : ('self, mayrec, 'self) t
val next : ('self, mayrec, 'self) t
val token : 'c pattern -> ('self, norec, 'c) t
- val rules : warning:(string -> unit) option -> 'a Rules.t list -> ('self, norec, 'a) t
+ val rules : 'a Rules.t list -> ('self, norec, 'a) t
end = struct
@@ -1604,7 +1633,7 @@ end = struct
let self = Sself
let next = Snext
let token tok = Stoken tok
- let rules ~warning (t : 'a Rules.t list) = srules ~warning t
+ let rules (t : 'a Rules.t list) = srules t
end and Rule : sig
@@ -1656,14 +1685,87 @@ module Unsafe = struct
end
-let safe_extend ~warning (e : 'a Entry.t) pos
- (r :
- (string option * Gramext.g_assoc option * 'a ty_production list)
- list) =
- extend_entry ~warning e pos r
+type 'a single_extend_statement =
+ string option * Gramext.g_assoc option * 'a ty_production list
+
+type 'a extend_statement =
+ { pos : Gramext.position option
+ ; data : 'a single_extend_statement list
+ }
-let safe_delete_rule e r =
+let safe_extend (e : 'a Entry.t) { pos; data } =
+ extend_entry e pos data
+
+let safe_delete_rule e (TProd (r,_act)) =
let AnyS (symbols, _) = get_symbols r in
delete_rule e symbols
+let level_of_nonterm sym = match sym with
+ | Snterml (_,l) -> Some l
+ | _ -> None
+
+exception SelfSymbol
+
+let rec generalize_symbol :
+ type a tr s. (s, tr, a) Symbol.t -> (s, norec, a) ty_symbol =
+ function
+ | Stoken tok ->
+ Stoken tok
+ | Slist1 e ->
+ Slist1 (generalize_symbol e)
+ | Slist1sep (e, sep, b) ->
+ let e = generalize_symbol e in
+ let sep = generalize_symbol sep in
+ Slist1sep (e, sep, b)
+ | Slist0 e ->
+ Slist0 (generalize_symbol e)
+ | Slist0sep (e, sep, b) ->
+ let e = generalize_symbol e in
+ let sep = generalize_symbol sep in
+ Slist0sep (e, sep, b)
+ | Sopt e ->
+ Sopt (generalize_symbol e)
+ | Sself ->
+ raise SelfSymbol
+ | Snext ->
+ raise SelfSymbol
+ | Snterm e ->
+ Snterm e
+ | Snterml (e, l) ->
+ Snterml (e, l)
+ | Stree r ->
+ Stree (generalize_tree r)
+and generalize_tree : type a tr s .
+ (s, tr, a) ty_tree -> (s, norec, a) ty_tree = fun r ->
+ match r with
+ | Node (fi, n) ->
+ let fi = match fi with
+ | NoRec3 -> NoRec3
+ | MayRec3 -> raise SelfSymbol
+ in
+ let n = match n with
+ | { node; son; brother } ->
+ let node = generalize_symbol node in
+ let son = generalize_tree son in
+ let brother = generalize_tree brother in
+ { node; son; brother }
+ in
+ Node (fi, n)
+ | LocAct _ as r -> r
+ | DeadEnd as r -> r
+
+let generalize_symbol s =
+ try Some (generalize_symbol s)
+ with SelfSymbol -> None
+
+let rec mk_rule tok =
+ match tok with
+ | [] ->
+ let stop_e = Rule.stop in
+ TRules (stop_e, fun _ -> (* dropped anyway: *) "")
+ | tkn :: rem ->
+ let TRules (r, f) = mk_rule rem in
+ let r = Rule.next_norec r (Symbol.token tkn) in
+ TRules (r, fun _ -> f)
+
end
diff --git a/gramlib/grammar.mli b/gramlib/grammar.mli
index f0423a92af..33006f6f65 100644
--- a/gramlib/grammar.mli
+++ b/gramlib/grammar.mli
@@ -15,8 +15,7 @@
rule "an entry cannot call an entry of another grammar" by
normal OCaml typing. *)
-module type GLexerType = Plexing.Lexer
- (** The input signature for the functor [Grammar.GMake]: [te] is the
+(** The input signature for the functor [Grammar.GMake]: [te] is the
type of the tokens. *)
type norec
@@ -29,6 +28,7 @@ module type S = sig
module Parsable : sig
type t
val make : ?loc:Loc.t -> char Stream.t -> t
+ val comments : t -> ((int * int) * string) list
end
val tokens : string -> (string option * int) list
@@ -36,6 +36,7 @@ module type S = sig
module Entry : sig
type 'a t
val make : string -> 'a t
+ val create : string -> 'a t (* compat *)
val parse : 'a t -> Parsable.t -> 'a
val name : 'a t -> string
val of_parser : string -> (Plexing.location_function -> te Stream.t -> 'a) -> 'a t
@@ -60,7 +61,7 @@ module type S = sig
val self : ('self, mayrec, 'self) t
val next : ('self, mayrec, 'self) t
val token : 'c pattern -> ('self, norec, 'c) t
- val rules : warning:(string -> unit) option -> 'a Rules.t list -> ('self, norec, 'a) t
+ val rules : 'a Rules.t list -> ('self, norec, 'a) t
end and Rule : sig
@@ -86,17 +87,37 @@ module type S = sig
val make : ('a, _, 'f, Loc.t -> 'a) Rule.t -> 'f -> 'a t
end
- module Unsafe :
- sig
+ type 'a single_extend_statement =
+ string option * Gramext.g_assoc option * 'a Production.t list
+
+ type 'a extend_statement =
+ { pos : Gramext.position option
+ ; data : 'a single_extend_statement list
+ }
+
+ val generalize_symbol : ('a, 'tr, 'c) Symbol.t -> ('a, norec, 'c) Symbol.t option
+
+ val mk_rule : 'a pattern list -> string Rules.t
+
+ (* Used in custom entries, should tweak? *)
+ val level_of_nonterm : ('a, norec, 'c) Symbol.t -> string option
+
+end
+
+(* Interface private to clients *)
+module type ExtS = sig
+
+ include S
+
+ val safe_extend : 'a Entry.t -> 'a extend_statement -> unit
+ val safe_delete_rule : 'a Entry.t -> 'a Production.t -> unit
+
+ module Unsafe : sig
val clear_entry : 'a Entry.t -> unit
end
- val safe_extend : warning:(string -> unit) option ->
- 'a Entry.t -> Gramext.position option ->
- (string option * Gramext.g_assoc option * 'a Production.t list)
- list ->
- unit
- val safe_delete_rule : 'a Entry.t -> ('a, _, 'f, 'r) Rule.t -> unit
+
end
+
(** Signature type of the functor [Grammar.GMake]. The types and
functions are almost the same than in generic interface, but:
- Grammars are not values. Functions holding a grammar as parameter
@@ -107,5 +128,4 @@ end
type (instead of (string * string)); the module parameter
must specify a way to show them as (string * string) *)
-module GMake (L : GLexerType) :
- S with type te = L.te and type 'c pattern = 'c L.pattern
+module GMake (L : Plexing.S) : ExtS with type te = L.te and type 'c pattern = 'c L.pattern
diff --git a/gramlib/plexing.ml b/gramlib/plexing.ml
index e881ab3350..ce3e38ff08 100644
--- a/gramlib/plexing.ml
+++ b/gramlib/plexing.ml
@@ -5,7 +5,7 @@
type location_function = int -> Loc.t
type 'te lexer_func = ?loc:Loc.t -> char Stream.t -> 'te Stream.t * location_function
-module type Lexer = sig
+module type S = sig
type te
type 'c pattern
val tok_pattern_eq : 'a pattern -> 'b pattern -> ('a, 'b) Util.eq option
@@ -15,4 +15,15 @@ module type Lexer = sig
val tok_removing : 'c pattern -> unit
val tok_match : 'c pattern -> te -> 'c
val tok_text : 'c pattern -> string
+
+ (* State for the comments, at some point we should make it functional *)
+ module State : sig
+ type t
+ val init : unit -> t
+ val set : t -> unit
+ val get : unit -> t
+ val drop : unit -> unit
+ val get_comments : t -> ((int * int) * string) list
+ end
+
end
diff --git a/gramlib/plexing.mli b/gramlib/plexing.mli
index 521eba7446..0c190af635 100644
--- a/gramlib/plexing.mli
+++ b/gramlib/plexing.mli
@@ -15,7 +15,7 @@ and location_function = int -> Loc.t
(** The type of a function giving the location of a token in the
source from the token number in the stream (starting from zero). *)
-module type Lexer = sig
+module type S = sig
type te
type 'c pattern
val tok_pattern_eq : 'a pattern -> 'b pattern -> ('a, 'b) Util.eq option
@@ -25,4 +25,15 @@ module type Lexer = sig
val tok_removing : 'c pattern -> unit
val tok_match : 'c pattern -> te -> 'c
val tok_text : 'c pattern -> string
+
+ (* State for the comments, at some point we should make it functional *)
+ module State : sig
+ type t
+ val init : unit -> t
+ val set : t -> unit
+ val get : unit -> t
+ val drop : unit -> unit
+ val get_comments : t -> ((int * int) * string) list
+ end
+
end