diff options
| author | Emilio Jesus Gallego Arias | 2019-02-12 18:18:29 +0100 |
|---|---|---|
| committer | Emilio Jesus Gallego Arias | 2019-02-12 18:18:29 +0100 |
| commit | ddc17851aa2f73eda9ddc2f8f0f2749f58b51520 (patch) | |
| tree | ace4c2d2255d2f8c7b0038c2456e757fcef7f9f3 | |
| parent | fbb30d0ec4f5f5af8dd3daa2ca4f47405a4d0d15 (diff) | |
| parent | 9da757b47984837ed5d6b88e72325b45d2122c17 (diff) | |
Merge PR #9548: Almost fully type-safe gramlib implementation
Reviewed-by: ejgallego
| -rw-r--r-- | gramlib/gramext.ml | 451 | ||||
| -rw-r--r-- | gramlib/gramext.mli | 55 | ||||
| -rw-r--r-- | gramlib/grammar.ml | 980 |
3 files changed, 787 insertions, 699 deletions
diff --git a/gramlib/gramext.ml b/gramlib/gramext.ml index 46c2688f05..c396bbab34 100644 --- a/gramlib/gramext.ml +++ b/gramlib/gramext.ml @@ -2,51 +2,6 @@ (* gramext.ml,v *) (* Copyright (c) INRIA 2007-2017 *) -open Printf - -type 'a parser_t = 'a Stream.t -> Obj.t - -type 'te grammar = - { gtokens : (Plexing.pattern, int ref) Hashtbl.t; - glexer : 'te Plexing.lexer } - -type 'te g_entry = - { egram : 'te grammar; - ename : string; - elocal : bool; - mutable estart : int -> 'te parser_t; - mutable econtinue : int -> int -> Obj.t -> 'te parser_t; - mutable edesc : 'te g_desc } -and 'te g_desc = - Dlevels of 'te g_level list - | Dparser of 'te parser_t -and 'te g_level = - { assoc : g_assoc; - lname : string option; - lsuffix : 'te g_tree; - lprefix : 'te g_tree } -and g_assoc = NonA | RightA | LeftA -and 'te g_symbol = - | Snterm of 'te g_entry - | Snterml of 'te g_entry * string - | Slist0 of 'te g_symbol - | Slist0sep of 'te g_symbol * 'te g_symbol * bool - | Slist1 of 'te g_symbol - | Slist1sep of 'te g_symbol * 'te g_symbol * bool - | Sopt of 'te g_symbol - | Sself - | Snext - | Stoken of Plexing.pattern - | Stree of 'te g_tree -and g_action = Obj.t -and 'te g_tree = - Node of 'te g_node - | LocAct of g_action * g_action list - | DeadEnd -and 'te g_node = - { node : 'te g_symbol; son : 'te g_tree; brother : 'te g_tree } -and err_fun = unit -> string - type position = First | Last @@ -54,408 +9,4 @@ type position = | After of string | Level of string -let rec derive_eps = - function - Slist0 _ -> true - | Slist0sep (_, _, _) -> true - | Sopt _ -> true - | Stree t -> tree_derive_eps t - | Slist1 _ | Slist1sep (_, _, _) | Snterm _ | - Snterml (_, _) | Snext | Sself | Stoken _ -> - false -and tree_derive_eps = - function - LocAct (_, _) -> true - | Node {node = s; brother = bro; son = son} -> - derive_eps s && tree_derive_eps son || tree_derive_eps bro - | DeadEnd -> false - -let rec eq_symbol s1 s2 = - match s1, s2 with - Snterm e1, Snterm e2 -> e1 == e2 - | Snterml (e1, l1), Snterml (e2, l2) -> e1 == e2 && l1 = l2 - | Slist0 s1, Slist0 s2 -> eq_symbol s1 s2 - | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> - eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 - | Slist1 s1, Slist1 s2 -> eq_symbol s1 s2 - | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> - eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 - | Sopt s1, Sopt s2 -> eq_symbol s1 s2 - | Stree _, Stree _ -> false - | _ -> s1 = s2 - -let is_before s1 s2 = - match s1, s2 with - Stoken ("ANY", _), _ -> false - | _, Stoken ("ANY", _) -> true - | Stoken (_, s), Stoken (_, "") when s <> "" -> true - | Stoken _, Stoken _ -> false - | Stoken _, _ -> true - | _ -> false - -let insert_tree ~warning entry_name gsymbols action tree = - let rec insert symbols tree = - match symbols with - s :: sl -> insert_in_tree s sl tree - | [] -> - match tree with - Node {node = s; son = son; brother = bro} -> - Node {node = s; son = son; brother = insert [] bro} - | LocAct (old_action, action_list) -> - begin match warning with - | None -> () - | Some warn_fn -> - let msg = - "<W> Grammar extension: " ^ - (if entry_name <> "" then "" else "in ["^entry_name^"%s], ") ^ - "some rule has been masked" in - warn_fn msg - end; - LocAct (action, old_action :: action_list) - | DeadEnd -> LocAct (action, []) - and insert_in_tree s sl tree = - match try_insert s sl tree with - Some t -> t - | None -> Node {node = s; son = insert sl DeadEnd; brother = tree} - and try_insert s sl tree = - match tree with - Node {node = s1; son = son; brother = bro} -> - if eq_symbol s s1 then - let t = Node {node = s1; son = insert sl son; brother = bro} in - Some t - else if is_before s1 s || derive_eps s && not (derive_eps s1) then - let bro = - match try_insert s sl bro with - Some bro -> bro - | None -> Node {node = s; son = insert sl DeadEnd; brother = bro} - in - let t = Node {node = s1; son = son; brother = bro} in Some t - else - begin match try_insert s sl bro with - Some bro -> - let t = Node {node = s1; son = son; brother = bro} in Some t - | None -> None - end - | LocAct (_, _) | DeadEnd -> None - in - insert gsymbols tree - -let srules ~warning rl = - let t = - List.fold_left - (fun tree (symbols, action) -> insert_tree ~warning "" symbols action tree) - DeadEnd rl - in - Stree t - -let is_level_labelled n lev = - match lev.lname with - Some n1 -> n = n1 - | None -> false - -let insert_level ~warning entry_name e1 symbols action slev = - match e1 with - true -> - {assoc = slev.assoc; lname = slev.lname; - lsuffix = insert_tree ~warning entry_name symbols action slev.lsuffix; - lprefix = slev.lprefix} - | false -> - {assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix; - lprefix = insert_tree ~warning entry_name symbols action slev.lprefix} - -let empty_lev lname assoc = - let assoc = - match assoc with - Some a -> a - | None -> LeftA - in - {assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd} - -let change_lev ~warning lev n lname assoc = - let a = - match assoc with - None -> lev.assoc - | Some a -> - if a <> lev.assoc then - begin - match warning with - | None -> () - | Some warn_fn -> - warn_fn ("<W> Changing associativity of level \""^n^"\"") - end; - a - in - begin match lname with - Some n -> - if lname <> lev.lname then - begin match warning with - | None -> () - | Some warn_fn -> - warn_fn ("<W> Level label \""^n^"\" ignored") - end; - | None -> () - end; - {assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix} - -let get_level ~warning entry position levs = - match position with - Some First -> [], empty_lev, levs - | Some Last -> levs, empty_lev, [] - | Some (Level n) -> - let rec get = - function - [] -> - eprintf "No level labelled \"%s\" in entry \"%s\"\n" n - entry.ename; - flush stderr; - failwith "Grammar.extend" - | lev :: levs -> - if is_level_labelled n lev then [], change_lev ~warning lev n, levs - else - let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 - in - get levs - | Some (Before n) -> - let rec get = - function - [] -> - eprintf "No level labelled \"%s\" in entry \"%s\"\n" n - entry.ename; - flush stderr; - failwith "Grammar.extend" - | lev :: levs -> - if is_level_labelled n lev then [], empty_lev, lev :: levs - else - let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 - in - get levs - | Some (After n) -> - let rec get = - function - [] -> - eprintf "No level labelled \"%s\" in entry \"%s\"\n" n - entry.ename; - flush stderr; - failwith "Grammar.extend" - | lev :: levs -> - if is_level_labelled n lev then [lev], empty_lev, levs - else - let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 - in - get levs - | None -> - match levs with - lev :: levs -> [], change_lev ~warning lev "<top>", levs - | [] -> [], empty_lev, [] - -let change_to_self entry = - function - Snterm e when e == entry -> Sself - | x -> x - -let get_initial entry = - function - Sself :: symbols -> true, symbols - | symbols -> false, symbols - -let insert_tokens gram symbols = - let rec insert = - function - | Slist0 s -> insert s - | Slist1 s -> insert s - | Slist0sep (s, t, _) -> insert s; insert t - | Slist1sep (s, t, _) -> insert s; insert t - | Sopt s -> insert s - | Stree t -> tinsert t - | Stoken ("ANY", _) -> () - | Stoken tok -> - gram.glexer.Plexing.tok_using tok; - let r = - try Hashtbl.find gram.gtokens tok with - Not_found -> let r = ref 0 in Hashtbl.add gram.gtokens tok r; r - in - incr r - | Snterm _ | Snterml (_, _) | Snext | Sself -> () - and tinsert = - function - Node {node = s; brother = bro; son = son} -> - insert s; tinsert bro; tinsert son - | LocAct (_, _) | DeadEnd -> () - in - List.iter insert symbols - -let levels_of_rules ~warning entry position rules = - let elev = - match entry.edesc with - Dlevels elev -> elev - | Dparser _ -> - eprintf "Error: entry not extensible: \"%s\"\n" entry.ename; - flush stderr; - failwith "Grammar.extend" - in - if rules = [] then elev - else - let (levs1, make_lev, levs2) = get_level ~warning entry position elev in - let (levs, _) = - List.fold_left - (fun (levs, make_lev) (lname, assoc, level) -> - let lev = make_lev lname assoc in - let lev = - List.fold_left - (fun lev (symbols, action) -> - let symbols = List.map (change_to_self entry) symbols in - let (e1, symbols) = get_initial entry symbols in - insert_tokens entry.egram symbols; - insert_level ~warning entry.ename e1 symbols action lev) - lev level - in - lev :: levs, empty_lev) - ([], make_lev) rules - in - levs1 @ List.rev levs @ levs2 - -let logically_eq_symbols entry = - let rec eq_symbols s1 s2 = - match s1, s2 with - Snterm e1, Snterm e2 -> e1.ename = e2.ename - | Snterm e1, Sself -> e1.ename = entry.ename - | Sself, Snterm e2 -> entry.ename = e2.ename - | Snterml (e1, l1), Snterml (e2, l2) -> e1.ename = e2.ename && l1 = l2 - | Slist0 s1, Slist0 s2 -> eq_symbols s1 s2 - | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> - eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 - | Slist1 s1, Slist1 s2 -> eq_symbols s1 s2 - | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> - eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 - | Sopt s1, Sopt s2 -> eq_symbols s1 s2 - | Stree t1, Stree t2 -> eq_trees t1 t2 - | _ -> s1 = s2 - and eq_trees t1 t2 = - match t1, t2 with - Node n1, Node n2 -> - eq_symbols n1.node n2.node && eq_trees n1.son n2.son && - eq_trees n1.brother n2.brother - | (LocAct (_, _) | DeadEnd), (LocAct (_, _) | DeadEnd) -> true - | _ -> false - in - eq_symbols - -(* [delete_rule_in_tree] returns - [Some (dsl, t)] if success - [dsl] = - Some (list of deleted nodes) if branch deleted - None if action replaced by previous version of action - [t] = remaining tree - [None] if failure *) - -let delete_rule_in_tree entry = - let rec delete_in_tree symbols tree = - match symbols, tree with - s :: sl, Node n -> - if logically_eq_symbols entry s n.node then delete_son sl n - else - begin match delete_in_tree symbols n.brother with - Some (dsl, t) -> - Some (dsl, Node {node = n.node; son = n.son; brother = t}) - | None -> None - end - | s :: sl, _ -> None - | [], Node n -> - begin match delete_in_tree [] n.brother with - Some (dsl, t) -> - Some (dsl, Node {node = n.node; son = n.son; brother = t}) - | None -> None - end - | [], DeadEnd -> None - | [], LocAct (_, []) -> Some (Some [], DeadEnd) - | [], LocAct (_, action :: list) -> Some (None, LocAct (action, list)) - and delete_son sl n = - match delete_in_tree sl n.son with - Some (Some dsl, DeadEnd) -> Some (Some (n.node :: dsl), n.brother) - | Some (Some dsl, t) -> - let t = Node {node = n.node; son = t; brother = n.brother} in - Some (Some (n.node :: dsl), t) - | Some (None, t) -> - let t = Node {node = n.node; son = t; brother = n.brother} in - Some (None, t) - | None -> None - in - delete_in_tree - -let rec decr_keyw_use gram = - function - Stoken tok -> - let r = Hashtbl.find gram.gtokens tok in - decr r; - if !r == 0 then - begin - Hashtbl.remove gram.gtokens tok; - gram.glexer.Plexing.tok_removing tok - end - | Slist0 s -> decr_keyw_use gram s - | Slist1 s -> decr_keyw_use gram s - | Slist0sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 - | Slist1sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 - | Sopt s -> decr_keyw_use gram s - | Stree t -> decr_keyw_use_in_tree gram t - | Sself | Snext | Snterm _ | Snterml (_, _) -> () -and decr_keyw_use_in_tree gram = - function - DeadEnd | LocAct (_, _) -> () - | Node n -> - decr_keyw_use gram n.node; - decr_keyw_use_in_tree gram n.son; - decr_keyw_use_in_tree gram n.brother - -let rec delete_rule_in_suffix entry symbols = - function - lev :: levs -> - begin match delete_rule_in_tree entry symbols lev.lsuffix with - Some (dsl, t) -> - begin match dsl with - Some dsl -> List.iter (decr_keyw_use entry.egram) dsl - | None -> () - end; - begin match t with - DeadEnd when lev.lprefix == DeadEnd -> levs - | _ -> - let lev = - {assoc = lev.assoc; lname = lev.lname; lsuffix = t; - lprefix = lev.lprefix} - in - lev :: levs - end - | None -> - let levs = delete_rule_in_suffix entry symbols levs in lev :: levs - end - | [] -> raise Not_found - -let rec delete_rule_in_prefix entry symbols = - function - lev :: levs -> - begin match delete_rule_in_tree entry symbols lev.lprefix with - Some (dsl, t) -> - begin match dsl with - Some dsl -> List.iter (decr_keyw_use entry.egram) dsl - | None -> () - end; - begin match t with - DeadEnd when lev.lsuffix == DeadEnd -> levs - | _ -> - let lev = - {assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix; - lprefix = t} - in - lev :: levs - end - | None -> - let levs = delete_rule_in_prefix entry symbols levs in lev :: levs - end - | [] -> raise Not_found - -let delete_rule_in_level_list entry symbols levs = - match symbols with - Sself :: symbols -> delete_rule_in_suffix entry symbols levs - | Snterm e :: symbols when e == entry -> - delete_rule_in_suffix entry symbols levs - | _ -> delete_rule_in_prefix entry symbols levs +type g_assoc = NonA | RightA | LeftA diff --git a/gramlib/gramext.mli b/gramlib/gramext.mli index f1e294fb4c..f9daf5bf10 100644 --- a/gramlib/gramext.mli +++ b/gramlib/gramext.mli @@ -2,49 +2,6 @@ (* gramext.mli,v *) (* Copyright (c) INRIA 2007-2017 *) -type 'a parser_t = 'a Stream.t -> Obj.t - -type 'te grammar = - { gtokens : (Plexing.pattern, int ref) Hashtbl.t; - glexer : 'te Plexing.lexer } - -type 'te g_entry = - { egram : 'te grammar; - ename : string; - elocal : bool; - mutable estart : int -> 'te parser_t; - mutable econtinue : int -> int -> Obj.t -> 'te parser_t; - mutable edesc : 'te g_desc } -and 'te g_desc = - Dlevels of 'te g_level list - | Dparser of 'te parser_t -and 'te g_level = - { assoc : g_assoc; - lname : string option; - lsuffix : 'te g_tree; - lprefix : 'te g_tree } -and g_assoc = NonA | RightA | LeftA -and 'te g_symbol = - | Snterm of 'te g_entry - | Snterml of 'te g_entry * string - | Slist0 of 'te g_symbol - | Slist0sep of 'te g_symbol * 'te g_symbol * bool - | Slist1 of 'te g_symbol - | Slist1sep of 'te g_symbol * 'te g_symbol * bool - | Sopt of 'te g_symbol - | Sself - | Snext - | Stoken of Plexing.pattern - | Stree of 'te g_tree -and g_action = Obj.t -and 'te g_tree = - Node of 'te g_node - | LocAct of g_action * g_action list - | DeadEnd -and 'te g_node = - { node : 'te g_symbol; son : 'te g_tree; brother : 'te g_tree } -and err_fun = unit -> string - type position = First | Last @@ -52,14 +9,4 @@ type position = | After of string | Level of string -val levels_of_rules : warning:(string -> unit) option -> - 'te g_entry -> position option -> - (string option * g_assoc option * ('te g_symbol list * g_action) list) - list -> - 'te g_level list - -val srules : warning:(string -> unit) option -> ('te g_symbol list * g_action) list -> 'te g_symbol -val eq_symbol : 'a g_symbol -> 'a g_symbol -> bool - -val delete_rule_in_level_list : - 'te g_entry -> 'te g_symbol list -> 'te g_level list -> 'te g_level list +type g_assoc = NonA | RightA | LeftA diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml index e959e9b9e6..e313f2e648 100644 --- a/gramlib/grammar.ml +++ b/gramlib/grammar.ml @@ -5,14 +5,644 @@ open Gramext open Format -external gramext_action : 'a -> g_action = "%identity" +type ('a, 'b) eq = Refl : ('a, 'a) eq -let rec flatten_tree = +(* Functorial interface *) + +module type GLexerType = sig type te val lexer : te Plexing.lexer end + +module type S = + sig + type te + type parsable + val parsable : char Stream.t -> parsable + val tokens : string -> (string * int) list + module Entry : + sig + type 'a e + val create : string -> 'a e + val parse : 'a e -> parsable -> 'a + val name : 'a e -> string + val of_parser : string -> (te Stream.t -> 'a) -> 'a e + val parse_token_stream : 'a e -> te Stream.t -> 'a + val print : Format.formatter -> 'a e -> unit + end + type ('self, 'a) ty_symbol + type ('self, 'f, 'r) ty_rule + type 'a ty_production + val s_nterm : 'a Entry.e -> ('self, 'a) ty_symbol + val s_nterml : 'a Entry.e -> string -> ('self, 'a) ty_symbol + val s_list0 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list0sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_list1 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list1sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_opt : ('self, 'a) ty_symbol -> ('self, 'a option) ty_symbol + val s_self : ('self, 'self) ty_symbol + val s_next : ('self, 'self) ty_symbol + val s_token : Plexing.pattern -> ('self, string) ty_symbol + val s_rules : warning:(string -> unit) option -> 'a ty_production list -> ('self, 'a) ty_symbol + val r_stop : ('self, 'r, 'r) ty_rule + val r_next : + ('self, 'a, 'r) ty_rule -> ('self, 'b) ty_symbol -> + ('self, 'b -> 'a, 'r) ty_rule + val production : ('a, 'f, Loc.t -> 'a) ty_rule * 'f -> 'a ty_production + module Unsafe : + sig + val clear_entry : 'a Entry.e -> unit + end + val safe_extend : warning:(string -> unit) option -> + 'a Entry.e -> Gramext.position option -> + (string option * Gramext.g_assoc option * 'a ty_production list) + list -> + unit + val safe_delete_rule : 'a Entry.e -> ('a, 'r, 'f) ty_rule -> unit + end + +(* Implementation *) + +module GMake (L : GLexerType) = +struct + +type te = L.te + +type 'a parser_t = L.te Stream.t -> 'a + +type grammar = + { gtokens : (Plexing.pattern, int ref) Hashtbl.t; + glexer : L.te Plexing.lexer } + +let egram = + {gtokens = Hashtbl.create 301; glexer = L.lexer } + +let tokens con = + let list = ref [] in + Hashtbl.iter + (fun (p_con, p_prm) c -> if p_con = con then list := (p_prm, !c) :: !list) + egram.gtokens; + !list + +type 'a ty_entry = { + ename : string; + mutable estart : int -> 'a parser_t; + mutable econtinue : int -> int -> 'a -> 'a parser_t; + mutable edesc : 'a ty_desc; +} + +and 'a ty_desc = +| Dlevels of 'a ty_level list +| Dparser of 'a parser_t + +and 'a ty_level = { + assoc : g_assoc; + lname : string option; + lsuffix : ('a, 'a -> Loc.t -> 'a) ty_tree; + lprefix : ('a, Loc.t -> 'a) ty_tree; +} + +and ('self, 'a) ty_symbol = +| Stoken : Plexing.pattern -> ('self, string) ty_symbol +| Slist1 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol +| Slist1sep : ('self, 'a) ty_symbol * ('self, _) ty_symbol * bool -> ('self, 'a list) ty_symbol +| Slist0 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol +| Slist0sep : ('self, 'a) ty_symbol * ('self, _) ty_symbol * bool -> ('self, 'a list) ty_symbol +| Sopt : ('self, 'a) ty_symbol -> ('self, 'a option) ty_symbol +| Sself : ('self, 'self) ty_symbol +| Snext : ('self, 'self) ty_symbol +| Snterm : 'a ty_entry -> ('self, 'a) ty_symbol +| Snterml : 'a ty_entry * string -> ('self, 'a) ty_symbol +| Stree : ('self, Loc.t -> 'a) ty_tree -> ('self, 'a) ty_symbol + +and ('self, _, 'r) ty_rule = +| TStop : ('self, 'r, 'r) ty_rule +| TNext : ('self, 'a, 'r) ty_rule * ('self, 'b) ty_symbol -> ('self, 'b -> 'a, 'r) ty_rule + +and ('self, 'a) ty_tree = +| Node : ('self, 'b, 'a) ty_node -> ('self, 'a) ty_tree +| LocAct : 'k * 'k list -> ('self, 'k) ty_tree +| DeadEnd : ('self, 'k) ty_tree + +and ('self, 'a, 'r) ty_node = { + node : ('self, 'a) ty_symbol; + son : ('self, 'a -> 'r) ty_tree; + brother : ('self, 'r) ty_tree; +} + +type 'a ty_production = +| TProd : ('a, 'act, Loc.t -> 'a) ty_rule * 'act -> 'a ty_production + +let rec derive_eps : type s a. (s, a) ty_symbol -> bool = + function + Slist0 _ -> true + | Slist0sep (_, _, _) -> true + | Sopt _ -> true + | Stree t -> tree_derive_eps t + | Slist1 _ -> false + | Slist1sep (_, _, _) -> false + | Snterm _ | Snterml (_, _) -> false + | Snext -> false + | Sself -> false + | Stoken _ -> false +and tree_derive_eps : type s a. (s, a) ty_tree -> bool = + function + LocAct (_, _) -> true + | Node {node = s; brother = bro; son = son} -> + derive_eps s && tree_derive_eps son || tree_derive_eps bro + | DeadEnd -> false + +(** FIXME: find a way to do that type-safely *) +let eq_entry : type a1 a2. a1 ty_entry -> a2 ty_entry -> (a1, a2) eq option = fun e1 e2 -> + if (Obj.magic e1) == (Obj.magic e2) then Some (Obj.magic Refl) + else None + +let rec eq_symbol : type s a1 a2. (s, a1) ty_symbol -> (s, a2) ty_symbol -> (a1, a2) eq option = fun s1 s2 -> + match s1, s2 with + Snterm e1, Snterm e2 -> eq_entry e1 e2 + | Snterml (e1, l1), Snterml (e2, l2) -> + if String.equal l1 l2 then eq_entry e1 e2 else None + | Slist0 s1, Slist0 s2 -> + begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end + | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> + if b1 = b2 then match eq_symbol s1 s2 with + | None -> None + | Some Refl -> + match eq_symbol sep1 sep2 with + | None -> None + | Some Refl -> Some Refl + else None + | Slist1 s1, Slist1 s2 -> + begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end + | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> + if b1 = b2 then match eq_symbol s1 s2 with + | None -> None + | Some Refl -> + match eq_symbol sep1 sep2 with + | None -> None + | Some Refl -> Some Refl + else None + | Sopt s1, Sopt s2 -> + begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end + | Stree _, Stree _ -> None + | Sself, Sself -> Some Refl + | Snext, Snext -> Some Refl + | Stoken p1, Stoken p2 -> if p1 = p2 then Some Refl else None + | _ -> None + +let is_before : type s1 s2 a1 a2. (s1, a1) ty_symbol -> (s2, a2) ty_symbol -> bool = fun s1 s2 -> + match s1, s2 with + Stoken ("ANY", _), _ -> false + | _, Stoken ("ANY", _) -> true + | Stoken (_, s), Stoken (_, "") when s <> "" -> true + | Stoken _, Stoken _ -> false + | Stoken _, _ -> true + | _ -> false + +(** Ancilliary datatypes *) + +type ('self, _) ty_symbols = +| TNil : ('self, unit) ty_symbols +| TCns : ('self, 'a) ty_symbol * ('self, 'b) ty_symbols -> ('self, 'a * 'b) ty_symbols + +(** ('i, 'p, 'f, 'r) rel_prod0 ~ + ∃ α₁ ... αₙ. + p ≡ αₙ * ... α₁ * 'i ∧ + f ≡ α₁ -> ... -> αₙ -> 'r +*) +type ('i, _, 'f, _) rel_prod0 = +| Rel0 : ('i, 'i, 'f, 'f) rel_prod0 +| RelS : ('i, 'p, 'f, 'a -> 'r) rel_prod0 -> ('i, 'a * 'p, 'f, 'r) rel_prod0 + +type ('p, 'k, 'r) rel_prod = (unit, 'p, 'k, 'r) rel_prod0 + +type ('s, 'i, 'k, 'r) any_symbols = +| AnyS : ('s, 'p) ty_symbols * ('i, 'p, 'k, 'r) rel_prod0 -> ('s, 'i, 'k, 'r) any_symbols + +(** FIXME *) +let rec symbols : type s p k r. (s, p) ty_symbols -> (s, k, r) ty_rule -> (s, unit, k, r) any_symbols = + fun accu r -> match r with + | TStop -> AnyS (Obj.magic accu, Rel0) + | TNext (r, s) -> + let AnyS (r, pf) = symbols (TCns (s, accu)) r in + AnyS (Obj.magic r, RelS (Obj.magic pf)) + +let get_symbols : type s k r. (s, k, r) ty_rule -> (s, unit, k, r) any_symbols = + fun r -> symbols TNil r + +let insert_tree (type s p k a) ~warning entry_name (gsymbols : (s, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, a) ty_tree) = + let rec insert : type p f k. (s, p) ty_symbols -> (p, k, f) rel_prod -> (s, f) ty_tree -> k -> (s, f) ty_tree = + fun symbols pf tree action -> + match symbols, pf with + TCns (s, sl), RelS pf -> insert_in_tree s sl pf tree action + | TNil, Rel0 -> + match tree with + Node {node = s; son = son; brother = bro} -> + Node {node = s; son = son; brother = insert TNil Rel0 bro action} + | LocAct (old_action, action_list) -> + begin match warning with + | None -> () + | Some warn_fn -> + let msg = + "<W> Grammar extension: " ^ + (if entry_name <> "" then "" else "in ["^entry_name^"%s], ") ^ + "some rule has been masked" in + warn_fn msg + end; + LocAct (action, old_action :: action_list) + | DeadEnd -> LocAct (action, []) + and insert_in_tree : type a p f k. (s, a) ty_symbol -> (s, p) ty_symbols -> (p, k, a -> f) rel_prod -> (s, f) ty_tree -> k -> (s, f) ty_tree = + fun s sl pf tree action -> + match try_insert s sl pf tree action with + Some t -> t + | None -> Node {node = s; son = insert sl pf DeadEnd action; brother = tree} + and try_insert : type a p f k. (s, a) ty_symbol -> (s, p) ty_symbols -> (p, k, a -> f) rel_prod -> (s, f) ty_tree -> k -> (s, f) ty_tree option = + fun s sl pf tree action -> + match tree with + Node {node = s1; son = son; brother = bro} -> + begin match eq_symbol s s1 with + | Some Refl -> + let t = Node {node = s1; son = insert sl pf son action; brother = bro} in + Some t + | None -> + if is_before s1 s || derive_eps s && not (derive_eps s1) then + let bro = + match try_insert s sl pf bro action with + Some bro -> bro + | None -> Node {node = s; son = insert sl pf DeadEnd action; brother = bro} + in + let t = Node {node = s1; son = son; brother = bro} in Some t + else + begin match try_insert s sl pf bro action with + Some bro -> + let t = Node {node = s1; son = son; brother = bro} in Some t + | None -> None + end + end + | LocAct (_, _) | DeadEnd -> None + in + insert gsymbols pf tree action + +let srules (type self a) ~warning (rl : a ty_production list) = + let t = + List.fold_left + (fun tree (TProd (symbols, action)) -> + let AnyS (symbols, pf) = get_symbols symbols in + insert_tree ~warning "" symbols pf action tree) + DeadEnd rl + in + (* FIXME: use an universal self type to ensure well-typedness *) + (Obj.magic (Stree t) : (self, a) ty_symbol) + +let is_level_labelled n lev = + match lev.lname with + Some n1 -> n = n1 + | None -> false + +let insert_level (type s p k) ~warning entry_name (symbols : (s, p) ty_symbols) (pf : (p, k, Loc.t -> s) rel_prod) (action : k) (slev : s ty_level) : s ty_level = + match symbols with + | TCns (Sself, symbols) -> + let RelS pf = pf in + {assoc = slev.assoc; lname = slev.lname; + lsuffix = insert_tree ~warning entry_name symbols pf action slev.lsuffix; + lprefix = slev.lprefix} + | _ -> + {assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix; + lprefix = insert_tree ~warning entry_name symbols pf action slev.lprefix} + +let empty_lev lname assoc = + let assoc = + match assoc with + Some a -> a + | None -> LeftA + in + {assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd} + +let change_lev ~warning lev n lname assoc = + let a = + match assoc with + None -> lev.assoc + | Some a -> + if a <> lev.assoc then + begin + match warning with + | None -> () + | Some warn_fn -> + warn_fn ("<W> Changing associativity of level \""^n^"\"") + end; + a + in + begin match lname with + Some n -> + if lname <> lev.lname then + begin match warning with + | None -> () + | Some warn_fn -> + warn_fn ("<W> Level label \""^n^"\" ignored") + end; + | None -> () + end; + {assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix} + +let get_level ~warning entry position levs = + match position with + Some First -> [], empty_lev, levs + | Some Last -> levs, empty_lev, [] + | Some (Level n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [], change_lev ~warning lev n, levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | Some (Before n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [], empty_lev, lev :: levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | Some (After n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [lev], empty_lev, levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | None -> + match levs with + lev :: levs -> [], change_lev ~warning lev "<top>", levs + | [] -> [], empty_lev, [] + +let change_to_self0 (type s) (type a) (entry : s ty_entry) : (s, a) ty_symbol -> (s, a) ty_symbol = + function + | Snterm e -> + begin match eq_entry e entry with + | None -> Snterm e + | Some Refl -> Sself + end + | x -> x + +let rec change_to_self : type s a r. s ty_entry -> (s, a, r) ty_rule -> (s, a, r) ty_rule = fun e r -> match r with +| TStop -> TStop +| TNext (r, t) -> TNext (change_to_self e r, change_to_self0 e t) + +let insert_tokens gram symbols = + let rec insert : type s a. (s, a) ty_symbol -> unit = + function + | Slist0 s -> insert s + | Slist1 s -> insert s + | Slist0sep (s, t, _) -> insert s; insert t + | Slist1sep (s, t, _) -> insert s; insert t + | Sopt s -> insert s + | Stree t -> tinsert t + | Stoken ("ANY", _) -> () + | Stoken tok -> + gram.glexer.Plexing.tok_using tok; + let r = + try Hashtbl.find gram.gtokens tok with + Not_found -> let r = ref 0 in Hashtbl.add gram.gtokens tok r; r + in + incr r + | Snterm _ | Snterml (_, _) -> () + | Snext -> () + | Sself -> () + and tinsert : type s a. (s, a) ty_tree -> unit = + function + Node {node = s; brother = bro; son = son} -> + insert s; tinsert bro; tinsert son + | LocAct (_, _) | DeadEnd -> () + and linsert : type s p. (s, p) ty_symbols -> unit = function + | TNil -> () + | TCns (s, r) -> insert s; linsert r + in + linsert symbols + +let levels_of_rules ~warning entry position rules = + let elev = + match entry.edesc with + Dlevels elev -> elev + | Dparser _ -> + eprintf "Error: entry not extensible: \"%s\"\n" entry.ename; + flush stderr; + failwith "Grammar.extend" + in + match rules with + | [] -> elev + | _ -> + let (levs1, make_lev, levs2) = get_level ~warning entry position elev in + let (levs, _) = + List.fold_left + (fun (levs, make_lev) (lname, assoc, level) -> + let lev = make_lev lname assoc in + let lev = + List.fold_left + (fun lev (TProd (symbols, action)) -> + let symbols = change_to_self entry symbols in + let AnyS (symbols, pf) = get_symbols symbols in + insert_tokens egram symbols; + insert_level ~warning entry.ename symbols pf action lev) + lev level + in + lev :: levs, empty_lev) + ([], make_lev) rules + in + levs1 @ List.rev levs @ levs2 + +let logically_eq_symbols entry = + let rec eq_symbols : type s1 s2 a1 a2. (s1, a1) ty_symbol -> (s2, a2) ty_symbol -> bool = fun s1 s2 -> + match s1, s2 with + Snterm e1, Snterm e2 -> e1.ename = e2.ename + | Snterm e1, Sself -> e1.ename = entry.ename + | Sself, Snterm e2 -> entry.ename = e2.ename + | Snterml (e1, l1), Snterml (e2, l2) -> e1.ename = e2.ename && l1 = l2 + | Slist0 s1, Slist0 s2 -> eq_symbols s1 s2 + | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | Slist1 s1, Slist1 s2 -> eq_symbols s1 s2 + | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | Sopt s1, Sopt s2 -> eq_symbols s1 s2 + | Stree t1, Stree t2 -> eq_trees t1 t2 + | Stoken p1, Stoken p2 -> p1 = p2 + | Sself, Sself -> true + | Snext, Snext -> true + | _ -> false + and eq_trees : type s1 s2 a1 a2. (s1, a1) ty_tree -> (s2, a2) ty_tree -> bool = fun t1 t2 -> + match t1, t2 with + Node n1, Node n2 -> + eq_symbols n1.node n2.node && eq_trees n1.son n2.son && + eq_trees n1.brother n2.brother + | (LocAct (_, _) | DeadEnd), (LocAct (_, _) | DeadEnd) -> true + | _ -> false + in + eq_symbols + +(* [delete_rule_in_tree] returns + [Some (dsl, t)] if success + [dsl] = + Some (list of deleted nodes) if branch deleted + None if action replaced by previous version of action + [t] = remaining tree + [None] if failure *) + +type 's ex_symbols = +| ExS : ('s, 'p) ty_symbols -> 's ex_symbols + +let delete_rule_in_tree entry = + let rec delete_in_tree : + type s p r. (s, p) ty_symbols -> (s, r) ty_tree -> (s ex_symbols option * (s, r) ty_tree) option = + fun symbols tree -> + match symbols, tree with + | TCns (s, sl), Node n -> + if logically_eq_symbols entry s n.node then delete_son sl n + else + begin match delete_in_tree symbols n.brother with + Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None + end + | TCns (s, sl), _ -> None + | TNil, Node n -> + begin match delete_in_tree TNil n.brother with + Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None + end + | TNil, DeadEnd -> None + | TNil, LocAct (_, []) -> Some (Some (ExS TNil), DeadEnd) + | TNil, LocAct (_, action :: list) -> Some (None, LocAct (action, list)) + and delete_son : + type s p a r. (s, p) ty_symbols -> (s, a, r) ty_node -> (s ex_symbols option * (s, r) ty_tree) option = + fun sl n -> + match delete_in_tree sl n.son with + Some (Some (ExS dsl), DeadEnd) -> Some (Some (ExS (TCns (n.node, dsl))), n.brother) + | Some (Some (ExS dsl), t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (Some (ExS (TCns (n.node, dsl))), t) + | Some (None, t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (None, t) + | None -> None + in + delete_in_tree + +let rec decr_keyw_use : type s a. _ -> (s, a) ty_symbol -> unit = fun gram -> + function + Stoken tok -> + let r = Hashtbl.find gram.gtokens tok in + decr r; + if !r == 0 then + begin + Hashtbl.remove gram.gtokens tok; + gram.glexer.Plexing.tok_removing tok + end + | Slist0 s -> decr_keyw_use gram s + | Slist1 s -> decr_keyw_use gram s + | Slist0sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 + | Slist1sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 + | Sopt s -> decr_keyw_use gram s + | Stree t -> decr_keyw_use_in_tree gram t + | Sself -> () + | Snext -> () + | Snterm _ | Snterml (_, _) -> () +and decr_keyw_use_in_tree : type s a. _ -> (s, a) ty_tree -> unit = fun gram -> + function + DeadEnd | LocAct (_, _) -> () + | Node n -> + decr_keyw_use gram n.node; + decr_keyw_use_in_tree gram n.son; + decr_keyw_use_in_tree gram n.brother +and decr_keyw_use_in_list : type s p. _ -> (s, p) ty_symbols -> unit = fun gram -> + function + | TNil -> () + | TCns (s, l) -> decr_keyw_use gram s; decr_keyw_use_in_list gram l + +let rec delete_rule_in_suffix entry symbols = + function + lev :: levs -> + begin match delete_rule_in_tree entry symbols lev.lsuffix with + Some (dsl, t) -> + begin match dsl with + Some (ExS dsl) -> decr_keyw_use_in_list egram dsl + | None -> () + end; + begin match t with + DeadEnd when lev.lprefix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = t; + lprefix = lev.lprefix} + in + lev :: levs + end + | None -> + let levs = delete_rule_in_suffix entry symbols levs in lev :: levs + end + | [] -> raise Not_found + +let rec delete_rule_in_prefix entry symbols = + function + lev :: levs -> + begin match delete_rule_in_tree entry symbols lev.lprefix with + Some (dsl, t) -> + begin match dsl with + Some (ExS dsl) -> decr_keyw_use_in_list egram dsl + | None -> () + end; + begin match t with + DeadEnd when lev.lsuffix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix; + lprefix = t} + in + lev :: levs + end + | None -> + let levs = delete_rule_in_prefix entry symbols levs in lev :: levs + end + | [] -> raise Not_found + +let delete_rule_in_level_list (type s p) (entry : s ty_entry) (symbols : (s, p) ty_symbols) levs = + match symbols with + TCns (Sself, symbols) -> delete_rule_in_suffix entry symbols levs + | TCns (Snterm e, symbols') -> + begin match eq_entry e entry with + | None -> delete_rule_in_prefix entry symbols levs + | Some Refl -> + delete_rule_in_suffix entry symbols' levs + end + | _ -> delete_rule_in_prefix entry symbols levs + +let rec flatten_tree : type s a. (s, a) ty_tree -> s ex_symbols list = function DeadEnd -> [] - | LocAct (_, _) -> [[]] + | LocAct (_, _) -> [ExS TNil] | Node {node = n; brother = b; son = s} -> - List.map (fun l -> n :: l) (flatten_tree s) @ flatten_tree b + List.map (fun (ExS l) -> ExS (TCns (n, l))) (flatten_tree s) @ flatten_tree b let utf8_print = ref true @@ -41,7 +671,8 @@ let string_escaped s = let print_str ppf s = fprintf ppf "\"%s\"" (string_escaped s) -let rec print_symbol ppf = +let rec print_symbol : type s r. formatter -> (s, r) ty_symbol -> unit = + fun ppf -> function | Slist0 s -> fprintf ppf "LIST0 %a" print_symbol1 s | Slist0sep (s, t, osep) -> @@ -55,36 +686,38 @@ let rec print_symbol ppf = | Stoken (con, prm) when con <> "" && prm <> "" -> fprintf ppf "%s@ %a" con print_str prm | Snterml (e, l) -> - fprintf ppf "%s%s@ LEVEL@ %a" e.ename (if e.elocal then "*" else "") + fprintf ppf "%s%s@ LEVEL@ %a" e.ename "" print_str l - | Snterm _ | Snext | Sself | Stoken _ | Stree _ as s -> - print_symbol1 ppf s -and print_symbol1 ppf = + | s -> print_symbol1 ppf s +and print_symbol1 : type s r. formatter -> (s, r) ty_symbol -> unit = + fun ppf -> function - | Snterm e -> fprintf ppf "%s%s" e.ename (if e.elocal then "*" else "") + | Snterm e -> fprintf ppf "%s%s" e.ename "" | Sself -> pp_print_string ppf "SELF" | Snext -> pp_print_string ppf "NEXT" | Stoken ("", s) -> print_str ppf s | Stoken (con, "") -> pp_print_string ppf con | Stree t -> print_level ppf pp_print_space (flatten_tree t) - | Snterml (_, _) | Slist0 _ | Slist0sep (_, _, _) | - Slist1 _ | Slist1sep (_, _, _) | Sopt _ | Stoken _ as s -> + | s -> fprintf ppf "(%a)" print_symbol s -and print_rule ppf symbols = +and print_rule : type s p. formatter -> (s, p) ty_symbols -> unit = + fun ppf symbols -> fprintf ppf "@[<hov 0>"; - let _ = - List.fold_left - (fun sep symbol -> - fprintf ppf "%t%a" sep print_symbol symbol; - fun ppf -> fprintf ppf ";@ ") - (fun ppf -> ()) symbols + let rec fold : type s p. _ -> (s, p) ty_symbols -> unit = + fun sep symbols -> match symbols with + | TNil -> () + | TCns (symbol, symbols) -> + fprintf ppf "%t%a" sep print_symbol symbol; + fold (fun ppf -> fprintf ppf ";@ ") symbols in + let () = fold (fun ppf -> ()) symbols in fprintf ppf "@]" -and print_level ppf pp_print_space rules = +and print_level : type s. _ -> _ -> s ex_symbols list -> _ = + fun ppf pp_print_space rules -> fprintf ppf "@[<hov 0>[ "; let _ = List.fold_left - (fun sep rule -> + (fun sep (ExS rule) -> fprintf ppf "%t%a" sep print_rule rule; fun ppf -> fprintf ppf "%a| " pp_print_space ()) (fun ppf -> ()) rules @@ -96,7 +729,7 @@ let print_levels ppf elev = List.fold_left (fun sep lev -> let rules = - List.map (fun t -> Sself :: t) (flatten_tree lev.lsuffix) @ + List.map (fun (ExS t) -> ExS (TCns (Sself, t))) (flatten_tree lev.lsuffix) @ flatten_tree lev.lprefix in fprintf ppf "%t@[<hov 2>" sep; @@ -132,21 +765,32 @@ let loc_of_token_interval bp ep = else let loc1 = !floc bp in let loc2 = !floc (pred ep) in Loc.merge loc1 loc2 -let name_of_symbol entry = +let name_of_symbol : type s a. s ty_entry -> (s, a) ty_symbol -> string = + fun entry -> function Snterm e -> "[" ^ e.ename ^ "]" | Snterml (e, l) -> "[" ^ e.ename ^ " level " ^ l ^ "]" - | Sself | Snext -> "[" ^ entry.ename ^ "]" - | Stoken tok -> entry.egram.glexer.Plexing.tok_text tok + | Sself -> "[" ^ entry.ename ^ "]" + | Snext -> "[" ^ entry.ename ^ "]" + | Stoken tok -> egram.glexer.Plexing.tok_text tok | _ -> "???" -let rec get_token_list entry rev_tokl last_tok tree = +type ('r, 'f) tok_list = +| TokNil : ('f, 'f) tok_list +| TokCns : ('r, 'f) tok_list -> (string -> 'r, 'f) tok_list + +type ('s, 'f) tok_tree = TokTree : ('s, string -> 'r) ty_tree * ('r, 'f) tok_list -> ('s, 'f) tok_tree + +let rec get_token_list : type s r f. + s ty_entry -> _ -> _ -> _ -> (r, f) tok_list -> (s, string -> r) ty_tree -> (_ * _ * _ * (s, f) tok_tree) option = + fun entry first_tok rev_tokl last_tok pf tree -> match tree with Node {node = Stoken tok; son = son; brother = DeadEnd} -> - get_token_list entry (last_tok :: rev_tokl) tok son - | _ -> if rev_tokl = [] then None else Some (rev_tokl, last_tok, tree) + get_token_list entry first_tok (last_tok :: rev_tokl) tok (TokCns pf) son + | _ -> if rev_tokl = [] then None else Some (first_tok, rev_tokl, last_tok, TokTree (tree, pf)) -let rec name_of_symbol_failed entry = +let rec name_of_symbol_failed : type s a. s ty_entry -> (s, a) ty_symbol -> _ = + fun entry -> function | Slist0 s -> name_of_symbol_failed entry s | Slist0sep (s, _, _) -> name_of_symbol_failed entry s @@ -155,12 +799,13 @@ let rec name_of_symbol_failed entry = | Sopt s -> name_of_symbol_failed entry s | Stree t -> name_of_tree_failed entry t | s -> name_of_symbol entry s -and name_of_tree_failed entry = +and name_of_tree_failed : type s a. s ty_entry -> (s, a) ty_tree -> _ = + fun entry -> function Node {node = s; brother = bro; son = son} -> let tokl = match s with - Stoken tok -> get_token_list entry [] tok son + Stoken tok -> get_token_list entry tok [] tok TokNil son | _ -> None in begin match tokl with @@ -177,16 +822,16 @@ and name_of_tree_failed entry = | Node _ -> txt ^ " or " ^ name_of_tree_failed entry bro in txt - | Some (rev_tokl, last_tok, son) -> + | Some (_, rev_tokl, last_tok, _) -> List.fold_left (fun s tok -> (if s = "" then "" else s ^ " ") ^ - entry.egram.glexer.Plexing.tok_text tok) + egram.glexer.Plexing.tok_text tok) "" (List.rev (last_tok :: rev_tokl)) end | DeadEnd | LocAct (_, _) -> "???" -let tree_failed entry prev_symb_result prev_symb tree = +let tree_failed (type s a) (entry : s ty_entry) (prev_symb_result : a) (prev_symb : (s, a) ty_symbol) tree = let txt = name_of_tree_failed entry tree in let txt = match prev_symb with @@ -197,7 +842,7 @@ let tree_failed entry prev_symb_result prev_symb tree = let txt1 = name_of_symbol_failed entry s in txt1 ^ " or " ^ txt ^ " expected" | Slist0sep (s, sep, _) -> - begin match Obj.magic prev_symb_result with + begin match prev_symb_result with [] -> let txt1 = name_of_symbol_failed entry s in txt1 ^ " or " ^ txt ^ " expected" @@ -206,7 +851,7 @@ let tree_failed entry prev_symb_result prev_symb tree = txt1 ^ " or " ^ txt ^ " expected" end | Slist1sep (s, sep, _) -> - begin match Obj.magic prev_symb_result with + begin match prev_symb_result with [] -> let txt1 = name_of_symbol_failed entry s in txt1 ^ " or " ^ txt ^ " expected" @@ -214,7 +859,8 @@ let tree_failed entry prev_symb_result prev_symb tree = let txt1 = name_of_symbol_failed entry sep in txt1 ^ " or " ^ txt ^ " expected" end - | Sopt _ | Stree _ -> txt ^ " expected" + | Sopt _ -> txt ^ " expected" + | Stree _ -> txt ^ " expected" | _ -> txt ^ " expected after " ^ name_of_symbol_failed entry prev_symb in txt ^ " (in [" ^ entry.ename ^ "])" @@ -223,8 +869,6 @@ let symb_failed entry prev_symb_result prev_symb symb = let tree = Node {node = symb; brother = DeadEnd; son = DeadEnd} in tree_failed entry prev_symb_result prev_symb tree -external app : Obj.t -> 'a = "%identity" - let is_level_labelled n lev = match lev.lname with Some n1 -> n = n1 @@ -241,28 +885,33 @@ let level_number entry lab = Dlevels elev -> lookup 0 elev | Dparser _ -> raise Not_found -let rec top_symb entry = +let rec top_symb : type s a. s ty_entry -> (s, a) ty_symbol -> (s, a) ty_symbol = + fun entry -> function - Sself | Snext -> Snterm entry + Sself -> Snterm entry + | Snext -> Snterm entry | Snterml (e, _) -> Snterm e | Slist1sep (s, sep, b) -> Slist1sep (top_symb entry s, sep, b) | _ -> raise Stream.Failure -let entry_of_symb entry = +let entry_of_symb : type s a. s ty_entry -> (s, a) ty_symbol -> a ty_entry = + fun entry -> function - Sself | Snext -> entry + Sself -> entry + | Snext -> entry | Snterm e -> e | Snterml (e, _) -> e | _ -> raise Stream.Failure -let top_tree entry = +let top_tree : type s a. s ty_entry -> (s, a) ty_tree -> (s, a) ty_tree = + fun entry -> function Node {node = s; brother = bro; son = son} -> Node {node = top_symb entry s; brother = bro; son = son} | LocAct (_, _) | DeadEnd -> raise Stream.Failure let skip_if_empty bp p strm = - if Stream.count strm == bp then gramext_action (fun a -> p strm) + if Stream.count strm == bp then fun a -> p strm else raise Stream.Failure let continue entry bp a s son p1 (strm__ : _ Stream.t) = @@ -271,7 +920,7 @@ let continue entry bp a s son p1 (strm__ : _ Stream.t) = try p1 strm__ with Stream.Failure -> raise (Stream.Error (tree_failed entry a s son)) in - gramext_action (fun _ -> app act a) + fun _ -> act a let do_recover parser_of_tree entry nlevn alevn bp a s son (strm__ : _ Stream.t) = @@ -309,27 +958,28 @@ let call_and_push ps al strm = let token_ematch gram tok = let tematch = gram.glexer.Plexing.tok_match tok in - fun tok -> Obj.repr (tematch tok : string) + fun tok -> tematch tok -let rec parser_of_tree entry nlevn alevn = +let rec parser_of_tree : type s r. s ty_entry -> int -> int -> (s, r) ty_tree -> r parser_t = + fun entry nlevn alevn -> function DeadEnd -> (fun (strm__ : _ Stream.t) -> raise Stream.Failure) | LocAct (act, _) -> (fun (strm__ : _ Stream.t) -> act) | Node {node = Sself; son = LocAct (act, _); brother = DeadEnd} -> (fun (strm__ : _ Stream.t) -> - let a = entry.estart alevn strm__ in app act a) + let a = entry.estart alevn strm__ in act a) | Node {node = Sself; son = LocAct (act, _); brother = bro} -> let p2 = parser_of_tree entry nlevn alevn bro in (fun (strm__ : _ Stream.t) -> match try Some (entry.estart alevn strm__) with Stream.Failure -> None with - Some a -> app act a + Some a -> act a | _ -> p2 strm__) | Node {node = s; son = son; brother = DeadEnd} -> let tokl = match s with - Stoken tok -> get_token_list entry [] tok son + Stoken tok -> get_token_list entry tok [] tok TokNil son | _ -> None in begin match tokl with @@ -345,19 +995,20 @@ let rec parser_of_tree entry nlevn alevn = Stream.Failure -> raise (Stream.Error (tree_failed entry a s son)) in - app act a) - | Some (rev_tokl, last_tok, son) -> + act a) + | Some (first_tok, rev_tokl, last_tok, TokTree (son, pf)) -> + let s = Stoken first_tok in let lt = Stoken last_tok in let p1 = parser_of_tree entry nlevn alevn son in let p1 = parser_cont p1 entry nlevn alevn lt son in - parser_of_token_list entry s son p1 + parser_of_token_list entry s son pf p1 (fun (strm__ : _ Stream.t) -> raise Stream.Failure) rev_tokl last_tok end | Node {node = s; son = son; brother = bro} -> let tokl = match s with - Stoken tok -> get_token_list entry [] tok son + Stoken tok -> get_token_list entry tok [] tok TokNil son | _ -> None in match tokl with @@ -373,71 +1024,81 @@ let rec parser_of_tree entry nlevn alevn = begin match (try Some (p1 bp a strm) with Stream.Failure -> None) with - Some act -> app act a + Some act -> act a | None -> raise (Stream.Error (tree_failed entry a s son)) end | None -> p2 strm) - | Some (rev_tokl, last_tok, son) -> + | Some (first_tok, rev_tokl, last_tok, TokTree (son, pf)) -> let lt = Stoken last_tok in let p2 = parser_of_tree entry nlevn alevn bro in let p1 = parser_of_tree entry nlevn alevn son in let p1 = parser_cont p1 entry nlevn alevn lt son in let p1 = - parser_of_token_list entry lt son p1 p2 rev_tokl last_tok + parser_of_token_list entry lt son pf p1 p2 rev_tokl last_tok in fun (strm__ : _ Stream.t) -> try p1 strm__ with Stream.Failure -> p2 strm__ -and parser_cont p1 entry nlevn alevn s son bp a (strm__ : _ Stream.t) = +and parser_cont : type s a r. + (a -> r) parser_t -> s ty_entry -> int -> int -> (s, a) ty_symbol -> (s, a -> r) ty_tree -> int -> a -> (a -> r) parser_t = + fun p1 entry nlevn alevn s son bp a (strm__ : _ Stream.t) -> try p1 strm__ with Stream.Failure -> recover parser_of_tree entry nlevn alevn bp a s son strm__ -and parser_of_token_list entry s son p1 p2 rev_tokl last_tok = - let plast = +and parser_of_token_list : type s r f. + s ty_entry -> (s, string) ty_symbol -> (s, string -> r) ty_tree -> + (r, f) tok_list -> (int -> string -> (string -> r) parser_t) -> f parser_t -> _ -> _ -> f parser_t = + fun entry s son pf p1 p2 rev_tokl last_tok -> + let plast : r parser_t = let n = List.length rev_tokl + 1 in - let tematch = token_ematch entry.egram last_tok in + let tematch = token_ematch egram last_tok in let ps strm = match peek_nth n strm with Some tok -> let r = tematch tok in - for _i = 1 to n do Stream.junk strm done; Obj.repr r + for _i = 1 to n do Stream.junk strm done; r | None -> raise Stream.Failure in fun (strm : _ Stream.t) -> let bp = Stream.count strm in let a = ps strm in match try Some (p1 bp a strm) with Stream.Failure -> None with - Some act -> app act a + Some act -> act a | None -> raise (Stream.Error (tree_failed entry a s son)) in - match List.rev rev_tokl with - [] -> (fun (strm__ : _ Stream.t) -> plast strm__) - | tok :: tokl -> - let tematch = token_ematch entry.egram tok in + match List.rev rev_tokl, pf with + [], TokNil -> (fun (strm__ : _ Stream.t) -> plast strm__) + | tok :: tokl, TokCns pf -> + let tematch = token_ematch egram tok in let ps strm = match peek_nth 1 strm with Some tok -> tematch tok | None -> raise Stream.Failure in let p1 = - let rec loop n = - function - [] -> plast - | tok :: tokl -> - let tematch = token_ematch entry.egram tok in + let rec loop : type s f. _ -> _ -> (s, f) tok_list -> (string -> s) parser_t -> (string -> f) parser_t = + fun n tokl pf plast -> + match tokl, pf with + [], TokNil -> plast + | tok :: tokl, TokCns pf -> + let tematch = token_ematch egram tok in let ps strm = match peek_nth n strm with Some tok -> tematch tok | None -> raise Stream.Failure in - let p1 = loop (n + 1) tokl in + let p1 = loop (n + 1) tokl pf (Obj.magic plast) in (* FIXME *) fun (strm__ : _ Stream.t) -> - let a = ps strm__ in let act = p1 strm__ in app act a + let a = ps strm__ in let act = p1 strm__ in (Obj.magic act a) (* FIXME *) + | _ -> assert false in - loop 2 tokl + loop 2 tokl pf plast in fun (strm__ : _ Stream.t) -> - let a = ps strm__ in let act = p1 strm__ in app act a -and parser_of_symbol entry nlevn = + let a = ps strm__ in let act = p1 strm__ in act a + | _ -> assert false +and parser_of_symbol : type s a. + s ty_entry -> int -> (s, a) ty_symbol -> a parser_t = + fun entry nlevn -> function | Slist0 s -> let ps = call_and_push (parser_of_symbol entry nlevn s) in @@ -447,7 +1108,7 @@ and parser_of_symbol entry nlevn = | _ -> al in (fun (strm__ : _ Stream.t) -> - let a = loop [] strm__ in Obj.repr (List.rev a)) + let a = loop [] strm__ in List.rev a) | Slist0sep (symb, sep, false) -> let ps = call_and_push (parser_of_symbol entry nlevn symb) in let pt = parser_of_symbol entry nlevn sep in @@ -464,8 +1125,8 @@ and parser_of_symbol entry nlevn = in (fun (strm__ : _ Stream.t) -> match try Some (ps [] strm__) with Stream.Failure -> None with - Some al -> let a = kont al strm__ in Obj.repr (List.rev a) - | _ -> Obj.repr []) + Some al -> let a = kont al strm__ in List.rev a + | _ -> []) | Slist0sep (symb, sep, true) -> let ps = call_and_push (parser_of_symbol entry nlevn symb) in let pt = parser_of_symbol entry nlevn sep in @@ -482,8 +1143,8 @@ and parser_of_symbol entry nlevn = in (fun (strm__ : _ Stream.t) -> match try Some (ps [] strm__) with Stream.Failure -> None with - Some al -> let a = kont al strm__ in Obj.repr (List.rev a) - | _ -> Obj.repr []) + Some al -> let a = kont al strm__ in List.rev a + | _ -> []) | Slist1 s -> let ps = call_and_push (parser_of_symbol entry nlevn s) in let rec loop al (strm__ : _ Stream.t) = @@ -493,7 +1154,7 @@ and parser_of_symbol entry nlevn = in (fun (strm__ : _ Stream.t) -> let al = ps [] strm__ in - let a = loop al strm__ in Obj.repr (List.rev a)) + let a = loop al strm__ in List.rev a) | Slist1sep (symb, sep, false) -> let ps = call_and_push (parser_of_symbol entry nlevn symb) in let pt = parser_of_symbol entry nlevn sep in @@ -515,7 +1176,7 @@ and parser_of_symbol entry nlevn = in (fun (strm__ : _ Stream.t) -> let al = ps [] strm__ in - let a = kont al strm__ in Obj.repr (List.rev a)) + let a = kont al strm__ in List.rev a) | Slist1sep (symb, sep, true) -> let ps = call_and_push (parser_of_symbol entry nlevn symb) in let pt = parser_of_symbol entry nlevn sep in @@ -538,33 +1199,37 @@ and parser_of_symbol entry nlevn = in (fun (strm__ : _ Stream.t) -> let al = ps [] strm__ in - let a = kont al strm__ in Obj.repr (List.rev a)) + let a = kont al strm__ in List.rev a) | Sopt s -> let ps = parser_of_symbol entry nlevn s in (fun (strm__ : _ Stream.t) -> match try Some (ps strm__) with Stream.Failure -> None with - Some a -> Obj.repr (Some a) - | _ -> Obj.repr None) + Some a -> Some a + | _ -> None) | Stree t -> let pt = parser_of_tree entry 1 0 t in (fun (strm__ : _ Stream.t) -> let bp = Stream.count strm__ in let a = pt strm__ in let ep = Stream.count strm__ in - let loc = loc_of_token_interval bp ep in app a loc) + let loc = loc_of_token_interval bp ep in a loc) | Snterm e -> (fun (strm__ : _ Stream.t) -> e.estart 0 strm__) | Snterml (e, l) -> (fun (strm__ : _ Stream.t) -> e.estart (level_number e l) strm__) | Sself -> (fun (strm__ : _ Stream.t) -> entry.estart 0 strm__) | Snext -> (fun (strm__ : _ Stream.t) -> entry.estart nlevn strm__) | Stoken tok -> parser_of_token entry tok -and parser_of_token entry tok = - let f = entry.egram.glexer.Plexing.tok_match tok in +and parser_of_token : type s. + s ty_entry -> Plexing.pattern -> string parser_t = + fun entry tok -> + let f = egram.glexer.Plexing.tok_match tok in fun strm -> match Stream.peek strm with - Some tok -> let r = f tok in Stream.junk strm; Obj.repr r + Some tok -> let r = f tok in Stream.junk strm; r | None -> raise Stream.Failure -and parse_top_symb entry symb = parser_of_symbol entry 0 (top_symb entry symb) +and parse_top_symb : type s a. s ty_entry -> (s, a) ty_symbol -> a parser_t = + fun entry symb -> + parser_of_symbol entry 0 (top_symb entry symb) let rec start_parser_of_levels entry clevn = function @@ -594,7 +1259,7 @@ let rec start_parser_of_levels entry clevn = let bp = Stream.count strm__ in let act = p2 strm__ in let ep = Stream.count strm__ in - let a = app act (loc_of_token_interval bp ep) in + let a = act (loc_of_token_interval bp ep) in entry.econtinue levn bp a strm) | _ -> fun levn strm -> @@ -605,7 +1270,7 @@ let rec start_parser_of_levels entry clevn = match try Some (p2 strm__) with Stream.Failure -> None with Some act -> let ep = Stream.count strm__ in - let a = app act (loc_of_token_interval bp ep) in + let a = act (loc_of_token_interval bp ep) in entry.econtinue levn bp a strm | _ -> p1 levn strm__ @@ -631,7 +1296,7 @@ let rec continue_parser_of_levels entry clevn = Stream.Failure -> let act = p2 strm__ in let ep = Stream.count strm__ in - let a = app act a (loc_of_token_interval bp ep) in + let a = act a (loc_of_token_interval bp ep) in entry.econtinue levn bp a strm let continue_parser_of_entry entry = @@ -663,7 +1328,7 @@ let init_entry_functions entry = entry.econtinue <- f; f lev bp a strm) let extend_entry ~warning entry position rules = - let elev = Gramext.levels_of_rules ~warning entry position rules in + let elev = levels_of_rules ~warning entry position rules in entry.edesc <- Dlevels elev; init_entry_functions entry (* Deleting a rule *) @@ -671,7 +1336,7 @@ let extend_entry ~warning entry position rules = let delete_rule entry sl = match entry.edesc with Dlevels levs -> - let levs = Gramext.delete_rule_in_level_list entry sl levs in + let levs = delete_rule_in_level_list entry sl levs in entry.edesc <- Dlevels levs; entry.estart <- (fun lev strm -> @@ -685,20 +1350,9 @@ let delete_rule entry sl = (* Normal interface *) -let create_toktab () = Hashtbl.create 301 -let gcreate glexer = - {gtokens = create_toktab (); glexer = glexer } - -let tokens g con = - let list = ref [] in - Hashtbl.iter - (fun (p_con, p_prm) c -> if p_con = con then list := (p_prm, !c) :: !list) - g.gtokens; - !list - -type 'te gen_parsable = +type parsable = { pa_chr_strm : char Stream.t; - pa_tok_strm : 'te Stream.t; + pa_tok_strm : L.te Stream.t; pa_loc_func : Plexing.location_function } let parse_parsable entry p = @@ -741,95 +1395,30 @@ let clear_entry e = Dlevels _ -> e.edesc <- Dlevels [] | Dparser _ -> () -(* Functorial interface *) - -module type GLexerType = sig type te val lexer : te Plexing.lexer end - -module type S = - sig - type te - type parsable - val parsable : char Stream.t -> parsable - val tokens : string -> (string * int) list - module Entry : - sig - type 'a e - val create : string -> 'a e - val parse : 'a e -> parsable -> 'a - val name : 'a e -> string - val of_parser : string -> (te Stream.t -> 'a) -> 'a e - val parse_token_stream : 'a e -> te Stream.t -> 'a - val print : Format.formatter -> 'a e -> unit - end - type ('self, 'a) ty_symbol - type ('self, 'f, 'r) ty_rule - type 'a ty_production - val s_nterm : 'a Entry.e -> ('self, 'a) ty_symbol - val s_nterml : 'a Entry.e -> string -> ('self, 'a) ty_symbol - val s_list0 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol - val s_list0sep : - ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> - ('self, 'a list) ty_symbol - val s_list1 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol - val s_list1sep : - ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> - ('self, 'a list) ty_symbol - val s_opt : ('self, 'a) ty_symbol -> ('self, 'a option) ty_symbol - val s_self : ('self, 'self) ty_symbol - val s_next : ('self, 'self) ty_symbol - val s_token : Plexing.pattern -> ('self, string) ty_symbol - val s_rules : warning:(string -> unit) option -> 'a ty_production list -> ('self, 'a) ty_symbol - val r_stop : ('self, 'r, 'r) ty_rule - val r_next : - ('self, 'a, 'r) ty_rule -> ('self, 'b) ty_symbol -> - ('self, 'b -> 'a, 'r) ty_rule - val production : ('a, 'f, Loc.t -> 'a) ty_rule * 'f -> 'a ty_production - module Unsafe : - sig - val clear_entry : 'a Entry.e -> unit - end - val safe_extend : warning:(string -> unit) option -> - 'a Entry.e -> Gramext.position option -> - (string option * Gramext.g_assoc option * 'a ty_production list) - list -> - unit - val safe_delete_rule : 'a Entry.e -> ('a, 'r, 'f) ty_rule -> unit - end - -module GMake (L : GLexerType) = - struct - type te = L.te - type parsable = te gen_parsable - let gram = gcreate L.lexer let parsable cs = let (ts, lf) = L.lexer.Plexing.tok_func cs in {pa_chr_strm = cs; pa_tok_strm = ts; pa_loc_func = lf} - let tokens = tokens gram module Entry = struct - type 'a e = te g_entry + type 'a e = 'a ty_entry let create n = - {egram = gram; ename = n; elocal = false; estart = empty_entry n; + { ename = n; estart = empty_entry n; econtinue = (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); edesc = Dlevels []} - external obj : 'a e -> te Gramext.g_entry = "%identity" let parse (e : 'a e) p : 'a = - Obj.magic (parse_parsable e p : Obj.t) + parse_parsable e p let parse_token_stream (e : 'a e) ts : 'a = - Obj.magic (e.estart 0 ts : Obj.t) + e.estart 0 ts let name e = e.ename let of_parser n (p : te Stream.t -> 'a) : 'a e = - {egram = gram; ename = n; elocal = false; - estart = (fun _ -> (Obj.magic p : te Stream.t -> Obj.t)); + { ename = n; + estart = (fun _ -> p); econtinue = (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); - edesc = Dparser (Obj.magic p : te Stream.t -> Obj.t)} - let print ppf e = fprintf ppf "%a@." print_entry (obj e) + edesc = Dparser p} + let print ppf e = fprintf ppf "%a@." print_entry e end - type ('self, 'a) ty_symbol = te Gramext.g_symbol - type ('self, 'f, 'r) ty_rule = ('self, Obj.t) ty_symbol list - type 'a ty_production = ('a, Obj.t, Obj.t) ty_rule * Gramext.g_action let s_nterm e = Snterm e let s_nterml e l = Snterml (e, l) let s_list0 s = Slist0 s @@ -840,20 +1429,21 @@ module GMake (L : GLexerType) = let s_self = Sself let s_next = Snext let s_token tok = Stoken tok - let s_rules ~warning (t : Obj.t ty_production list) = Gramext.srules ~warning (Obj.magic t) - let r_stop = [] - let r_next r s = r @ [s] - let production - (p : ('a, 'f, Loc.t -> 'a) ty_rule * 'f) : 'a ty_production = - Obj.magic p + let s_rules ~warning (t : 'a ty_production list) = srules ~warning t + let r_stop = TStop + let r_next r s = TNext (r, s) + let production (p, act) = TProd (p, act) module Unsafe = struct let clear_entry = clear_entry end - let safe_extend ~warning e pos + let safe_extend ~warning (e : 'a Entry.e) pos (r : - (string option * Gramext.g_assoc option * Obj.t ty_production list) + (string option * Gramext.g_assoc option * 'a ty_production list) list) = - extend_entry ~warning e pos (Obj.magic r) - let safe_delete_rule e r = delete_rule (Entry.obj e) r - end + extend_entry ~warning e pos r + let safe_delete_rule e r = + let AnyS (symbols, _) = get_symbols r in + delete_rule e symbols + +end |
