diff options
| author | Pierre-Marie Pédrot | 2018-10-29 13:38:29 +0100 |
|---|---|---|
| committer | Pierre-Marie Pédrot | 2018-10-29 13:38:29 +0100 |
| commit | 35f715a005063a2c6195c574d019ba202bcdc27b (patch) | |
| tree | 5dc3edf54bc0d3db9a6cbd95f6225b16f36ef3ae /gramlib | |
| parent | d838a67707812cb496f0f0f83f1ebe5a65280c86 (diff) | |
| parent | cd8b8974e2d62a3c3c4d7572921b5a83cbb8642c (diff) | |
Merge PR #8667: [RFC] Vendoring of Camlp5
Diffstat (limited to 'gramlib')
| -rw-r--r-- | gramlib/LICENSE | 29 | ||||
| -rw-r--r-- | gramlib/dune | 3 | ||||
| -rw-r--r-- | gramlib/gramext.ml | 580 | ||||
| -rw-r--r-- | gramlib/gramext.mli | 73 | ||||
| -rw-r--r-- | gramlib/grammar.ml | 1223 | ||||
| -rw-r--r-- | gramlib/grammar.mli | 170 | ||||
| -rw-r--r-- | gramlib/plexing.ml | 217 | ||||
| -rw-r--r-- | gramlib/plexing.mli | 108 | ||||
| -rw-r--r-- | gramlib/ploc.ml | 176 | ||||
| -rw-r--r-- | gramlib/ploc.mli | 122 | ||||
| -rw-r--r-- | gramlib/token.ml | 37 | ||||
| -rw-r--r-- | gramlib/token.mli | 56 |
12 files changed, 2794 insertions, 0 deletions
diff --git a/gramlib/LICENSE b/gramlib/LICENSE new file mode 100644 index 0000000000..b696affde7 --- /dev/null +++ b/gramlib/LICENSE @@ -0,0 +1,29 @@ +gramlib was derived from Daniel de Rauglaudre's camlp5 +(https://github.com/camlp5/camlp5) whose licence follows: + +* Copyright (c) 2007-2017, INRIA (Institut National de Recherches en +* Informatique et Automatique). All rights reserved. +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of INRIA, nor the names of its contributors may be +* used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY INRIA AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INRIA AND +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. diff --git a/gramlib/dune b/gramlib/dune new file mode 100644 index 0000000000..6a9e622b4c --- /dev/null +++ b/gramlib/dune @@ -0,0 +1,3 @@ +(library + (name gramlib) + (public_name coq.gramlib)) diff --git a/gramlib/gramext.ml b/gramlib/gramext.ml new file mode 100644 index 0000000000..8960d4f257 --- /dev/null +++ b/gramlib/gramext.ml @@ -0,0 +1,580 @@ +(* camlp5r *) +(* gramext.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +open Printf + +type 'a parser_t = 'a Stream.t -> Obj.t + +type 'te grammar = + { gtokens : (Plexing.pattern, int ref) Hashtbl.t; + mutable glexer : 'te Plexing.lexer } + +type 'te g_entry = + { egram : 'te grammar; + ename : string; + elocal : bool; + mutable estart : int -> 'te parser_t; + mutable econtinue : int -> int -> Obj.t -> 'te parser_t; + mutable edesc : 'te g_desc } +and 'te g_desc = + Dlevels of 'te g_level list + | Dparser of 'te parser_t +and 'te g_level = + { assoc : g_assoc; + lname : string option; + lsuffix : 'te g_tree; + lprefix : 'te g_tree } +and g_assoc = NonA | RightA | LeftA +and 'te g_symbol = + Sfacto of 'te g_symbol + | Smeta of string * 'te g_symbol list * Obj.t + | Snterm of 'te g_entry + | Snterml of 'te g_entry * string + | Slist0 of 'te g_symbol + | Slist0sep of 'te g_symbol * 'te g_symbol * bool + | Slist1 of 'te g_symbol + | Slist1sep of 'te g_symbol * 'te g_symbol * bool + | Sopt of 'te g_symbol + | Sflag of 'te g_symbol + | Sself + | Snext + | Scut + | Stoken of Plexing.pattern + | Stree of 'te g_tree + | Svala of string list * 'te g_symbol +and g_action = Obj.t +and 'te g_tree = + Node of 'te g_node + | LocAct of g_action * g_action list + | DeadEnd +and 'te g_node = + { node : 'te g_symbol; son : 'te g_tree; brother : 'te g_tree } +and err_fun = unit -> string + +type position = + First + | Last + | Before of string + | After of string + | Like of string + | Level of string + +let warning_verbose = ref true + +let rec derive_eps = + function + Slist0 _ -> true + | Slist0sep (_, _, _) -> true + | Sopt _ | Sflag _ -> true + | Sfacto s -> derive_eps s + | Stree t -> tree_derive_eps t + | Svala (_, s) -> derive_eps s + | Smeta (_, _, _) | Slist1 _ | Slist1sep (_, _, _) | Snterm _ | + Snterml (_, _) | Snext | Sself | Scut | Stoken _ -> + false +and tree_derive_eps = + function + LocAct (_, _) -> true + | Node {node = s; brother = bro; son = son} -> + derive_eps s && tree_derive_eps son || tree_derive_eps bro + | DeadEnd -> false + +let rec eq_symbol s1 s2 = + match s1, s2 with + Snterm e1, Snterm e2 -> e1 == e2 + | Snterml (e1, l1), Snterml (e2, l2) -> e1 == e2 && l1 = l2 + | Slist0 s1, Slist0 s2 -> eq_symbol s1 s2 + | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> + eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 + | Slist1 s1, Slist1 s2 -> eq_symbol s1 s2 + | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> + eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 + | Sflag s1, Sflag s2 -> eq_symbol s1 s2 + | Sopt s1, Sopt s2 -> eq_symbol s1 s2 + | Svala (ls1, s1), Svala (ls2, s2) -> ls1 = ls2 && eq_symbol s1 s2 + | Stree _, Stree _ -> false + | Sfacto (Stree t1), Sfacto (Stree t2) -> + (* The only goal of the node 'Sfacto' is to allow tree comparison + (therefore factorization) without looking at the semantic + actions; allow factorization of rules like "SV foo" which are + actually expanded into a tree. *) + let rec eq_tree t1 t2 = + match t1, t2 with + Node n1, Node n2 -> + eq_symbol n1.node n2.node && eq_tree n1.son n2.son && + eq_tree n1.brother n2.brother + | LocAct (_, _), LocAct (_, _) -> true + | DeadEnd, DeadEnd -> true + | _ -> false + in + eq_tree t1 t2 + | _ -> s1 = s2 + +let is_before s1 s2 = + let s1 = + match s1 with + Svala (_, s) -> s + | _ -> s1 + in + let s2 = + match s2 with + Svala (_, s) -> s + | _ -> s2 + in + match s1, s2 with + Stoken ("ANY", _), _ -> false + | _, Stoken ("ANY", _) -> true + | Stoken (_, s), Stoken (_, "") when s <> "" -> true + | Stoken _, Stoken _ -> false + | Stoken _, _ -> true + | _ -> false + +let insert_tree entry_name gsymbols action tree = + let rec insert symbols tree = + match symbols with + s :: sl -> insert_in_tree s sl tree + | [] -> + match tree with + Node {node = s; son = son; brother = bro} -> + Node {node = s; son = son; brother = insert [] bro} + | LocAct (old_action, action_list) -> + if !warning_verbose then + begin + eprintf "<W> Grammar extension: "; + if entry_name <> "" then eprintf "in [%s], " entry_name; + eprintf "some rule has been masked\n"; + flush stderr + end; + LocAct (action, old_action :: action_list) + | DeadEnd -> LocAct (action, []) + and insert_in_tree s sl tree = + match try_insert s sl tree with + Some t -> t + | None -> Node {node = s; son = insert sl DeadEnd; brother = tree} + and try_insert s sl tree = + match tree with + Node {node = s1; son = son; brother = bro} -> + if eq_symbol s s1 then + let t = Node {node = s1; son = insert sl son; brother = bro} in + Some t + else if s = Scut then + try_insert s sl (Node {node = s; son = tree; brother = DeadEnd}) + else if s1 = Scut then try_insert s1 (s :: sl) tree + else if is_before s1 s || derive_eps s && not (derive_eps s1) then + let bro = + match try_insert s sl bro with + Some bro -> bro + | None -> Node {node = s; son = insert sl DeadEnd; brother = bro} + in + let t = Node {node = s1; son = son; brother = bro} in Some t + else + begin match try_insert s sl bro with + Some bro -> + let t = Node {node = s1; son = son; brother = bro} in Some t + | None -> None + end + | LocAct (_, _) | DeadEnd -> None + in + insert gsymbols tree + +let srules rl = + let t = + List.fold_left + (fun tree (symbols, action) -> insert_tree "" symbols action tree) + DeadEnd rl + in + Stree t + +external action : 'a -> g_action = "%identity" + +let is_level_labelled n lev = + match lev.lname with + Some n1 -> n = n1 + | None -> false + +let rec token_exists_in_level f lev = + token_exists_in_tree f lev.lprefix || token_exists_in_tree f lev.lsuffix +and token_exists_in_tree f = + function + Node n -> + token_exists_in_symbol f n.node || token_exists_in_tree f n.brother || + token_exists_in_tree f n.son + | LocAct (_, _) | DeadEnd -> false +and token_exists_in_symbol f = + function + Sfacto sy -> token_exists_in_symbol f sy + | Smeta (_, syl, _) -> List.exists (token_exists_in_symbol f) syl + | Slist0 sy -> token_exists_in_symbol f sy + | Slist0sep (sy, sep, _) -> + token_exists_in_symbol f sy || token_exists_in_symbol f sep + | Slist1 sy -> token_exists_in_symbol f sy + | Slist1sep (sy, sep, _) -> + token_exists_in_symbol f sy || token_exists_in_symbol f sep + | Sopt sy -> token_exists_in_symbol f sy + | Sflag sy -> token_exists_in_symbol f sy + | Stoken tok -> f tok + | Stree t -> token_exists_in_tree f t + | Svala (_, sy) -> token_exists_in_symbol f sy + | Snterm _ | Snterml (_, _) | Snext | Sself | Scut -> false + +let insert_level entry_name e1 symbols action slev = + match e1 with + true -> + {assoc = slev.assoc; lname = slev.lname; + lsuffix = insert_tree entry_name symbols action slev.lsuffix; + lprefix = slev.lprefix} + | false -> + {assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix; + lprefix = insert_tree entry_name symbols action slev.lprefix} + +let empty_lev lname assoc = + let assoc = + match assoc with + Some a -> a + | None -> LeftA + in + {assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd} + +let change_lev lev n lname assoc = + let a = + match assoc with + None -> lev.assoc + | Some a -> + if a <> lev.assoc && !warning_verbose then + begin + eprintf "<W> Changing associativity of level \"%s\"\n" n; + flush stderr + end; + a + in + begin match lname with + Some n -> + if lname <> lev.lname && !warning_verbose then + begin eprintf "<W> Level label \"%s\" ignored\n" n; flush stderr end + | None -> () + end; + {assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix} + +let get_level entry position levs = + match position with + Some First -> [], empty_lev, levs + | Some Last -> levs, empty_lev, [] + | Some (Level n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [], change_lev lev n, levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | Some (Before n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [], empty_lev, lev :: levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | Some (After n) -> + let rec get = + function + [] -> + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if is_level_labelled n lev then [lev], empty_lev, levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | Some (Like n) -> + let f (tok, prm) = n = tok || n = prm in + let rec get = + function + [] -> + eprintf "No level with \"%s\" in entry \"%s\"\n" n entry.ename; + flush stderr; + failwith "Grammar.extend" + | lev :: levs -> + if token_exists_in_level f lev then [], change_lev lev n, levs + else + let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2 + in + get levs + | None -> + match levs with + lev :: levs -> [], change_lev lev "<top>", levs + | [] -> [], empty_lev, [] + +let rec check_gram entry = + function + Snterm e -> + if e.egram != entry.egram then + begin + eprintf "\ +Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n" + entry.ename e.ename; + flush stderr; + failwith "Grammar.extend error" + end + | Snterml (e, _) -> + if e.egram != entry.egram then + begin + eprintf "\ +Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n" + entry.ename e.ename; + flush stderr; + failwith "Grammar.extend error" + end + | Sfacto s -> check_gram entry s + | Smeta (_, sl, _) -> List.iter (check_gram entry) sl + | Slist0sep (s, t, _) -> check_gram entry t; check_gram entry s + | Slist1sep (s, t, _) -> check_gram entry t; check_gram entry s + | Slist0 s -> check_gram entry s + | Slist1 s -> check_gram entry s + | Sopt s -> check_gram entry s + | Sflag s -> check_gram entry s + | Stree t -> tree_check_gram entry t + | Svala (_, s) -> check_gram entry s + | Snext | Sself | Scut | Stoken _ -> () +and tree_check_gram entry = + function + Node {node = n; brother = bro; son = son} -> + check_gram entry n; tree_check_gram entry bro; tree_check_gram entry son + | LocAct (_, _) | DeadEnd -> () + +let change_to_self entry = + function + Snterm e when e == entry -> Sself + | x -> x + +let get_initial entry = + function + Sself :: symbols -> true, symbols + | symbols -> false, symbols + +let insert_tokens gram symbols = + let rec insert = + function + Sfacto s -> insert s + | Smeta (_, sl, _) -> List.iter insert sl + | Slist0 s -> insert s + | Slist1 s -> insert s + | Slist0sep (s, t, _) -> insert s; insert t + | Slist1sep (s, t, _) -> insert s; insert t + | Sopt s -> insert s + | Sflag s -> insert s + | Stree t -> tinsert t + | Svala (_, s) -> insert s + | Stoken ("ANY", _) -> () + | Stoken tok -> + gram.glexer.Plexing.tok_using tok; + let r = + try Hashtbl.find gram.gtokens tok with + Not_found -> let r = ref 0 in Hashtbl.add gram.gtokens tok r; r + in + incr r + | Snterm _ | Snterml (_, _) | Snext | Sself | Scut -> () + and tinsert = + function + Node {node = s; brother = bro; son = son} -> + insert s; tinsert bro; tinsert son + | LocAct (_, _) | DeadEnd -> () + in + List.iter insert symbols + +let levels_of_rules entry position rules = + let elev = + match entry.edesc with + Dlevels elev -> elev + | Dparser _ -> + eprintf "Error: entry not extensible: \"%s\"\n" entry.ename; + flush stderr; + failwith "Grammar.extend" + in + if rules = [] then elev + else + let (levs1, make_lev, levs2) = get_level entry position elev in + let (levs, _) = + List.fold_left + (fun (levs, make_lev) (lname, assoc, level) -> + let lev = make_lev lname assoc in + let lev = + List.fold_left + (fun lev (symbols, action) -> + let symbols = List.map (change_to_self entry) symbols in + List.iter (check_gram entry) symbols; + let (e1, symbols) = get_initial entry symbols in + insert_tokens entry.egram symbols; + insert_level entry.ename e1 symbols action lev) + lev level + in + lev :: levs, empty_lev) + ([], make_lev) rules + in + levs1 @ List.rev levs @ levs2 + +let logically_eq_symbols entry = + let rec eq_symbols s1 s2 = + match s1, s2 with + Snterm e1, Snterm e2 -> e1.ename = e2.ename + | Snterm e1, Sself -> e1.ename = entry.ename + | Sself, Snterm e2 -> entry.ename = e2.ename + | Snterml (e1, l1), Snterml (e2, l2) -> e1.ename = e2.ename && l1 = l2 + | Slist0 s1, Slist0 s2 -> eq_symbols s1 s2 + | Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | Slist1 s1, Slist1 s2 -> eq_symbols s1 s2 + | Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | Sopt s1, Sopt s2 -> eq_symbols s1 s2 + | Stree t1, Stree t2 -> eq_trees t1 t2 + | _ -> s1 = s2 + and eq_trees t1 t2 = + match t1, t2 with + Node n1, Node n2 -> + eq_symbols n1.node n2.node && eq_trees n1.son n2.son && + eq_trees n1.brother n2.brother + | (LocAct (_, _) | DeadEnd), (LocAct (_, _) | DeadEnd) -> true + | _ -> false + in + eq_symbols + +(* [delete_rule_in_tree] returns + [Some (dsl, t)] if success + [dsl] = + Some (list of deleted nodes) if branch deleted + None if action replaced by previous version of action + [t] = remaining tree + [None] if failure *) + +let delete_rule_in_tree entry = + let rec delete_in_tree symbols tree = + match symbols, tree with + s :: sl, Node n -> + if logically_eq_symbols entry s n.node then delete_son sl n + else + begin match delete_in_tree symbols n.brother with + Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None + end + | s :: sl, _ -> None + | [], Node n -> + begin match delete_in_tree [] n.brother with + Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None + end + | [], DeadEnd -> None + | [], LocAct (_, []) -> Some (Some [], DeadEnd) + | [], LocAct (_, action :: list) -> Some (None, LocAct (action, list)) + and delete_son sl n = + match delete_in_tree sl n.son with + Some (Some dsl, DeadEnd) -> Some (Some (n.node :: dsl), n.brother) + | Some (Some dsl, t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (Some (n.node :: dsl), t) + | Some (None, t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (None, t) + | None -> None + in + delete_in_tree + +let rec decr_keyw_use gram = + function + Stoken tok -> + let r = Hashtbl.find gram.gtokens tok in + decr r; + if !r == 0 then + begin + Hashtbl.remove gram.gtokens tok; + gram.glexer.Plexing.tok_removing tok + end + | Sfacto s -> decr_keyw_use gram s + | Smeta (_, sl, _) -> List.iter (decr_keyw_use gram) sl + | Slist0 s -> decr_keyw_use gram s + | Slist1 s -> decr_keyw_use gram s + | Slist0sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 + | Slist1sep (s1, s2, _) -> decr_keyw_use gram s1; decr_keyw_use gram s2 + | Sopt s -> decr_keyw_use gram s + | Sflag s -> decr_keyw_use gram s + | Stree t -> decr_keyw_use_in_tree gram t + | Svala (_, s) -> decr_keyw_use gram s + | Sself | Snext | Scut | Snterm _ | Snterml (_, _) -> () +and decr_keyw_use_in_tree gram = + function + DeadEnd | LocAct (_, _) -> () + | Node n -> + decr_keyw_use gram n.node; + decr_keyw_use_in_tree gram n.son; + decr_keyw_use_in_tree gram n.brother + +let rec delete_rule_in_suffix entry symbols = + function + lev :: levs -> + begin match delete_rule_in_tree entry symbols lev.lsuffix with + Some (dsl, t) -> + begin match dsl with + Some dsl -> List.iter (decr_keyw_use entry.egram) dsl + | None -> () + end; + begin match t with + DeadEnd when lev.lprefix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = t; + lprefix = lev.lprefix} + in + lev :: levs + end + | None -> + let levs = delete_rule_in_suffix entry symbols levs in lev :: levs + end + | [] -> raise Not_found + +let rec delete_rule_in_prefix entry symbols = + function + lev :: levs -> + begin match delete_rule_in_tree entry symbols lev.lprefix with + Some (dsl, t) -> + begin match dsl with + Some dsl -> List.iter (decr_keyw_use entry.egram) dsl + | None -> () + end; + begin match t with + DeadEnd when lev.lsuffix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix; + lprefix = t} + in + lev :: levs + end + | None -> + let levs = delete_rule_in_prefix entry symbols levs in lev :: levs + end + | [] -> raise Not_found + +let delete_rule_in_level_list entry symbols levs = + match symbols with + Sself :: symbols -> delete_rule_in_suffix entry symbols levs + | Snterm e :: symbols when e == entry -> + delete_rule_in_suffix entry symbols levs + | _ -> delete_rule_in_prefix entry symbols levs diff --git a/gramlib/gramext.mli b/gramlib/gramext.mli new file mode 100644 index 0000000000..a76b7da9a2 --- /dev/null +++ b/gramlib/gramext.mli @@ -0,0 +1,73 @@ +(* camlp5r *) +(* gramext.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type 'a parser_t = 'a Stream.t -> Obj.t + +type 'te grammar = + { gtokens : (Plexing.pattern, int ref) Hashtbl.t; + mutable glexer : 'te Plexing.lexer } + +type 'te g_entry = + { egram : 'te grammar; + ename : string; + elocal : bool; + mutable estart : int -> 'te parser_t; + mutable econtinue : int -> int -> Obj.t -> 'te parser_t; + mutable edesc : 'te g_desc } +and 'te g_desc = + Dlevels of 'te g_level list + | Dparser of 'te parser_t +and 'te g_level = + { assoc : g_assoc; + lname : string option; + lsuffix : 'te g_tree; + lprefix : 'te g_tree } +and g_assoc = NonA | RightA | LeftA +and 'te g_symbol = + Sfacto of 'te g_symbol + | Smeta of string * 'te g_symbol list * Obj.t + | Snterm of 'te g_entry + | Snterml of 'te g_entry * string + | Slist0 of 'te g_symbol + | Slist0sep of 'te g_symbol * 'te g_symbol * bool + | Slist1 of 'te g_symbol + | Slist1sep of 'te g_symbol * 'te g_symbol * bool + | Sopt of 'te g_symbol + | Sflag of 'te g_symbol + | Sself + | Snext + | Scut + | Stoken of Plexing.pattern + | Stree of 'te g_tree + | Svala of string list * 'te g_symbol +and g_action = Obj.t +and 'te g_tree = + Node of 'te g_node + | LocAct of g_action * g_action list + | DeadEnd +and 'te g_node = + { node : 'te g_symbol; son : 'te g_tree; brother : 'te g_tree } +and err_fun = unit -> string + +type position = + First + | Last + | Before of string + | After of string + | Like of string + | Level of string + +val levels_of_rules : + 'te g_entry -> position option -> + (string option * g_assoc option * ('te g_symbol list * g_action) list) + list -> + 'te g_level list +val srules : ('te g_symbol list * g_action) list -> 'te g_symbol +external action : 'a -> g_action = "%identity" +val eq_symbol : 'a g_symbol -> 'a g_symbol -> bool + +val delete_rule_in_level_list : + 'te g_entry -> 'te g_symbol list -> 'te g_level list -> 'te g_level list + +val warning_verbose : bool ref diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml new file mode 100644 index 0000000000..04ec1049ed --- /dev/null +++ b/gramlib/grammar.ml @@ -0,0 +1,1223 @@ +(* camlp5r *) +(* grammar.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +open Gramext +open Format + +let rec flatten_tree = + function + DeadEnd -> [] + | LocAct (_, _) -> [[]] + | Node {node = n; brother = b; son = s} -> + List.map (fun l -> n :: l) (flatten_tree s) @ flatten_tree b + +let utf8_print = ref true + +let utf8_string_escaped s = + let b = Buffer.create (String.length s) in + let rec loop i = + if i = String.length s then Buffer.contents b + else + begin + begin match s.[i] with + '"' -> Buffer.add_string b "\\\"" + | '\\' -> Buffer.add_string b "\\\\" + | '\n' -> Buffer.add_string b "\\n" + | '\t' -> Buffer.add_string b "\\t" + | '\r' -> Buffer.add_string b "\\r" + | '\b' -> Buffer.add_string b "\\b" + | c -> Buffer.add_char b c + end; + loop (i + 1) + end + in + loop 0 + +let string_escaped s = + if !utf8_print then utf8_string_escaped s else String.escaped s + +let print_str ppf s = fprintf ppf "\"%s\"" (string_escaped s) + +let rec print_symbol ppf = + function + Sfacto s -> print_symbol ppf s + | Smeta (n, sl, _) -> print_meta ppf n sl + | Slist0 s -> fprintf ppf "LIST0 %a" print_symbol1 s + | Slist0sep (s, t, osep) -> + fprintf ppf "LIST0 %a SEP %a%s" print_symbol1 s print_symbol1 t + (if osep then " OPT_SEP" else "") + | Slist1 s -> fprintf ppf "LIST1 %a" print_symbol1 s + | Slist1sep (s, t, osep) -> + fprintf ppf "LIST1 %a SEP %a%s" print_symbol1 s print_symbol1 t + (if osep then " OPT_SEP" else "") + | Sopt s -> fprintf ppf "OPT %a" print_symbol1 s + | Sflag s -> fprintf ppf "FLAG %a" print_symbol1 s + | Stoken (con, prm) when con <> "" && prm <> "" -> + fprintf ppf "%s@ %a" con print_str prm + | Svala (_, s) -> fprintf ppf "V %a" print_symbol s + | Snterml (e, l) -> + fprintf ppf "%s%s@ LEVEL@ %a" e.ename (if e.elocal then "*" else "") + print_str l + | Snterm _ | Snext | Sself | Scut | Stoken _ | Stree _ as s -> + print_symbol1 ppf s +and print_meta ppf n sl = + let rec loop i = + function + [] -> () + | s :: sl -> + let j = + try String.index_from n i ' ' with Not_found -> String.length n + in + fprintf ppf "%s %a" (String.sub n i (j - i)) print_symbol1 s; + if sl = [] then () + else + begin fprintf ppf " "; loop (min (j + 1) (String.length n)) sl end + in + loop 0 sl +and print_symbol1 ppf = + function + Sfacto s -> print_symbol1 ppf s + | Snterm e -> fprintf ppf "%s%s" e.ename (if e.elocal then "*" else "") + | Sself -> pp_print_string ppf "SELF" + | Snext -> pp_print_string ppf "NEXT" + | Scut -> pp_print_string ppf "/" + | Stoken ("", s) -> print_str ppf s + | Stoken (con, "") -> pp_print_string ppf con + | Stree t -> print_level ppf pp_print_space (flatten_tree t) + | Smeta (_, _, _) | Snterml (_, _) | Slist0 _ | Slist0sep (_, _, _) | + Slist1 _ | Slist1sep (_, _, _) | Sopt _ | Sflag _ | Stoken _ | + Svala (_, _) as s -> + fprintf ppf "(%a)" print_symbol s +and print_rule ppf symbols = + fprintf ppf "@[<hov 0>"; + let _ = + List.fold_left + (fun sep symbol -> + fprintf ppf "%t%a" sep print_symbol symbol; + fun ppf -> fprintf ppf ";@ ") + (fun ppf -> ()) symbols + in + fprintf ppf "@]" +and print_level ppf pp_print_space rules = + fprintf ppf "@[<hov 0>[ "; + let _ = + List.fold_left + (fun sep rule -> + fprintf ppf "%t%a" sep print_rule rule; + fun ppf -> fprintf ppf "%a| " pp_print_space ()) + (fun ppf -> ()) rules + in + fprintf ppf " ]@]" + +let print_levels ppf elev = + let _ = + List.fold_left + (fun sep lev -> + let rules = + List.map (fun t -> Sself :: t) (flatten_tree lev.lsuffix) @ + flatten_tree lev.lprefix + in + fprintf ppf "%t@[<hov 2>" sep; + begin match lev.lname with + Some n -> fprintf ppf "%a@;<1 2>" print_str n + | None -> () + end; + begin match lev.assoc with + LeftA -> fprintf ppf "LEFTA" + | RightA -> fprintf ppf "RIGHTA" + | NonA -> fprintf ppf "NONA" + end; + fprintf ppf "@]@;<1 2>"; + print_level ppf pp_force_newline rules; + fun ppf -> fprintf ppf "@,| ") + (fun ppf -> ()) elev + in + () + +let print_entry ppf e = + fprintf ppf "@[<v 0>[ "; + begin match e.edesc with + Dlevels elev -> print_levels ppf elev + | Dparser _ -> fprintf ppf "<parser>" + end; + fprintf ppf " ]@]" + +let floc = ref (fun _ -> failwith "internal error when computing location") + +let loc_of_token_interval bp ep = + if bp == ep then + if bp == 0 then Ploc.dummy else Ploc.after (!floc (bp - 1)) 0 1 + else + let loc1 = !floc bp in let loc2 = !floc (pred ep) in Ploc.encl loc1 loc2 + +let name_of_symbol entry = + function + Snterm e -> "[" ^ e.ename ^ "]" + | Snterml (e, l) -> "[" ^ e.ename ^ " level " ^ l ^ "]" + | Sself | Snext -> "[" ^ entry.ename ^ "]" + | Stoken tok -> entry.egram.glexer.Plexing.tok_text tok + | _ -> "???" + +let rec get_token_list entry rev_tokl last_tok tree = + match tree with + Node {node = Stoken tok; son = son; brother = DeadEnd} -> + get_token_list entry (last_tok :: rev_tokl) (tok, None) son + | Node {node = Svala (ls, Stoken tok); son = son; brother = DeadEnd} -> + get_token_list entry (last_tok :: rev_tokl) (tok, Some ls) son + | _ -> if rev_tokl = [] then None else Some (rev_tokl, last_tok, tree) + +let rec name_of_symbol_failed entry = + function + Sfacto s -> name_of_symbol_failed entry s + | Slist0 s -> name_of_symbol_failed entry s + | Slist0sep (s, _, _) -> name_of_symbol_failed entry s + | Slist1 s -> name_of_symbol_failed entry s + | Slist1sep (s, _, _) -> name_of_symbol_failed entry s + | Sopt s -> name_of_symbol_failed entry s + | Sflag s -> name_of_symbol_failed entry s + | Stree t -> name_of_tree_failed entry t + | Svala (_, s) -> name_of_symbol_failed entry s + | Smeta (_, s :: _, _) -> name_of_symbol_failed entry s + | s -> name_of_symbol entry s +and name_of_tree_failed entry = + function + Node {node = s; brother = bro; son = son} -> + let tokl = + match s with + Stoken tok -> get_token_list entry [] (tok, None) son + | Svala (ls, Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None + in + begin match tokl with + None -> + let txt = name_of_symbol_failed entry s in + let txt = + match s, son with + Sopt _, Node _ -> txt ^ " or " ^ name_of_tree_failed entry son + | _ -> txt + in + let txt = + match bro with + DeadEnd | LocAct (_, _) -> txt + | Node _ -> txt ^ " or " ^ name_of_tree_failed entry bro + in + txt + | Some (rev_tokl, last_tok, son) -> + List.fold_left + (fun s (tok, _) -> + (if s = "" then "" else s ^ " ") ^ + entry.egram.glexer.Plexing.tok_text tok) + "" (List.rev (last_tok :: rev_tokl)) + end + | DeadEnd | LocAct (_, _) -> "???" + +let search_tree_in_entry prev_symb tree = + function + Dlevels levels -> + let rec search_levels = + function + [] -> tree + | level :: levels -> + match search_level level with + Some tree -> tree + | None -> search_levels levels + and search_level level = + match search_tree level.lsuffix with + Some t -> Some (Node {node = Sself; son = t; brother = DeadEnd}) + | None -> search_tree level.lprefix + and search_tree t = + if tree <> DeadEnd && t == tree then Some t + else + match t with + Node n -> + begin match search_symbol n.node with + Some symb -> + Some (Node {node = symb; son = n.son; brother = DeadEnd}) + | None -> + match search_tree n.son with + Some t -> + Some (Node {node = n.node; son = t; brother = DeadEnd}) + | None -> search_tree n.brother + end + | LocAct (_, _) | DeadEnd -> None + and search_symbol symb = + match symb with + Snterm _ | Snterml (_, _) | Slist0 _ | Slist0sep (_, _, _) | + Slist1 _ | Slist1sep (_, _, _) | Sopt _ | Stoken _ | Stree _ + when symb == prev_symb -> + Some symb + | Slist0 symb -> + begin match search_symbol symb with + Some symb -> Some (Slist0 symb) + | None -> None + end + | Slist0sep (symb, sep, b) -> + begin match search_symbol symb with + Some symb -> Some (Slist0sep (symb, sep, b)) + | None -> + match search_symbol sep with + Some sep -> Some (Slist0sep (symb, sep, b)) + | None -> None + end + | Slist1 symb -> + begin match search_symbol symb with + Some symb -> Some (Slist1 symb) + | None -> None + end + | Slist1sep (symb, sep, b) -> + begin match search_symbol symb with + Some symb -> Some (Slist1sep (symb, sep, b)) + | None -> + match search_symbol sep with + Some sep -> Some (Slist1sep (symb, sep, b)) + | None -> None + end + | Sopt symb -> + begin match search_symbol symb with + Some symb -> Some (Sopt symb) + | None -> None + end + | Stree t -> + begin match search_tree t with + Some t -> Some (Stree t) + | None -> None + end + | _ -> None + in + search_levels levels + | Dparser _ -> tree + +let error_verbose = ref false + +let tree_failed entry prev_symb_result prev_symb tree = + let txt = name_of_tree_failed entry tree in + let txt = + match prev_symb with + Slist0 s -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | Slist1 s -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | Slist0sep (s, sep, _) -> + begin match Obj.magic prev_symb_result with + [] -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | _ -> + let txt1 = name_of_symbol_failed entry sep in + txt1 ^ " or " ^ txt ^ " expected" + end + | Slist1sep (s, sep, _) -> + begin match Obj.magic prev_symb_result with + [] -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | _ -> + let txt1 = name_of_symbol_failed entry sep in + txt1 ^ " or " ^ txt ^ " expected" + end + | Sopt _ | Sflag _ | Stree _ | Svala (_, _) -> txt ^ " expected" + | _ -> txt ^ " expected after " ^ name_of_symbol_failed entry prev_symb + in + if !error_verbose then + begin let tree = search_tree_in_entry prev_symb tree entry.edesc in + let ppf = err_formatter in + fprintf ppf "@[<v 0>@,"; + fprintf ppf "----------------------------------@,"; + fprintf ppf "Parse error in entry [%s], rule:@;<0 2>" entry.ename; + fprintf ppf "@["; + print_level ppf pp_force_newline (flatten_tree tree); + fprintf ppf "@]@,"; + fprintf ppf "----------------------------------@,"; + fprintf ppf "@]@." + end; + txt ^ " (in [" ^ entry.ename ^ "])" + +let symb_failed entry prev_symb_result prev_symb symb = + let tree = Node {node = symb; brother = DeadEnd; son = DeadEnd} in + tree_failed entry prev_symb_result prev_symb tree + +external app : Obj.t -> 'a = "%identity" + +let is_level_labelled n lev = + match lev.lname with + Some n1 -> n = n1 + | None -> false + +let level_number entry lab = + let rec lookup levn = + function + [] -> failwith ("unknown level " ^ lab) + | lev :: levs -> + if is_level_labelled lab lev then levn else lookup (succ levn) levs + in + match entry.edesc with + Dlevels elev -> lookup 0 elev + | Dparser _ -> raise Not_found + +let rec top_symb entry = + function + Sself | Snext -> Snterm entry + | Snterml (e, _) -> Snterm e + | Slist1sep (s, sep, b) -> Slist1sep (top_symb entry s, sep, b) + | _ -> raise Stream.Failure + +let entry_of_symb entry = + function + Sself | Snext -> entry + | Snterm e -> e + | Snterml (e, _) -> e + | _ -> raise Stream.Failure + +let top_tree entry = + function + Node {node = s; brother = bro; son = son} -> + Node {node = top_symb entry s; brother = bro; son = son} + | LocAct (_, _) | DeadEnd -> raise Stream.Failure + +let skip_if_empty bp p strm = + if Stream.count strm == bp then Gramext.action (fun a -> p strm) + else raise Stream.Failure + +let continue entry bp a s son p1 (strm__ : _ Stream.t) = + let a = (entry_of_symb entry s).econtinue 0 bp a strm__ in + let act = + try p1 strm__ with + Stream.Failure -> raise (Stream.Error (tree_failed entry a s son)) + in + Gramext.action (fun _ -> app act a) + +let do_recover parser_of_tree entry nlevn alevn bp a s son + (strm__ : _ Stream.t) = + try parser_of_tree entry nlevn alevn (top_tree entry son) strm__ with + Stream.Failure -> + try + skip_if_empty bp (fun (strm__ : _ Stream.t) -> raise Stream.Failure) + strm__ + with Stream.Failure -> + continue entry bp a s son (parser_of_tree entry nlevn alevn son) + strm__ + +let strict_parsing = ref false + +let recover parser_of_tree entry nlevn alevn bp a s son strm = + if !strict_parsing then raise (Stream.Error (tree_failed entry a s son)) + else do_recover parser_of_tree entry nlevn alevn bp a s son strm + +let token_count = ref 0 + +let peek_nth n strm = + let list = Stream.npeek n strm in + token_count := Stream.count strm + n; + let rec loop list n = + match list, n with + x :: _, 1 -> Some x + | _ :: l, n -> loop l (n - 1) + | [], _ -> None + in + loop list n + +let item_skipped = ref false + +let call_and_push ps al strm = + item_skipped := false; + let a = ps strm in + let al = if !item_skipped then al else a :: al in item_skipped := false; al + +let token_ematch gram (tok, vala) = + let tematch = gram.glexer.Plexing.tok_match tok in + match vala with + Some al -> + let pa = + match al with + [] -> + let t = "V " ^ fst tok in gram.glexer.Plexing.tok_match (t, "") + | al -> + let rec loop = + function + a :: al -> + let pa = gram.glexer.Plexing.tok_match ("V", a) in + let pal = loop al in + (fun tok -> try pa tok with Stream.Failure -> pal tok) + | [] -> fun tok -> raise Stream.Failure + in + loop al + in + (fun tok -> + try Obj.repr (Ploc.VaAnt (Obj.magic (pa tok : string))) with + Stream.Failure -> Obj.repr (Ploc.VaVal (tematch tok))) + | None -> fun tok -> Obj.repr (tematch tok : string) + +let rec parser_of_tree entry nlevn alevn = + function + DeadEnd -> (fun (strm__ : _ Stream.t) -> raise Stream.Failure) + | LocAct (act, _) -> (fun (strm__ : _ Stream.t) -> act) + | Node {node = Sself; son = LocAct (act, _); brother = DeadEnd} -> + (fun (strm__ : _ Stream.t) -> + let a = entry.estart alevn strm__ in app act a) + | Node {node = Scut; son = son; brother = _} -> + parser_of_tree entry nlevn alevn son + | Node {node = Sself; son = LocAct (act, _); brother = bro} -> + let p2 = parser_of_tree entry nlevn alevn bro in + (fun (strm__ : _ Stream.t) -> + match + try Some (entry.estart alevn strm__) with Stream.Failure -> None + with + Some a -> app act a + | _ -> p2 strm__) + | Node {node = s; son = son; brother = DeadEnd} -> + let tokl = + match s with + Stoken tok -> get_token_list entry [] (tok, None) son + | Svala (ls, Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None + in + begin match tokl with + None -> + let ps = parser_of_symbol entry nlevn s in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn s son in + (fun (strm__ : _ Stream.t) -> + let bp = Stream.count strm__ in + let a = ps strm__ in + let act = + try p1 bp a strm__ with + Stream.Failure -> + raise (Stream.Error (tree_failed entry a s son)) + in + app act a) + | Some (rev_tokl, (last_tok, svala), son) -> + let lt = + let t = Stoken last_tok in + match svala with + Some l -> Svala (l, t) + | None -> t + in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn lt son in + parser_of_token_list entry s son p1 + (fun (strm__ : _ Stream.t) -> raise Stream.Failure) rev_tokl + (last_tok, svala) + end + | Node {node = s; son = son; brother = bro} -> + let tokl = + match s with + Stoken tok -> get_token_list entry [] (tok, None) son + | Svala (ls, Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None + in + match tokl with + None -> + let ps = parser_of_symbol entry nlevn s in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn s son in + let p2 = parser_of_tree entry nlevn alevn bro in + (fun (strm : _ Stream.t) -> + let bp = Stream.count strm in + match try Some (ps strm) with Stream.Failure -> None with + Some a -> + begin match + (try Some (p1 bp a strm) with Stream.Failure -> None) + with + Some act -> app act a + | None -> raise (Stream.Error (tree_failed entry a s son)) + end + | None -> p2 strm) + | Some (rev_tokl, (last_tok, vala), son) -> + let lt = + let t = Stoken last_tok in + match vala with + Some ls -> Svala (ls, t) + | None -> t + in + let p2 = parser_of_tree entry nlevn alevn bro in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn lt son in + let p1 = + parser_of_token_list entry lt son p1 p2 rev_tokl (last_tok, vala) + in + fun (strm__ : _ Stream.t) -> + try p1 strm__ with Stream.Failure -> p2 strm__ +and parser_cont p1 entry nlevn alevn s son bp a (strm__ : _ Stream.t) = + try p1 strm__ with + Stream.Failure -> + recover parser_of_tree entry nlevn alevn bp a s son strm__ +and parser_of_token_list entry s son p1 p2 rev_tokl last_tok = + let plast = + let n = List.length rev_tokl + 1 in + let tematch = token_ematch entry.egram last_tok in + let ps strm = + match peek_nth n strm with + Some tok -> + let r = tematch tok in + for _i = 1 to n do Stream.junk strm done; Obj.repr r + | None -> raise Stream.Failure + in + fun (strm : _ Stream.t) -> + let bp = Stream.count strm in + let a = ps strm in + match try Some (p1 bp a strm) with Stream.Failure -> None with + Some act -> app act a + | None -> raise (Stream.Error (tree_failed entry a s son)) + in + match List.rev rev_tokl with + [] -> (fun (strm__ : _ Stream.t) -> plast strm__) + | tok :: tokl -> + let tematch = token_ematch entry.egram tok in + let ps strm = + match peek_nth 1 strm with + Some tok -> tematch tok + | None -> raise Stream.Failure + in + let p1 = + let rec loop n = + function + [] -> plast + | tok :: tokl -> + let tematch = token_ematch entry.egram tok in + let ps strm = + match peek_nth n strm with + Some tok -> tematch tok + | None -> raise Stream.Failure + in + let p1 = loop (n + 1) tokl in + fun (strm__ : _ Stream.t) -> + let a = ps strm__ in let act = p1 strm__ in app act a + in + loop 2 tokl + in + fun (strm__ : _ Stream.t) -> + let a = ps strm__ in let act = p1 strm__ in app act a +and parser_of_symbol entry nlevn = + function + Sfacto s -> parser_of_symbol entry nlevn s + | Smeta (_, symbl, act) -> + let act = Obj.magic act entry symbl in + Obj.magic + (List.fold_left + (fun act symb -> Obj.magic act (parser_of_symbol entry nlevn symb)) + act symbl) + | Slist0 s -> + let ps = call_and_push (parser_of_symbol entry nlevn s) in + let rec loop al (strm__ : _ Stream.t) = + match try Some (ps al strm__) with Stream.Failure -> None with + Some al -> loop al strm__ + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + let a = loop [] strm__ in Obj.repr (List.rev a)) + | Slist0sep (symb, sep, false) -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al (strm__ : _ Stream.t) = + match try Some (pt strm__) with Stream.Failure -> None with + Some v -> + let al = + try ps al strm__ with + Stream.Failure -> + raise (Stream.Error (symb_failed entry v sep symb)) + in + kont al strm__ + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + match try Some (ps [] strm__) with Stream.Failure -> None with + Some al -> let a = kont al strm__ in Obj.repr (List.rev a) + | _ -> Obj.repr []) + | Slist0sep (symb, sep, true) -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al (strm__ : _ Stream.t) = + match try Some (pt strm__) with Stream.Failure -> None with + Some v -> + begin match + (try Some (ps al strm__) with Stream.Failure -> None) + with + Some al -> kont al strm__ + | _ -> al + end + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + match try Some (ps [] strm__) with Stream.Failure -> None with + Some al -> let a = kont al strm__ in Obj.repr (List.rev a) + | _ -> Obj.repr []) + | Slist1 s -> + let ps = call_and_push (parser_of_symbol entry nlevn s) in + let rec loop al (strm__ : _ Stream.t) = + match try Some (ps al strm__) with Stream.Failure -> None with + Some al -> loop al strm__ + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + let al = ps [] strm__ in + let a = loop al strm__ in Obj.repr (List.rev a)) + | Slist1sep (symb, sep, false) -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al (strm__ : _ Stream.t) = + match try Some (pt strm__) with Stream.Failure -> None with + Some v -> + let al = + try ps al strm__ with + Stream.Failure -> + let a = + try parse_top_symb entry symb strm__ with + Stream.Failure -> + raise (Stream.Error (symb_failed entry v sep symb)) + in + a :: al + in + kont al strm__ + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + let al = ps [] strm__ in + let a = kont al strm__ in Obj.repr (List.rev a)) + | Slist1sep (symb, sep, true) -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al (strm__ : _ Stream.t) = + match try Some (pt strm__) with Stream.Failure -> None with + Some v -> + begin match + (try Some (ps al strm__) with Stream.Failure -> None) + with + Some al -> kont al strm__ + | _ -> + match + try Some (parse_top_symb entry symb strm__) with + Stream.Failure -> None + with + Some a -> kont (a :: al) strm__ + | _ -> al + end + | _ -> al + in + (fun (strm__ : _ Stream.t) -> + let al = ps [] strm__ in + let a = kont al strm__ in Obj.repr (List.rev a)) + | Sopt s -> + let ps = parser_of_symbol entry nlevn s in + (fun (strm__ : _ Stream.t) -> + match try Some (ps strm__) with Stream.Failure -> None with + Some a -> Obj.repr (Some a) + | _ -> Obj.repr None) + | Sflag s -> + let ps = parser_of_symbol entry nlevn s in + (fun (strm__ : _ Stream.t) -> + match try Some (ps strm__) with Stream.Failure -> None with + Some _ -> Obj.repr true + | _ -> Obj.repr false) + | Stree t -> + let pt = parser_of_tree entry 1 0 t in + (fun (strm__ : _ Stream.t) -> + let bp = Stream.count strm__ in + let a = pt strm__ in + let ep = Stream.count strm__ in + let loc = loc_of_token_interval bp ep in app a loc) + | Svala (al, s) -> + let pa = + match al with + [] -> + let t = + match s with + Sflag _ -> Some "V FLAG" + | Sopt _ -> Some "V OPT" + | Slist0 _ | Slist0sep (_, _, _) -> Some "V LIST" + | Slist1 _ | Slist1sep (_, _, _) -> Some "V LIST" + | Stoken (con, "") -> Some ("V " ^ con) + | _ -> None + in + begin match t with + Some t -> parser_of_token entry (t, "") + | None -> fun (strm__ : _ Stream.t) -> raise Stream.Failure + end + | al -> + let rec loop = + function + a :: al -> + let pa = parser_of_token entry ("V", a) in + let pal = loop al in + (fun (strm__ : _ Stream.t) -> + try pa strm__ with Stream.Failure -> pal strm__) + | [] -> fun (strm__ : _ Stream.t) -> raise Stream.Failure + in + loop al + in + let ps = parser_of_symbol entry nlevn s in + (fun (strm__ : _ Stream.t) -> + match try Some (pa strm__) with Stream.Failure -> None with + Some a -> Obj.repr (Ploc.VaAnt (Obj.magic a : string)) + | _ -> let a = ps strm__ in Obj.repr (Ploc.VaVal a)) + | Snterm e -> (fun (strm__ : _ Stream.t) -> e.estart 0 strm__) + | Snterml (e, l) -> + (fun (strm__ : _ Stream.t) -> e.estart (level_number e l) strm__) + | Sself -> (fun (strm__ : _ Stream.t) -> entry.estart 0 strm__) + | Snext -> (fun (strm__ : _ Stream.t) -> entry.estart nlevn strm__) + | Scut -> (fun (strm__ : _ Stream.t) -> Obj.repr ()) + | Stoken tok -> parser_of_token entry tok +and parser_of_token entry tok = + let f = entry.egram.glexer.Plexing.tok_match tok in + fun strm -> + match Stream.peek strm with + Some tok -> let r = f tok in Stream.junk strm; Obj.repr r + | None -> raise Stream.Failure +and parse_top_symb entry symb = parser_of_symbol entry 0 (top_symb entry symb) + +let rec start_parser_of_levels entry clevn = + function + [] -> (fun levn (strm__ : _ Stream.t) -> raise Stream.Failure) + | lev :: levs -> + let p1 = start_parser_of_levels entry (succ clevn) levs in + match lev.lprefix with + DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + LeftA | NonA -> succ clevn + | RightA -> clevn + in + let p2 = parser_of_tree entry (succ clevn) alevn tree in + match levs with + [] -> + (fun levn strm -> + (* this code should be there but is commented to preserve + compatibility with previous versions... with this code, + the grammar entry e: [[ "x"; a = e | "y" ]] should fail + because it should be: e: [RIGHTA[ "x"; a = e | "y" ]]... + if levn > clevn then match strm with parser [] + else + *) + let (strm__ : _ Stream.t) = strm in + let bp = Stream.count strm__ in + let act = p2 strm__ in + let ep = Stream.count strm__ in + let a = app act (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm) + | _ -> + fun levn strm -> + if levn > clevn then p1 levn strm + else + let (strm__ : _ Stream.t) = strm in + let bp = Stream.count strm__ in + match try Some (p2 strm__) with Stream.Failure -> None with + Some act -> + let ep = Stream.count strm__ in + let a = app act (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm + | _ -> p1 levn strm__ + +let rec continue_parser_of_levels entry clevn = + function + [] -> (fun levn bp a (strm__ : _ Stream.t) -> raise Stream.Failure) + | lev :: levs -> + let p1 = continue_parser_of_levels entry (succ clevn) levs in + match lev.lsuffix with + DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + LeftA | NonA -> succ clevn + | RightA -> clevn + in + let p2 = parser_of_tree entry (succ clevn) alevn tree in + fun levn bp a strm -> + if levn > clevn then p1 levn bp a strm + else + let (strm__ : _ Stream.t) = strm in + try p1 levn bp a strm__ with + Stream.Failure -> + let act = p2 strm__ in + let ep = Stream.count strm__ in + let a = app act a (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm + +let continue_parser_of_entry entry = + match entry.edesc with + Dlevels elev -> + let p = continue_parser_of_levels entry 0 elev in + (fun levn bp a (strm__ : _ Stream.t) -> + try p levn bp a strm__ with Stream.Failure -> a) + | Dparser p -> fun levn bp a (strm__ : _ Stream.t) -> raise Stream.Failure + +let empty_entry ename levn strm = + raise (Stream.Error ("entry [" ^ ename ^ "] is empty")) + +let start_parser_of_entry entry = + match entry.edesc with + Dlevels [] -> empty_entry entry.ename + | Dlevels elev -> start_parser_of_levels entry 0 elev + | Dparser p -> fun levn strm -> p strm + +(* Extend syntax *) + +let init_entry_functions entry = + entry.estart <- + (fun lev strm -> + let f = start_parser_of_entry entry in entry.estart <- f; f lev strm); + entry.econtinue <- + (fun lev bp a strm -> + let f = continue_parser_of_entry entry in + entry.econtinue <- f; f lev bp a strm) + +let extend_entry entry position rules = + try + let elev = Gramext.levels_of_rules entry position rules in + entry.edesc <- Dlevels elev; init_entry_functions entry + with Plexing.Error s -> + Printf.eprintf "Lexer initialization error:\n- %s\n" s; + flush stderr; + failwith "Grammar.extend" + +(* Deleting a rule *) + +let delete_rule entry sl = + match entry.edesc with + Dlevels levs -> + let levs = Gramext.delete_rule_in_level_list entry sl levs in + entry.edesc <- Dlevels levs; + entry.estart <- + (fun lev strm -> + let f = start_parser_of_entry entry in + entry.estart <- f; f lev strm); + entry.econtinue <- + (fun lev bp a strm -> + let f = continue_parser_of_entry entry in + entry.econtinue <- f; f lev bp a strm) + | Dparser _ -> () + +(* Normal interface *) + +type token = string * string +type g = token Gramext.grammar + +type ('self, 'a) ty_symbol = token Gramext.g_symbol +type ('self, 'f, 'r) ty_rule = ('self, Obj.t) ty_symbol list +type 'a ty_production = ('a, Obj.t, Obj.t) ty_rule * Gramext.g_action + +let create_toktab () = Hashtbl.create 301 +let gcreate glexer = + {gtokens = create_toktab (); glexer = glexer } + +let tokens g con = + let list = ref [] in + Hashtbl.iter + (fun (p_con, p_prm) c -> if p_con = con then list := (p_prm, !c) :: !list) + g.gtokens; + !list + +let glexer g = g.glexer + +type 'te gen_parsable = + { pa_chr_strm : char Stream.t; + pa_tok_strm : 'te Stream.t; + pa_loc_func : Plexing.location_function } + +type parsable = token gen_parsable + +let parsable g cs = + let (ts, lf) = g.glexer.Plexing.tok_func cs in + {pa_chr_strm = cs; pa_tok_strm = ts; pa_loc_func = lf} + +let parse_parsable entry p = + let efun = entry.estart 0 in + let ts = p.pa_tok_strm in + let cs = p.pa_chr_strm in + let fun_loc = p.pa_loc_func in + let restore = + let old_floc = !floc in + let old_tc = !token_count in + fun () -> floc := old_floc; token_count := old_tc + in + let get_loc () = + try + let cnt = Stream.count ts in + let loc = fun_loc cnt in + if !token_count - 1 <= cnt then loc + else Ploc.encl loc (fun_loc (!token_count - 1)) + with Failure _ -> Ploc.make_unlined (Stream.count cs, Stream.count cs + 1) + in + floc := fun_loc; + token_count := 0; + try let r = efun ts in restore (); r with + Stream.Failure -> + let loc = get_loc () in + restore (); + Ploc.raise loc (Stream.Error ("illegal begin of " ^ entry.ename)) + | Stream.Error _ as exc -> + let loc = get_loc () in restore (); Ploc.raise loc exc + | exc -> + let loc = Stream.count cs, Stream.count cs + 1 in + restore (); Ploc.raise (Ploc.make_unlined loc) exc + +let find_entry e s = + let rec find_levels = + function + [] -> None + | lev :: levs -> + match find_tree lev.lsuffix with + None -> + begin match find_tree lev.lprefix with + None -> find_levels levs + | x -> x + end + | x -> x + and find_symbol = + function + Sfacto s -> find_symbol s + | Snterm e -> if e.ename = s then Some e else None + | Snterml (e, _) -> if e.ename = s then Some e else None + | Smeta (_, sl, _) -> find_symbol_list sl + | Slist0 s -> find_symbol s + | Slist0sep (s, _, _) -> find_symbol s + | Slist1 s -> find_symbol s + | Slist1sep (s, _, _) -> find_symbol s + | Sopt s -> find_symbol s + | Sflag s -> find_symbol s + | Stree t -> find_tree t + | Svala (_, s) -> find_symbol s + | Sself | Snext | Scut | Stoken _ -> None + and find_symbol_list = + function + s :: sl -> + begin match find_symbol s with + None -> find_symbol_list sl + | x -> x + end + | [] -> None + and find_tree = + function + Node {node = s; brother = bro; son = son} -> + begin match find_symbol s with + None -> + begin match find_tree bro with + None -> find_tree son + | x -> x + end + | x -> x + end + | LocAct (_, _) | DeadEnd -> None + in + match e.edesc with + Dlevels levs -> + begin match find_levels levs with + Some e -> e + | None -> raise Not_found + end + | Dparser _ -> raise Not_found +module Entry = + struct + type te = token + type 'a e = te g_entry + let create g n = + {egram = g; ename = n; elocal = false; estart = empty_entry n; + econtinue = (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); + edesc = Dlevels []} + let parse_parsable (entry : 'a e) p : 'a = + Obj.magic (parse_parsable entry p : Obj.t) + let parse (entry : 'a e) cs : 'a = + let parsable = parsable entry.egram cs in parse_parsable entry parsable + let parse_parsable_all (entry : 'a e) p : 'a = + begin try Obj.magic [(parse_parsable entry p : Obj.t)] with + Stream.Failure | Stream.Error _ -> [] + end + let parse_all (entry : 'a e) cs : 'a = + let parsable = parsable entry.egram cs in + parse_parsable_all entry parsable + let parse_token_stream (entry : 'a e) ts : 'a = + Obj.magic (entry.estart 0 ts : Obj.t) + let _warned_using_parse_token = ref false + let parse_token (entry : 'a e) ts : 'a = + (* commented: too often warned in Coq... + if not warned_using_parse_token.val then do { + eprintf "<W> use of Grammar.Entry.parse_token "; + eprintf "deprecated since 2017-06-16\n%!"; + eprintf "use Grammar.Entry.parse_token_stream instead\n%! "; + warned_using_parse_token.val := True + } + else (); + *) + parse_token_stream entry ts + let name e = e.ename + let of_parser g n (p : te Stream.t -> 'a) : 'a e = + {egram = g; ename = n; elocal = false; + estart = (fun _ -> (Obj.magic p : te Stream.t -> Obj.t)); + econtinue = (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); + edesc = Dparser (Obj.magic p : te Stream.t -> Obj.t)} + external obj : 'a e -> te Gramext.g_entry = "%identity" + let print ppf e = fprintf ppf "%a@." print_entry (obj e) + let find e s = find_entry (obj e) s + end + +(* Unsafe *) + +let clear_entry e = + e.estart <- (fun _ (strm__ : _ Stream.t) -> raise Stream.Failure); + e.econtinue <- (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); + match e.edesc with + Dlevels _ -> e.edesc <- Dlevels [] + | Dparser _ -> () + +let gram_reinit g glexer = Hashtbl.clear g.gtokens; g.glexer <- glexer + +module Unsafe = + struct + let gram_reinit = gram_reinit + let clear_entry = clear_entry + end + +(* Functorial interface *) + +module type GLexerType = sig type te val lexer : te Plexing.lexer end + +module type S = + sig + type te + type parsable + val parsable : char Stream.t -> parsable + val tokens : string -> (string * int) list + val glexer : te Plexing.lexer + module Entry : + sig + type 'a e + val create : string -> 'a e + val parse : 'a e -> parsable -> 'a + val name : 'a e -> string + val of_parser : string -> (te Stream.t -> 'a) -> 'a e + val parse_token_stream : 'a e -> te Stream.t -> 'a + val print : Format.formatter -> 'a e -> unit + external obj : 'a e -> te Gramext.g_entry = "%identity" + val parse_token : 'a e -> te Stream.t -> 'a + end + type ('self, 'a) ty_symbol + type ('self, 'f, 'r) ty_rule + type 'a ty_production + val s_facto : ('self, 'a) ty_symbol -> ('self, 'a) ty_symbol + val s_nterm : 'a Entry.e -> ('self, 'a) ty_symbol + val s_nterml : 'a Entry.e -> string -> ('self, 'a) ty_symbol + val s_list0 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list0sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_list1 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list1sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_opt : ('self, 'a) ty_symbol -> ('self, 'a option) ty_symbol + val s_flag : ('self, 'a) ty_symbol -> ('self, bool) ty_symbol + val s_self : ('self, 'self) ty_symbol + val s_next : ('self, 'self) ty_symbol + val s_token : Plexing.pattern -> ('self, string) ty_symbol + val s_rules : 'a ty_production list -> ('self, 'a) ty_symbol + val s_vala : + string list -> ('self, 'a) ty_symbol -> ('self, 'a Ploc.vala) ty_symbol + val r_stop : ('self, 'r, 'r) ty_rule + val r_next : + ('self, 'a, 'r) ty_rule -> ('self, 'b) ty_symbol -> + ('self, 'b -> 'a, 'r) ty_rule + val r_cut : ('self, 'a, 'r) ty_rule -> ('self, 'a, 'r) ty_rule + val production : ('a, 'f, Ploc.t -> 'a) ty_rule * 'f -> 'a ty_production + module Unsafe : + sig + val gram_reinit : te Plexing.lexer -> unit + val clear_entry : 'a Entry.e -> unit + end + val extend : + 'a Entry.e -> Gramext.position option -> + (string option * Gramext.g_assoc option * + (te Gramext.g_symbol list * Gramext.g_action) list) + list -> + unit + val safe_extend : + 'a Entry.e -> Gramext.position option -> + (string option * Gramext.g_assoc option * 'a ty_production list) + list -> + unit + val delete_rule : 'a Entry.e -> te Gramext.g_symbol list -> unit + val safe_delete_rule : 'a Entry.e -> ('a, 'r, 'f) ty_rule -> unit + end + +module GMake (L : GLexerType) = + struct + type te = L.te + type parsable = te gen_parsable + let gram = gcreate L.lexer + let parsable cs = + let (ts, lf) = L.lexer.Plexing.tok_func cs in + {pa_chr_strm = cs; pa_tok_strm = ts; pa_loc_func = lf} + let tokens = tokens gram + let glexer = glexer gram + module Entry = + struct + type 'a e = te g_entry + let create n = + {egram = gram; ename = n; elocal = false; estart = empty_entry n; + econtinue = + (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); + edesc = Dlevels []} + external obj : 'a e -> te Gramext.g_entry = "%identity" + let parse (e : 'a e) p : 'a = + Obj.magic (parse_parsable e p : Obj.t) + let parse_token_stream (e : 'a e) ts : 'a = + Obj.magic (e.estart 0 ts : Obj.t) + let _warned_using_parse_token = ref false + let parse_token (entry : 'a e) ts : 'a = + (* commented: too often warned in Coq... + if not warned_using_parse_token.val then do { + eprintf "<W> use of Entry.parse_token "; + eprintf "deprecated since 2017-06-16\n%!"; + eprintf "use Entry.parse_token_stream instead\n%! "; + warned_using_parse_token.val := True + } + else (); + *) + parse_token_stream entry ts + let name e = e.ename + let of_parser n (p : te Stream.t -> 'a) : 'a e = + {egram = gram; ename = n; elocal = false; + estart = (fun _ -> (Obj.magic p : te Stream.t -> Obj.t)); + econtinue = + (fun _ _ _ (strm__ : _ Stream.t) -> raise Stream.Failure); + edesc = Dparser (Obj.magic p : te Stream.t -> Obj.t)} + let print ppf e = fprintf ppf "%a@." print_entry (obj e) + end + type ('self, 'a) ty_symbol = te Gramext.g_symbol + type ('self, 'f, 'r) ty_rule = ('self, Obj.t) ty_symbol list + type 'a ty_production = ('a, Obj.t, Obj.t) ty_rule * Gramext.g_action + let s_facto s = Sfacto s + let s_nterm e = Snterm e + let s_nterml e l = Snterml (e, l) + let s_list0 s = Slist0 s + let s_list0sep s sep b = Slist0sep (s, sep, b) + let s_list1 s = Slist1 s + let s_list1sep s sep b = Slist1sep (s, sep, b) + let s_opt s = Sopt s + let s_flag s = Sflag s + let s_self = Sself + let s_next = Snext + let s_token tok = Stoken tok + let s_rules (t : Obj.t ty_production list) = Gramext.srules (Obj.magic t) + let s_vala sl s = Svala (sl, s) + let r_stop = [] + let r_next r s = r @ [s] + let r_cut r = r @ [Scut] + let production + (p : ('a, 'f, Ploc.t -> 'a) ty_rule * 'f) : 'a ty_production = + Obj.magic p + module Unsafe = + struct + let gram_reinit = gram_reinit gram + let clear_entry = clear_entry + end + let extend = extend_entry + let safe_extend e pos + (r : + (string option * Gramext.g_assoc option * Obj.t ty_production list) + list) = + extend e pos (Obj.magic r) + let delete_rule e r = delete_rule (Entry.obj e) r + let safe_delete_rule = delete_rule + end diff --git a/gramlib/grammar.mli b/gramlib/grammar.mli new file mode 100644 index 0000000000..54b7eb5539 --- /dev/null +++ b/gramlib/grammar.mli @@ -0,0 +1,170 @@ +(* camlp5r *) +(* grammar.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Extensible grammars. + + This module implements the Camlp5 extensible grammars system. + Grammars entries can be extended using the [EXTEND] statement, + added by loading the Camlp5 [pa_extend.cmo] file. *) + +type g + (** The type for grammars, holding entries. *) +type token = string * string + +type parsable +val parsable : g -> char Stream.t -> parsable + (** Type and value allowing to keep the same token stream between + several calls of entries of the same grammar, to prevent possible + loss of tokens. To be used with [Entry.parse_parsable] below *) + +module Entry : + sig + type 'a e + val create : g -> string -> 'a e + val parse : 'a e -> char Stream.t -> 'a + val parse_all : 'a e -> char Stream.t -> 'a list + val parse_parsable : 'a e -> parsable -> 'a + val name : 'a e -> string + val of_parser : g -> string -> (token Stream.t -> 'a) -> 'a e + val parse_token_stream : 'a e -> token Stream.t -> 'a + val print : Format.formatter -> 'a e -> unit + val find : 'a e -> string -> Obj.t e + external obj : 'a e -> token Gramext.g_entry = "%identity" + val parse_token : 'a e -> token Stream.t -> 'a + end + (** Module to handle entries. +- [Entry.e] is the type for entries returning values of type ['a]. +- [Entry.create g n] creates a new entry named [n] in the grammar [g]. +- [Entry.parse e] returns the stream parser of the entry [e]. +- [Entry.parse_all e] returns the stream parser returning all possible + values while parsing with the entry [e]: may return more than one + value when the parsing algorithm is [Backtracking] +- [Entry.parse_all e] returns the parser returning all possible values. +- [Entry.parse_parsable e] returns the parsable parser of the entry [e]. +- [Entry.name e] returns the name of the entry [e]. +- [Entry.of_parser g n p] makes an entry from a token stream parser. +- [Entry.parse_token_stream e] returns the token stream parser of the + entry [e]. +- [Entry.print e] displays the entry [e] using [Format]. +- [Entry.find e s] finds the entry named [s] in the rules of [e]. +- [Entry.obj e] converts an entry into a [Gramext.g_entry] allowing + to see what it holds. +- [Entry.parse_token]: deprecated since 2017-06-16; old name for + [Entry.parse_token_stream] *) + +type ('self, 'a) ty_symbol +(** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The + first type argument is the type of the ambient entry, the second one is the + type of the produced value. *) + +type ('self, 'f, 'r) ty_rule + +type 'a ty_production + +(** {6 Clearing grammars and entries} *) + +module Unsafe : + sig + val gram_reinit : g -> token Plexing.lexer -> unit + val clear_entry : 'a Entry.e -> unit + end + (** Module for clearing grammars and entries. To be manipulated with + care, because: 1) reinitializing a grammar destroys all tokens + and there may have problems with the associated lexer if there + are keywords; 2) clearing an entry does not destroy the tokens + used only by itself. +- [Unsafe.reinit_gram g lex] removes the tokens of the grammar +- and sets [lex] as a new lexer for [g]. Warning: the lexer +- itself is not reinitialized. +- [Unsafe.clear_entry e] removes all rules of the entry [e]. *) + +(** {6 Functorial interface} *) + + (** Alternative for grammars use. Grammars are no more Ocaml values: + there is no type for them. Modules generated preserve the + rule "an entry cannot call an entry of another grammar" by + normal OCaml typing. *) + +module type GLexerType = sig type te val lexer : te Plexing.lexer end + (** The input signature for the functor [Grammar.GMake]: [te] is the + type of the tokens. *) + +module type S = + sig + type te + type parsable + val parsable : char Stream.t -> parsable + val tokens : string -> (string * int) list + val glexer : te Plexing.lexer + module Entry : + sig + type 'a e + val create : string -> 'a e + val parse : 'a e -> parsable -> 'a + val name : 'a e -> string + val of_parser : string -> (te Stream.t -> 'a) -> 'a e + val parse_token_stream : 'a e -> te Stream.t -> 'a + val print : Format.formatter -> 'a e -> unit + external obj : 'a e -> te Gramext.g_entry = "%identity" + val parse_token : 'a e -> te Stream.t -> 'a + end + type ('self, 'a) ty_symbol + type ('self, 'f, 'r) ty_rule + type 'a ty_production + val s_facto : ('self, 'a) ty_symbol -> ('self, 'a) ty_symbol + val s_nterm : 'a Entry.e -> ('self, 'a) ty_symbol + val s_nterml : 'a Entry.e -> string -> ('self, 'a) ty_symbol + val s_list0 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list0sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_list1 : ('self, 'a) ty_symbol -> ('self, 'a list) ty_symbol + val s_list1sep : + ('self, 'a) ty_symbol -> ('self, 'b) ty_symbol -> bool -> + ('self, 'a list) ty_symbol + val s_opt : ('self, 'a) ty_symbol -> ('self, 'a option) ty_symbol + val s_flag : ('self, 'a) ty_symbol -> ('self, bool) ty_symbol + val s_self : ('self, 'self) ty_symbol + val s_next : ('self, 'self) ty_symbol + val s_token : Plexing.pattern -> ('self, string) ty_symbol + val s_rules : 'a ty_production list -> ('self, 'a) ty_symbol + val s_vala : + string list -> ('self, 'a) ty_symbol -> ('self, 'a Ploc.vala) ty_symbol + val r_stop : ('self, 'r, 'r) ty_rule + val r_next : + ('self, 'a, 'r) ty_rule -> ('self, 'b) ty_symbol -> + ('self, 'b -> 'a, 'r) ty_rule + val r_cut : ('self, 'a, 'r) ty_rule -> ('self, 'a, 'r) ty_rule + val production : ('a, 'f, Ploc.t -> 'a) ty_rule * 'f -> 'a ty_production + + module Unsafe : + sig + val gram_reinit : te Plexing.lexer -> unit + val clear_entry : 'a Entry.e -> unit + end + val extend : + 'a Entry.e -> Gramext.position option -> + (string option * Gramext.g_assoc option * + (te Gramext.g_symbol list * Gramext.g_action) list) + list -> + unit + val safe_extend : + 'a Entry.e -> Gramext.position option -> + (string option * Gramext.g_assoc option * 'a ty_production list) + list -> + unit + val delete_rule : 'a Entry.e -> te Gramext.g_symbol list -> unit + val safe_delete_rule : 'a Entry.e -> ('a, 'f, 'r) ty_rule -> unit + end + (** Signature type of the functor [Grammar.GMake]. The types and + functions are almost the same than in generic interface, but: +- Grammars are not values. Functions holding a grammar as parameter + do not have this parameter yet. +- The type [parsable] is used in function [parse] instead of + the char stream, avoiding the possible loss of tokens. +- The type of tokens (expressions and patterns) can be any + type (instead of (string * string)); the module parameter + must specify a way to show them as (string * string) *) + +module GMake (L : GLexerType) : S with type te = L.te diff --git a/gramlib/plexing.ml b/gramlib/plexing.ml new file mode 100644 index 0000000000..beebcd016e --- /dev/null +++ b/gramlib/plexing.ml @@ -0,0 +1,217 @@ +(* camlp5r *) +(* plexing.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type pattern = string * string + +exception Error of string + +type location = Ploc.t +type location_function = int -> location +type 'te lexer_func = char Stream.t -> 'te Stream.t * location_function + +type 'te lexer = + { tok_func : 'te lexer_func; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + mutable tok_match : pattern -> 'te -> string; + tok_text : pattern -> string; + mutable tok_comm : location list option } + +let make_loc = Ploc.make_unlined +let dummy_loc = Ploc.dummy + +let lexer_text (con, prm) = + if con = "" then "'" ^ prm ^ "'" + else if prm = "" then con + else con ^ " '" ^ prm ^ "'" + +let locerr () = failwith "Lexer: location function" +let loct_create () = ref (Array.make 1024 None), ref false +let loct_func (loct, ov) i = + match + if i < 0 || i >= Array.length !loct then + if !ov then Some dummy_loc else None + else Array.unsafe_get !loct i + with + Some loc -> loc + | None -> locerr () +let loct_add (loct, ov) i loc = + if i >= Array.length !loct then + let new_tmax = Array.length !loct * 2 in + if new_tmax < Sys.max_array_length then + let new_loct = Array.make new_tmax None in + Array.blit !loct 0 new_loct 0 (Array.length !loct); + loct := new_loct; + !loct.(i) <- Some loc + else ov := true + else !loct.(i) <- Some loc + +let make_stream_and_location next_token_loc = + let loct = loct_create () in + let ts = + Stream.from + (fun i -> + let (tok, loc) = next_token_loc () in loct_add loct i loc; Some tok) + in + ts, loct_func loct + +let lexer_func_of_parser next_token_loc cs = + let line_nb = ref 1 in + let bolpos = ref 0 in + make_stream_and_location (fun () -> next_token_loc (cs, line_nb, bolpos)) + +let lexer_func_of_ocamllex lexfun cs = + let lb = + Lexing.from_function + (fun s n -> + try Bytes.set s 0 (Stream.next cs); 1 with Stream.Failure -> 0) + in + let next_token_loc _ = + let tok = lexfun lb in + let loc = make_loc (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in + tok, loc + in + make_stream_and_location next_token_loc + +(* Char and string tokens to real chars and string *) + +let buff = ref (Bytes.create 80) +let store len x = + if len >= Bytes.length !buff then + buff := Bytes.(cat !buff (create (length !buff))); + Bytes.set !buff len x; + succ len +let get_buff len = Bytes.sub !buff 0 len + +let valch x = Char.code x - Char.code '0' +let valch_a x = Char.code x - Char.code 'a' + 10 +let valch_A x = Char.code x - Char.code 'A' + 10 + +let rec backslash s i = + if i = String.length s then raise Not_found + else + match s.[i] with + 'n' -> '\n', i + 1 + | 'r' -> '\r', i + 1 + | 't' -> '\t', i + 1 + | 'b' -> '\b', i + 1 + | '\\' -> '\\', i + 1 + | '"' -> '"', i + 1 + | '\'' -> '\'', i + 1 + | '0'..'9' as c -> backslash1 (valch c) s (i + 1) + | 'x' -> backslash1h s (i + 1) + | _ -> raise Not_found +and backslash1 cod s i = + if i = String.length s then '\\', i - 1 + else + match s.[i] with + '0'..'9' as c -> backslash2 (10 * cod + valch c) s (i + 1) + | _ -> '\\', i - 1 +and backslash2 cod s i = + if i = String.length s then '\\', i - 2 + else + match s.[i] with + '0'..'9' as c -> Char.chr (10 * cod + valch c), i + 1 + | _ -> '\\', i - 2 +and backslash1h s i = + if i = String.length s then '\\', i - 1 + else + match s.[i] with + '0'..'9' as c -> backslash2h (valch c) s (i + 1) + | 'a'..'f' as c -> backslash2h (valch_a c) s (i + 1) + | 'A'..'F' as c -> backslash2h (valch_A c) s (i + 1) + | _ -> '\\', i - 1 +and backslash2h cod s i = + if i = String.length s then '\\', i - 2 + else + match s.[i] with + '0'..'9' as c -> Char.chr (16 * cod + valch c), i + 1 + | 'a'..'f' as c -> Char.chr (16 * cod + valch_a c), i + 1 + | 'A'..'F' as c -> Char.chr (16 * cod + valch_A c), i + 1 + | _ -> '\\', i - 2 + +let rec skip_indent s i = + if i = String.length s then i + else + match s.[i] with + ' ' | '\t' -> skip_indent s (i + 1) + | _ -> i + +let skip_opt_linefeed s i = + if i = String.length s then i else if s.[i] = '\010' then i + 1 else i + +let eval_char s = + if String.length s = 1 then s.[0] + else if String.length s = 0 then failwith "invalid char token" + else if s.[0] = '\\' then + if String.length s = 2 && s.[1] = '\'' then '\'' + else + try + let (c, i) = backslash s 1 in + if i = String.length s then c else raise Not_found + with Not_found -> failwith "invalid char token" + else failwith "invalid char token" + +let eval_string loc s = + let rec loop len i = + if i = String.length s then get_buff len + else + let (len, i) = + if s.[i] = '\\' then + let i = i + 1 in + if i = String.length s then failwith "invalid string token" + else if s.[i] = '"' then store len '"', i + 1 + else + match s.[i] with + '\010' -> len, skip_indent s (i + 1) + | '\013' -> len, skip_indent s (skip_opt_linefeed s (i + 1)) + | c -> + try let (c, i) = backslash s i in store len c, i with + Not_found -> store (store len '\\') c, i + 1 + else store len s.[i], i + 1 + in + loop len i + in + Bytes.to_string (loop 0 0) + +let default_match = + function + "ANY", "" -> (fun (con, prm) -> prm) + | "ANY", v -> + (fun (con, prm) -> if v = prm then v else raise Stream.Failure) + | p_con, "" -> + (fun (con, prm) -> if con = p_con then prm else raise Stream.Failure) + | p_con, p_prm -> + fun (con, prm) -> + if con = p_con && prm = p_prm then prm else raise Stream.Failure + +let input_file = ref "" +let line_nb = ref (ref 0) +let bol_pos = ref (ref 0) +let restore_lexing_info = ref None + +(* The lexing buffer used by pa_lexer.cmo *) + +let rev_implode l = + let s = Bytes.create (List.length l) in + let rec loop i = + function + c :: l -> Bytes.unsafe_set s i c; loop (i - 1) l + | [] -> s + in + Bytes.to_string (loop (Bytes.length s - 1) l) + +module Lexbuf : + sig + type t + val empty : t + val add : char -> t -> t + val get : t -> string + end = + struct + type t = char list + let empty = [] + let add c l = c :: l + let get = rev_implode + end diff --git a/gramlib/plexing.mli b/gramlib/plexing.mli new file mode 100644 index 0000000000..6b5f718bc3 --- /dev/null +++ b/gramlib/plexing.mli @@ -0,0 +1,108 @@ +(* camlp5r *) +(* plexing.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Lexing for Camlp5 grammars. + + This module defines the Camlp5 lexer type to be used in extensible + grammars (see module [Grammar]). It also provides some useful functions + to create lexers. *) + +type pattern = string * string + (* Type for values used by the generated code of the EXTEND + statement to represent terminals in entry rules. +- The first string is the constructor name (must start with + an uppercase character). When it is empty, the second string + is supposed to be a keyword. +- The second string is the constructor parameter. Empty if it + has no parameter (corresponding to the 'wildcard' pattern). +- The way tokens patterns are interpreted to parse tokens is done + by the lexer, function [tok_match] below. *) + +exception Error of string + (** A lexing error exception to be used by lexers. *) + +(** Lexer type *) + +type 'te lexer = + { tok_func : 'te lexer_func; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + mutable tok_match : pattern -> 'te -> string; + tok_text : pattern -> string; + mutable tok_comm : Ploc.t list option } +and 'te lexer_func = char Stream.t -> 'te Stream.t * location_function +and location_function = int -> Ploc.t + (** The type of a function giving the location of a token in the + source from the token number in the stream (starting from zero). *) + +val lexer_text : pattern -> string + (** A simple [tok_text] function. *) + +val default_match : pattern -> string * string -> string + (** A simple [tok_match] function, appling to the token type + [(string * string)] *) + +(** Lexers from parsers or ocamllex + + The functions below create lexer functions either from a [char stream] + parser or for an [ocamllex] function. With the returned function [f], + it is possible to get a simple lexer (of the type [Plexing.glexer] above): + {[ + { Plexing.tok_func = f; + Plexing.tok_using = (fun _ -> ()); + Plexing.tok_removing = (fun _ -> ()); + Plexing.tok_match = Plexing.default_match; + Plexing.tok_text = Plexing.lexer_text; + Plexing.tok_comm = None } + ]} + Note that a better [tok_using] function should check the used tokens + and raise [Plexing.Error] for incorrect ones. The other functions + [tok_removing], [tok_match] and [tok_text] may have other implementations + as well. *) + +val lexer_func_of_parser : + (char Stream.t * int ref * int ref -> 'te * Ploc.t) -> 'te lexer_func + (** A lexer function from a lexer written as a char stream parser + returning the next token and its location. The two references + with the char stream contain the current line number and the + position of the beginning of the current line. *) +val lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> 'te lexer_func + (** A lexer function from a lexer created by [ocamllex] *) + +(** Function to build a stream and a location function *) + +val make_stream_and_location : + (unit -> 'te * Ploc.t) -> 'te Stream.t * location_function + (** General function *) + +(** Useful functions and values *) + +val eval_char : string -> char +val eval_string : Ploc.t -> string -> string + (** Convert a char or a string token, where the backslashes had not + been interpreted into a real char or string; raise [Failure] if + bad backslash sequence found; [Plexing.eval_char (Char.escaped c)] + would return [c] and [Plexing.eval_string (String.escaped s)] would + return [s] *) + +val restore_lexing_info : (int * int) option ref +val input_file : string ref +val line_nb : int ref ref +val bol_pos : int ref ref + (** Special variables used to reinitialize line numbers and position + of beginning of line with their correct current values when a parser + is called several times with the same character stream. Necessary + for directives (e.g. #load or #use) which interrupt the parsing. + Without usage of these variables, locations after the directives + can be wrong. *) + +(** The lexing buffer used by streams lexers *) + +module Lexbuf : + sig + type t + val empty : t + val add : char -> t -> t + val get : t -> string + end diff --git a/gramlib/ploc.ml b/gramlib/ploc.ml new file mode 100644 index 0000000000..cb71f72678 --- /dev/null +++ b/gramlib/ploc.ml @@ -0,0 +1,176 @@ +(* camlp5r *) +(* ploc.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type t = + { fname : string; + line_nb : int; + bol_pos : int; + line_nb_last : int; + bol_pos_last : int; + bp : int; + ep : int; + comm : string; + ecomm : string } + +let make_loc fname line_nb bol_pos (bp, ep) comm = + {fname = fname; line_nb = line_nb; bol_pos = bol_pos; + line_nb_last = line_nb; bol_pos_last = bol_pos; bp = bp; ep = ep; + comm = comm; ecomm = ""} + +let make_unlined (bp, ep) = + {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0; + bp = bp; ep = ep; comm = ""; ecomm = ""} + +let dummy = + {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0; + bp = 0; ep = 0; comm = ""; ecomm = ""} + +let file_name loc = loc.fname +let first_pos loc = loc.bp +let last_pos loc = loc.ep +let line_nb loc = loc.line_nb +let bol_pos loc = loc.bol_pos +let line_nb_last loc = loc.line_nb_last +let bol_pos_last loc = loc.bol_pos_last +let comment loc = loc.comm +let comment_last loc = loc.ecomm + +(* *) + +let encl loc1 loc2 = + if loc1.bp < loc2.bp then + if loc1.ep < loc2.ep then + {fname = loc1.fname; line_nb = loc1.line_nb; bol_pos = loc1.bol_pos; + line_nb_last = loc2.line_nb_last; bol_pos_last = loc2.bol_pos_last; + bp = loc1.bp; ep = loc2.ep; comm = loc1.comm; ecomm = loc2.comm} + else loc1 + else if loc2.ep < loc1.ep then + {fname = loc2.fname; line_nb = loc2.line_nb; bol_pos = loc2.bol_pos; + line_nb_last = loc1.line_nb_last; bol_pos_last = loc1.bol_pos_last; + bp = loc2.bp; ep = loc1.ep; comm = loc2.comm; ecomm = loc1.comm} + else loc2 +let shift sh loc = {loc with bp = sh + loc.bp; ep = sh + loc.ep} +let sub loc sh len = {loc with bp = loc.bp + sh; ep = loc.bp + sh + len} +let after loc sh len = {loc with bp = loc.ep + sh; ep = loc.ep + sh + len} +let with_comment loc comm = {loc with comm = comm} + +let name = ref "loc" + +let from_file fname loc = + let (bp, ep) = first_pos loc, last_pos loc in + try + let ic = open_in_bin fname in + let strm = Stream.of_channel ic in + let rec loop fname lin = + let rec not_a_line_dir col (strm__ : _ Stream.t) = + let cnt = Stream.count strm__ in + match Stream.peek strm__ with + Some c -> + Stream.junk strm__; + let s = strm__ in + if cnt < bp then + if c = '\n' then loop fname (lin + 1) + else not_a_line_dir (col + 1) s + else let col = col - (cnt - bp) in fname, lin, col, col + ep - bp + | _ -> fname, lin, col, col + 1 + in + let rec a_line_dir str n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '\n' -> Stream.junk strm__; loop str n + | Some _ -> Stream.junk strm__; a_line_dir str n (col + 1) strm__ + | _ -> raise Stream.Failure + in + let rec spaces col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ' ' -> Stream.junk strm__; spaces (col + 1) strm__ + | _ -> col + in + let rec check_string str n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '"' -> + Stream.junk strm__; + let col = + try spaces (col + 1) strm__ with + Stream.Failure -> raise (Stream.Error "") + in + a_line_dir str n col strm__ + | Some c when c <> '\n' -> + Stream.junk strm__; + check_string (str ^ String.make 1 c) n (col + 1) strm__ + | _ -> not_a_line_dir col strm__ + in + let check_quote n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '"' -> Stream.junk strm__; check_string "" n (col + 1) strm__ + | _ -> not_a_line_dir col strm__ + in + let rec check_num n col (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ('0'..'9' as c) -> + Stream.junk strm__; + check_num (10 * n + Char.code c - Char.code '0') (col + 1) strm__ + | _ -> let col = spaces col strm__ in check_quote n col strm__ + in + let begin_line (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '#' -> + Stream.junk strm__; + let col = + try spaces 1 strm__ with + Stream.Failure -> raise (Stream.Error "") + in + check_num 0 col strm__ + | _ -> not_a_line_dir 0 strm__ + in + begin_line strm + in + let r = + try loop fname 1 with + Stream.Failure -> + let bol = bol_pos loc in fname, line_nb loc, bp - bol, ep - bol + in + close_in ic; r + with Sys_error _ -> fname, 1, bp, ep + +let second_line fname ep0 (line, bp) ep = + let ic = open_in fname in + seek_in ic bp; + let rec loop line bol p = + if p = ep then + begin close_in ic; if bol = bp then line, ep0 else line, ep - bol end + else + let (line, bol) = + match input_char ic with + '\n' -> line + 1, p + 1 + | _ -> line, bol + in + loop line bol (p + 1) + in + loop line bp bp + +let get loc = + if loc.fname = "" || loc.fname = "-" then + loc.line_nb, loc.bp - loc.bol_pos, loc.line_nb, loc.ep - loc.bol_pos, + loc.ep - loc.bp + else + let (bl, bc, ec) = + loc.line_nb, loc.bp - loc.bol_pos, loc.ep - loc.bol_pos + in + let (el, eep) = second_line loc.fname ec (bl, loc.bp) loc.ep in + bl, bc, el, eep, ec - bc + +let call_with r v f a = + let saved = !r in + try r := v; let b = f a in r := saved; b with e -> r := saved; raise e + +exception Exc of t * exn + +let raise loc exc = + match exc with + Exc (_, _) -> raise exc + | _ -> raise (Exc (loc, exc)) + +type 'a vala = + VaAnt of string + | VaVal of 'a diff --git a/gramlib/ploc.mli b/gramlib/ploc.mli new file mode 100644 index 0000000000..d2ab62db06 --- /dev/null +++ b/gramlib/ploc.mli @@ -0,0 +1,122 @@ +(* camlp5r *) +(* ploc.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Locations and some pervasive type and value. *) + +type t + +(* located exceptions *) + +exception Exc of t * exn + (** [Ploc.Exc loc e] is an encapsulation of the exception [e] with + the input location [loc]. To be used to specify a location + for an error. This exception must not be raised by [raise] but + rather by [Ploc.raise] (see below), to prevent the risk of several + encapsulations of [Ploc.Exc]. *) +val raise : t -> exn -> 'a + (** [Ploc.raise loc e], if [e] is already the exception [Ploc.Exc], + re-raise it (ignoring the new location [loc]), else raise the + exception [Ploc.Exc loc e]. *) + +(* making locations *) + +val make_loc : string -> int -> int -> int * int -> string -> t + (** [Ploc.make_loc fname line_nb bol_pos (bp, ep) comm] creates a location + starting at line number [line_nb], where the position of the beginning + of the line is [bol_pos] and between the positions [bp] (included) and + [ep] excluded. And [comm] is the comment before the location. The + positions are in number of characters since the begin of the stream. *) +val make_unlined : int * int -> t + (** [Ploc.make_unlined] is like [Ploc.make] except that the line number + is not provided (to be used e.g. when the line number is unknown. *) + +val dummy : t + (** [Ploc.dummy] is a dummy location, used in situations when location + has no meaning. *) + +(* getting location info *) + +val file_name : t -> string + (** [Ploc.file_name loc] returns the file name of the location. *) +val first_pos : t -> int + (** [Ploc.first_pos loc] returns the position of the begin of the location + in number of characters since the beginning of the stream. *) +val last_pos : t -> int + (** [Ploc.last_pos loc] returns the position of the first character not + in the location in number of characters since the beginning of the + stream. *) +val line_nb : t -> int + (** [Ploc.line_nb loc] returns the line number of the location or [-1] if + the location does not contain a line number (i.e. built with + [Ploc.make_unlined]. *) +val bol_pos : t -> int + (** [Ploc.bol_pos loc] returns the position of the beginning of the line + of the location in number of characters since the beginning of + the stream, or [0] if the location does not contain a line number + (i.e. built with [Ploc.make_unlined]. *) +val line_nb_last : t -> int +val bol_pos_last : t -> int + (** Return the line number and the position of the beginning of the line + of the last position. *) +val comment : t -> string + (** [Ploc.comment loc] returns the comment before the location. *) +val comment_last : t -> string + (** [Ploc.comment loc] returns the last comment of the location. *) + +(* combining locations *) + +val encl : t -> t -> t + (** [Ploc.encl loc1 loc2] returns the location starting at the + smallest start of [loc1] and [loc2] and ending at the greatest end + of them. In other words, it is the location enclosing [loc1] and + [loc2]. *) +val shift : int -> t -> t + (** [Ploc.shift sh loc] returns the location [loc] shifted with [sh] + characters. The line number is not recomputed. *) +val sub : t -> int -> int -> t + (** [Ploc.sub loc sh len] is the location [loc] shifted with [sh] + characters and with length [len]. The previous ending position + of the location is lost. *) +val after : t -> int -> int -> t + (** [Ploc.after loc sh len] is the location just after loc (starting at + the end position of [loc]) shifted with [sh] characters and of length + [len]. *) +val with_comment : t -> string -> t + (** Change the comment part of the given location *) + +(* miscellaneous *) + +val name : string ref + (** [Ploc.name.val] is the name of the location variable used in grammars + and in the predefined quotations for OCaml syntax trees. Default: + ["loc"] *) + +val get : t -> int * int * int * int * int + (** [Ploc.get loc] returns in order: 1/ the line number of the begin + of the location, 2/ its column, 3/ the line number of the first + character not in the location, 4/ its column and 5/ the length + of the location. The file where the location occurs (if any) may + be read during this operation. *) + +val from_file : string -> t -> string * int * int * int + (** [Ploc.from_file fname loc] reads the file [fname] up to the + location [loc] and returns the real input file, the line number + and the characters location in the line; the real input file + can be different from [fname] because of possibility of line + directives typically generated by /lib/cpp. *) + +(* pervasives *) + +type 'a vala = + VaAnt of string + | VaVal of 'a + (** Encloser of many abstract syntax tree nodes types, in "strict" mode. + This allow the system of antiquotations of abstract syntax tree + quotations to work when using the quotation kit [q_ast.cmo]. *) + +val call_with : 'a ref -> 'a -> ('b -> 'c) -> 'b -> 'c + (** [Ploc.call_with r v f a] sets the reference [r] to the value [v], + then call [f a], and resets [r] to its initial value. If [f a] raises + an exception, its initial value is also reset and the exception is + re-raised. The result is the result of [f a]. *) diff --git a/gramlib/token.ml b/gramlib/token.ml new file mode 100644 index 0000000000..77c737b880 --- /dev/null +++ b/gramlib/token.ml @@ -0,0 +1,37 @@ +(* camlp5r *) +(* token.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type pattern = Plexing.pattern + +exception Error of string + +type location = Ploc.t +type location_function = int -> location +type 'te lexer_func = char Stream.t -> 'te Stream.t * location_function + +type 'te glexer = + 'te Plexing.lexer = + { tok_func : 'te lexer_func; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + mutable tok_match : pattern -> 'te -> string; + tok_text : pattern -> string; + mutable tok_comm : location list option } + +let make_loc = Ploc.make_unlined +let dummy_loc = Ploc.dummy + +let make_stream_and_location = Plexing.make_stream_and_location +let lexer_func_of_parser = Plexing.lexer_func_of_parser +let lexer_func_of_ocamllex = Plexing.lexer_func_of_ocamllex + +let eval_char = Plexing.eval_char +let eval_string = Plexing.eval_string + +let lexer_text = Plexing.lexer_text +let default_match = Plexing.default_match + +let line_nb = Plexing.line_nb +let bol_pos = Plexing.bol_pos +let restore_lexing_info = Plexing.restore_lexing_info diff --git a/gramlib/token.mli b/gramlib/token.mli new file mode 100644 index 0000000000..c1de5cefff --- /dev/null +++ b/gramlib/token.mli @@ -0,0 +1,56 @@ +(* camlp5r *) +(* token.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Module deprecated since Camlp5 version 5.00. Use now module Plexing. + Compatibility assumed. *) + +type pattern = Plexing.pattern + +exception Error of string + (** Use now [Plexing.Error] *) + +type 'te glexer = + 'te Plexing.lexer = + { tok_func : 'te Plexing.lexer_func; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + mutable tok_match : pattern -> 'te -> string; + tok_text : pattern -> string; + mutable tok_comm : Ploc.t list option } + +type 'te lexer_func = char Stream.t -> 'te Stream.t * location_function +and location_function = int -> Ploc.t + +val lexer_text : pattern -> string + (** Use now [Plexing.lexer_text] *) +val default_match : pattern -> string * string -> string + (** Use now [Plexing.default_match] *) + +val lexer_func_of_parser : + (char Stream.t * int ref * int ref -> 'te * Ploc.t) -> 'te lexer_func + (** Use now [Plexing.lexer_func_of_parser] *) +val lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> 'te lexer_func + (** Use now [Plexing.lexer_func_of_ocamllex] *) + +val make_stream_and_location : + (unit -> 'te * Ploc.t) -> 'te Stream.t * location_function + (** Use now [Plexing.make_stream_and_location] *) + +val eval_char : string -> char + (** Use now [Plexing.eval_char] *) +val eval_string : Ploc.t -> string -> string + (** Use now [Plexing.eval_string] *) + +val restore_lexing_info : (int * int) option ref + (** Use now [Plexing.restore_lexing_info] *) +val line_nb : int ref ref + (** Use now [Plexing.line_nb] *) +val bol_pos : int ref ref + (** Use now [Plexing.bol_pos] *) + +(* deprecated since version 4.08 *) + +type location = Ploc.t +val make_loc : int * int -> Ploc.t +val dummy_loc : Ploc.t |
