diff options
| -rw-r--r-- | Makefile | 1 | ||||
| -rw-r--r-- | gramlib/LICENSE | 29 | ||||
| -rw-r--r-- | gramlib/fstream.ml | 152 | ||||
| -rw-r--r-- | gramlib/fstream.mli | 95 | ||||
| -rw-r--r-- | gramlib/gramext.ml | 622 | ||||
| -rw-r--r-- | gramlib/gramext.mli | 87 | ||||
| -rw-r--r-- | gramlib/grammar.ml | 2695 | ||||
| -rw-r--r-- | gramlib/grammar.mli | 338 | ||||
| -rw-r--r-- | gramlib/plexing.ml | 241 | ||||
| -rw-r--r-- | gramlib/plexing.mli | 143 | ||||
| -rw-r--r-- | gramlib/ploc.ml | 217 | ||||
| -rw-r--r-- | gramlib/ploc.mli | 129 | ||||
| -rw-r--r-- | gramlib/token.ml | 37 | ||||
| -rw-r--r-- | gramlib/token.mli | 56 |
14 files changed, 4842 insertions, 0 deletions
@@ -57,6 +57,7 @@ FIND_SKIP_DIRS:='(' \ -name '_build' -o \ -name '_build_ci' -o \ -name '_install_ci' -o \ + -name 'gramlib' -o \ -name 'user-contrib' -o \ -name 'test-suite' -o \ -name '.opamcache' -o \ diff --git a/gramlib/LICENSE b/gramlib/LICENSE new file mode 100644 index 0000000000..b696affde7 --- /dev/null +++ b/gramlib/LICENSE @@ -0,0 +1,29 @@ +gramlib was derived from Daniel de Rauglaudre's camlp5 +(https://github.com/camlp5/camlp5) whose licence follows: + +* Copyright (c) 2007-2017, INRIA (Institut National de Recherches en +* Informatique et Automatique). All rights reserved. +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of INRIA, nor the names of its contributors may be +* used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY INRIA AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INRIA AND +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. diff --git a/gramlib/fstream.ml b/gramlib/fstream.ml new file mode 100644 index 0000000000..928df3b0c4 --- /dev/null +++ b/gramlib/fstream.ml @@ -0,0 +1,152 @@ +(* camlp5r *) +(* fstream.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +exception Cut; + +type mlazy_c 'a = + [ Lfun of unit -> 'a + | Lval of 'a ] +; +type mlazy 'a = + [ Cval of 'a + | Clazy of ref (mlazy_c 'a) ] +; +value mlazy f = Clazy (ref (Lfun f)); +value mlazy_force l = + match l with + [ Cval v -> v + | Clazy l -> + match l.val with + [ Lfun f -> do { let x = f () in l.val := Lval x; x } + | Lval v -> v ] ] +; +value mlazy_is_val l = + match l with + [ Cval _ -> True + | Clazy l -> + match l.val with + [ Lval _ -> True + | Lfun _ -> False ] ] +; + +type t 'a = { count : int; data : mlazy (data 'a) } +and data 'a = + [ Nil + | Cons of 'a and t 'a + | App of t 'a and t 'a ] +; + +value from f = + loop 0 where rec loop i = + {count = 0; + data = + mlazy + (fun () -> + match f i with + [ Some x -> Cons x (loop (i + 1)) + | None -> Nil ])} +; + +value rec next s = + let count = s.count + 1 in + match mlazy_force s.data with + [ Nil -> None + | Cons a s -> Some (a, {count = count; data = s.data}) + | App s1 s2 -> + match next s1 with + [ Some (a, s1) -> + Some (a, {count = count; data = mlazy (fun () -> App s1 s2)}) + | None -> + match next s2 with + [ Some (a, s2) -> Some (a, {count = count; data = s2.data}) + | None -> None ] ] ] +; + +value empty s = + match next s with + [ Some _ -> None + | None -> Some ((), s) ] +; + +value nil = {count = 0; data = Cval Nil}; +value cons a s = Cons a s; +value app s1 s2 = App s1 s2; +value flazy f = {count = 0; data = mlazy f}; + +value of_list l = + List.fold_right (fun x s -> flazy (fun () -> cons x s)) l nil +; + +value of_string s = + from (fun c -> if c < String.length s then Some s.[c] else None) +; + +value of_channel ic = + from (fun _ -> try Some (input_char ic) with [ End_of_file -> None ]) +; + +value iter f = + do_rec where rec do_rec strm = + match next strm with + [ Some (a, strm) -> + let _ = f a in + do_rec strm + | None -> () ] +; + +value count s = s.count; + +value count_unfrozen s = + loop 0 s where rec loop cnt s = + if mlazy_is_val s.data then + match mlazy_force s.data with + [ Cons _ s -> loop (cnt + 1) s + | _ -> cnt ] + else cnt +; + +(* backtracking parsers *) + +type kont 'a 'b = [ K of unit -> option ('b * t 'a * kont 'a 'b) ]; +type bp 'a 'b = t 'a -> option ('b * t 'a * kont 'a 'b); + +value bcontinue = fun [ (K k) -> k () ]; + +value bparse_all p strm = + loop (fun () -> p strm) where rec loop p = + match p () with + [ Some (r, _, K k) -> [r :: loop k] + | None -> [] ] +; + +value b_seq a b strm = + let rec app_a kont1 () = + match kont1 () with + [ Some (x, strm, K kont1) -> app_b (fun () -> b x strm) kont1 () + | None -> None ] + and app_b kont2 kont1 () = + match kont2 () with + [ Some (y, strm, K kont2) -> Some (y, strm, K (app_b kont2 kont1)) + | None -> app_a kont1 () ] + in + app_a (fun () -> a strm) () +; + +value b_or a b strm = + loop (fun () -> a strm) () where rec loop kont () = + match kont () with + [ Some (x, strm, K kont) -> Some (x, strm, K (loop kont)) + | None -> b strm ] +; + +value b_term f strm = + match next strm with + [ Some (x, strm) -> + match f x with + [ Some y -> Some (y, strm, K (fun _ -> None)) + | None -> None ] + | None -> None ] +; + +value b_act a strm = Some (a, strm, K (fun _ -> None)); diff --git a/gramlib/fstream.mli b/gramlib/fstream.mli new file mode 100644 index 0000000000..a6d0391173 --- /dev/null +++ b/gramlib/fstream.mli @@ -0,0 +1,95 @@ +(* camlp5r *) +(* fstream.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(* Module [Fstream]: functional streams *) + +(* This module implement functional streams and parsers together with + backtracking parsers. To be used with syntax [pa_fstream.cmo]. The + syntax is: + For functional streams: +- stream: [fstream [: ... :]] + For functional parsers: +- parser: [fparser [ [: ... :] -> ... | ... ]] + For backtracking parsers: +- parser: [bparser [ [: ... :] -> ... | ... ]] + + Functional parsers are of type: + [Fstream.t 'a -> option ('b * Fstream.t 'a)] + Backtracking parsers are of type: + [Fstream.t 'a -> option ('b * Fstream.t 'a * Fstream.kont 'a 'b)] + + Functional parsers have limited backtrack, i.e if a rule fails, the + next rule is tested with the initial stream; limited because when + in case of a rule with two consecutive symbols [a] and [b], if [b] + fails, the rule fails: there is no try with the next rule of [a]. + + Backtracking parsers have full backtrack. If a rule fails, the next + case of the previous rule is tested. +*) + +exception Cut; + +(** Functional streams *) + +type t 'a = 'x; + (* The type of 'a functional streams *) +value from : (int -> option 'a) -> t 'a; + (* [Fstream.from f] returns a stream built from the function [f]. + To create a new stream element, the function [f] is called with + the current stream count. The user function [f] must return either + [Some <value>] for a value or [None] to specify the end of the + stream. *) + +value of_list : list 'a -> t 'a; + (* Return the stream holding the elements of the list in the same + order. *) +value of_string : string -> t char; + (* Return the stream of the characters of the string parameter. *) +value of_channel : in_channel -> t char; + (* Return the stream of the characters read from the input channel. *) + +value iter : ('a -> unit) -> t 'a -> unit; + (* [Fstream.iter f s] scans the whole stream s, applying function [f] + in turn to each stream element encountered. *) + +value next : t 'a -> option ('a * t 'a); + (* Return [Some (a, s)] where [a] is the first element of the stream + and [s] the remaining stream, or [None] if the stream is empty. *) +value empty : t 'a -> option (unit * t 'a); + (* Return [Some ((), s)] if the stream is empty where [s] is itself, + else [None] *) +value count : t 'a -> int; + (* Return the current count of the stream elements, i.e. the number + of the stream elements discarded. *) +value count_unfrozen : t 'a -> int; + (* Return the number of unfrozen elements in the beginning of the + stream; useful to determine the position of a parsing error (longuest + path). *) + +(** Backtracking parsers *) + +type kont 'a 'b = [ K of unit -> option ('b * t 'a * kont 'a 'b) ]; + (* The type of continuation of a backtracking parser. *) +type bp 'a 'b = t 'a -> option ('b * t 'a * kont 'a 'b); + (* The type of a backtracking parser. *) + +value bcontinue : kont 'a 'b -> option ('b * t 'a * kont 'a 'b); + (* [bcontinue k] return the next solution of a backtracking parser. *) + +value bparse_all : bp 'a 'b -> t 'a -> list 'b; + (* [bparse_all p strm] return the list of all solutions of a + backtracking parser applied to a functional stream. *) + +(*--*) + +value nil : t 'a; +type data 'a = 'x; +value cons : 'a -> t 'a -> data 'a; +value app : t 'a -> t 'a -> data 'a; +value flazy : (unit -> data 'a) -> t 'a; + +value b_seq : bp 'a 'b -> ('b -> bp 'a 'c) -> bp 'a 'c; +value b_or : bp 'a 'b -> bp 'a 'b -> bp 'a 'b; +value b_term : ('a -> option 'b) -> bp 'a 'b; +value b_act : 'b -> bp 'a 'b; diff --git a/gramlib/gramext.ml b/gramlib/gramext.ml new file mode 100644 index 0000000000..1e50bdc14f --- /dev/null +++ b/gramlib/gramext.ml @@ -0,0 +1,622 @@ +(* camlp5r *) +(* gramext.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +open Printf; + +type parser_t 'a = Stream.t 'a -> Obj.t; +type fparser_t 'a = Fstream.t 'a -> option (Obj.t * Fstream.t 'a); +type bparser_t 'a = Fstream.bp 'a Obj.t; + +type parse_algorithm = + [ Predictive | Functional | Backtracking | DefaultAlgorithm ]; + +type grammar 'te = + { gtokens : Hashtbl.t Plexing.pattern (ref int); + glexer : mutable Plexing.lexer 'te; + galgo : mutable parse_algorithm } +; + +type g_entry 'te = + { egram : grammar 'te; + ename : string; + elocal : bool; + estart : mutable int -> parser_t 'te; + econtinue : mutable int -> int -> Obj.t -> parser_t 'te; + fstart : mutable int -> err_fun -> fparser_t 'te; + fcontinue : mutable int -> int -> Obj.t -> err_fun -> fparser_t 'te; + bstart : mutable int -> err_fun -> bparser_t 'te; + bcontinue : mutable int -> int -> Obj.t -> err_fun -> bparser_t 'te; + edesc : mutable g_desc 'te } +and g_desc 'te = + [ Dlevels of list (g_level 'te) + | Dparser of parser_t 'te ] +and g_level 'te = + { assoc : g_assoc; + lname : option string; + lsuffix : g_tree 'te; + lprefix : g_tree 'te } +and g_assoc = [ NonA | RightA | LeftA ] +and g_symbol 'te = + [ Sfacto of g_symbol 'te + | Smeta of string and list (g_symbol 'te) and Obj.t + | Snterm of g_entry 'te + | Snterml of g_entry 'te and string + | Slist0 of g_symbol 'te + | Slist0sep of g_symbol 'te and g_symbol 'te and bool + | Slist1 of g_symbol 'te + | Slist1sep of g_symbol 'te and g_symbol 'te and bool + | Sopt of g_symbol 'te + | Sflag of g_symbol 'te + | Sself + | Snext + | Scut + | Stoken of Plexing.pattern + | Stree of g_tree 'te + | Svala of list string and g_symbol 'te ] +and g_action = Obj.t +and g_tree 'te = + [ Node of g_node 'te + | LocAct of g_action and list g_action + | DeadEnd ] +and g_node 'te = + { node : g_symbol 'te; son : g_tree 'te; brother : g_tree 'te } +and err_fun = unit -> string; + +type position = + [ First + | Last + | Before of string + | After of string + | Like of string + | Level of string ] +; + +value warning_verbose = ref True; + +value rec derive_eps = + fun + [ Slist0 _ -> True + | Slist0sep _ _ _ -> True + | Sopt _ | Sflag _ -> True + | Sfacto s -> derive_eps s + | Stree t -> tree_derive_eps t + | Svala _ s -> derive_eps s + | Smeta _ _ _ | Slist1 _ | Slist1sep _ _ _ | Snterm _ | Snterml _ _ | + Snext | Sself | Scut | Stoken _ -> + False ] +and tree_derive_eps = + fun + [ LocAct _ _ -> True + | Node {node = s; brother = bro; son = son} -> + derive_eps s && tree_derive_eps son || tree_derive_eps bro + | DeadEnd -> False ] +; + +value rec eq_symbol s1 s2 = + match (s1, s2) with + [ (Snterm e1, Snterm e2) -> e1 == e2 + | (Snterml e1 l1, Snterml e2 l2) -> e1 == e2 && l1 = l2 + | (Slist0 s1, Slist0 s2) -> eq_symbol s1 s2 + | (Slist0sep s1 sep1 b1, Slist0sep s2 sep2 b2) -> + eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 + | (Slist1 s1, Slist1 s2) -> eq_symbol s1 s2 + | (Slist1sep s1 sep1 b1, Slist1sep s2 sep2 b2) -> + eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2 + | (Sflag s1, Sflag s2) -> eq_symbol s1 s2 + | (Sopt s1, Sopt s2) -> eq_symbol s1 s2 + | (Svala ls1 s1, Svala ls2 s2) -> ls1 = ls2 && eq_symbol s1 s2 + | (Stree _, Stree _) -> False + | (Sfacto (Stree t1), Sfacto (Stree t2)) -> + (* The only goal of the node 'Sfacto' is to allow tree comparison + (therefore factorization) without looking at the semantic + actions; allow factorization of rules like "SV foo" which are + actually expanded into a tree. *) + eq_tree t1 t2 where rec eq_tree t1 t2 = + match (t1, t2) with + [ (Node n1, Node n2) -> + eq_symbol n1.node n2.node && eq_tree n1.son n2.son && + eq_tree n1.brother n2.brother + | (LocAct _ _, LocAct _ _) -> True + | (DeadEnd, DeadEnd) -> True + | _ -> False ] + | _ -> s1 = s2 ] +; + +value is_before s1 s2 = + let s1 = match s1 with [ Svala _ s -> s | _ -> s1 ] in + let s2 = match s2 with [ Svala _ s -> s | _ -> s2 ] in + match (s1, s2) with + [ (Stoken ("ANY", _), _) -> False + | (_, Stoken ("ANY", _)) -> True + | (Stoken (_, s), Stoken (_, "")) when s <> "" -> True + | (Stoken _, Stoken _) -> False + | (Stoken _, _) -> True + | _ -> False ] +; + +value insert_tree entry_name gsymbols action tree = + let rec insert symbols tree = + match symbols with + [ [s :: sl] -> insert_in_tree s sl tree + | [] -> + match tree with + [ Node {node = s; son = son; brother = bro} -> + Node {node = s; son = son; brother = insert [] bro} + | LocAct old_action action_list -> do { + if warning_verbose.val then do { + eprintf "<W> Grammar extension: "; + if entry_name <> "" then eprintf "in [%s], " entry_name else (); + eprintf "some rule has been masked\n"; + flush stderr + } + else (); + LocAct action [old_action :: action_list] + } + | DeadEnd -> LocAct action [] ] ] + and insert_in_tree s sl tree = + match try_insert s sl tree with + [ Some t -> t + | None -> Node {node = s; son = insert sl DeadEnd; brother = tree} ] + and try_insert s sl tree = + match tree with + [ Node {node = s1; son = son; brother = bro} -> + if eq_symbol s s1 then + let t = Node {node = s1; son = insert sl son; brother = bro} in + Some t + else if s = Scut then + try_insert s sl (Node {node = s; son = tree; brother = DeadEnd}) + else if s1 = Scut then + try_insert s1 [s :: sl] tree + else if is_before s1 s || derive_eps s && not (derive_eps s1) then + let bro = + match try_insert s sl bro with + [ Some bro -> bro + | None -> + Node {node = s; son = insert sl DeadEnd; brother = bro} ] + in + let t = Node {node = s1; son = son; brother = bro} in + Some t + else + match try_insert s sl bro with + [ Some bro -> + let t = Node {node = s1; son = son; brother = bro} in + Some t + | None -> None ] + | LocAct _ _ | DeadEnd -> None ] + in + insert gsymbols tree +; + +value srules rl = + let t = + List.fold_left + (fun tree (symbols, action) -> insert_tree "" symbols action tree) + DeadEnd rl + in + Stree t +; + +external action : 'a -> g_action = "%identity"; + +value is_level_labelled n lev = + match lev.lname with + [ Some n1 -> n = n1 + | None -> False ] +; + +value rec token_exists_in_level f lev = + token_exists_in_tree f lev.lprefix || token_exists_in_tree f lev.lsuffix +and token_exists_in_tree f = + fun + [ Node n -> + token_exists_in_symbol f n.node || token_exists_in_tree f n.brother || + token_exists_in_tree f n.son + | LocAct _ _ | DeadEnd -> False ] +and token_exists_in_symbol f = + fun + [ Sfacto sy -> token_exists_in_symbol f sy + | Smeta _ syl _ -> List.exists (token_exists_in_symbol f) syl + | Slist0 sy -> token_exists_in_symbol f sy + | Slist0sep sy sep _ -> + token_exists_in_symbol f sy || token_exists_in_symbol f sep + | Slist1 sy -> token_exists_in_symbol f sy + | Slist1sep sy sep _ -> + token_exists_in_symbol f sy || token_exists_in_symbol f sep + | Sopt sy -> token_exists_in_symbol f sy + | Sflag sy -> token_exists_in_symbol f sy + | Stoken tok -> f tok + | Stree t -> token_exists_in_tree f t + | Svala _ sy -> token_exists_in_symbol f sy + | Snterm _ | Snterml _ _ | Snext | Sself | Scut -> False ] +; + +value insert_level entry_name e1 symbols action slev = + match e1 with + [ True -> + {assoc = slev.assoc; lname = slev.lname; + lsuffix = insert_tree entry_name symbols action slev.lsuffix; + lprefix = slev.lprefix} + | False -> + {assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix; + lprefix = insert_tree entry_name symbols action slev.lprefix} ] +; + +value empty_lev lname assoc = + let assoc = + match assoc with + [ Some a -> a + | None -> LeftA ] + in + {assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd} +; + +value change_lev lev n lname assoc = do { + let a = + match assoc with + [ None -> lev.assoc + | Some a -> do { + if a <> lev.assoc && warning_verbose.val then do { + eprintf "<W> Changing associativity of level \"%s\"\n" n; + flush stderr + } + else (); + a + } ] + in + match lname with + [ Some n -> + if lname <> lev.lname && warning_verbose.val then do { + eprintf "<W> Level label \"%s\" ignored\n" n; + flush stderr + } + else () + | None -> () ]; + {assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix} +}; + +value get_level entry position levs = + match position with + [ Some First -> ([], empty_lev, levs) + | Some Last -> (levs, empty_lev, []) + | Some (Level n) -> + get levs where rec get = + fun + [ [] -> do { + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + } + | [lev :: levs] -> + if is_level_labelled n lev then ([], change_lev lev n, levs) + else + let (levs1, rlev, levs2) = get levs in + ([lev :: levs1], rlev, levs2) ] + | Some (Before n) -> + get levs where rec get = + fun + [ [] -> do { + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + } + | [lev :: levs] -> + if is_level_labelled n lev then ([], empty_lev, [lev :: levs]) + else + let (levs1, rlev, levs2) = get levs in + ([lev :: levs1], rlev, levs2) ] + | Some (After n) -> + get levs where rec get = + fun + [ [] -> do { + eprintf "No level labelled \"%s\" in entry \"%s\"\n" n + entry.ename; + flush stderr; + failwith "Grammar.extend" + } + | [lev :: levs] -> + if is_level_labelled n lev then ([lev], empty_lev, levs) + else + let (levs1, rlev, levs2) = get levs in + ([lev :: levs1], rlev, levs2) ] + | Some (Like n) -> + let f (tok, prm) = n = tok || n = prm in + get levs where rec get = + fun + [ [] -> do { + eprintf "No level with \"%s\" in entry \"%s\"\n" n entry.ename; + flush stderr; + failwith "Grammar.extend" + } + | [lev :: levs] -> + if token_exists_in_level f lev then ([], change_lev lev n, levs) + else + let (levs1, rlev, levs2) = get levs in + ([lev :: levs1], rlev, levs2) ] + | None -> + match levs with + [ [lev :: levs] -> ([], change_lev lev "<top>", levs) + | [] -> ([], empty_lev, []) ] ] +; + +value rec check_gram entry = + fun + [ Snterm e -> + if e.egram != entry.egram then do { + eprintf "\ +Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n" + entry.ename e.ename; + flush stderr; + failwith "Grammar.extend error" + } + else () + | Snterml e _ -> + if e.egram != entry.egram then do { + eprintf "\ +Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n" + entry.ename e.ename; + flush stderr; + failwith "Grammar.extend error" + } + else () + | Sfacto s -> check_gram entry s + | Smeta _ sl _ -> List.iter (check_gram entry) sl + | Slist0sep s t _ -> do { check_gram entry t; check_gram entry s } + | Slist1sep s t _ -> do { check_gram entry t; check_gram entry s } + | Slist0 s -> check_gram entry s + | Slist1 s -> check_gram entry s + | Sopt s -> check_gram entry s + | Sflag s -> check_gram entry s + | Stree t -> tree_check_gram entry t + | Svala _ s -> check_gram entry s + | Snext | Sself | Scut | Stoken _ -> () ] +and tree_check_gram entry = + fun + [ Node {node = n; brother = bro; son = son} -> do { + check_gram entry n; + tree_check_gram entry bro; + tree_check_gram entry son + } + | LocAct _ _ | DeadEnd -> () ] +; + +value change_to_self entry = + fun + [ Snterm e when e == entry -> Sself + | x -> x ] +; + +value get_initial entry = + fun + [ [Sself :: symbols] -> (True, symbols) + | symbols -> (False, symbols) ] +; + +value insert_tokens gram symbols = + let rec insert = + fun + [ Sfacto s -> insert s + | Smeta _ sl _ -> List.iter insert sl + | Slist0 s -> insert s + | Slist1 s -> insert s + | Slist0sep s t _ -> do { insert s; insert t } + | Slist1sep s t _ -> do { insert s; insert t } + | Sopt s -> insert s + | Sflag s -> insert s + | Stree t -> tinsert t + | Svala _ s -> insert s + | Stoken ("ANY", _) -> () + | Stoken tok -> do { + gram.glexer.Plexing.tok_using tok; + let r = + try Hashtbl.find gram.gtokens tok with + [ Not_found -> do { + let r = ref 0 in + Hashtbl.add gram.gtokens tok r; + r + } ] + in + incr r + } + | Snterm _ | Snterml _ _ | Snext | Sself | Scut -> () ] + and tinsert = + fun + [ Node {node = s; brother = bro; son = son} -> do { + insert s; + tinsert bro; + tinsert son + } + | LocAct _ _ | DeadEnd -> () ] + in + List.iter insert symbols +; + +value levels_of_rules entry position rules = + let elev = + match entry.edesc with + [ Dlevels elev -> elev + | Dparser _ -> do { + eprintf "Error: entry not extensible: \"%s\"\n" entry.ename; + flush stderr; + failwith "Grammar.extend" + } ] + in + if rules = [] then elev + else + let (levs1, make_lev, levs2) = get_level entry position elev in + let (levs, _) = + List.fold_left + (fun (levs, make_lev) (lname, assoc, level) -> + let lev = make_lev lname assoc in + let lev = + List.fold_left + (fun lev (symbols, action) -> do { + let symbols = List.map (change_to_self entry) symbols in + List.iter (check_gram entry) symbols; + let (e1, symbols) = get_initial entry symbols in + insert_tokens entry.egram symbols; + insert_level entry.ename e1 symbols action lev + }) + lev level + in + ([lev :: levs], empty_lev)) + ([], make_lev) rules + in + levs1 @ List.rev levs @ levs2 +; + +value logically_eq_symbols entry = + let rec eq_symbols s1 s2 = + match (s1, s2) with + [ (Snterm e1, Snterm e2) -> e1.ename = e2.ename + | (Snterm e1, Sself) -> e1.ename = entry.ename + | (Sself, Snterm e2) -> entry.ename = e2.ename + | (Snterml e1 l1, Snterml e2 l2) -> e1.ename = e2.ename && l1 = l2 + | (Slist0 s1, Slist0 s2) -> eq_symbols s1 s2 + | (Slist0sep s1 sep1 b1, Slist0sep s2 sep2 b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | (Slist1 s1, Slist1 s2) -> eq_symbols s1 s2 + | (Slist1sep s1 sep1 b1, Slist1sep s2 sep2 b2) -> + eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2 + | (Sopt s1, Sopt s2) -> eq_symbols s1 s2 + | (Stree t1, Stree t2) -> eq_trees t1 t2 + | _ -> s1 = s2 ] + and eq_trees t1 t2 = + match (t1, t2) with + [ (Node n1, Node n2) -> + eq_symbols n1.node n2.node && eq_trees n1.son n2.son && + eq_trees n1.brother n2.brother + | (LocAct _ _ | DeadEnd, LocAct _ _ | DeadEnd) -> True + | _ -> False ] + in + eq_symbols +; + +(* [delete_rule_in_tree] returns + [Some (dsl, t)] if success + [dsl] = + Some (list of deleted nodes) if branch deleted + None if action replaced by previous version of action + [t] = remaining tree + [None] if failure *) + +value delete_rule_in_tree entry = + let rec delete_in_tree symbols tree = + match (symbols, tree) with + [ ([s :: sl], Node n) -> + if logically_eq_symbols entry s n.node then delete_son sl n + else + match delete_in_tree symbols n.brother with + [ Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None ] + | ([s :: sl], _) -> None + | ([], Node n) -> + match delete_in_tree [] n.brother with + [ Some (dsl, t) -> + Some (dsl, Node {node = n.node; son = n.son; brother = t}) + | None -> None ] + | ([], DeadEnd) -> None + | ([], LocAct _ []) -> Some (Some [], DeadEnd) + | ([], LocAct _ [action :: list]) -> Some (None, LocAct action list) ] + and delete_son sl n = + match delete_in_tree sl n.son with + [ Some (Some dsl, DeadEnd) -> Some (Some [n.node :: dsl], n.brother) + | Some (Some dsl, t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (Some [n.node :: dsl], t) + | Some (None, t) -> + let t = Node {node = n.node; son = t; brother = n.brother} in + Some (None, t) + | None -> None ] + in + delete_in_tree +; + +value rec decr_keyw_use gram = + fun + [ Stoken tok -> do { + let r = Hashtbl.find gram.gtokens tok in + decr r; + if r.val == 0 then do { + Hashtbl.remove gram.gtokens tok; + gram.glexer.Plexing.tok_removing tok + } + else () + } + | Sfacto s -> decr_keyw_use gram s + | Smeta _ sl _ -> List.iter (decr_keyw_use gram) sl + | Slist0 s -> decr_keyw_use gram s + | Slist1 s -> decr_keyw_use gram s + | Slist0sep s1 s2 _ -> do { decr_keyw_use gram s1; decr_keyw_use gram s2 } + | Slist1sep s1 s2 _ -> do { decr_keyw_use gram s1; decr_keyw_use gram s2 } + | Sopt s -> decr_keyw_use gram s + | Sflag s -> decr_keyw_use gram s + | Stree t -> decr_keyw_use_in_tree gram t + | Svala _ s -> decr_keyw_use gram s + | Sself | Snext | Scut | Snterm _ | Snterml _ _ -> () ] +and decr_keyw_use_in_tree gram = + fun + [ DeadEnd | LocAct _ _ -> () + | Node n -> do { + decr_keyw_use gram n.node; + decr_keyw_use_in_tree gram n.son; + decr_keyw_use_in_tree gram n.brother + } ] +; + +value rec delete_rule_in_suffix entry symbols = + fun + [ [lev :: levs] -> + match delete_rule_in_tree entry symbols lev.lsuffix with + [ Some (dsl, t) -> do { + match dsl with + [ Some dsl -> List.iter (decr_keyw_use entry.egram) dsl + | None -> () ]; + match t with + [ DeadEnd when lev.lprefix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = t; + lprefix = lev.lprefix} + in + [lev :: levs] ] + } + | None -> + let levs = delete_rule_in_suffix entry symbols levs in + [lev :: levs] ] + | [] -> raise Not_found ] +; + +value rec delete_rule_in_prefix entry symbols = + fun + [ [lev :: levs] -> + match delete_rule_in_tree entry symbols lev.lprefix with + [ Some (dsl, t) -> do { + match dsl with + [ Some dsl -> List.iter (decr_keyw_use entry.egram) dsl + | None -> () ]; + match t with + [ DeadEnd when lev.lsuffix == DeadEnd -> levs + | _ -> + let lev = + {assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix; + lprefix = t} + in + [lev :: levs] ] + } + | None -> + let levs = delete_rule_in_prefix entry symbols levs in + [lev :: levs] ] + | [] -> raise Not_found ] +; + +value rec delete_rule_in_level_list entry symbols levs = + match symbols with + [ [Sself :: symbols] -> delete_rule_in_suffix entry symbols levs + | [Snterm e :: symbols] when e == entry -> + delete_rule_in_suffix entry symbols levs + | _ -> delete_rule_in_prefix entry symbols levs ] +; diff --git a/gramlib/gramext.mli b/gramlib/gramext.mli new file mode 100644 index 0000000000..69498f6d40 --- /dev/null +++ b/gramlib/gramext.mli @@ -0,0 +1,87 @@ +(* camlp5r *) +(* gramext.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type parser_t 'a = Stream.t 'a -> Obj.t; +type fparser_t 'a = Fstream.t 'a -> option (Obj.t * Fstream.t 'a); +type bparser_t 'a = Fstream.bp 'a Obj.t; + +type parse_algorithm = + [ Predictive | Functional | Backtracking | DefaultAlgorithm ]; + +type grammar 'te = + { gtokens : Hashtbl.t Plexing.pattern (ref int); + glexer : mutable Plexing.lexer 'te; + galgo : mutable parse_algorithm } +; + +type g_entry 'te = + { egram : grammar 'te; + ename : string; + elocal : bool; + estart : mutable int -> parser_t 'te; + econtinue : mutable int -> int -> Obj.t -> parser_t 'te; + fstart : mutable int -> err_fun -> fparser_t 'te; + fcontinue : mutable int -> int -> Obj.t -> err_fun -> fparser_t 'te; + bstart : mutable int -> err_fun -> bparser_t 'te; + bcontinue : mutable int -> int -> Obj.t -> err_fun -> bparser_t 'te; + edesc : mutable g_desc 'te } +and g_desc 'te = + [ Dlevels of list (g_level 'te) + | Dparser of parser_t 'te ] +and g_level 'te = + { assoc : g_assoc; + lname : option string; + lsuffix : g_tree 'te; + lprefix : g_tree 'te } +and g_assoc = [ NonA | RightA | LeftA ] +and g_symbol 'te = + [ Sfacto of g_symbol 'te + | Smeta of string and list (g_symbol 'te) and Obj.t + | Snterm of g_entry 'te + | Snterml of g_entry 'te and string + | Slist0 of g_symbol 'te + | Slist0sep of g_symbol 'te and g_symbol 'te and bool + | Slist1 of g_symbol 'te + | Slist1sep of g_symbol 'te and g_symbol 'te and bool + | Sopt of g_symbol 'te + | Sflag of g_symbol 'te + | Sself + | Snext + | Scut + | Stoken of Plexing.pattern + | Stree of g_tree 'te + | Svala of list string and g_symbol 'te ] +and g_action = Obj.t +and g_tree 'te = + [ Node of g_node 'te + | LocAct of g_action and list g_action + | DeadEnd ] +and g_node 'te = + { node : g_symbol 'te; son : g_tree 'te; brother : g_tree 'te } +and err_fun = unit -> string; + +type position = + [ First + | Last + | Before of string + | After of string + | Like of string + | Level of string ] +; + +value levels_of_rules : + g_entry 'te -> option position -> + list + (option string * option g_assoc * + list (list (g_symbol 'te) * g_action)) -> + list (g_level 'te); +value srules : list (list (g_symbol 'te) * g_action) -> g_symbol 'te; +external action : 'a -> g_action = "%identity"; +value eq_symbol : g_symbol 'a -> g_symbol 'a -> bool; + +value delete_rule_in_level_list : + g_entry 'te -> list (g_symbol 'te) -> list (g_level 'te) -> + list (g_level 'te); + +value warning_verbose : ref bool; diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml new file mode 100644 index 0000000000..d6a2019c20 --- /dev/null +++ b/gramlib/grammar.ml @@ -0,0 +1,2695 @@ +(* camlp5r *) +(* grammar.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +#load "pa_fstream.cmo"; + +value pervasives_stderr = stderr; + +open Gramext; +open Format; + +value stderr = pervasives_stderr; + +value rec flatten_tree = + fun + [ DeadEnd -> [] + | LocAct _ _ -> [[]] + | Node {node = n; brother = b; son = s} -> + List.map (fun l -> [n :: l]) (flatten_tree s) @ flatten_tree b ] +; + +value utf8_print = ref True; + +value utf8_string_escaped s = + let b = Buffer.create (String.length s) in + loop 0 where rec loop i = + if i = String.length s then Buffer.contents b + else do { + match s.[i] with + | '"' → Buffer.add_string b "\\\"" + | '\\' → Buffer.add_string b "\\\\" + | '\n' → Buffer.add_string b "\\n" + | '\t' → Buffer.add_string b "\\t" + | '\r' → Buffer.add_string b "\\r" + | '\b' → Buffer.add_string b "\\b" + | c → Buffer.add_char b c + end; + loop (i + 1) + } +; + +value string_escaped s = + if utf8_print.val then utf8_string_escaped s + else String.escaped s +; + +value print_str ppf s = fprintf ppf "\"%s\"" (string_escaped s); + +value rec print_symbol ppf = + fun + [ Sfacto s -> print_symbol ppf s + | Smeta n sl _ -> print_meta ppf n sl + | Slist0 s -> fprintf ppf "LIST0 %a" print_symbol1 s + | Slist0sep s t osep -> + fprintf ppf "LIST0 %a SEP %a%s" print_symbol1 s print_symbol1 t + (if osep then " OPT_SEP" else "") + | Slist1 s -> fprintf ppf "LIST1 %a" print_symbol1 s + | Slist1sep s t osep -> + fprintf ppf "LIST1 %a SEP %a%s" print_symbol1 s print_symbol1 t + (if osep then " OPT_SEP" else "") + | Sopt s -> fprintf ppf "OPT %a" print_symbol1 s + | Sflag s -> fprintf ppf "FLAG %a" print_symbol1 s + | Stoken (con, prm) when con <> "" && prm <> "" -> + fprintf ppf "%s@ %a" con print_str prm + | Svala _ s -> fprintf ppf "V %a" print_symbol s + | Snterml e l -> + fprintf ppf "%s%s@ LEVEL@ %a" e.ename (if e.elocal then "*" else "") + print_str l + | Snterm _ | Snext | Sself | Scut | Stoken _ | Stree _ as s -> + print_symbol1 ppf s ] +and print_meta ppf n sl = + loop 0 sl where rec loop i = + fun + [ [] -> () + | [s :: sl] -> do { + let j = + try String.index_from n i ' ' with [ Not_found -> String.length n ] + in + fprintf ppf "%s %a" (String.sub n i (j - i)) print_symbol1 s; + if sl = [] then () + else do { fprintf ppf " "; loop (min (j + 1) (String.length n)) sl } + } ] +and print_symbol1 ppf = + fun + [ Sfacto s -> print_symbol1 ppf s + | Snterm e -> fprintf ppf "%s%s" e.ename (if e.elocal then "*" else "") + | Sself -> pp_print_string ppf "SELF" + | Snext -> pp_print_string ppf "NEXT" + | Scut -> pp_print_string ppf "/" + | Stoken ("", s) -> print_str ppf s + | Stoken (con, "") -> pp_print_string ppf con + | Stree t -> print_level ppf pp_print_space (flatten_tree t) + | Smeta _ _ _ | Snterml _ _ | Slist0 _ | Slist0sep _ _ _ | Slist1 _ | + Slist1sep _ _ _ | Sopt _ | Sflag _ | Stoken _ | Svala _ _ as s -> + fprintf ppf "(%a)" print_symbol s ] +and print_rule ppf symbols = do { + fprintf ppf "@[<hov 0>"; + let _ = + List.fold_left + (fun sep symbol -> do { + fprintf ppf "%t%a" sep print_symbol symbol; + fun ppf -> fprintf ppf ";@ " + }) + (fun ppf -> ()) symbols + in + fprintf ppf "@]" +} +and print_level ppf pp_print_space rules = do { + fprintf ppf "@[<hov 0>[ "; + let _ = + List.fold_left + (fun sep rule -> do { + fprintf ppf "%t%a" sep print_rule rule; + fun ppf -> fprintf ppf "%a| " pp_print_space () + }) + (fun ppf -> ()) rules + in + fprintf ppf " ]@]" +}; + +value print_levels ppf elev = + let _ = + List.fold_left + (fun sep lev -> do { + let rules = + List.map (fun t -> [Sself :: t]) (flatten_tree lev.lsuffix) @ + flatten_tree lev.lprefix + in + fprintf ppf "%t@[<hov 2>" sep; + match lev.lname with + [ Some n -> fprintf ppf "%a@;<1 2>" print_str n + | None -> () ]; + match lev.assoc with + [ LeftA -> fprintf ppf "LEFTA" + | RightA -> fprintf ppf "RIGHTA" + | NonA -> fprintf ppf "NONA" ]; + fprintf ppf "@]@;<1 2>"; + print_level ppf pp_force_newline rules; + fun ppf -> fprintf ppf "@,| " + }) + (fun ppf -> ()) elev + in + () +; + +value print_entry ppf e = do { + fprintf ppf "@[<v 0>[ "; + match e.edesc with + [ Dlevels elev -> print_levels ppf elev + | Dparser _ -> fprintf ppf "<parser>" ]; + fprintf ppf " ]@]" +}; + +value iter_entry f e = + let treated = ref [] in + let rec do_entry e = + if List.memq e treated.val then () + else do { + treated.val := [e :: treated.val]; + f e; + match e.edesc with + [ Dlevels ll -> List.iter do_level ll + | Dparser _ -> () ] + } + and do_level lev = do { do_tree lev.lsuffix; do_tree lev.lprefix } + and do_tree = + fun + [ Node n -> do_node n + | LocAct _ _ | DeadEnd -> () ] + and do_node n = do { do_symbol n.node; do_tree n.son; do_tree n.brother } + and do_symbol = + fun + [ Sfacto s -> do_symbol s + | Smeta _ sl _ -> List.iter do_symbol sl + | Snterm e -> do_entry e + | Snterml e _ -> do_entry e + | Slist0 s -> do_symbol s + | Slist1 s -> do_symbol s + | Sopt s -> do_symbol s + | Sflag s -> do_symbol s + | Slist0sep s1 s2 _ -> do { do_symbol s1; do_symbol s2 } + | Slist1sep s1 s2 _ -> do { do_symbol s1; do_symbol s2 } + | Stree t -> do_tree t + | Svala _ s -> do_symbol s + | Sself | Snext | Scut | Stoken _ -> () ] + in + do_entry e +; + +value fold_entry f e init = + let treated = ref [] in + let rec do_entry accu e = + if List.memq e treated.val then accu + else do { + treated.val := [e :: treated.val]; + let accu = f e accu in + match e.edesc with + [ Dlevels ll -> List.fold_left do_level accu ll + | Dparser _ -> accu ] + } + and do_level accu lev = + let accu = do_tree accu lev.lsuffix in + do_tree accu lev.lprefix + and do_tree accu = + fun + [ Node n -> do_node accu n + | LocAct _ _ | DeadEnd -> accu ] + and do_node accu n = + let accu = do_symbol accu n.node in + let accu = do_tree accu n.son in + do_tree accu n.brother + and do_symbol accu = + fun + [ Sfacto s -> do_symbol accu s + | Smeta _ sl _ -> List.fold_left do_symbol accu sl + | Snterm e -> do_entry accu e + | Snterml e _ -> do_entry accu e + | Slist0 s -> do_symbol accu s + | Slist1 s -> do_symbol accu s + | Sopt s -> do_symbol accu s + | Sflag s -> do_symbol accu s + | Slist0sep s1 s2 _ -> do_symbol (do_symbol accu s1) s2 + | Slist1sep s1 s2 _ -> do_symbol (do_symbol accu s1) s2 + | Stree t -> do_tree accu t + | Svala _ s -> do_symbol accu s + | Sself | Snext | Scut | Stoken _ -> accu ] + in + do_entry init e +; + +value floc = ref (fun _ -> failwith "internal error when computing location"); + +value loc_of_token_interval bp ep = + if bp == ep then + if bp == 0 then Ploc.dummy + else Ploc.after (floc.val (bp - 1)) 0 1 + else + let loc1 = floc.val bp in + let loc2 = floc.val (pred ep) in + Ploc.encl loc1 loc2 +; + +value rec name_of_symbol entry = + fun + [ Snterm e -> "[" ^ e.ename ^ "]" + | Snterml e l -> "[" ^ e.ename ^ " level " ^ l ^ "]" + | Sself | Snext -> "[" ^ entry.ename ^ "]" + | Stoken tok -> entry.egram.glexer.Plexing.tok_text tok + | _ -> "???" ] +; + +value rec get_token_list entry rev_tokl last_tok tree = + match tree with + [ Node {node = Stoken tok; son = son; brother = DeadEnd} -> + get_token_list entry [last_tok :: rev_tokl] (tok, None) son + | Node {node = Svala ls (Stoken tok); son = son; brother = DeadEnd} -> + get_token_list entry [last_tok :: rev_tokl] (tok, Some ls) son + | _ -> + if rev_tokl = [] then None + else Some (rev_tokl, last_tok, tree) ] +; + +value rec name_of_symbol_failed entry = + fun + [ Sfacto s -> name_of_symbol_failed entry s + | Slist0 s -> name_of_symbol_failed entry s + | Slist0sep s _ _ -> name_of_symbol_failed entry s + | Slist1 s -> name_of_symbol_failed entry s + | Slist1sep s _ _ -> name_of_symbol_failed entry s + | Sopt s -> name_of_symbol_failed entry s + | Sflag s -> name_of_symbol_failed entry s + | Stree t -> name_of_tree_failed entry t + | Svala _ s -> name_of_symbol_failed entry s + | Smeta _ [s :: _] _ -> name_of_symbol_failed entry s + | s -> name_of_symbol entry s ] +and name_of_tree_failed entry = + fun + [ Node {node = s; brother = bro; son = son} -> + let tokl = + match s with + [ Stoken tok -> get_token_list entry [] (tok, None) son + | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None ] + in + match tokl with + [ None -> + let txt = name_of_symbol_failed entry s in + let txt = + match (s, son) with + [ (Sopt _, Node _) -> txt ^ " or " ^ name_of_tree_failed entry son + | _ -> txt ] + in + let txt = + match bro with + [ DeadEnd | LocAct _ _ -> txt + | Node _ -> txt ^ " or " ^ name_of_tree_failed entry bro ] + in + txt + | Some (rev_tokl, last_tok, son) -> + List.fold_left + (fun s (tok, _) -> + (if s = "" then "" else s ^ " ") ^ + entry.egram.glexer.Plexing.tok_text tok) + "" (List.rev [last_tok :: rev_tokl]) ] + | DeadEnd | LocAct _ _ -> "???" ] +; + +value search_tree_in_entry prev_symb tree = + fun + [ Dlevels levels -> + let rec search_levels = + fun + [ [] -> tree + | [level :: levels] -> + match search_level level with + [ Some tree -> tree + | None -> search_levels levels ] ] + and search_level level = + match search_tree level.lsuffix with + [ Some t -> Some (Node {node = Sself; son = t; brother = DeadEnd}) + | None -> search_tree level.lprefix ] + and search_tree t = + if tree <> DeadEnd && t == tree then Some t + else + match t with + [ Node n -> + match search_symbol n.node with + [ Some symb -> + Some (Node {node = symb; son = n.son; brother = DeadEnd}) + | None -> + match search_tree n.son with + [ Some t -> + Some (Node {node = n.node; son = t; brother = DeadEnd}) + | None -> search_tree n.brother ] ] + | LocAct _ _ | DeadEnd -> None ] + and search_symbol symb = + match symb with + [ Snterm _ | Snterml _ _ | Slist0 _ | Slist0sep _ _ _ | Slist1 _ | + Slist1sep _ _ _ | Sopt _ | Stoken _ | Stree _ + when symb == prev_symb -> + Some symb + | Slist0 symb -> + match search_symbol symb with + [ Some symb -> Some (Slist0 symb) + | None -> None ] + | Slist0sep symb sep b -> + match search_symbol symb with + [ Some symb -> Some (Slist0sep symb sep b) + | None -> + match search_symbol sep with + [ Some sep -> Some (Slist0sep symb sep b) + | None -> None ] ] + | Slist1 symb -> + match search_symbol symb with + [ Some symb -> Some (Slist1 symb) + | None -> None ] + | Slist1sep symb sep b -> + match search_symbol symb with + [ Some symb -> Some (Slist1sep symb sep b) + | None -> + match search_symbol sep with + [ Some sep -> Some (Slist1sep symb sep b) + | None -> None ] ] + | Sopt symb -> + match search_symbol symb with + [ Some symb -> Some (Sopt symb) + | None -> None ] + | Stree t -> + match search_tree t with + [ Some t -> Some (Stree t) + | None -> None ] + | _ -> None ] + in + search_levels levels + | Dparser _ -> tree ] +; + +value error_verbose = ref False; + +value tree_failed entry prev_symb_result prev_symb tree = do { + let txt = name_of_tree_failed entry tree in + let txt = + match prev_symb with + [ Slist0 s -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | Slist1 s -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | Slist0sep s sep _ -> + match Obj.magic prev_symb_result with + [ [] -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | _ -> + let txt1 = name_of_symbol_failed entry sep in + txt1 ^ " or " ^ txt ^ " expected" ] + | Slist1sep s sep _ -> + match Obj.magic prev_symb_result with + [ [] -> + let txt1 = name_of_symbol_failed entry s in + txt1 ^ " or " ^ txt ^ " expected" + | _ -> + let txt1 = name_of_symbol_failed entry sep in + txt1 ^ " or " ^ txt ^ " expected" ] + | Sopt _ | Sflag _ | Stree _ | Svala _ _ -> txt ^ " expected" + | _ -> txt ^ " expected after " ^ name_of_symbol_failed entry prev_symb ] + in + if error_verbose.val then do { + let tree = search_tree_in_entry prev_symb tree entry.edesc in + let ppf = err_formatter in + fprintf ppf "@[<v 0>@,"; + fprintf ppf "----------------------------------@,"; + fprintf ppf "Parse error in entry [%s], rule:@;<0 2>" entry.ename; + fprintf ppf "@["; + print_level ppf pp_force_newline (flatten_tree tree); + fprintf ppf "@]@,"; + fprintf ppf "----------------------------------@,"; + fprintf ppf "@]@." + } + else (); + txt ^ " (in [" ^ entry.ename ^ "])" +}; + +value symb_failed entry prev_symb_result prev_symb symb = + let tree = Node {node = symb; brother = DeadEnd; son = DeadEnd} in + tree_failed entry prev_symb_result prev_symb tree +; + +external app : Obj.t -> 'a = "%identity"; + +value is_level_labelled n lev = + match lev.lname with + [ Some n1 -> n = n1 + | None -> False ] +; + +value level_number entry lab = + let rec lookup levn = + fun + [ [] -> failwith ("unknown level " ^ lab) + | [lev :: levs] -> + if is_level_labelled lab lev then levn else lookup (succ levn) levs ] + in + match entry.edesc with + [ Dlevels elev -> lookup 0 elev + | Dparser _ -> raise Not_found ] +; + +value rec top_symb entry = + fun + [ Sself | Snext -> Snterm entry + | Snterml e _ -> Snterm e + | Slist1sep s sep b -> Slist1sep (top_symb entry s) sep b + | _ -> raise Stream.Failure ] +; + +value entry_of_symb entry = + fun + [ Sself | Snext -> entry + | Snterm e -> e + | Snterml e _ -> e + | _ -> raise Stream.Failure ] +; + +value top_tree entry = + fun + [ Node {node = s; brother = bro; son = son} -> + Node {node = top_symb entry s; brother = bro; son = son} + | LocAct _ _ | DeadEnd -> raise Stream.Failure ] +; + +value skip_if_empty bp p strm = + if Stream.count strm == bp then Gramext.action (fun a -> p strm) + else raise Stream.Failure +; + +value continue entry bp a s son p1 = + parser + [: a = (entry_of_symb entry s).econtinue 0 bp a; + act = p1 ? tree_failed entry a s son :] -> + Gramext.action (fun _ -> app act a) +; + +value do_recover parser_of_tree entry nlevn alevn bp a s son = + parser + [ [: a = parser_of_tree entry nlevn alevn (top_tree entry son) :] -> a + | [: a = skip_if_empty bp (parser []) :] -> a + | [: a = + continue entry bp a s son + (parser_of_tree entry nlevn alevn son) :] -> + a ] +; + +value strict_parsing = ref False; + +value recover parser_of_tree entry nlevn alevn bp a s son strm = + if strict_parsing.val then raise (Stream.Error (tree_failed entry a s son)) + else do_recover parser_of_tree entry nlevn alevn bp a s son strm +; + +value token_count = ref 0; + +value peek_nth n strm = do { + let list = Stream.npeek n strm in + token_count.val := Stream.count strm + n; + let rec loop list n = + match (list, n) with + [ ([x :: _], 1) -> Some x + | ([_ :: l], n) -> loop l (n - 1) + | ([], _) -> None ] + in + loop list n +}; + +value item_skipped = ref False; +value skip_item a = do { item_skipped.val := True; a }; + +value call_and_push ps al strm = do { + item_skipped.val := False; + let a = ps strm in + let al = if item_skipped.val then al else [a :: al] in + item_skipped.val := False; + al +}; + +value fcall_and_push ps al err strm = do { + item_skipped.val := False; + match ps err strm with + [ Some (a, strm) -> do { + let al = if item_skipped.val then al else [a :: al] in + item_skipped.val := False; + Some (al, strm) + } + | None -> None ] +}; + +value bcall_and_push ps al err strm = do { + item_skipped.val := False; + match ps err strm with + [ Some (a, strm, Fstream.K kont) -> do { + let rec kont2 kont () = do { + item_skipped.val := False; + match kont () with + [ Some (a, strm, Fstream.K kont) -> do { + let al = if item_skipped.val then al else [a :: al] in + item_skipped.val := False; + Some (al, strm, Fstream.K (kont2 kont)) + } + | None -> None ] + } + in + let al = if item_skipped.val then al else [a :: al] in + item_skipped.val := False; + Some (al, strm, Fstream.K (kont2 kont)) + } + | None -> None ] +}; + +value token_ematch gram (tok, vala) = + let tematch = gram.glexer.Plexing.tok_match tok in + match vala with + | Some al -> + let pa = + match al with + [ [] -> + let t = "V " ^ fst tok in + gram.glexer.Plexing.tok_match (t, "") + | al -> + loop al where rec loop = + fun + [ [a :: al] -> + let pa = gram.glexer.Plexing.tok_match ("V", a) in + let pal = loop al in + fun tok -> + try pa tok with [ Stream.Failure -> pal tok ] + | [] -> fun tok -> raise Stream.Failure ] ] + in + fun tok -> + try Obj.repr (Ploc.VaAnt (Obj.magic (pa tok : string))) with + [ Stream.Failure -> Obj.repr (Ploc.VaVal (tematch tok)) ] + | None -> + fun tok -> Obj.repr (tematch tok : string) + end +; + +value rec parser_of_tree entry nlevn alevn = + fun + [ DeadEnd -> parser [] + | LocAct act _ -> parser [: :] -> act + | Node {node = Sself; son = LocAct act _; brother = DeadEnd} -> + parser [: a = entry.estart alevn :] -> app act a + | Node {node = Scut; son = son; brother = _} -> + parser_of_tree entry nlevn alevn son + | Node {node = Sself; son = LocAct act _; brother = bro} -> + let p2 = parser_of_tree entry nlevn alevn bro in + parser + [ [: a = entry.estart alevn :] -> app act a + | [: a = p2 :] -> a ] + | Node {node = s; son = son; brother = DeadEnd} -> + let tokl = + match s with + [ Stoken tok -> get_token_list entry [] (tok, None) son + | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None ] + in + match tokl with + [ None -> + let ps = parser_of_symbol entry nlevn s in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn s son in + parser bp + [: a = ps; + act = p1 bp a ? tree_failed entry a s son :] -> app act a + | Some (rev_tokl, (last_tok, svala), son) -> + let lt = + let t = Stoken last_tok in + match svala with + [ Some l -> Svala l t + | None -> t ] + in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn lt son in + parser_of_token_list entry s son p1 (parser []) rev_tokl + (last_tok, svala) ] + | Node {node = s; son = son; brother = bro} -> + let tokl = + match s with + [ Stoken tok -> get_token_list entry [] (tok, None) son + | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son + | _ -> None ] + in + match tokl with + [ None -> + let ps = parser_of_symbol entry nlevn s in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn s son in + let p2 = parser_of_tree entry nlevn alevn bro in + fun (strm : Stream.t _) -> + let bp = Stream.count strm in + match try Some (ps strm) with [ Stream.Failure -> None ] with + | Some a -> + match + try Some (p1 bp a strm) with [ Stream.Failure -> None ] + with + | Some act -> app act a + | None -> raise (Stream.Error (tree_failed entry a s son)) + end + | None -> p2 strm + end + | Some (rev_tokl, (last_tok, vala), son) -> + let lt = + let t = Stoken last_tok in + match vala with + [ Some ls -> Svala ls t + | None -> t ] + in + let p2 = parser_of_tree entry nlevn alevn bro in + let p1 = parser_of_tree entry nlevn alevn son in + let p1 = parser_cont p1 entry nlevn alevn lt son in + let p1 = + parser_of_token_list entry lt son p1 p2 rev_tokl + (last_tok, vala) + in + parser + [ [: a = p1 :] -> a + | [: a = p2 :] -> a ] ] ] +and parser_cont p1 entry nlevn alevn s son bp a = + parser + [ [: a = p1 :] -> a + | [: a = recover parser_of_tree entry nlevn alevn bp a s son :] -> a ] +and parser_of_token_list entry s son p1 p2 rev_tokl last_tok = + let plast = + let n = List.length rev_tokl + 1 in + let tematch = token_ematch entry.egram last_tok in + let ps strm = + match peek_nth n strm with + [ Some tok -> do { + let r = tematch tok in + for i = 1 to n do { Stream.junk strm }; + Obj.repr r + } + | None -> raise Stream.Failure ] + in + fun (strm : Stream.t _) -> + let bp = Stream.count strm in + let a = ps strm in + match try Some (p1 bp a strm) with [ Stream.Failure -> None ] with + | Some act -> app act a + | None -> raise (Stream.Error (tree_failed entry a s son)) + end + in + match List.rev rev_tokl with + | [] -> parser [: a = plast :] -> a + | [tok :: tokl] -> + let tematch = token_ematch entry.egram tok in + let ps strm = + match peek_nth 1 strm with + [ Some tok -> tematch tok + | None -> raise Stream.Failure ] + in + let p1 = + loop 2 tokl where rec loop n = + fun + | [] -> plast + | [tok :: tokl] -> + let tematch = token_ematch entry.egram tok in + let ps strm = + match peek_nth n strm with + | Some tok -> tematch tok + | None -> raise Stream.Failure + end + in + let p1 = loop (n + 1) tokl in + parser [: a = ps; act = p1 ! :] -> app act a + end + in + parser [: a = ps; act = p1 ! :] -> app act a + end +and parser_of_symbol entry nlevn = + fun + [ Sfacto s -> parser_of_symbol entry nlevn s + | Smeta _ symbl act -> + let act = Obj.magic act entry symbl in + Obj.magic + (List.fold_left + (fun act symb -> Obj.magic act (parser_of_symbol entry nlevn symb)) + act symbl) + | Slist0 s -> + let ps = call_and_push (parser_of_symbol entry nlevn s) in + let rec loop al = + parser + [ [: al = ps al; a = loop al ! :] -> a + | [: :] -> al ] + in + parser [: a = loop [] :] -> Obj.repr (List.rev a) + | Slist0sep symb sep False -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al = + parser + [ [: v = pt; al = ps al ? symb_failed entry v sep symb; + a = kont al ! :] -> + a + | [: :] -> al ] + in + parser + [ [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a) + | [: :] -> Obj.repr [] ] + | Slist0sep symb sep True -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al = + parser + [ [: v = pt; al = ps al; a = kont al ! :] -> a + | [: v = pt :] -> al + | [: :] -> al ] + in + parser + [ [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a) + | [: :] -> Obj.repr [] ] + | Slist1 s -> + let ps = call_and_push (parser_of_symbol entry nlevn s) in + let rec loop al = + parser + [ [: al = ps al; a = loop al ! :] -> a + | [: :] -> al ] + in + parser [: al = ps []; a = loop al ! :] -> Obj.repr (List.rev a) + | Slist1sep symb sep False -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al = + parser + [ [: v = pt; + al = + parser + [ [: a = ps al :] -> a + | [: a = parse_top_symb entry symb :] -> [a :: al] + | [: :] -> + raise (Stream.Error (symb_failed entry v sep symb)) ] !; + a = kont al ! :] -> + a + | [: :] -> al ] + in + parser [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a) + | Slist1sep symb sep True -> + let ps = call_and_push (parser_of_symbol entry nlevn symb) in + let pt = parser_of_symbol entry nlevn sep in + let rec kont al = + parser + [ [: v = pt; al = ps al; al = kont al ! :] -> al + | [: v = pt; a = parse_top_symb entry symb; + al = kont [a :: al] ! :] -> al + | [: v = pt :] -> al + | [: :] -> al ] + in + parser [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a) + | Sopt s -> + let ps = parser_of_symbol entry nlevn s in + parser + [ [: a = ps :] -> Obj.repr (Some a) + | [: :] -> Obj.repr None ] + | Sflag s -> + let ps = parser_of_symbol entry nlevn s in + parser + [ [: _ = ps :] -> Obj.repr True + | [: :] -> Obj.repr False ] + | Stree t -> + let pt = parser_of_tree entry 1 0 t in + parser bp + [: a = pt :] ep -> + let loc = loc_of_token_interval bp ep in + app a loc + | Svala al s -> + let pa = + match al with + [ [] -> + let t = + match s with + [ Sflag _ -> Some "V FLAG" + | Sopt _ -> Some "V OPT" + | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST" + | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST" + | Stoken (con, "") -> Some ("V " ^ con) + | _ -> None ] + in + match t with + [ Some t -> parser_of_token entry (t, "") + | None -> parser [] ] + | al -> + loop al where rec loop = + fun + [ [a :: al] -> + let pa = parser_of_token entry ("V", a) in + let pal = loop al in + parser + [ [: a = pa :] -> a + | [: a = pal :] -> a ] + | [] -> parser [] ] ] + in + let ps = parser_of_symbol entry nlevn s in + parser + [ [: a = pa :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string)) + | [: a = ps :] -> Obj.repr (Ploc.VaVal a) ] + | Snterm e -> parser [: a = e.estart 0 :] -> a + | Snterml e l -> parser [: a = e.estart (level_number e l) :] -> a + | Sself -> parser [: a = entry.estart 0 :] -> a + | Snext -> parser [: a = entry.estart nlevn :] -> a + | Scut -> parser [: :] -> Obj.repr () + | Stoken tok -> parser_of_token entry tok ] +and parser_of_token entry tok = + let f = entry.egram.glexer.Plexing.tok_match tok in + fun strm -> + match Stream.peek strm with + [ Some tok -> do { + let r = f tok in + Stream.junk strm; + Obj.repr r + } + | None -> raise Stream.Failure ] +and parse_top_symb entry symb = + parser_of_symbol entry 0 (top_symb entry symb) +; + +value symb_failed_txt e s1 s2 = symb_failed e 0 s1 s2; + +value rec start_parser_of_levels entry clevn = + fun + [ [] -> fun levn -> parser [] + | [lev :: levs] -> + let p1 = start_parser_of_levels entry (succ clevn) levs in + match lev.lprefix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = parser_of_tree entry (succ clevn) alevn tree in + match levs with + [ [] -> + fun levn strm -> + (* this code should be there but is commented to preserve + compatibility with previous versions... with this code, + the grammar entry e: [[ "x"; a = e | "y" ]] should fail + because it should be: e: [RIGHTA[ "x"; a = e | "y" ]]... + if levn > clevn then match strm with parser [] + else + *) + match strm with parser bp + [: act = p2 :] ep -> + let a = app act (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm + | _ -> + fun levn strm -> + if levn > clevn then p1 levn strm + else + match strm with parser bp + [ [: act = p2 :] ep -> + let a = app act (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm + | [: a = p1 levn :] -> a ] ] ] ] +; + +value rec continue_parser_of_levels entry clevn = + fun + [ [] -> fun levn bp a -> parser [] + | [lev :: levs] -> + let p1 = continue_parser_of_levels entry (succ clevn) levs in + match lev.lsuffix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = parser_of_tree entry (succ clevn) alevn tree in + fun levn bp a strm -> + if levn > clevn then p1 levn bp a strm + else + match strm with parser + [ [: a = p1 levn bp a :] -> a + | [: act = p2 :] ep -> + let a = app act a (loc_of_token_interval bp ep) in + entry.econtinue levn bp a strm ] ] ] +; + +value continue_parser_of_entry entry = + match entry.edesc with + [ Dlevels elev -> + let p = continue_parser_of_levels entry 0 elev in + fun levn bp a -> + parser + [ [: a = p levn bp a :] -> a + | [: :] -> a ] + | Dparser p -> fun levn bp a -> parser [] ] +; + +value empty_entry ename levn strm = + raise (Stream.Error ("entry [" ^ ename ^ "] is empty")) +; + +value start_parser_of_entry entry = + match entry.edesc with + [ Dlevels [] -> empty_entry entry.ename + | Dlevels elev -> start_parser_of_levels entry 0 elev + | Dparser p -> fun levn strm -> p strm ] +; + +value default_algorithm_var = ref DefaultAlgorithm; +value set_default_algorithm algo = default_algorithm_var.val := algo; +value default_algorithm () = default_algorithm_var.val; + +(* deprecated since 2017-06-06: use 'set_default_algorithm' instead *) +value backtrack_parse = ref False; +value warned_using_backtrack_parse = ref False; +value compatible_deprecated_backtrack_parse () = + if backtrack_parse.val then do { + if not warned_using_backtrack_parse.val then do { + eprintf "<W> use of Grammar.backtrace_parse "; + eprintf "deprecated since 2017-06-06\n%!"; + warned_using_backtrack_parse.val := True + } + else (); + backtrack_parse.val := False; + set_default_algorithm Backtracking + } + else () +; + +(* parsing with functional streams *) + +value backtrack_trace = ref False; +value backtrack_stalling_limit = ref 10000; +value backtrack_trace_try = ref False; +value tind = ref ""; +value max_fcount = ref None; +value nb_ftry = ref 0; + +value no_err () = ""; +value ftree_failed entry prev_symb_result prev_symb tree () = + tree_failed entry prev_symb_result prev_symb tree +; +value fsymb_failed entry prev_symb_result prev_symb symb () = + symb_failed entry prev_symb_result prev_symb symb +; + +value bfparser_of_token entry tok return_value = + let f = entry.egram.glexer.Plexing.tok_match tok in + fun err strm -> + let _ = + if backtrack_trace.val then do { + Printf.eprintf "%stesting (\"%s\", \"%s\") ..." tind.val (fst tok) + (snd tok); + flush stderr; + } + else () + in + let _ = + if backtrack_stalling_limit.val > 0 || backtrack_trace_try.val then + let m = + match max_fcount.val with + | Some (m, _, _) -> m + | None -> 0 + end + in + if Fstream.count strm > m then do { + if backtrack_trace.val then + Printf.eprintf " (token count max %d)%!" (Fstream.count strm) + else (); + let e : g_entry Obj.t = Obj.magic (entry : g_entry _) in + let cnt = Fstream.count strm in + max_fcount.val := Some (cnt, e, err); + nb_ftry.val := 0 + } + else do { + if backtrack_trace.val then + Printf.eprintf " (token count %d/%d)%!" (Fstream.count strm) m + else (); + incr nb_ftry; + if backtrack_trace_try.val then do { + Printf.eprintf "\ntokens read: %d; tokens tests: %d" m + nb_ftry.val; + flush stderr; + } + else (); + if backtrack_stalling_limit.val > 0 && + nb_ftry.val >= backtrack_stalling_limit.val + then do { + if backtrack_trace.val || backtrack_trace_try.val then + Printf.eprintf " (stalling limit reached)\n%!" + else (); + raise Stream.Failure + } + else () + } + else () + in + match Fstream.next strm with + [ Some (tok, strm) -> + try do { + let r = f tok in + let _ = + if backtrack_trace.val then Printf.eprintf " yes \"%s\"\n%!" r + else () + in + nb_ftry.val := 0; + return_value r strm + } + with + [ Stream.Failure -> + let _ = + if backtrack_trace.val then Printf.eprintf " not found\n%!" + else () + in + None ] + | None -> + let _ = + if backtrack_trace.val then do { + Printf.eprintf " eos\n"; + flush stderr; + } + else () + in + None ] +; + +let s = try Sys.getenv "CAMLP5PARAM" with [ Not_found -> "" ] in +loop 0 where rec loop i = + if i = String.length s then () + else if s.[i] = 'b' then do { + set_default_algorithm Backtracking; + loop (i + 1) + } + else if s.[i] = 'f' then do { + set_default_algorithm Functional; + loop (i + 1) + } + else if s.[i] = 'p' then do { + set_default_algorithm Predictive; + loop (i + 1) + } + else if s.[i] = 'l' && i + 1 < String.length s && s.[i+1] = '=' then do { + let (n, i) = + loop 0 (i + 2) where rec loop n i = + if i = String.length s then (n, i) + else if s.[i] >= '0' && s.[i] <= '9' then + loop (10 * n + Char.code s.[i] - Char.code '0') (i + 1) + else (n, i) + in + backtrack_stalling_limit.val := n; + loop i + } + else if s.[i] = 't' then do { + backtrack_trace.val := True; + loop (i + 1) + } + else if s.[i] = 'y' then do { + backtrack_trace_try.val := True; + loop (i + 1) + } + else + loop (i + 1) +; + +(* version with functional streams and limited backtracking *) + +value fcount = fparser bp [: :] → bp; + +value rec ftop_symb entry = + fun + [ Sself | Snext -> Some (Snterm entry) + | Snterml e _ -> Some (Snterm e) + | Slist1sep s sep b -> + match ftop_symb entry s with + [ Some s -> Some (Slist1sep s sep b) + | None -> None ] + | _ -> None ] +; + +value ftop_tree entry son strm = + match son with + [ Node {node = s; brother = bro; son = son} -> + match ftop_symb entry s with + [ Some sy -> + let r = Node {node = sy; brother = bro; son = son} in + let _ = + if backtrack_trace.val then + Printf.eprintf "%srecovering pos %d\n%!" tind.val + (Fstream.count strm) + else () + in + match strm with fparser [: :] -> r + | None -> + None ] + | LocAct _ _ | DeadEnd -> + None ] +; + +value frecover fparser_of_tree entry next_levn assoc_levn son err = + fparser + [ [: t = ftop_tree entry son; + a = fparser_of_tree entry next_levn assoc_levn t err :] -> a ] +; + +value fparser_of_token entry tok = + let return_value r strm = match strm with fparser [: :] -> Obj.repr r in + bfparser_of_token entry tok return_value +; + +value rec fparser_of_tree entry next_levn assoc_levn = + fun + [ DeadEnd -> fun err -> fparser [] + | LocAct act _ -> fun err -> fparser [: :] -> act + | Node {node = Sself; son = LocAct act _; brother = DeadEnd} -> + fun err -> fparser [: a = entry.fstart assoc_levn err :] -> app act a + | Node {node = Sself; son = LocAct act _; brother = bro} -> + let p2 = fparser_of_tree entry next_levn assoc_levn bro in + fun err -> + fparser + [ [: a = entry.fstart assoc_levn err :] -> app act a + | [: a = p2 err :] -> a ] + | Node {node = Scut; son = son; brother = _} -> + let p1 = fparser_of_tree entry next_levn assoc_levn son in + fun err -> + fparser + [ [: !; a = p1 err :] -> a ] + | Node {node = s; son = son; brother = DeadEnd} -> + let ps = fparser_of_symbol entry next_levn s in + let p1 = fparser_of_tree entry next_levn assoc_levn son in + let p1 = fparser_cont p1 entry next_levn assoc_levn son in + fun err -> + fparser [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> + app act a + | Node {node = s; son = son; brother = bro} -> + let ps = fparser_of_symbol entry next_levn s in + let p1 = fparser_of_tree entry next_levn assoc_levn son in + let p1 = fparser_cont p1 entry next_levn assoc_levn son in + let p2 = fparser_of_tree entry next_levn assoc_levn bro in + fun err -> + fparser + [ [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> app act a + | [: a = p2 err :] -> a ] ] +and fparser_cont p1 entry next_levn assoc_levn son err = + fparser + [ [: a = p1 err :] -> a + | [: a = frecover fparser_of_tree entry next_levn assoc_levn son err :] -> + a ] +and fparser_of_symbol entry next_levn = + fun + [ Sfacto s -> fparser_of_symbol entry next_levn s + | Smeta _ symbl act -> + let _ = failwith "Smeta for fparser not impl" in + let act = Obj.magic act entry symbl in + Obj.magic + (List.fold_left + (fun act symb -> + Obj.magic act (fparser_of_symbol entry next_levn symb)) + act symbl) + | Slist0 s -> + let ps = fparser_of_symbol entry next_levn s in + let ps = fcall_and_push ps in + let rec loop al err = + fparser + [ [: al = ps al err; a = loop al err :] -> a + | [: :] -> al ] + in + fun err -> fparser [: a = loop [] err :] -> Obj.repr (List.rev a) + | Slist0sep symb sep False -> + let ps = fparser_of_symbol entry next_levn symb in + let ps = fcall_and_push ps in + let pt = fparser_of_symbol entry next_levn sep in + let rec kont al err = + fparser + [ [: v = pt err; al = ps al (fsymb_failed entry v sep symb); + a = kont al err :] -> a + | [: :] -> al ] + in + fun err -> + fparser + [ [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | [: :] -> Obj.repr [] ] + | Slist1 s -> + let ps = fparser_of_symbol entry next_levn s in + let ps = fcall_and_push ps in + let rec loop al err = + fparser + [ [: al = ps al err; a = loop al err :] -> a + | [: :] -> al ] + in + fun err -> + fparser [: al = ps [] err; a = loop al err :] -> Obj.repr (List.rev a) + | Slist0sep symb sep True -> + failwith "LIST0 _ SEP _ OPT_SEP not implemented; please report" + | Slist1sep symb sep False -> + let ps = fparser_of_symbol entry next_levn symb in + let ps = fcall_and_push ps in + let pt = fparser_of_symbol entry next_levn sep in + let pts = fparse_top_symb entry symb in + let rec kont al err = + fparser + [ [: v = pt err; + al = + fparser + [ [: a = ps al (fsymb_failed entry v sep symb) :] -> a + | [: a = pts (fsymb_failed entry v sep symb) :] -> [a :: al] ]; + a = kont al err :] -> + a + | [: :] -> al ] + in + fun err -> + fparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | Slist1sep symb sep True -> + let ps = fparser_of_symbol entry next_levn symb in + let ps = fcall_and_push ps in + let pt = fparser_of_symbol entry next_levn sep in + let pts = fparse_top_symb entry symb in + let rec kont al err = + fparser + [ [: v = pt err; al = ps al err; al = kont al err :] -> al + | [: v = pt err; a = pts err; al = kont [a :: al] err :] -> al + | [: v = pt err :] -> al + | [: :] -> al ] + in + fun err -> + fparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | Sopt s -> + let ps = fparser_of_symbol entry next_levn s in + fun err -> + fparser + [ [: a = ps err :] -> Obj.repr (Some a) + | [: :] -> Obj.repr None ] + | Sflag s -> + let ps = fparser_of_symbol entry next_levn s in + fun err -> + fparser + [ [: _ = ps err :] -> Obj.repr True + | [: :] -> Obj.repr False ] + | Stree t -> + let pt = fparser_of_tree entry 1 0 t in + fun err -> + fparser bp + [: a = pt err :] ep -> + let loc = loc_of_token_interval bp ep in + app a loc + | Svala al s -> + let pa = + match al with + [ [] -> + let t = + match s with + [ Sflag _ -> Some "V FLAG" + | Sopt _ -> Some "V OPT" + | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST" + | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST" + | Stoken (con, "") -> Some ("V " ^ con) + | _ -> None ] + in + match t with + [ Some t -> fparser_of_token entry (t, "") + | None -> fun err -> fparser [] ] + | al -> + loop al where rec loop = + fun + [ [a :: al] -> + let pa = fparser_of_token entry ("V", a) in + let pal = loop al in + fun err -> + fparser + [ [: a = pa err :] -> a + | [: a = pal err :] -> a ] + | [] -> fun err -> fparser [] ] ] + in + let ps = fparser_of_symbol entry next_levn s in + fun err -> + fparser + [ [: a = pa err :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string)) + | [: a = ps err :] -> Obj.repr (Ploc.VaVal a) ] + | Snterm e -> + fun err -> fparser [: a = e.fstart 0 err :] -> a + | Snterml e l -> + fun err -> fparser [: a = e.fstart (level_number e l) err :] -> a + | Sself -> fun err -> fparser [: a = entry.fstart 0 err :] -> a + | Snext -> fun err -> fparser [: a = entry.fstart next_levn err :] -> a + | Scut -> fun err -> fparser [: ! :] -> Obj.repr () + | Stoken tok -> fparser_of_token entry tok ] +and fparse_top_symb entry symb = + match ftop_symb entry symb with + [ Some sy -> fparser_of_symbol entry 0 sy + | None -> fun err -> fparser [] ] +; + +value rec fstart_parser_of_levels entry clevn = + fun + [ [] -> fun levn err -> fparser [] + | [lev :: levs] -> + let p1 = fstart_parser_of_levels entry (succ clevn) levs in + match lev.lprefix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = fparser_of_tree entry (succ clevn) alevn tree in + match levs with + [ [] -> + fun levn err strm -> + match strm with fparser bp + [: act = p2 err; ep = fcount; + a = + entry.fcontinue levn bp + (app act (loc_of_token_interval bp ep)) err :] -> + a + | _ -> + fun levn err strm -> + if levn > clevn then p1 levn err strm + else + match strm with fparser bp + [ [: act = p2 err; ep = fcount; + a = + entry.fcontinue levn bp + (app act (loc_of_token_interval bp ep)) err :] -> + a + | [: a = p1 levn err :] -> a ] ] ] ] +; + +value rec fcontinue_parser_of_levels entry clevn = + fun + [ [] -> fun levn bp a err -> fparser [] + | [lev :: levs] -> + let p1 = fcontinue_parser_of_levels entry (succ clevn) levs in + match lev.lsuffix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = fparser_of_tree entry (succ clevn) alevn tree in + fun levn bp a err strm -> + if levn > clevn then p1 levn bp a err strm + else + match strm with fparser + [ [: a = p1 levn bp a err :] -> a + | [: act = p2 err; ep = fcount; + a = + entry.fcontinue levn bp + (app act a (loc_of_token_interval bp ep)) err :] -> + a ] ] ] +; + +value fstart_parser_of_entry entry = + match entry.edesc with + [ Dlevels [] -> fun _ err -> fparser [] + | Dlevels elev -> fstart_parser_of_levels entry 0 elev + | Dparser p -> fun levn err strm -> failwith "Dparser for Fstream" ] +; + +value fcontinue_parser_of_entry entry = + match entry.edesc with + [ Dlevels elev -> + let p = fcontinue_parser_of_levels entry 0 elev in + fun levn bp a err -> + fparser + [ [: a = p levn bp a err :] -> a + | [: :] -> a ] + | Dparser p -> fun levn bp a err -> fparser [] ] +; + +(* version with functional streams and full backtracking *) + +value rec btop_symb entry = + fun + [ Sself | Snext -> Some (Snterm entry) + | Snterml e _ -> Some (Snterm e) + | Slist1sep s sep b -> + match btop_symb entry s with + [ Some s -> Some (Slist1sep s sep b) + | None -> None ] + | _ -> None ] +; + +value btop_tree entry son strm = + match son with + [ Node {node = s; brother = bro; son = son} -> + match btop_symb entry s with + [ Some sy -> + let r = Node {node = sy; brother = bro; son = son} in + let _ = + if backtrack_trace.val then + Printf.eprintf "%srecovering pos %d\n%!" tind.val + (Fstream.count strm) + else () + in + match strm with bparser [: :] -> r + | None -> + None ] + | LocAct _ _ | DeadEnd -> + None ] +; + +value brecover bparser_of_tree entry next_levn assoc_levn son err = + bparser + [ [: t = btop_tree entry son; + a = bparser_of_tree entry next_levn assoc_levn t err :] -> a ] +; + +value bparser_of_token entry tok = + let return_value r strm = match strm with bparser [: :] -> Obj.repr r in + bfparser_of_token entry tok return_value +; + +value rec bparser_of_tree entry next_levn assoc_levn = + fun + [ DeadEnd -> fun err -> bparser [] + | LocAct act _ -> fun err -> bparser [: :] -> act + | Node {node = Sself; son = LocAct act _; brother = DeadEnd} -> + fun err -> bparser [: a = entry.bstart assoc_levn err :] -> app act a + | Node {node = Sself; son = LocAct act _; brother = bro} -> + let p2 = bparser_of_tree entry next_levn assoc_levn bro in + fun err -> + bparser + [ [: a = entry.bstart assoc_levn err :] -> app act a + | [: a = p2 err :] -> a ] + | Node {node = Scut; son = son; brother = _} -> + let p1 = bparser_of_tree entry next_levn assoc_levn son in + fun err -> + bparser + [ [: !; a = p1 err :] -> a ] + | Node {node = s; son = son; brother = DeadEnd} -> + let ps = bparser_of_symbol entry next_levn s in + let p1 = bparser_of_tree entry next_levn assoc_levn son in + let p1 = bparser_cont p1 entry next_levn assoc_levn son in + fun err -> + bparser [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> + app act a + | Node {node = s; son = son; brother = bro} -> + let ps = bparser_of_symbol entry next_levn s in + let p1 = bparser_of_tree entry next_levn assoc_levn son in + let p1 = bparser_cont p1 entry next_levn assoc_levn son in + let p2 = bparser_of_tree entry next_levn assoc_levn bro in + fun err -> + bparser + [ [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> app act a + | [: a = p2 err :] -> a ] ] +and bparser_cont p1 entry next_levn assoc_levn son err = + bparser + [ [: a = p1 err :] -> a + | [: a = brecover bparser_of_tree entry next_levn assoc_levn son err :] -> + a ] +and bparser_of_symbol entry next_levn = + fun + [ Sfacto s -> bparser_of_symbol entry next_levn s + | Smeta _ symbl act -> + let _ = failwith "Smeta for bparser not impl" in + let act = Obj.magic act entry symbl in + Obj.magic + (List.fold_left + (fun act symb -> + Obj.magic act (bparser_of_symbol entry next_levn symb)) + act symbl) + | Slist0 s -> + let ps = bparser_of_symbol entry next_levn s in + let ps = bcall_and_push ps in + let rec loop al err = + bparser + [ [: al = ps al err; a = loop al err :] -> a + | [: :] -> al ] + in + fun err -> bparser [: a = loop [] err :] -> Obj.repr (List.rev a) + | Slist0sep symb sep False -> + let ps = bparser_of_symbol entry next_levn symb in + let ps = bcall_and_push ps in + let pt = bparser_of_symbol entry next_levn sep in + let rec kont al err = + bparser + [ [: v = pt err; al = ps al (fsymb_failed entry v sep symb); + a = kont al err :] -> a + | [: :] -> al ] + in + fun err -> + bparser + [ [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | [: :] -> Obj.repr [] ] + | Slist1 s -> + let ps = bparser_of_symbol entry next_levn s in + let ps = bcall_and_push ps in + let rec loop al err = + bparser + [ [: al = ps al err; a = loop al err :] -> a + | [: :] -> al ] + in + fun err -> + bparser [: al = ps [] err; a = loop al err :] -> Obj.repr (List.rev a) + | Slist0sep symb sep True -> + failwith "LIST0 _ SEP _ OPT_SEP not implemented; please report" + | Slist1sep symb sep False -> + let ps = bparser_of_symbol entry next_levn symb in + let ps = bcall_and_push ps in + let pt = bparser_of_symbol entry next_levn sep in + let pts = bparse_top_symb entry symb in + let rec kont al err = + bparser + [ [: v = pt err; + al = + bparser + [ [: a = ps al (fsymb_failed entry v sep symb) :] -> a + | [: a = pts (fsymb_failed entry v sep symb) :] -> [a :: al] ]; + a = kont al err :] -> + a + | [: :] -> al ] + in + fun err -> + bparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | Slist1sep symb sep True -> + let ps = bparser_of_symbol entry next_levn symb in + let ps = bcall_and_push ps in + let pt = bparser_of_symbol entry next_levn sep in + let pts = bparse_top_symb entry symb in + let rec kont al err = + bparser + [ [: v = pt err; al = ps al err; al = kont al err :] -> al + | [: v = pt err; a = pts err; al = kont [a :: al] err :] -> al + | [: v = pt err :] -> al + | [: :] -> al ] + in + fun err -> + bparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a) + | Sopt s -> + let ps = bparser_of_symbol entry next_levn s in + fun err -> + bparser + [ [: a = ps err :] -> Obj.repr (Some a) + | [: :] -> Obj.repr None ] + | Sflag s -> + let ps = bparser_of_symbol entry next_levn s in + fun err -> + bparser + [ [: _ = ps err :] -> Obj.repr True + | [: :] -> Obj.repr False ] + | Stree t -> + let pt = bparser_of_tree entry 1 0 t in + fun err -> + bparser bp + [: a = pt err :] ep -> + let loc = loc_of_token_interval bp ep in + app a loc + | Svala al s -> + let pa = + match al with + [ [] -> + let t = + match s with + [ Sflag _ -> Some "V FLAG" + | Sopt _ -> Some "V OPT" + | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST" + | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST" + | Stoken (con, "") -> Some ("V " ^ con) + | _ -> None ] + in + match t with + [ Some t -> bparser_of_token entry (t, "") + | None -> fun err -> bparser [] ] + | al -> + loop al where rec loop = + fun + [ [a :: al] -> + let pa = bparser_of_token entry ("V", a) in + let pal = loop al in + fun err -> + bparser + [ [: a = pa err :] -> a + | [: a = pal err :] -> a ] + | [] -> fun err -> bparser [] ] ] + in + let ps = bparser_of_symbol entry next_levn s in + fun err -> + bparser + [ [: a = pa err :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string)) + | [: a = ps err :] -> Obj.repr (Ploc.VaVal a) ] + | Snterm e -> + fun err -> bparser [: a = e.bstart 0 err :] -> a + | Snterml e l -> + fun err -> bparser [: a = e.bstart (level_number e l) err :] -> a + | Sself -> fun err -> bparser [: a = entry.bstart 0 err :] -> a + | Snext -> fun err -> bparser [: a = entry.bstart next_levn err :] -> a + | Scut -> fun err -> bparser [: ! :] -> Obj.repr () + | Stoken tok -> bparser_of_token entry tok ] +and bparse_top_symb entry symb = + match btop_symb entry symb with + [ Some sy -> bparser_of_symbol entry 0 sy + | None -> fun err -> bparser [] ] +; + +value bcount strm = match strm with bparser [: :] -> Fstream.count strm; + +value rec bstart_parser_of_levels entry clevn = + fun + [ [] -> fun levn err -> bparser [] + | [lev :: levs] -> + let p1 = bstart_parser_of_levels entry (succ clevn) levs in + match lev.lprefix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = bparser_of_tree entry (succ clevn) alevn tree in + match levs with + [ [] -> + fun levn err strm -> + match strm with bparser bp + [: act = p2 err; ep = bcount; + a = + entry.bcontinue levn bp + (app act (loc_of_token_interval bp ep)) err :] -> + a + | _ -> + fun levn err strm -> + if levn > clevn then p1 levn err strm + else + match strm with bparser bp + [ [: act = p2 err; ep = bcount; + a = + entry.bcontinue levn bp + (app act (loc_of_token_interval bp ep)) err :] -> + a + | [: a = p1 levn err :] -> a ] ] ] ] +; + +value rec bcontinue_parser_of_levels entry clevn = + fun + [ [] -> fun levn bp a err -> bparser [] + | [lev :: levs] -> + let p1 = bcontinue_parser_of_levels entry (succ clevn) levs in + match lev.lsuffix with + [ DeadEnd -> p1 + | tree -> + let alevn = + match lev.assoc with + [ LeftA | NonA -> succ clevn + | RightA -> clevn ] + in + let p2 = bparser_of_tree entry (succ clevn) alevn tree in + fun levn bp a err strm -> + if levn > clevn then p1 levn bp a err strm + else + match strm with bparser + [ [: a = p1 levn bp a err :] -> a + | [: act = p2 err; ep = bcount; + a = + entry.bcontinue levn bp + (app act a (loc_of_token_interval bp ep)) err :] -> + a ] ] ] +; + +value bstart_parser_of_entry entry = + match entry.edesc with + [ Dlevels [] -> fun _ err -> bparser [] + | Dlevels elev -> bstart_parser_of_levels entry 0 elev + | Dparser p -> fun levn err strm -> failwith "Dparser for Fstream" ] +; + +value bcontinue_parser_of_entry entry = + match entry.edesc with + [ Dlevels elev -> + let p = bcontinue_parser_of_levels entry 0 elev in + fun levn bp a err -> + bparser + [ [: a = p levn bp a err :] -> a + | [: :] -> a ] + | Dparser p -> fun levn bp a err -> bparser [] ] +; + +(* Extend syntax *) + +value trace_entry_lev_name entry lev = + match entry.edesc with + | Dlevels ll -> + if lev < List.length ll then + let glev = List.nth ll lev in + match glev.lname with + | Some "" | None -> () + | Some s -> Printf.eprintf " (\"%s\")" s + end + else () + | Dparser _ -> () + end +; + +value may_trace_start entry f = + if backtrack_trace.val then + fun lev err strm -> do { + let t = tind.val in + Printf.eprintf "%s>> start %s lev %d" tind.val entry.ename lev; + trace_entry_lev_name entry lev; + Printf.eprintf "\n%!"; + tind.val := tind.val ^ " "; + try do { + let r = f lev err strm in + tind.val := t; + Printf.eprintf "%s<< end %s lev %d" tind.val entry.ename lev; + trace_entry_lev_name entry lev; + Printf.eprintf "\n%!"; + r + } + with e -> do { + tind.val := t; + Printf.eprintf "%sexception \"%s\"\n" tind.val + (Printexc.to_string e); + flush stderr; + raise e + } + } + else f +; + +value may_trace_continue entry f = + if backtrack_trace.val then + fun lev bp a err strm -> do { + let t = tind.val in + Printf.eprintf "%s>> continue %s lev %d bp %d pos %d" tind.val + entry.ename lev bp (Fstream.count strm); + trace_entry_lev_name entry lev; + Printf.eprintf "\n%!"; + tind.val := tind.val ^ " "; + try do { + let r = f lev bp a err strm in + tind.val := t; + Printf.eprintf "%s<< end continue %s lev %d %d" tind.val + entry.ename lev bp; + trace_entry_lev_name entry lev; + Printf.eprintf "\n%!"; + r + } + with e -> do { + tind.val := t; + Printf.eprintf "%sexception \"%s\"" tind.val + (Printexc.to_string e); + trace_entry_lev_name entry lev; + Printf.eprintf "\n%!"; + raise e + } + } + else f +; + +value init_entry_functions entry = do { + entry.estart := + fun lev strm -> do { + let f = start_parser_of_entry entry in + entry.estart := f; + f lev strm + }; + entry.econtinue := + fun lev bp a strm -> do { + let f = continue_parser_of_entry entry in + entry.econtinue := f; + f lev bp a strm + }; + entry.fstart := + fun lev err strm -> do { + let f = fstart_parser_of_entry entry in + let f = may_trace_start entry f in + entry.fstart := f; + f lev err strm + }; + entry.fcontinue := + fun lev bp a err strm -> do { + let f = fcontinue_parser_of_entry entry in + let f = may_trace_continue entry f in + entry.fcontinue := f; + f lev bp a err strm + }; + entry.bstart := + fun lev err strm -> do { + let f = bstart_parser_of_entry entry in + let f = may_trace_start entry f in + entry.bstart := f; + f lev err strm + }; + entry.bcontinue := + fun lev bp a err strm -> do { + let f = bcontinue_parser_of_entry entry in + let f = may_trace_continue entry f in + entry.bcontinue := f; + f lev bp a err strm + } +}; + +value reinit_entry_functions entry = + match entry.edesc with + [ Dlevels elev -> init_entry_functions entry + | _ -> () ] +; + +value extend_entry entry position rules = + try do { + let elev = Gramext.levels_of_rules entry position rules in + entry.edesc := Dlevels elev; + init_entry_functions entry + } + with + [ Plexing.Error s -> do { + Printf.eprintf "Lexer initialization error:\n- %s\n" s; + flush stderr; + failwith "Grammar.extend" + } ] +; + +value extend entry_rules_list = + let gram = ref None in + List.iter + (fun (entry, position, rules) -> do { + match gram.val with + [ Some g -> + if g != entry.egram then do { + Printf.eprintf "Error: entries with different grammars\n"; + flush stderr; + failwith "Grammar.extend" + } + else () + | None -> gram.val := Some entry.egram ]; + extend_entry entry position rules + }) + entry_rules_list +; + +(* Deleting a rule *) + +value delete_rule entry sl = + match entry.edesc with + [ Dlevels levs -> do { + let levs = Gramext.delete_rule_in_level_list entry sl levs in + entry.edesc := Dlevels levs; + entry.estart := + fun lev strm -> do { + let f = start_parser_of_entry entry in + entry.estart := f; + f lev strm + }; + entry.econtinue := + fun lev bp a strm -> do { + let f = continue_parser_of_entry entry in + entry.econtinue := f; + f lev bp a strm + }; + entry.fstart := + fun lev err strm -> do { + let f = fstart_parser_of_entry entry in + entry.fstart := f; + f lev err strm + }; + entry.fcontinue := + fun lev bp a err strm -> do { + let f = fcontinue_parser_of_entry entry in + entry.fcontinue := f; + f lev bp a err strm + }; + entry.bstart := + fun lev err strm -> do { + let f = bstart_parser_of_entry entry in + entry.bstart := f; + f lev err strm + }; + entry.bcontinue := + fun lev bp a err strm -> do { + let f = bcontinue_parser_of_entry entry in + entry.bcontinue := f; + f lev bp a err strm + } + } + | Dparser _ -> () ] +; + +value safe_delete_rule = delete_rule; + +type parse_algorithm = Gramext.parse_algorithm == + [ Predictive | Functional | Backtracking | DefaultAlgorithm ] +; + +value warning_verbose = Gramext.warning_verbose; + +(* Normal interface *) + +type token = (string * string); +type g = Gramext.grammar token; + +type ty_symbol 'self 'a = Gramext.g_symbol token; +type ty_rule 'self 'f 'r = list (ty_symbol 'self Obj.t); +type ty_production 'a = (ty_rule 'a Obj.t Obj.t * Gramext.g_action); +type ty_extension = + (Gramext.g_entry token * option Gramext.position * list (option string * option Gramext.g_assoc * list (ty_production Obj.t))); + +value s_facto s = Sfacto s; +value s_nterm e = Snterm e; +value s_nterml e l = Snterml e l; +value s_list0 s = Slist0 s; +value s_list0sep s sep b = Slist0sep s sep b; +value s_list1 s = Slist1 s; +value s_list1sep s sep b = Slist1sep s sep b; +value s_opt s = Sopt s; +value s_flag s = Sflag s; +value s_self = Sself; +value s_next = Snext; +value s_token tok = Stoken tok; +value s_rules (t : list (ty_production Obj.t)) = Gramext.srules (Obj.magic t); +value s_vala sl s = Svala sl s; + +value r_stop = []; +value r_next r s = r @ [s]; +value r_cut r = r @ [Scut]; + +value production (p : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f)) = (Obj.magic p : ty_production 'a); +value extension e pos (r : list (option string * option Gramext.g_assoc * list (ty_production Obj.t))) = ((e, pos, Obj.magic r) : ty_extension); + +value safe_extend (l : list ty_extension) = extend (Obj.magic l); + +value create_toktab () = Hashtbl.create 301; +value gcreate glexer = + {gtokens = create_toktab (); glexer = glexer; galgo = DefaultAlgorithm} +; + +value set_algorithm g algo = g.galgo := algo; + +value tokens g con = do { + let list = ref [] in + Hashtbl.iter + (fun (p_con, p_prm) c -> + if p_con = con then list.val := [(p_prm, c.val) :: list.val] else ()) + g.gtokens; + list.val +}; + +value glexer g = g.glexer; + +type gen_parsable 'te = + { pa_chr_strm : Stream.t char; + pa_tok_strm : Stream.t 'te; + pa_tok_fstrm : mutable Fstream.t 'te; + pa_loc_func : Plexing.location_function } +; + +type parsable = gen_parsable token; + +value fstream_of_stream ts = + Fstream.from + (fun _ -> + match Stream.peek ts with + | None -> None + | x -> do { Stream.junk ts; x } + end) +; + +value parsable g cs = + let (ts, lf) = g.glexer.Plexing.tok_func cs in + let fts = fstream_of_stream ts in + {pa_chr_strm = cs; pa_tok_strm = ts; pa_tok_fstrm = fts; pa_loc_func = lf} +; + +value parse_parsable entry p = do { + let efun = entry.estart 0 in + let ts = p.pa_tok_strm in + let cs = p.pa_chr_strm in + let fun_loc = p.pa_loc_func in + let restore = + let old_floc = floc.val in + let old_tc = token_count.val in + fun () -> do { + floc.val := old_floc; + token_count.val := old_tc; + } + in + let get_loc () = + try + let cnt = Stream.count ts in + let loc = fun_loc cnt in + if token_count.val - 1 <= cnt then loc + else Ploc.encl loc (fun_loc (token_count.val - 1)) + with + [ Failure _ -> Ploc.make_unlined (Stream.count cs, Stream.count cs + 1) ] + in + floc.val := fun_loc; + token_count.val := 0; + try do { + let r = efun ts in + restore (); + r + } + with + [ Stream.Failure -> do { + let loc = get_loc () in + restore (); + Ploc.raise loc (Stream.Error ("illegal begin of " ^ entry.ename)) + } + | Stream.Error _ as exc -> do { + let loc = get_loc () in + restore (); + Ploc.raise loc exc + } + | exc -> do { + let loc = (Stream.count cs, Stream.count cs + 1) in + restore (); + Ploc.raise (Ploc.make_unlined loc) exc + } ] +}; + +value bfparse entry efun restore2 p = do { + let default_loc () = + let cs = p.pa_chr_strm in + Ploc.make_unlined (Stream.count cs, Stream.count cs + 1) + in + let restore = + let old_tc = token_count.val in + let old_nb_ftry = nb_ftry.val in + fun () -> do { + token_count.val := old_tc; + nb_ftry.val := old_nb_ftry; + restore2 (); + } + in + let get_loc cnt = + try + let loc = p.pa_loc_func cnt in + if token_count.val - 1 <= cnt then loc + else Ploc.encl loc (p.pa_loc_func (token_count.val - 1)) + with + [ Failure _ -> default_loc () + | e -> do { restore (); raise e } ] + in + token_count.val := 0; + nb_ftry.val := 0; + if backtrack_trace_try.val then do { + Printf.eprintf "\n"; + flush stderr; + } + else (); + let r = + let fts = p.pa_tok_fstrm in + try efun no_err fts with + [ Stream.Failure | Fstream.Cut -> do { + let cnt = Fstream.count fts + Fstream.count_unfrozen fts - 1 in + let loc = get_loc cnt in + let mess = + match max_fcount.val with + | Some (cnt, entry, err) -> + let mess = err () in + let mess = + if mess = "" then sprintf "failure in [%s]" entry.ename + else mess + in + if backtrack_trace.val then + mess ^ Printf.sprintf " (max token count %d)" cnt + else mess + | None -> + sprintf "[%s] failed" entry.ename + end + in + let mess = + if backtrack_trace.val then + mess ^ Printf.sprintf " (cnt %d) (cnt+unfrozen %d)" + token_count.val cnt + else mess + in + restore (); + Ploc.raise loc (Stream.Error mess) + } + | exc -> do { + restore (); + Ploc.raise (default_loc ()) exc + } ] + in + restore (); r +}; + +value bfparse_token_stream entry efun ts = do { + let restore2 () = () in + if backtrack_trace.val then + Printf.eprintf "%sbfparse_token_stream [%s]\n%!" tind.val entry.ename + else (); + let p = + {pa_chr_strm = [: :]; + pa_tok_strm = ts; + pa_tok_fstrm = fstream_of_stream ts; + pa_loc_func = floc.val} + in + bfparse entry efun restore2 p +}; + +value bfparse_parsable entry p efun = do { + let restore2 = + let old_floc = floc.val in + let old_max_fcount = max_fcount.val in + fun () -> do { + floc.val := old_floc; + max_fcount.val := old_max_fcount; + } + in + floc.val := p.pa_loc_func; + max_fcount.val := None; + if backtrack_trace.val then + Printf.eprintf "%sbfparse_parsable [%s]\n%!" tind.val entry.ename + else (); + bfparse entry efun restore2 p +}; + +value fparse_token_stream entry ts = + let efun err fts = + match entry.fstart 0 err fts with + | Some (a, _) -> Obj.magic a + | None -> raise Stream.Failure + end + in + bfparse_token_stream entry efun ts +; + +value fparse_parsable entry p = + let efun err fts = + match entry.fstart 0 err fts with + [ Some (r, strm) -> do { p.pa_tok_fstrm := strm; r } + | None -> raise Stream.Failure ] + in + bfparse_parsable entry p efun +; + +value bparse_token_stream entry ts = + let efun err fts = + match entry.bstart 0 err fts with + | Some (a, _, _) -> Obj.magic a + | None -> raise Stream.Failure + end + in + bfparse_token_stream entry efun ts +; + +value bparse_parsable entry p = + let efun err fts = + match entry.bstart 0 err fts with + [ Some (r, strm, _) -> do { p.pa_tok_fstrm := strm; r } + | None -> raise Stream.Failure ] + in + bfparse_parsable entry p efun +; + +value bparse_parsable_all entry p = do { + let efun = entry.bstart 0 in + let fts = p.pa_tok_fstrm in + let cs = p.pa_chr_strm in + let fun_loc = p.pa_loc_func in + let restore = + let old_floc = floc.val in + let old_tc = token_count.val in + let old_max_fcount = max_fcount.val in + let old_nb_ftry = nb_ftry.val in + fun () -> do { + floc.val := old_floc; + token_count.val := old_tc; + max_fcount.val := old_max_fcount; + nb_ftry.val := old_nb_ftry; + } + in + floc.val := fun_loc; + token_count.val := 0; + max_fcount.val := None; + if backtrack_trace.val then + Printf.eprintf "%sbparse_parsable_all [%s]: max token count reset\n%!" + tind.val entry.ename + else + nb_ftry.val := 0; + if backtrack_trace_try.val then do { + Printf.eprintf "\n"; + flush stderr; + } + else (); + try do { + let rl = + loop [] (efun no_err fts) where rec loop rev_rl = + fun + [ Some (r, strm, k) -> + let _ = + if backtrack_trace.val then do { + Printf.eprintf "result found !\n\n"; + flush stderr; + } + else () + in + loop [r :: rev_rl] (Fstream.bcontinue k) + | None -> List.rev rev_rl ] + in + restore (); + rl + } + with exc -> do { + let loc = (Stream.count cs, Stream.count cs + 1) in + restore (); + Ploc.raise (Ploc.make_unlined loc) exc + } +}; + +value find_entry e s = + let rec find_levels = + fun + [ [] -> None + | [lev :: levs] -> + match find_tree lev.lsuffix with + [ None -> + match find_tree lev.lprefix with + [ None -> find_levels levs + | x -> x ] + | x -> x ] ] + and find_symbol = + fun + [ Sfacto s -> find_symbol s + | Snterm e -> if e.ename = s then Some e else None + | Snterml e _ -> if e.ename = s then Some e else None + | Smeta _ sl _ -> find_symbol_list sl + | Slist0 s -> find_symbol s + | Slist0sep s _ _ -> find_symbol s + | Slist1 s -> find_symbol s + | Slist1sep s _ _ -> find_symbol s + | Sopt s -> find_symbol s + | Sflag s -> find_symbol s + | Stree t -> find_tree t + | Svala _ s -> find_symbol s + | Sself | Snext | Scut | Stoken _ -> None ] + and find_symbol_list = + fun + [ [s :: sl] -> + match find_symbol s with + [ None -> find_symbol_list sl + | x -> x ] + | [] -> None ] + and find_tree = + fun + [ Node {node = s; brother = bro; son = son} -> + match find_symbol s with + [ None -> + match find_tree bro with + [ None -> find_tree son + | x -> x ] + | x -> x ] + | LocAct _ _ | DeadEnd -> None ] + in + match e.edesc with + [ Dlevels levs -> + match find_levels levs with + [ Some e -> e + | None -> raise Not_found ] + | Dparser _ -> raise Not_found ] +; + +value bfparser_of_parser p fstrm return_value = do { + let shift_token_number = Fstream.count fstrm in + let old_floc = floc.val in + let restore () = floc.val := old_floc in + floc.val := fun i -> old_floc (shift_token_number + i); + let ts = + let fts = ref fstrm in + Stream.from + (fun _ -> + match Fstream.next fts.val with + [ Some (v, fstrm) -> do { fts.val := fstrm; Some v } + | None -> None ]) + in + let r = + try + let r = (Obj.magic p ts : Obj.t) in + let fstrm = + loop fstrm (Stream.count ts) where rec loop fstrm i = + if i = 0 then fstrm + else + match Fstream.next fstrm with + [ Some (_, fstrm) -> loop fstrm (i - 1) + | None -> failwith "internal error in Entry.of_parser" ] + in + return_value r fstrm + with e -> do { + restore (); + match e with + | Stream.Failure -> None + | _ -> raise e + end + } + in + do { restore (); r } +}; + +value fparser_of_parser p err fstrm = + let return_value r fstrm = match fstrm with fparser [: :] -> r in + bfparser_of_parser p fstrm return_value +; + +value bparser_of_parser p err fstrm = + let return_value r fstrm = match fstrm with bparser [: :] -> r in + bfparser_of_parser p fstrm return_value +; + +module Entry = + struct + type te = token; + type e 'a = g_entry te; + value create g n = + {egram = g; ename = n; elocal = False; estart = empty_entry n; + econtinue _ _ _ = parser []; fstart _ _ = fparser []; + fcontinue _ _ _ _ = fparser []; bstart _ _ = bparser []; + bcontinue _ _ _ _ = bparser []; edesc = Dlevels []} + ; + value parse_parsable (entry : e 'a) p : 'a = + let _ = compatible_deprecated_backtrack_parse () in + match entry.egram.galgo with + [ DefaultAlgorithm -> + match default_algorithm_var.val with + | Predictive | DefaultAlgorithm -> + Obj.magic (parse_parsable entry p : Obj.t) + | Backtracking -> + Obj.magic (bparse_parsable entry p : Obj.t) + | Functional -> + Obj.magic (fparse_parsable entry p : Obj.t) + end + | Predictive -> + Obj.magic (parse_parsable entry p : Obj.t) + | Functional -> + Obj.magic (fparse_parsable entry p : Obj.t) + | Backtracking -> + Obj.magic (bparse_parsable entry p : Obj.t) ] + ; + value parse (entry : e 'a) cs : 'a = + let parsable = parsable entry.egram cs in + parse_parsable entry parsable + ; + value parse_parsable_all (entry : e 'a) p : 'a = + let _ = compatible_deprecated_backtrack_parse () in + match entry.egram.galgo with + [ DefaultAlgorithm -> + match default_algorithm_var.val with + | Predictive | DefaultAlgorithm -> + try Obj.magic [(parse_parsable entry p : Obj.t)] with + [ Stream.Failure | Stream.Error _ -> [] ] + | Backtracking -> + Obj.magic (bparse_parsable_all entry p : list Obj.t) + | Functional -> + failwith "Entry.parse_parsable_all: func parsing not impl" + end + | Predictive -> + try Obj.magic [(parse_parsable entry p : Obj.t)] with + [ Stream.Failure | Stream.Error _ -> [] ] + | Functional -> + failwith "parse_parsable_all: functional parsing not impl" + | Backtracking -> + Obj.magic (bparse_parsable_all entry p : list Obj.t) ] + ; + value parse_all (entry : e 'a) cs : 'a = + let parsable = parsable entry.egram cs in + parse_parsable_all entry parsable + ; + value parse_token_stream (entry : e 'a) ts : 'a = + let _ = compatible_deprecated_backtrack_parse () in + match entry.egram.galgo with + | DefaultAlgorithm -> + match default_algorithm_var.val with + | Predictive | DefaultAlgorithm -> + Obj.magic (entry.estart 0 ts : Obj.t) + | Backtracking -> + failwith "not impl Entry.parse_token_stream default backtrack" + | Functional -> + failwith "Entry.parse_token_stream: func parsing not impl" + end + | Predictive -> Obj.magic (entry.estart 0 ts : Obj.t) + | Functional -> + failwith "not impl Entry.parse_token_stream functional" + | Backtracking -> + failwith "not impl Entry.parse_token_stream backtrack" + end + ; + value warned_using_parse_token = ref False; + value parse_token (entry : e 'a) ts : 'a = do { + (* commented: too often warned in Coq... + if not warned_using_parse_token.val then do { + eprintf "<W> use of Grammar.Entry.parse_token "; + eprintf "deprecated since 2017-06-16\n%!"; + eprintf "use Grammar.Entry.parse_token_stream instead\n%! "; + warned_using_parse_token.val := True + } + else (); + *) + parse_token_stream entry ts + }; + value name e = e.ename; + value of_parser g n (p : Stream.t te -> 'a) : e 'a = + {egram = g; ename = n; elocal = False; + estart _ = (Obj.magic p : Stream.t te -> Obj.t); + econtinue _ _ _ = parser []; + fstart _ = fparser_of_parser p; + fcontinue _ _ _ _ = fparser []; + bstart _ = bparser_of_parser p; + bcontinue _ _ _ _ = bparser []; + edesc = Dparser (Obj.magic p : Stream.t te -> Obj.t)} + ; + external obj : e 'a -> Gramext.g_entry te = "%identity"; + value print ppf e = fprintf ppf "%a@." print_entry (obj e); + value find e s = find_entry (obj e) s; + end +; + +value of_entry e = e.egram; + +value create_local_entry g n = + {egram = g; ename = n; elocal = True; estart = empty_entry n; + econtinue _ _ _ = parser []; fstart _ _ = fparser []; + fcontinue _ _ _ _ = fparser []; bstart _ _ = bparser []; + bcontinue _ _ _ _ = bparser []; edesc = Dlevels []} +; + +(* Unsafe *) + +value clear_entry e = do { + e.estart := fun _ -> parser []; + e.econtinue := fun _ _ _ -> parser []; + e.fstart := fun _ _ -> fparser []; + e.fcontinue := fun _ _ _ _ -> fparser []; + e.bstart := fun _ _ -> bparser []; + e.bcontinue := fun _ _ _ _ -> bparser []; + match e.edesc with + [ Dlevels _ -> e.edesc := Dlevels [] + | Dparser _ -> () ] +}; + +value gram_reinit g glexer = do { + Hashtbl.clear g.gtokens; + g.glexer := glexer +}; + +module Unsafe = + struct + value gram_reinit = gram_reinit; + value clear_entry = clear_entry; + end +; + +(* Functorial interface *) + +module type GLexerType = + sig + type te = 'x; + value lexer : Plexing.lexer te; + end +; + +module type S = + sig + type te = 'x; + type parsable = 'x; + value parsable : Stream.t char -> parsable; + value tokens : string -> list (string * int); + value glexer : Plexing.lexer te; + value set_algorithm : parse_algorithm -> unit; + module Entry : + sig + type e 'a = 'x; + value create : string -> e 'a; + value parse : e 'a -> parsable -> 'a; + value name : e 'a -> string; + value of_parser : string -> (Stream.t te -> 'a) -> e 'a; + value parse_token_stream : e 'a -> Stream.t te -> 'a; + value print : Format.formatter -> e 'a -> unit; + external obj : e 'a -> Gramext.g_entry te = "%identity"; + value parse_token : e 'a -> Stream.t te -> 'a; + end + ; + type ty_symbol 'self 'a = 'x; + (** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The + first type argument is the type of the ambient entry, the second one is the + type of the produced value. *) + + type ty_rule 'self 'f 'r = 'x; + + type ty_production 'a = 'x; + + value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a; + (* | Smeta of string and list (g_symbol 'te) and Obj.t *) + value s_nterm : Entry.e 'a -> ty_symbol 'self 'a; + value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a; + value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); + value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); + value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); + value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); + value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a); + value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool; + value s_self : ty_symbol 'self 'self; + value s_next : ty_symbol 'self 'self; + value s_token : Plexing.pattern -> ty_symbol 'self string; + value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a; + value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a); + + value r_stop : ty_rule 'self 'r 'r; + value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r; + value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r; + + value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a; + + module Unsafe : + sig + value gram_reinit : Plexing.lexer te -> unit; + value clear_entry : Entry.e 'a -> unit; + end + ; + value extend : + Entry.e 'a -> option Gramext.position -> + list + (option string * option Gramext.g_assoc * + list (list (Gramext.g_symbol te) * Gramext.g_action)) -> + unit; + value safe_extend : + Entry.e 'a -> option Gramext.position -> + list + (option string * option Gramext.g_assoc * + list (ty_production 'a)) -> + unit; + value delete_rule : Entry.e 'a -> list (Gramext.g_symbol te) -> unit; + value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'r 'f -> unit; + end +; + +module GMake (L : GLexerType) = + struct + type te = L.te; + type parsable = gen_parsable te; + value gram = gcreate L.lexer; + value parsable cs = + let (ts, lf) = L.lexer.Plexing.tok_func cs in + let fts = fstream_of_stream ts in + {pa_chr_strm = cs; pa_tok_strm = ts; pa_tok_fstrm = fts; + pa_loc_func = lf} + ; + value tokens = tokens gram; + value glexer = glexer gram; + value set_algorithm algo = gram.galgo := algo; + module Entry = + struct + type e 'a = g_entry te; + value create n = + {egram = gram; ename = n; elocal = False; estart = empty_entry n; + econtinue _ _ _ = parser []; fstart _ _ = fparser []; + fcontinue _ _ _ _ = bparser []; bstart _ _ = bparser []; + bcontinue _ _ _ _ = bparser []; edesc = Dlevels []} + ; + external obj : e 'a -> Gramext.g_entry te = "%identity"; + value parse (e : e 'a) p : 'a = + let _ = compatible_deprecated_backtrack_parse () in + match gram.galgo with + [ DefaultAlgorithm -> + match default_algorithm_var.val with + | Predictive | DefaultAlgorithm -> + Obj.magic (parse_parsable e p : Obj.t) + | Backtracking -> + Obj.magic (bparse_parsable e p : Obj.t) + | Functional -> + Obj.magic (fparse_parsable e p : Obj.t) + end + | Predictive -> + Obj.magic (parse_parsable e p : Obj.t) + | Functional -> + Obj.magic (fparse_parsable e p : Obj.t) + | Backtracking -> + Obj.magic (bparse_parsable e p : Obj.t) ] + ; + value parse_token_stream (e : e 'a) ts : 'a = + let _ = compatible_deprecated_backtrack_parse () in + match e.egram.galgo with + | DefaultAlgorithm -> + match default_algorithm_var.val with + | Predictive | DefaultAlgorithm -> + Obj.magic (e.estart 0 ts : Obj.t) + | Backtracking -> + bparse_token_stream e ts + | Functional -> + fparse_token_stream e ts + end + | Predictive -> Obj.magic (e.estart 0 ts : Obj.t) + | Functional -> fparse_token_stream e ts + | Backtracking -> bparse_token_stream e ts + end + ; + value warned_using_parse_token = ref False; + value parse_token (entry : e 'a) ts : 'a = do { + (* commented: too often warned in Coq... + if not warned_using_parse_token.val then do { + eprintf "<W> use of Entry.parse_token "; + eprintf "deprecated since 2017-06-16\n%!"; + eprintf "use Entry.parse_token_stream instead\n%! "; + warned_using_parse_token.val := True + } + else (); + *) + parse_token_stream entry ts + }; + value name e = e.ename; + value of_parser n (p : Stream.t te -> 'a) : e 'a = + {egram = gram; ename = n; elocal = False; + estart _ = (Obj.magic p : Stream.t te -> Obj.t); + econtinue _ _ _ = parser []; + fstart _ = fparser_of_parser p; + fcontinue _ _ _ _ = fparser []; + bstart _ = bparser_of_parser p; + bcontinue _ _ _ _ = bparser []; + edesc = Dparser (Obj.magic p : Stream.t te -> Obj.t)} + ; + value print ppf e = fprintf ppf "%a@." print_entry (obj e); + end + ; + type ty_symbol 'self 'a = Gramext.g_symbol te; + type ty_rule 'self 'f 'r = list (ty_symbol 'self Obj.t); + type ty_production 'a = (ty_rule 'a Obj.t Obj.t * Gramext.g_action); + + value s_facto s = Sfacto s; + value s_nterm e = Snterm e; + value s_nterml e l = Snterml e l; + value s_list0 s = Slist0 s; + value s_list0sep s sep b = Slist0sep s sep b; + value s_list1 s = Slist1 s; + value s_list1sep s sep b = Slist1sep s sep b; + value s_opt s = Sopt s; + value s_flag s = Sflag s; + value s_self = Sself; + value s_next = Snext; + value s_token tok = Stoken tok; + value s_rules (t : list (ty_production Obj.t)) = Gramext.srules (Obj.magic t); + value s_vala sl s = Svala sl s; + + value r_stop = []; + value r_next r s = r @ [s]; + value r_cut r = r @ [Scut]; + + value production (p : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f)) = (Obj.magic p : ty_production 'a); + + module Unsafe = + struct + value gram_reinit = gram_reinit gram; + value clear_entry = clear_entry; + end + ; + value extend = extend_entry; + value safe_extend e pos (r : list (option string * option Gramext.g_assoc * list (ty_production Obj.t))) = extend e pos (Obj.magic r); + value delete_rule e r = delete_rule (Entry.obj e) r; + value safe_delete_rule = delete_rule; + end +; diff --git a/gramlib/grammar.mli b/gramlib/grammar.mli new file mode 100644 index 0000000000..ae2fa25f2f --- /dev/null +++ b/gramlib/grammar.mli @@ -0,0 +1,338 @@ +(* camlp5r *) +(* grammar.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Extensible grammars. + + This module implements the Camlp5 extensible grammars system. + Grammars entries can be extended using the [EXTEND] statement, + added by loading the Camlp5 [pa_extend.cmo] file. *) + +type g = 'x; + (** The type for grammars, holding entries. *) +type token = (string * string); + +value gcreate : Plexing.lexer token -> g; + (** Create a new grammar, without keywords, using the lexer given + as parameter. *) +value tokens : g -> string -> list (string * int); + (** Given a grammar and a token pattern constructor, returns the list of + the corresponding values currently used in all entries of this grammar. + The integer is the number of times this pattern value is used. + + Examples: +- The call [Grammar.tokens g ""] returns the keywords list. +- The call [Grammar.tokens g "IDENT"] returns the list of all usages + of the pattern "IDENT" in the [EXTEND] statements. *) +value glexer : g -> Plexing.lexer token; + (** Return the lexer used by the grammar *) + +type parsable = 'abstract; +value parsable : g -> Stream.t char -> parsable; + (** Type and value allowing to keep the same token stream between + several calls of entries of the same grammar, to prevent possible + loss of tokens. To be used with [Entry.parse_parsable] below *) + +module Entry : + sig + type e 'a = 'x; + value create : g -> string -> e 'a; + value parse : e 'a -> Stream.t char -> 'a; + value parse_all : e 'a -> Stream.t char -> list 'a; + value parse_parsable : e 'a -> parsable -> 'a; + value name : e 'a -> string; + value of_parser : g -> string -> (Stream.t token -> 'a) -> e 'a; + value parse_token_stream : e 'a -> Stream.t token -> 'a; + value print : Format.formatter -> e 'a -> unit; + value find : e 'a -> string -> e Obj.t; + external obj : e 'a -> Gramext.g_entry token = "%identity"; + (* deprecated since 2017-06-17 *) + value parse_token : e 'a -> Stream.t token -> 'a; + end +; + (** Module to handle entries. +- [Entry.e] is the type for entries returning values of type ['a]. +- [Entry.create g n] creates a new entry named [n] in the grammar [g]. +- [Entry.parse e] returns the stream parser of the entry [e]. +- [Entry.parse_all e] returns the stream parser returning all possible + values while parsing with the entry [e]: may return more than one + value when the parsing algorithm is [Backtracking] +- [Entry.parse_all e] returns the parser returning all possible values. +- [Entry.parse_parsable e] returns the parsable parser of the entry [e]. +- [Entry.name e] returns the name of the entry [e]. +- [Entry.of_parser g n p] makes an entry from a token stream parser. +- [Entry.parse_token_stream e] returns the token stream parser of the + entry [e]. +- [Entry.print e] displays the entry [e] using [Format]. +- [Entry.find e s] finds the entry named [s] in the rules of [e]. +- [Entry.obj e] converts an entry into a [Gramext.g_entry] allowing + to see what it holds. +- [Entry.parse_token]: deprecated since 2017-06-16; old name for + [Entry.parse_token_stream] *) + +value of_entry : Entry.e 'a -> g; + (** Return the grammar associated with an entry. *) + +type ty_symbol 'self 'a = 'x; +(** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The + first type argument is the type of the ambient entry, the second one is the + type of the produced value. *) + +type ty_rule 'self 'f 'r = 'x; + +type ty_production 'a = 'x; + +type ty_extension = 'x; + +value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a; +(* | Smeta of string and list (g_symbol 'te) and Obj.t *) +value s_nterm : Entry.e 'a -> ty_symbol 'self 'a; +value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a; +value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); +value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); +value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); +value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); +value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a); +value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool; +value s_self : ty_symbol 'self 'self; +value s_next : ty_symbol 'self 'self; +value s_token : Plexing.pattern -> ty_symbol 'self string; +value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a; +value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a); + +value r_stop : ty_rule 'self 'r 'r; +value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r; +value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r; + +value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a; + +value extension : Entry.e 'a -> option Gramext.position -> + list (option string * option Gramext.g_assoc * list (ty_production 'a)) -> ty_extension; + +value safe_extend : list ty_extension -> unit; +value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'f 'r -> unit; + +(** {6 Clearing grammars and entries} *) + +module Unsafe : + sig + value gram_reinit : g -> Plexing.lexer token -> unit; + value clear_entry : Entry.e 'a -> unit; + end +; + (** Module for clearing grammars and entries. To be manipulated with + care, because: 1) reinitializing a grammar destroys all tokens + and there may have problems with the associated lexer if there + are keywords; 2) clearing an entry does not destroy the tokens + used only by itself. +- [Unsafe.reinit_gram g lex] removes the tokens of the grammar +- and sets [lex] as a new lexer for [g]. Warning: the lexer +- itself is not reinitialized. +- [Unsafe.clear_entry e] removes all rules of the entry [e]. *) + +(** {6 Parsing algorithm} *) + +type parse_algorithm = Gramext.parse_algorithm == + [ Predictive | Functional | Backtracking | DefaultAlgorithm ] +; + (** Type of algorithm used in grammar entries. + [Predictive]: use imperative streams with predictive parsing + [Functional]: use functional streams with limited backtracking + [Backtracking]: use functional streams with full backtracking + [DefaultAlgorithm]: use the general default algorithm set by the + function [set_default_algorithm] below or through the environment + variablefound in the variable CAMLP5PARAM. + The default, when a grammar is created, is [DefaultAlgorithm]. *) + +value set_algorithm : g -> parse_algorithm -> unit; + (** Set the parsing algorithm for all entries of a given grammar. *) + +value set_default_algorithm : parse_algorithm -> unit; + (** Set the default parsing algorithm for all grammars. + If the environment variable CAMLP5PARAM contains "b", the + default is [Backtracking]; if it contains 'f', the default is + [Functional]; if it contains 'p', the default is [Predictive]. *) +value default_algorithm : unit -> parse_algorithm; + (** Return the current default algorithm. *) + +value backtrack_stalling_limit : ref int; + (** Limitation of backtracking to prevent stalling in case of syntax + error. In backtracking algorithm, when there is a syntax error, + the parsing continues trying to find another solution. It some + grammars, it can be very long before checking all possibilities. + This number limits the number of tokens tests after a backtrack. + (The number of tokens tests is reset to zero when the token + stream overtakes the last reached token.) The default is 10000. + If set to 0, there is no limit. Can be set by the environment + variable CAMLP5PARAM by "l=value". *) + +(** {6 Functorial interface} *) + + (** Alternative for grammars use. Grammars are no more Ocaml values: + there is no type for them. Modules generated preserve the + rule "an entry cannot call an entry of another grammar" by + normal OCaml typing. *) + +module type GLexerType = + sig + type te = 'x; + value lexer : Plexing.lexer te; + end +; + (** The input signature for the functor [Grammar.GMake]: [te] is the + type of the tokens. *) + +module type S = + sig + type te = 'x; + type parsable = 'x; + value parsable : Stream.t char -> parsable; + value tokens : string -> list (string * int); + value glexer : Plexing.lexer te; + value set_algorithm : parse_algorithm -> unit; + module Entry : + sig + type e 'a = 'y; + value create : string -> e 'a; + value parse : e 'a -> parsable -> 'a; + value name : e 'a -> string; + value of_parser : string -> (Stream.t te -> 'a) -> e 'a; + value parse_token_stream : e 'a -> Stream.t te -> 'a; + value print : Format.formatter -> e 'a -> unit; + external obj : e 'a -> Gramext.g_entry te = "%identity"; + (* deprecated since 2017-06-17 *) + value parse_token : e 'a -> Stream.t te -> 'a; + end + ; + + type ty_symbol 'self 'a = 'x; + (** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The + first type argument is the type of the ambient entry, the second one is the + type of the produced value. *) + + type ty_rule 'self 'f 'r = 'x; + + type ty_production 'a = 'x; + + value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a; + (* | Smeta of string and list (g_symbol 'te) and Obj.t *) + value s_nterm : Entry.e 'a -> ty_symbol 'self 'a; + value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a; + value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); + value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); + value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a); + value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a); + value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a); + value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool; + value s_self : ty_symbol 'self 'self; + value s_next : ty_symbol 'self 'self; + value s_token : Plexing.pattern -> ty_symbol 'self string; + value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a; + value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a); + + value r_stop : ty_rule 'self 'r 'r; + value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r; + value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r; + + value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a; + + module Unsafe : + sig + value gram_reinit : Plexing.lexer te -> unit; + value clear_entry : Entry.e 'a -> unit; + end + ; + value extend : + Entry.e 'a -> option Gramext.position -> + list + (option string * option Gramext.g_assoc * + list (list (Gramext.g_symbol te) * Gramext.g_action)) -> + unit; + value safe_extend : + Entry.e 'a -> option Gramext.position -> + list + (option string * option Gramext.g_assoc * + list (ty_production 'a)) -> + unit; + value delete_rule : Entry.e 'a -> list (Gramext.g_symbol te) -> unit; + value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'f 'r -> unit; + end +; + (** Signature type of the functor [Grammar.GMake]. The types and + functions are almost the same than in generic interface, but: +- Grammars are not values. Functions holding a grammar as parameter + do not have this parameter yet. +- The type [parsable] is used in function [parse] instead of + the char stream, avoiding the possible loss of tokens. +- The type of tokens (expressions and patterns) can be any + type (instead of (string * string)); the module parameter + must specify a way to show them as (string * string) *) + +module GMake (L : GLexerType) : S with type te = L.te; + +(** {6 Miscellaneous} *) + +value skip_item : 'a -> 'a; + (** [Grammar.skip_item x] can be called in a semantic action of + a grammar rule to ask the grammar to skip that item if it + is called in a list (LIST0 or LIST1). The function returns + the item itself (for typing reasons) but its value is ignored. + This function is used to allow IFDEF and IFNDEF for cases of + constructor declarations and pattern matchings. *) + +value error_verbose : ref bool; + (** Flag for displaying more information in case of parsing error; + default = [False] *) + +value warning_verbose : ref bool; + (** Flag for displaying warnings while extension; default = [True] *) + +value strict_parsing : ref bool; + (** Flag to apply strict parsing, without trying to recover errors; + default = [False] *) + +value utf8_print : ref bool; + (** Flag to consider strings as utf8-encoded when printing them; + default = [True] *) + +value print_entry : Format.formatter -> Gramext.g_entry 'te -> unit; + (** General printer for all kinds of entries (obj entries) *) + +value iter_entry : + (Gramext.g_entry 'te -> unit) -> Gramext.g_entry 'te -> unit; + (** [Grammar.iter_entry f e] applies [f] to the entry [e] and + transitively all entries called by [e]. The order in which + the entries are passed to [f] is the order they appear in + each entry. Each entry is passed only once. *) + +value fold_entry : + (Gramext.g_entry 'te -> 'a -> 'a) -> Gramext.g_entry 'te -> 'a -> 'a; + (** [Grammar.fold_entry f e init] computes [(f eN .. (f e2 (f e1 init)))], + where [e1 .. eN] are [e] and transitively all entries called by [e]. + The order in which the entries are passed to [f] is the order they + appear in each entry. Each entry is passed only once. *) + +value reinit_entry_functions : Gramext.g_entry 'te -> unit; + +(*** For system use *) + +value loc_of_token_interval : int -> int -> Ploc.t; +value extend : + list + (Gramext.g_entry 'te * option Gramext.position * + list + (option string * option Gramext.g_assoc * + list (list (Gramext.g_symbol 'te) * Gramext.g_action))) -> + unit; +value delete_rule : Entry.e 'a -> list (Gramext.g_symbol token) -> unit; + +value parse_top_symb : + Gramext.g_entry 'te -> Gramext.g_symbol 'te -> Stream.t 'te -> Obj.t; +value symb_failed_txt : + Gramext.g_entry 'te -> Gramext.g_symbol 'te -> Gramext.g_symbol 'te -> + string; +value create_local_entry : g -> string -> Entry.e 'a; + +(* deprecated since 2017-06-06 *) +(* rather use "set_default_algorithm Backtracking" *) +value backtrack_parse : ref bool; diff --git a/gramlib/plexing.ml b/gramlib/plexing.ml new file mode 100644 index 0000000000..947e1980b5 --- /dev/null +++ b/gramlib/plexing.ml @@ -0,0 +1,241 @@ +(* camlp5r *) +(* plexing.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +open Versdep; + +type pattern = (string * string); + +exception Error of string; + +type location = Ploc.t; +type location_function = int -> location; +type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function); + +type lexer 'te = + { tok_func : lexer_func 'te; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + tok_match : mutable pattern -> 'te -> string; + tok_text : pattern -> string; + tok_comm : mutable option (list location) } +; + +value make_loc = Ploc.make_unlined; +value dummy_loc = Ploc.dummy; + +value lexer_text (con, prm) = + if con = "" then "'" ^ prm ^ "'" + else if prm = "" then con + else con ^ " '" ^ prm ^ "'" +; + +value locerr () = failwith "Lexer: location function"; +value loct_create () = (ref (array_create 1024 None), ref False); +value loct_func (loct, ov) i = + match + if i < 0 || i >= Array.length loct.val then + if ov.val then Some dummy_loc else None + else Array.unsafe_get loct.val i + with + [ Some loc -> loc + | None -> locerr () ] +; +value loct_add (loct, ov) i loc = + if i >= Array.length loct.val then + let new_tmax = Array.length loct.val * 2 in + if new_tmax < Sys.max_array_length then do { + let new_loct = array_create new_tmax None in + Array.blit loct.val 0 new_loct 0 (Array.length loct.val); + loct.val := new_loct; + loct.val.(i) := Some loc + } + else ov.val := True + else loct.val.(i) := Some loc +; + +value make_stream_and_location next_token_loc = + let loct = loct_create () in + let ts = + Stream.from + (fun i -> do { + let (tok, loc) = next_token_loc () in + loct_add loct i loc; + Some tok + }) + in + (ts, loct_func loct) +; + +value lexer_func_of_parser next_token_loc cs = + let line_nb = ref 1 in + let bolpos = ref 0 in + make_stream_and_location (fun () -> next_token_loc (cs, line_nb, bolpos)) +; + +value lexer_func_of_ocamllex lexfun cs = + let lb = + Lexing.from_function + (fun s n -> + try do { string_set s 0 (Stream.next cs); 1 } with + [ Stream.Failure -> 0 ]) + in + let next_token_loc _ = + let tok = lexfun lb in + let loc = make_loc (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in + (tok, loc) + in + make_stream_and_location next_token_loc +; + +(* Char and string tokens to real chars and string *) + +value buff = ref (string_create 80); +value store len x = do { + if len >= string_length buff.val then + buff.val := string_cat buff.val (string_create (string_length buff.val)) + else (); + string_set buff.val len x; + succ len +}; +value get_buff len = string_sub buff.val 0 len; + +value valch x = Char.code x - Char.code '0'; +value valch_a x = Char.code x - Char.code 'a' + 10; +value valch_A x = Char.code x - Char.code 'A' + 10; + +value rec backslash s i = + if i = String.length s then raise Not_found + else + match s.[i] with + [ 'n' -> ('\n', i + 1) + | 'r' -> ('\r', i + 1) + | 't' -> ('\t', i + 1) + | 'b' -> ('\b', i + 1) + | '\\' -> ('\\', i + 1) + | '"' -> ('"', i + 1) + | ''' -> (''', i + 1) + | '0'..'9' as c -> backslash1 (valch c) s (i + 1) + | 'x' -> backslash1h s (i + 1) + | _ -> raise Not_found ] +and backslash1 cod s i = + if i = String.length s then ('\\', i - 1) + else + match s.[i] with + [ '0'..'9' as c -> backslash2 (10 * cod + valch c) s (i + 1) + | _ -> ('\\', i - 1) ] +and backslash2 cod s i = + if i = String.length s then ('\\', i - 2) + else + match s.[i] with + [ '0'..'9' as c -> (Char.chr (10 * cod + valch c), i + 1) + | _ -> ('\\', i - 2) ] +and backslash1h s i = + if i = String.length s then ('\\', i - 1) + else + match s.[i] with + [ '0'..'9' as c -> backslash2h (valch c) s (i + 1) + | 'a'..'f' as c -> backslash2h (valch_a c) s (i + 1) + | 'A'..'F' as c -> backslash2h (valch_A c) s (i + 1) + | _ -> ('\\', i - 1) ] +and backslash2h cod s i = + if i = String.length s then ('\\', i - 2) + else + match s.[i] with + [ '0'..'9' as c -> (Char.chr (16 * cod + valch c), i + 1) + | 'a'..'f' as c -> (Char.chr (16 * cod + valch_a c), i + 1) + | 'A'..'F' as c -> (Char.chr (16 * cod + valch_A c), i + 1) + | _ -> ('\\', i - 2) ] +; + +value rec skip_indent s i = + if i = String.length s then i + else + match s.[i] with + [ ' ' | '\t' -> skip_indent s (i + 1) + | _ -> i ] +; + +value skip_opt_linefeed s i = + if i = String.length s then i else if s.[i] = '\010' then i + 1 else i +; + +value eval_char s = + if String.length s = 1 then s.[0] + else if String.length s = 0 then failwith "invalid char token" + else if s.[0] = '\\' then + if String.length s = 2 && s.[1] = ''' then ''' + else + try + let (c, i) = backslash s 1 in + if i = String.length s then c else raise Not_found + with + [ Not_found -> failwith "invalid char token" ] + else failwith "invalid char token" +; + +value eval_string loc s = + bytes_to_string (loop 0 0) where rec loop len i = + if i = String.length s then get_buff len + else + let (len, i) = + if s.[i] = '\\' then + let i = i + 1 in + if i = String.length s then failwith "invalid string token" + else if s.[i] = '"' then (store len '"', i + 1) + else + match s.[i] with + [ '\010' -> (len, skip_indent s (i + 1)) + | '\013' -> (len, skip_indent s (skip_opt_linefeed s (i + 1))) + | c -> + try + let (c, i) = backslash s i in + (store len c, i) + with + [ Not_found -> (store (store len '\\') c, i + 1) ] ] + else (store len s.[i], i + 1) + in + loop len i +; + +value default_match = + fun + [ ("ANY", "") -> fun (con, prm) -> prm + | ("ANY", v) -> + fun (con, prm) -> if v = prm then v else raise Stream.Failure + | (p_con, "") -> + fun (con, prm) -> if con = p_con then prm else raise Stream.Failure + | (p_con, p_prm) -> + fun (con, prm) -> + if con = p_con && prm = p_prm then prm else raise Stream.Failure ] +; + +value input_file = ref ""; +value line_nb = ref (ref 0); +value bol_pos = ref (ref 0); +value restore_lexing_info = ref None; + +(* The lexing buffer used by pa_lexer.cmo *) + +value rev_implode l = + let s = string_create (List.length l) in + bytes_to_string (loop (string_length s - 1) l) where rec loop i = + fun + [ [c :: l] -> do { string_unsafe_set s i c; loop (i - 1) l } + | [] -> s ] +; + +module Lexbuf : + sig + type t = 'abstract; + value empty : t; + value add : char -> t -> t; + value get : t -> string; + end = + struct + type t = list char; + value empty = []; + value add c l = [c :: l]; + value get = rev_implode; + end +; diff --git a/gramlib/plexing.mli b/gramlib/plexing.mli new file mode 100644 index 0000000000..f0a03da583 --- /dev/null +++ b/gramlib/plexing.mli @@ -0,0 +1,143 @@ +(* camlp5r *) +(* plexing.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Lexing for Camlp5 grammars. + + This module defines the Camlp5 lexer type to be used in extensible + grammars (see module [Grammar]). It also provides some useful functions + to create lexers. *) + +type pattern = (string * string); + (* Type for values used by the generated code of the EXTEND + statement to represent terminals in entry rules. +- The first string is the constructor name (must start with + an uppercase character). When it is empty, the second string + is supposed to be a keyword. +- The second string is the constructor parameter. Empty if it + has no parameter (corresponding to the 'wildcard' pattern). +- The way tokens patterns are interpreted to parse tokens is done + by the lexer, function [tok_match] below. *) + +exception Error of string; + (** A lexing error exception to be used by lexers. *) + +(** Lexer type *) + +type lexer 'te = + { tok_func : lexer_func 'te; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + tok_match : mutable pattern -> 'te -> string; + tok_text : pattern -> string; + tok_comm : mutable option (list Ploc.t) } + (** The type for lexers compatible with camlp5 grammars. The parameter + type ['te] is the type of the tokens. +- The field [tok_func] is the main lexer function. See [lexer_func] + type below. +- The field [tok_using] is a function called by the [EXTEND] + statement to warn the lexer that a rule uses this pattern + (given as parameter). This allow the lexer 1/ to check that + the pattern constructor is really among its possible constructors + 2/ to enter the keywords in its tables. +- The field [tok_removing] is a function possibly called by the + [DELETE_RULE] statement to warn the lexer that this pattern + (given as parameter) is no more used in the grammar (the grammar + system maintains a number of usages of all patterns and calls this + function when this number falls to zero). If it is a keyword, this + allow the lexer to remove it in its tables. +- The field [tok_match] is a function called by the camlp5 + grammar system to ask the lexer how the input tokens have to + be matched against the patterns. Warning: for efficiency, this + function has to be written as a function taking patterns as + parameters and, for each pattern value, returning a function + matching a token, *not* as a function with two parameters. +- The field [tok_text] is a function called by the grammar + system to get the name of the tokens for the error messages, + in case of syntax error, or for the displaying of the rules + of an entry. +- The field [tok_comm] is a mutable place where the lexer can + put the locations of the comments, if its initial value is not + [None]. If it is [None], nothing has to be done by the lexer. *) + +and lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function) + (** The type of a lexer function (field [tok_func] of the type + [glexer]). The character stream is the input stream to be + lexed. The result is a pair of a token stream and a location + function (see below) for this tokens stream. *) + +and location_function = int -> Ploc.t; + (** The type of a function giving the location of a token in the + source from the token number in the stream (starting from zero). *) + +value lexer_text : pattern -> string; + (** A simple [tok_text] function. *) + +value default_match : pattern -> (string * string) -> string; + (** A simple [tok_match] function, appling to the token type + [(string * string)] *) + +(** Lexers from parsers or ocamllex + + The functions below create lexer functions either from a [char stream] + parser or for an [ocamllex] function. With the returned function [f], + it is possible to get a simple lexer (of the type [Plexing.glexer] above): + {[ + { Plexing.tok_func = f; + Plexing.tok_using = (fun _ -> ()); + Plexing.tok_removing = (fun _ -> ()); + Plexing.tok_match = Plexing.default_match; + Plexing.tok_text = Plexing.lexer_text; + Plexing.tok_comm = None } + ]} + Note that a better [tok_using] function should check the used tokens + and raise [Plexing.Error] for incorrect ones. The other functions + [tok_removing], [tok_match] and [tok_text] may have other implementations + as well. *) + +value lexer_func_of_parser : + ((Stream.t char * ref int * ref int) -> ('te * Ploc.t)) -> lexer_func 'te; + (** A lexer function from a lexer written as a char stream parser + returning the next token and its location. The two references + with the char stream contain the current line number and the + position of the beginning of the current line. *) +value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te; + (** A lexer function from a lexer created by [ocamllex] *) + +(** Function to build a stream and a location function *) + +value make_stream_and_location : + (unit -> ('te * Ploc.t)) -> (Stream.t 'te * location_function); + (** General function *) + +(** Useful functions and values *) + +value eval_char : string -> char; +value eval_string : Ploc.t -> string -> string; + (** Convert a char or a string token, where the backslashes had not + been interpreted into a real char or string; raise [Failure] if + bad backslash sequence found; [Plexing.eval_char (Char.escaped c)] + would return [c] and [Plexing.eval_string (String.escaped s)] would + return [s] *) + +value restore_lexing_info : ref (option (int * int)); +value input_file : ref string; +value line_nb : ref (ref int); +value bol_pos : ref (ref int); + (** Special variables used to reinitialize line numbers and position + of beginning of line with their correct current values when a parser + is called several times with the same character stream. Necessary + for directives (e.g. #load or #use) which interrupt the parsing. + Without usage of these variables, locations after the directives + can be wrong. *) + +(** The lexing buffer used by streams lexers *) + +module Lexbuf : + sig + type t = 'a; + value empty : t; + value add : char -> t -> t; + value get : t -> string; + end +; diff --git a/gramlib/ploc.ml b/gramlib/ploc.ml new file mode 100644 index 0000000000..30ae4eaa21 --- /dev/null +++ b/gramlib/ploc.ml @@ -0,0 +1,217 @@ +(* camlp5r *) +(* ploc.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +#load "pa_macro.cmo"; + +type t = + { fname : string; + line_nb : int; + bol_pos : int; + line_nb_last : int; + bol_pos_last : int; + bp : int; + ep : int; + comm : string; + ecomm : string } +; + +value make_loc fname line_nb bol_pos (bp, ep) comm = + {fname = fname; line_nb = line_nb; bol_pos = bol_pos; + line_nb_last = line_nb; bol_pos_last = bol_pos; + bp = bp; ep = ep; comm = comm; ecomm = ""} +; + +value make_unlined (bp, ep) = + {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0; + bp = bp; ep = ep; comm = ""; ecomm = ""} +; + +value dummy = + {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0; + bp = 0; ep = 0; comm = ""; ecomm = ""} +; + +value file_name loc = loc.fname; +value first_pos loc = loc.bp; +value last_pos loc = loc.ep; +value line_nb loc = loc.line_nb; +value bol_pos loc = loc.bol_pos; +value line_nb_last loc = loc.line_nb_last; +value bol_pos_last loc = loc.bol_pos_last; +value comment loc = loc.comm; +value comment_last loc = loc.ecomm; + +IFDEF OCAML_VERSION <= OCAML_1_07 OR COMPATIBLE_WITH_OLD_OCAML THEN + value with_bp_ep l bp ep = + {fname = l.fname; line_nb = l.line_nb; bol_pos = l.bol_pos; + line_nb_last = l.line_nb_last; bol_pos_last = l.bol_pos_last; bp = bp; + ep = ep; comm = l.comm; ecomm = l.ecomm} + ; + value with_comm l comm = + {fname = l.fname; line_nb = l.line_nb; bol_pos = l.bol_pos; + line_nb_last = l.line_nb_last; bol_pos_last = l.bol_pos_last; bp = l.bp; + ep = l.ep; comm = comm; ecomm = l.ecomm} + ; +END; + +value encl loc1 loc2 = + if loc1.bp < loc2.bp then + if loc1.ep < loc2.ep then + {fname = loc1.fname; line_nb = loc1.line_nb; bol_pos = loc1.bol_pos; + line_nb_last = loc2.line_nb_last; bol_pos_last = loc2.bol_pos_last; + bp = loc1.bp; ep = loc2.ep; comm = loc1.comm; ecomm = loc2.comm} + else + loc1 + else + if loc2.ep < loc1.ep then + {fname = loc2.fname; line_nb = loc2.line_nb; bol_pos = loc2.bol_pos; + line_nb_last = loc1.line_nb_last; bol_pos_last = loc1.bol_pos_last; + bp = loc2.bp; ep = loc1.ep; comm = loc2.comm; ecomm = loc1.comm} + else + loc2 +; +value shift sh loc = {(loc) with bp = sh + loc.bp; ep = sh + loc.ep}; +value sub loc sh len = {(loc) with bp = loc.bp + sh; ep = loc.bp + sh + len}; +value after loc sh len = + {(loc) with bp = loc.ep + sh; ep = loc.ep + sh + len} +; +value with_comment loc comm = {(loc) with comm = comm}; + +value name = ref "loc"; + +value from_file fname loc = + let (bp, ep) = (first_pos loc, last_pos loc) in + try + let ic = open_in_bin fname in + let strm = Stream.of_channel ic in + let rec loop fname lin = + let rec not_a_line_dir col = + parser cnt + [ [: `c; s :] -> + if cnt < bp then + if c = '\n' then loop fname (lin + 1) + else not_a_line_dir (col + 1) s + else + let col = col - (cnt - bp) in + (fname, lin, col, col + ep - bp) + | [: :] -> + (fname, lin, col, col + 1) ] + in + let rec a_line_dir str n col = + parser + [ [: `'\n' :] -> loop str n + | [: `_; s :] -> a_line_dir str n (col + 1) s ] + in + let rec spaces col = + parser + [ [: `' '; s :] -> spaces (col + 1) s + | [: :] -> col ] + in + let rec check_string str n col = + parser + [ [: `'"'; col = spaces (col + 1); s :] -> a_line_dir str n col s + | [: `c when c <> '\n'; s :] -> + check_string (str ^ String.make 1 c) n (col + 1) s + | [: a = not_a_line_dir col :] -> a ] + in + let check_quote n col = + parser + [ [: `'"'; s :] -> check_string "" n (col + 1) s + | [: a = not_a_line_dir col :] -> a ] + in + let rec check_num n col = + parser + [ [: `('0'..'9' as c); s :] -> + check_num (10 * n + Char.code c - Char.code '0') (col + 1) s + | [: col = spaces col; s :] -> check_quote n col s ] + in + let begin_line = + parser + [ [: `'#'; col = spaces 1; s :] -> check_num 0 col s + | [: a = not_a_line_dir 0 :] -> a ] + in + begin_line strm + in + let r = + try loop fname 1 with + [ Stream.Failure -> + let bol = bol_pos loc in + (fname, line_nb loc, bp - bol, ep - bol) ] + in + do { close_in ic; r } + with + [ Sys_error _ -> (fname, 1, bp, ep) ] +; + +value second_line fname ep0 (line, bp) ep = do { + let ic = open_in fname in + seek_in ic bp; + loop line bp bp where rec loop line bol p = + if p = ep then do { + close_in ic; + if bol = bp then (line, ep0) + else (line, ep - bol) + } + else do { + let (line, bol) = + match input_char ic with + [ '\n' -> (line + 1, p + 1) + | _ -> (line, bol) ] + in + loop line bol (p + 1) + } +}; + +value get loc = do { + if loc.fname = "" || loc.fname = "-" then do { + (loc.line_nb, loc.bp - loc.bol_pos, loc.line_nb, loc.ep - loc.bol_pos, + loc.ep - loc.bp) + } + else do { + let (bl, bc, ec) = + (loc.line_nb, loc.bp - loc.bol_pos, loc.ep - loc.bol_pos) + in + let (el, eep) = second_line loc.fname ec (bl, loc.bp) loc.ep in + (bl, bc, el, eep, ec - bc) + } +}; + +value call_with r v f a = + let saved = r.val in + try do { + r.val := v; + let b = f a in + r.val := saved; + b + } + with e -> do { r.val := saved; raise e } +; + +exception Exc of t and exn; + +value raise loc exc = + match exc with + [ Exc _ _ -> raise exc + | _ -> raise (Exc loc exc) ] +; + +type vala 'a = + [ VaAnt of string + | VaVal of 'a ] +; + +value warned = ref True; +value warning_deprecated_since_6_00 name = + if not warned.val then do { + Printf.eprintf "<W> %s deprecated since version 6.00" name; + warned.val := True + } + else () +; + +value make line_nb bol_pos (bp, ep) = + let _ = warning_deprecated_since_6_00 "Ploc.make" in + {fname = ""; line_nb = line_nb; bol_pos = bol_pos; line_nb_last = line_nb; + bol_pos_last = bol_pos; bp = bp; ep = ep; comm = ""; ecomm = ""} +; diff --git a/gramlib/ploc.mli b/gramlib/ploc.mli new file mode 100644 index 0000000000..1d6df4d322 --- /dev/null +++ b/gramlib/ploc.mli @@ -0,0 +1,129 @@ +(* camlp5r *) +(* ploc.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Locations and some pervasive type and value. *) + +type t = 'abstract; + +(* located exceptions *) + +exception Exc of t and exn; + (** [Ploc.Exc loc e] is an encapsulation of the exception [e] with + the input location [loc]. To be used to specify a location + for an error. This exception must not be raised by [raise] but + rather by [Ploc.raise] (see below), to prevent the risk of several + encapsulations of [Ploc.Exc]. *) +value raise : t -> exn -> 'a; + (** [Ploc.raise loc e], if [e] is already the exception [Ploc.Exc], + re-raise it (ignoring the new location [loc]), else raise the + exception [Ploc.Exc loc e]. *) + +(* making locations *) + +value make_loc : string -> int -> int -> (int * int) -> string -> t; + (** [Ploc.make_loc fname line_nb bol_pos (bp, ep) comm] creates a location + starting at line number [line_nb], where the position of the beginning + of the line is [bol_pos] and between the positions [bp] (included) and + [ep] excluded. And [comm] is the comment before the location. The + positions are in number of characters since the begin of the stream. *) +value make_unlined : (int * int) -> t; + (** [Ploc.make_unlined] is like [Ploc.make] except that the line number + is not provided (to be used e.g. when the line number is unknown. *) + +value dummy : t; + (** [Ploc.dummy] is a dummy location, used in situations when location + has no meaning. *) + +(* getting location info *) + +value file_name : t -> string; + (** [Ploc.file_name loc] returns the file name of the location. *) +value first_pos : t -> int; + (** [Ploc.first_pos loc] returns the position of the begin of the location + in number of characters since the beginning of the stream. *) +value last_pos : t -> int; + (** [Ploc.last_pos loc] returns the position of the first character not + in the location in number of characters since the beginning of the + stream. *) +value line_nb : t -> int; + (** [Ploc.line_nb loc] returns the line number of the location or [-1] if + the location does not contain a line number (i.e. built with + [Ploc.make_unlined]. *) +value bol_pos : t -> int; + (** [Ploc.bol_pos loc] returns the position of the beginning of the line + of the location in number of characters since the beginning of + the stream, or [0] if the location does not contain a line number + (i.e. built with [Ploc.make_unlined]. *) +value line_nb_last : t -> int; +value bol_pos_last : t -> int; + (** Return the line number and the position of the beginning of the line + of the last position. *) +value comment : t -> string; + (** [Ploc.comment loc] returns the comment before the location. *) +value comment_last : t -> string; + (** [Ploc.comment loc] returns the last comment of the location. *) + +(* combining locations *) + +value encl : t -> t -> t; + (** [Ploc.encl loc1 loc2] returns the location starting at the + smallest start of [loc1] and [loc2] and ending at the greatest end + of them. In other words, it is the location enclosing [loc1] and + [loc2]. *) +value shift : int -> t -> t; + (** [Ploc.shift sh loc] returns the location [loc] shifted with [sh] + characters. The line number is not recomputed. *) +value sub : t -> int -> int -> t; + (** [Ploc.sub loc sh len] is the location [loc] shifted with [sh] + characters and with length [len]. The previous ending position + of the location is lost. *) +value after : t -> int -> int -> t; + (** [Ploc.after loc sh len] is the location just after loc (starting at + the end position of [loc]) shifted with [sh] characters and of length + [len]. *) +value with_comment : t -> string -> t; + (** Change the comment part of the given location *) + +(* miscellaneous *) + +value name : ref string; + (** [Ploc.name.val] is the name of the location variable used in grammars + and in the predefined quotations for OCaml syntax trees. Default: + ["loc"] *) + +value get : t -> (int * int * int * int * int); + (** [Ploc.get loc] returns in order: 1/ the line number of the begin + of the location, 2/ its column, 3/ the line number of the first + character not in the location, 4/ its column and 5/ the length + of the location. The file where the location occurs (if any) may + be read during this operation. *) + +value from_file : string -> t -> (string * int * int * int); + (** [Ploc.from_file fname loc] reads the file [fname] up to the + location [loc] and returns the real input file, the line number + and the characters location in the line; the real input file + can be different from [fname] because of possibility of line + directives typically generated by /lib/cpp. *) + +(* pervasives *) + +type vala 'a = + [ VaAnt of string + | VaVal of 'a ] +; + (** Encloser of many abstract syntax tree nodes types, in "strict" mode. + This allow the system of antiquotations of abstract syntax tree + quotations to work when using the quotation kit [q_ast.cmo]. *) + +value call_with : ref 'a -> 'a -> ('b -> 'c) -> 'b -> 'c; + (** [Ploc.call_with r v f a] sets the reference [r] to the value [v], + then call [f a], and resets [r] to its initial value. If [f a] raises + an exception, its initial value is also reset and the exception is + re-raised. The result is the result of [f a]. *) + +(**/**) + +value make : int -> int -> (int * int) -> t; + (** deprecated function since version 6.00; use [make_loc] instead + with the empty string *) diff --git a/gramlib/token.ml b/gramlib/token.ml new file mode 100644 index 0000000000..9c1664ccd9 --- /dev/null +++ b/gramlib/token.ml @@ -0,0 +1,37 @@ +(* camlp5r *) +(* token.ml,v *) +(* Copyright (c) INRIA 2007-2017 *) + +type pattern = Plexing.pattern; + +exception Error of string; + +type location = Ploc.t; +type location_function = int -> location; +type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function); + +type glexer 'te = Plexing.lexer 'te == + { tok_func : lexer_func 'te; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + tok_match : mutable pattern -> 'te -> string; + tok_text : pattern -> string; + tok_comm : mutable option (list location) } +; + +value make_loc = Ploc.make_unlined; +value dummy_loc = Ploc.dummy; + +value make_stream_and_location = Plexing.make_stream_and_location; +value lexer_func_of_parser = Plexing.lexer_func_of_parser; +value lexer_func_of_ocamllex = Plexing.lexer_func_of_ocamllex; + +value eval_char = Plexing.eval_char; +value eval_string = Plexing.eval_string; + +value lexer_text = Plexing.lexer_text; +value default_match = Plexing.default_match; + +value line_nb = Plexing.line_nb; +value bol_pos = Plexing.bol_pos; +value restore_lexing_info = Plexing.restore_lexing_info; diff --git a/gramlib/token.mli b/gramlib/token.mli new file mode 100644 index 0000000000..333d53ce86 --- /dev/null +++ b/gramlib/token.mli @@ -0,0 +1,56 @@ +(* camlp5r *) +(* token.mli,v *) +(* Copyright (c) INRIA 2007-2017 *) + +(** Module deprecated since Camlp5 version 5.00. Use now module Plexing. + Compatibility assumed. *) + +type pattern = Plexing.pattern; + +exception Error of string; + (** Use now [Plexing.Error] *) + +type glexer 'te = Plexing.lexer 'te == + { tok_func : Plexing.lexer_func 'te; + tok_using : pattern -> unit; + tok_removing : pattern -> unit; + tok_match : mutable pattern -> 'te -> string; + tok_text : pattern -> string; + tok_comm : mutable option (list Ploc.t) } +; + +type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function) +and location_function = int -> Ploc.t; + +value lexer_text : pattern -> string; + (** Use now [Plexing.lexer_text] *) +value default_match : pattern -> (string * string) -> string; + (** Use now [Plexing.default_match] *) + +value lexer_func_of_parser : + ((Stream.t char * ref int * ref int) -> ('te * Ploc.t)) -> lexer_func 'te; + (** Use now [Plexing.lexer_func_of_parser] *) +value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te; + (** Use now [Plexing.lexer_func_of_ocamllex] *) + +value make_stream_and_location : + (unit -> ('te * Ploc.t)) -> (Stream.t 'te * location_function); + (** Use now [Plexing.make_stream_and_location] *) + +value eval_char : string -> char; + (** Use now [Plexing.eval_char] *) +value eval_string : Ploc.t -> string -> string; + (** Use now [Plexing.eval_string] *) + +value restore_lexing_info : ref (option (int * int)); + (** Use now [Plexing.restore_lexing_info] *) +value line_nb : ref (ref int); + (** Use now [Plexing.line_nb] *) +value bol_pos : ref (ref int); + (** Use now [Plexing.bol_pos] *) + +(* deprecated since version 4.08 *) + +type location = Ploc.t; +value make_loc : (int * int) -> Ploc.t; +value dummy_loc : Ploc.t; |
