aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmilio Jesus Gallego Arias2018-10-06 17:54:24 +0200
committerEmilio Jesus Gallego Arias2018-10-29 01:25:21 +0100
commit06979f87959866e6ed1214e745893dcd2e8ddbb3 (patch)
tree458274f8a8afedc314535db28e0936b7fe3bec3c
parent665146168720c094ce4fbb3d7d044d9904099f95 (diff)
[gramlib] Original Import from Camlp5 repos.
-rw-r--r--Makefile1
-rw-r--r--gramlib/LICENSE29
-rw-r--r--gramlib/fstream.ml152
-rw-r--r--gramlib/fstream.mli95
-rw-r--r--gramlib/gramext.ml622
-rw-r--r--gramlib/gramext.mli87
-rw-r--r--gramlib/grammar.ml2695
-rw-r--r--gramlib/grammar.mli338
-rw-r--r--gramlib/plexing.ml241
-rw-r--r--gramlib/plexing.mli143
-rw-r--r--gramlib/ploc.ml217
-rw-r--r--gramlib/ploc.mli129
-rw-r--r--gramlib/token.ml37
-rw-r--r--gramlib/token.mli56
14 files changed, 4842 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 9ac32625ab..f2dc6d7750 100644
--- a/Makefile
+++ b/Makefile
@@ -57,6 +57,7 @@ FIND_SKIP_DIRS:='(' \
-name '_build' -o \
-name '_build_ci' -o \
-name '_install_ci' -o \
+ -name 'gramlib' -o \
-name 'user-contrib' -o \
-name 'test-suite' -o \
-name '.opamcache' -o \
diff --git a/gramlib/LICENSE b/gramlib/LICENSE
new file mode 100644
index 0000000000..b696affde7
--- /dev/null
+++ b/gramlib/LICENSE
@@ -0,0 +1,29 @@
+gramlib was derived from Daniel de Rauglaudre's camlp5
+(https://github.com/camlp5/camlp5) whose licence follows:
+
+* Copyright (c) 2007-2017, INRIA (Institut National de Recherches en
+* Informatique et Automatique). All rights reserved.
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+* * Neither the name of INRIA, nor the names of its contributors may be
+* used to endorse or promote products derived from this software without
+* specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY INRIA AND CONTRIBUTORS ``AS IS'' AND
+* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INRIA AND
+* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+* SUCH DAMAGE.
diff --git a/gramlib/fstream.ml b/gramlib/fstream.ml
new file mode 100644
index 0000000000..928df3b0c4
--- /dev/null
+++ b/gramlib/fstream.ml
@@ -0,0 +1,152 @@
+(* camlp5r *)
+(* fstream.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+exception Cut;
+
+type mlazy_c 'a =
+ [ Lfun of unit -> 'a
+ | Lval of 'a ]
+;
+type mlazy 'a =
+ [ Cval of 'a
+ | Clazy of ref (mlazy_c 'a) ]
+;
+value mlazy f = Clazy (ref (Lfun f));
+value mlazy_force l =
+ match l with
+ [ Cval v -> v
+ | Clazy l ->
+ match l.val with
+ [ Lfun f -> do { let x = f () in l.val := Lval x; x }
+ | Lval v -> v ] ]
+;
+value mlazy_is_val l =
+ match l with
+ [ Cval _ -> True
+ | Clazy l ->
+ match l.val with
+ [ Lval _ -> True
+ | Lfun _ -> False ] ]
+;
+
+type t 'a = { count : int; data : mlazy (data 'a) }
+and data 'a =
+ [ Nil
+ | Cons of 'a and t 'a
+ | App of t 'a and t 'a ]
+;
+
+value from f =
+ loop 0 where rec loop i =
+ {count = 0;
+ data =
+ mlazy
+ (fun () ->
+ match f i with
+ [ Some x -> Cons x (loop (i + 1))
+ | None -> Nil ])}
+;
+
+value rec next s =
+ let count = s.count + 1 in
+ match mlazy_force s.data with
+ [ Nil -> None
+ | Cons a s -> Some (a, {count = count; data = s.data})
+ | App s1 s2 ->
+ match next s1 with
+ [ Some (a, s1) ->
+ Some (a, {count = count; data = mlazy (fun () -> App s1 s2)})
+ | None ->
+ match next s2 with
+ [ Some (a, s2) -> Some (a, {count = count; data = s2.data})
+ | None -> None ] ] ]
+;
+
+value empty s =
+ match next s with
+ [ Some _ -> None
+ | None -> Some ((), s) ]
+;
+
+value nil = {count = 0; data = Cval Nil};
+value cons a s = Cons a s;
+value app s1 s2 = App s1 s2;
+value flazy f = {count = 0; data = mlazy f};
+
+value of_list l =
+ List.fold_right (fun x s -> flazy (fun () -> cons x s)) l nil
+;
+
+value of_string s =
+ from (fun c -> if c < String.length s then Some s.[c] else None)
+;
+
+value of_channel ic =
+ from (fun _ -> try Some (input_char ic) with [ End_of_file -> None ])
+;
+
+value iter f =
+ do_rec where rec do_rec strm =
+ match next strm with
+ [ Some (a, strm) ->
+ let _ = f a in
+ do_rec strm
+ | None -> () ]
+;
+
+value count s = s.count;
+
+value count_unfrozen s =
+ loop 0 s where rec loop cnt s =
+ if mlazy_is_val s.data then
+ match mlazy_force s.data with
+ [ Cons _ s -> loop (cnt + 1) s
+ | _ -> cnt ]
+ else cnt
+;
+
+(* backtracking parsers *)
+
+type kont 'a 'b = [ K of unit -> option ('b * t 'a * kont 'a 'b) ];
+type bp 'a 'b = t 'a -> option ('b * t 'a * kont 'a 'b);
+
+value bcontinue = fun [ (K k) -> k () ];
+
+value bparse_all p strm =
+ loop (fun () -> p strm) where rec loop p =
+ match p () with
+ [ Some (r, _, K k) -> [r :: loop k]
+ | None -> [] ]
+;
+
+value b_seq a b strm =
+ let rec app_a kont1 () =
+ match kont1 () with
+ [ Some (x, strm, K kont1) -> app_b (fun () -> b x strm) kont1 ()
+ | None -> None ]
+ and app_b kont2 kont1 () =
+ match kont2 () with
+ [ Some (y, strm, K kont2) -> Some (y, strm, K (app_b kont2 kont1))
+ | None -> app_a kont1 () ]
+ in
+ app_a (fun () -> a strm) ()
+;
+
+value b_or a b strm =
+ loop (fun () -> a strm) () where rec loop kont () =
+ match kont () with
+ [ Some (x, strm, K kont) -> Some (x, strm, K (loop kont))
+ | None -> b strm ]
+;
+
+value b_term f strm =
+ match next strm with
+ [ Some (x, strm) ->
+ match f x with
+ [ Some y -> Some (y, strm, K (fun _ -> None))
+ | None -> None ]
+ | None -> None ]
+;
+
+value b_act a strm = Some (a, strm, K (fun _ -> None));
diff --git a/gramlib/fstream.mli b/gramlib/fstream.mli
new file mode 100644
index 0000000000..a6d0391173
--- /dev/null
+++ b/gramlib/fstream.mli
@@ -0,0 +1,95 @@
+(* camlp5r *)
+(* fstream.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+(* Module [Fstream]: functional streams *)
+
+(* This module implement functional streams and parsers together with
+ backtracking parsers. To be used with syntax [pa_fstream.cmo]. The
+ syntax is:
+ For functional streams:
+- stream: [fstream [: ... :]]
+ For functional parsers:
+- parser: [fparser [ [: ... :] -> ... | ... ]]
+ For backtracking parsers:
+- parser: [bparser [ [: ... :] -> ... | ... ]]
+
+ Functional parsers are of type:
+ [Fstream.t 'a -> option ('b * Fstream.t 'a)]
+ Backtracking parsers are of type:
+ [Fstream.t 'a -> option ('b * Fstream.t 'a * Fstream.kont 'a 'b)]
+
+ Functional parsers have limited backtrack, i.e if a rule fails, the
+ next rule is tested with the initial stream; limited because when
+ in case of a rule with two consecutive symbols [a] and [b], if [b]
+ fails, the rule fails: there is no try with the next rule of [a].
+
+ Backtracking parsers have full backtrack. If a rule fails, the next
+ case of the previous rule is tested.
+*)
+
+exception Cut;
+
+(** Functional streams *)
+
+type t 'a = 'x;
+ (* The type of 'a functional streams *)
+value from : (int -> option 'a) -> t 'a;
+ (* [Fstream.from f] returns a stream built from the function [f].
+ To create a new stream element, the function [f] is called with
+ the current stream count. The user function [f] must return either
+ [Some <value>] for a value or [None] to specify the end of the
+ stream. *)
+
+value of_list : list 'a -> t 'a;
+ (* Return the stream holding the elements of the list in the same
+ order. *)
+value of_string : string -> t char;
+ (* Return the stream of the characters of the string parameter. *)
+value of_channel : in_channel -> t char;
+ (* Return the stream of the characters read from the input channel. *)
+
+value iter : ('a -> unit) -> t 'a -> unit;
+ (* [Fstream.iter f s] scans the whole stream s, applying function [f]
+ in turn to each stream element encountered. *)
+
+value next : t 'a -> option ('a * t 'a);
+ (* Return [Some (a, s)] where [a] is the first element of the stream
+ and [s] the remaining stream, or [None] if the stream is empty. *)
+value empty : t 'a -> option (unit * t 'a);
+ (* Return [Some ((), s)] if the stream is empty where [s] is itself,
+ else [None] *)
+value count : t 'a -> int;
+ (* Return the current count of the stream elements, i.e. the number
+ of the stream elements discarded. *)
+value count_unfrozen : t 'a -> int;
+ (* Return the number of unfrozen elements in the beginning of the
+ stream; useful to determine the position of a parsing error (longuest
+ path). *)
+
+(** Backtracking parsers *)
+
+type kont 'a 'b = [ K of unit -> option ('b * t 'a * kont 'a 'b) ];
+ (* The type of continuation of a backtracking parser. *)
+type bp 'a 'b = t 'a -> option ('b * t 'a * kont 'a 'b);
+ (* The type of a backtracking parser. *)
+
+value bcontinue : kont 'a 'b -> option ('b * t 'a * kont 'a 'b);
+ (* [bcontinue k] return the next solution of a backtracking parser. *)
+
+value bparse_all : bp 'a 'b -> t 'a -> list 'b;
+ (* [bparse_all p strm] return the list of all solutions of a
+ backtracking parser applied to a functional stream. *)
+
+(*--*)
+
+value nil : t 'a;
+type data 'a = 'x;
+value cons : 'a -> t 'a -> data 'a;
+value app : t 'a -> t 'a -> data 'a;
+value flazy : (unit -> data 'a) -> t 'a;
+
+value b_seq : bp 'a 'b -> ('b -> bp 'a 'c) -> bp 'a 'c;
+value b_or : bp 'a 'b -> bp 'a 'b -> bp 'a 'b;
+value b_term : ('a -> option 'b) -> bp 'a 'b;
+value b_act : 'b -> bp 'a 'b;
diff --git a/gramlib/gramext.ml b/gramlib/gramext.ml
new file mode 100644
index 0000000000..1e50bdc14f
--- /dev/null
+++ b/gramlib/gramext.ml
@@ -0,0 +1,622 @@
+(* camlp5r *)
+(* gramext.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+open Printf;
+
+type parser_t 'a = Stream.t 'a -> Obj.t;
+type fparser_t 'a = Fstream.t 'a -> option (Obj.t * Fstream.t 'a);
+type bparser_t 'a = Fstream.bp 'a Obj.t;
+
+type parse_algorithm =
+ [ Predictive | Functional | Backtracking | DefaultAlgorithm ];
+
+type grammar 'te =
+ { gtokens : Hashtbl.t Plexing.pattern (ref int);
+ glexer : mutable Plexing.lexer 'te;
+ galgo : mutable parse_algorithm }
+;
+
+type g_entry 'te =
+ { egram : grammar 'te;
+ ename : string;
+ elocal : bool;
+ estart : mutable int -> parser_t 'te;
+ econtinue : mutable int -> int -> Obj.t -> parser_t 'te;
+ fstart : mutable int -> err_fun -> fparser_t 'te;
+ fcontinue : mutable int -> int -> Obj.t -> err_fun -> fparser_t 'te;
+ bstart : mutable int -> err_fun -> bparser_t 'te;
+ bcontinue : mutable int -> int -> Obj.t -> err_fun -> bparser_t 'te;
+ edesc : mutable g_desc 'te }
+and g_desc 'te =
+ [ Dlevels of list (g_level 'te)
+ | Dparser of parser_t 'te ]
+and g_level 'te =
+ { assoc : g_assoc;
+ lname : option string;
+ lsuffix : g_tree 'te;
+ lprefix : g_tree 'te }
+and g_assoc = [ NonA | RightA | LeftA ]
+and g_symbol 'te =
+ [ Sfacto of g_symbol 'te
+ | Smeta of string and list (g_symbol 'te) and Obj.t
+ | Snterm of g_entry 'te
+ | Snterml of g_entry 'te and string
+ | Slist0 of g_symbol 'te
+ | Slist0sep of g_symbol 'te and g_symbol 'te and bool
+ | Slist1 of g_symbol 'te
+ | Slist1sep of g_symbol 'te and g_symbol 'te and bool
+ | Sopt of g_symbol 'te
+ | Sflag of g_symbol 'te
+ | Sself
+ | Snext
+ | Scut
+ | Stoken of Plexing.pattern
+ | Stree of g_tree 'te
+ | Svala of list string and g_symbol 'te ]
+and g_action = Obj.t
+and g_tree 'te =
+ [ Node of g_node 'te
+ | LocAct of g_action and list g_action
+ | DeadEnd ]
+and g_node 'te =
+ { node : g_symbol 'te; son : g_tree 'te; brother : g_tree 'te }
+and err_fun = unit -> string;
+
+type position =
+ [ First
+ | Last
+ | Before of string
+ | After of string
+ | Like of string
+ | Level of string ]
+;
+
+value warning_verbose = ref True;
+
+value rec derive_eps =
+ fun
+ [ Slist0 _ -> True
+ | Slist0sep _ _ _ -> True
+ | Sopt _ | Sflag _ -> True
+ | Sfacto s -> derive_eps s
+ | Stree t -> tree_derive_eps t
+ | Svala _ s -> derive_eps s
+ | Smeta _ _ _ | Slist1 _ | Slist1sep _ _ _ | Snterm _ | Snterml _ _ |
+ Snext | Sself | Scut | Stoken _ ->
+ False ]
+and tree_derive_eps =
+ fun
+ [ LocAct _ _ -> True
+ | Node {node = s; brother = bro; son = son} ->
+ derive_eps s && tree_derive_eps son || tree_derive_eps bro
+ | DeadEnd -> False ]
+;
+
+value rec eq_symbol s1 s2 =
+ match (s1, s2) with
+ [ (Snterm e1, Snterm e2) -> e1 == e2
+ | (Snterml e1 l1, Snterml e2 l2) -> e1 == e2 && l1 = l2
+ | (Slist0 s1, Slist0 s2) -> eq_symbol s1 s2
+ | (Slist0sep s1 sep1 b1, Slist0sep s2 sep2 b2) ->
+ eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2
+ | (Slist1 s1, Slist1 s2) -> eq_symbol s1 s2
+ | (Slist1sep s1 sep1 b1, Slist1sep s2 sep2 b2) ->
+ eq_symbol s1 s2 && eq_symbol sep1 sep2 && b1 = b2
+ | (Sflag s1, Sflag s2) -> eq_symbol s1 s2
+ | (Sopt s1, Sopt s2) -> eq_symbol s1 s2
+ | (Svala ls1 s1, Svala ls2 s2) -> ls1 = ls2 && eq_symbol s1 s2
+ | (Stree _, Stree _) -> False
+ | (Sfacto (Stree t1), Sfacto (Stree t2)) ->
+ (* The only goal of the node 'Sfacto' is to allow tree comparison
+ (therefore factorization) without looking at the semantic
+ actions; allow factorization of rules like "SV foo" which are
+ actually expanded into a tree. *)
+ eq_tree t1 t2 where rec eq_tree t1 t2 =
+ match (t1, t2) with
+ [ (Node n1, Node n2) ->
+ eq_symbol n1.node n2.node && eq_tree n1.son n2.son &&
+ eq_tree n1.brother n2.brother
+ | (LocAct _ _, LocAct _ _) -> True
+ | (DeadEnd, DeadEnd) -> True
+ | _ -> False ]
+ | _ -> s1 = s2 ]
+;
+
+value is_before s1 s2 =
+ let s1 = match s1 with [ Svala _ s -> s | _ -> s1 ] in
+ let s2 = match s2 with [ Svala _ s -> s | _ -> s2 ] in
+ match (s1, s2) with
+ [ (Stoken ("ANY", _), _) -> False
+ | (_, Stoken ("ANY", _)) -> True
+ | (Stoken (_, s), Stoken (_, "")) when s <> "" -> True
+ | (Stoken _, Stoken _) -> False
+ | (Stoken _, _) -> True
+ | _ -> False ]
+;
+
+value insert_tree entry_name gsymbols action tree =
+ let rec insert symbols tree =
+ match symbols with
+ [ [s :: sl] -> insert_in_tree s sl tree
+ | [] ->
+ match tree with
+ [ Node {node = s; son = son; brother = bro} ->
+ Node {node = s; son = son; brother = insert [] bro}
+ | LocAct old_action action_list -> do {
+ if warning_verbose.val then do {
+ eprintf "<W> Grammar extension: ";
+ if entry_name <> "" then eprintf "in [%s], " entry_name else ();
+ eprintf "some rule has been masked\n";
+ flush stderr
+ }
+ else ();
+ LocAct action [old_action :: action_list]
+ }
+ | DeadEnd -> LocAct action [] ] ]
+ and insert_in_tree s sl tree =
+ match try_insert s sl tree with
+ [ Some t -> t
+ | None -> Node {node = s; son = insert sl DeadEnd; brother = tree} ]
+ and try_insert s sl tree =
+ match tree with
+ [ Node {node = s1; son = son; brother = bro} ->
+ if eq_symbol s s1 then
+ let t = Node {node = s1; son = insert sl son; brother = bro} in
+ Some t
+ else if s = Scut then
+ try_insert s sl (Node {node = s; son = tree; brother = DeadEnd})
+ else if s1 = Scut then
+ try_insert s1 [s :: sl] tree
+ else if is_before s1 s || derive_eps s && not (derive_eps s1) then
+ let bro =
+ match try_insert s sl bro with
+ [ Some bro -> bro
+ | None ->
+ Node {node = s; son = insert sl DeadEnd; brother = bro} ]
+ in
+ let t = Node {node = s1; son = son; brother = bro} in
+ Some t
+ else
+ match try_insert s sl bro with
+ [ Some bro ->
+ let t = Node {node = s1; son = son; brother = bro} in
+ Some t
+ | None -> None ]
+ | LocAct _ _ | DeadEnd -> None ]
+ in
+ insert gsymbols tree
+;
+
+value srules rl =
+ let t =
+ List.fold_left
+ (fun tree (symbols, action) -> insert_tree "" symbols action tree)
+ DeadEnd rl
+ in
+ Stree t
+;
+
+external action : 'a -> g_action = "%identity";
+
+value is_level_labelled n lev =
+ match lev.lname with
+ [ Some n1 -> n = n1
+ | None -> False ]
+;
+
+value rec token_exists_in_level f lev =
+ token_exists_in_tree f lev.lprefix || token_exists_in_tree f lev.lsuffix
+and token_exists_in_tree f =
+ fun
+ [ Node n ->
+ token_exists_in_symbol f n.node || token_exists_in_tree f n.brother ||
+ token_exists_in_tree f n.son
+ | LocAct _ _ | DeadEnd -> False ]
+and token_exists_in_symbol f =
+ fun
+ [ Sfacto sy -> token_exists_in_symbol f sy
+ | Smeta _ syl _ -> List.exists (token_exists_in_symbol f) syl
+ | Slist0 sy -> token_exists_in_symbol f sy
+ | Slist0sep sy sep _ ->
+ token_exists_in_symbol f sy || token_exists_in_symbol f sep
+ | Slist1 sy -> token_exists_in_symbol f sy
+ | Slist1sep sy sep _ ->
+ token_exists_in_symbol f sy || token_exists_in_symbol f sep
+ | Sopt sy -> token_exists_in_symbol f sy
+ | Sflag sy -> token_exists_in_symbol f sy
+ | Stoken tok -> f tok
+ | Stree t -> token_exists_in_tree f t
+ | Svala _ sy -> token_exists_in_symbol f sy
+ | Snterm _ | Snterml _ _ | Snext | Sself | Scut -> False ]
+;
+
+value insert_level entry_name e1 symbols action slev =
+ match e1 with
+ [ True ->
+ {assoc = slev.assoc; lname = slev.lname;
+ lsuffix = insert_tree entry_name symbols action slev.lsuffix;
+ lprefix = slev.lprefix}
+ | False ->
+ {assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix;
+ lprefix = insert_tree entry_name symbols action slev.lprefix} ]
+;
+
+value empty_lev lname assoc =
+ let assoc =
+ match assoc with
+ [ Some a -> a
+ | None -> LeftA ]
+ in
+ {assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd}
+;
+
+value change_lev lev n lname assoc = do {
+ let a =
+ match assoc with
+ [ None -> lev.assoc
+ | Some a -> do {
+ if a <> lev.assoc && warning_verbose.val then do {
+ eprintf "<W> Changing associativity of level \"%s\"\n" n;
+ flush stderr
+ }
+ else ();
+ a
+ } ]
+ in
+ match lname with
+ [ Some n ->
+ if lname <> lev.lname && warning_verbose.val then do {
+ eprintf "<W> Level label \"%s\" ignored\n" n;
+ flush stderr
+ }
+ else ()
+ | None -> () ];
+ {assoc = a; lname = lev.lname; lsuffix = lev.lsuffix; lprefix = lev.lprefix}
+};
+
+value get_level entry position levs =
+ match position with
+ [ Some First -> ([], empty_lev, levs)
+ | Some Last -> (levs, empty_lev, [])
+ | Some (Level n) ->
+ get levs where rec get =
+ fun
+ [ [] -> do {
+ eprintf "No level labelled \"%s\" in entry \"%s\"\n" n
+ entry.ename;
+ flush stderr;
+ failwith "Grammar.extend"
+ }
+ | [lev :: levs] ->
+ if is_level_labelled n lev then ([], change_lev lev n, levs)
+ else
+ let (levs1, rlev, levs2) = get levs in
+ ([lev :: levs1], rlev, levs2) ]
+ | Some (Before n) ->
+ get levs where rec get =
+ fun
+ [ [] -> do {
+ eprintf "No level labelled \"%s\" in entry \"%s\"\n" n
+ entry.ename;
+ flush stderr;
+ failwith "Grammar.extend"
+ }
+ | [lev :: levs] ->
+ if is_level_labelled n lev then ([], empty_lev, [lev :: levs])
+ else
+ let (levs1, rlev, levs2) = get levs in
+ ([lev :: levs1], rlev, levs2) ]
+ | Some (After n) ->
+ get levs where rec get =
+ fun
+ [ [] -> do {
+ eprintf "No level labelled \"%s\" in entry \"%s\"\n" n
+ entry.ename;
+ flush stderr;
+ failwith "Grammar.extend"
+ }
+ | [lev :: levs] ->
+ if is_level_labelled n lev then ([lev], empty_lev, levs)
+ else
+ let (levs1, rlev, levs2) = get levs in
+ ([lev :: levs1], rlev, levs2) ]
+ | Some (Like n) ->
+ let f (tok, prm) = n = tok || n = prm in
+ get levs where rec get =
+ fun
+ [ [] -> do {
+ eprintf "No level with \"%s\" in entry \"%s\"\n" n entry.ename;
+ flush stderr;
+ failwith "Grammar.extend"
+ }
+ | [lev :: levs] ->
+ if token_exists_in_level f lev then ([], change_lev lev n, levs)
+ else
+ let (levs1, rlev, levs2) = get levs in
+ ([lev :: levs1], rlev, levs2) ]
+ | None ->
+ match levs with
+ [ [lev :: levs] -> ([], change_lev lev "<top>", levs)
+ | [] -> ([], empty_lev, []) ] ]
+;
+
+value rec check_gram entry =
+ fun
+ [ Snterm e ->
+ if e.egram != entry.egram then do {
+ eprintf "\
+Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n"
+ entry.ename e.ename;
+ flush stderr;
+ failwith "Grammar.extend error"
+ }
+ else ()
+ | Snterml e _ ->
+ if e.egram != entry.egram then do {
+ eprintf "\
+Error: entries \"%s\" and \"%s\" do not belong to the same grammar.\n"
+ entry.ename e.ename;
+ flush stderr;
+ failwith "Grammar.extend error"
+ }
+ else ()
+ | Sfacto s -> check_gram entry s
+ | Smeta _ sl _ -> List.iter (check_gram entry) sl
+ | Slist0sep s t _ -> do { check_gram entry t; check_gram entry s }
+ | Slist1sep s t _ -> do { check_gram entry t; check_gram entry s }
+ | Slist0 s -> check_gram entry s
+ | Slist1 s -> check_gram entry s
+ | Sopt s -> check_gram entry s
+ | Sflag s -> check_gram entry s
+ | Stree t -> tree_check_gram entry t
+ | Svala _ s -> check_gram entry s
+ | Snext | Sself | Scut | Stoken _ -> () ]
+and tree_check_gram entry =
+ fun
+ [ Node {node = n; brother = bro; son = son} -> do {
+ check_gram entry n;
+ tree_check_gram entry bro;
+ tree_check_gram entry son
+ }
+ | LocAct _ _ | DeadEnd -> () ]
+;
+
+value change_to_self entry =
+ fun
+ [ Snterm e when e == entry -> Sself
+ | x -> x ]
+;
+
+value get_initial entry =
+ fun
+ [ [Sself :: symbols] -> (True, symbols)
+ | symbols -> (False, symbols) ]
+;
+
+value insert_tokens gram symbols =
+ let rec insert =
+ fun
+ [ Sfacto s -> insert s
+ | Smeta _ sl _ -> List.iter insert sl
+ | Slist0 s -> insert s
+ | Slist1 s -> insert s
+ | Slist0sep s t _ -> do { insert s; insert t }
+ | Slist1sep s t _ -> do { insert s; insert t }
+ | Sopt s -> insert s
+ | Sflag s -> insert s
+ | Stree t -> tinsert t
+ | Svala _ s -> insert s
+ | Stoken ("ANY", _) -> ()
+ | Stoken tok -> do {
+ gram.glexer.Plexing.tok_using tok;
+ let r =
+ try Hashtbl.find gram.gtokens tok with
+ [ Not_found -> do {
+ let r = ref 0 in
+ Hashtbl.add gram.gtokens tok r;
+ r
+ } ]
+ in
+ incr r
+ }
+ | Snterm _ | Snterml _ _ | Snext | Sself | Scut -> () ]
+ and tinsert =
+ fun
+ [ Node {node = s; brother = bro; son = son} -> do {
+ insert s;
+ tinsert bro;
+ tinsert son
+ }
+ | LocAct _ _ | DeadEnd -> () ]
+ in
+ List.iter insert symbols
+;
+
+value levels_of_rules entry position rules =
+ let elev =
+ match entry.edesc with
+ [ Dlevels elev -> elev
+ | Dparser _ -> do {
+ eprintf "Error: entry not extensible: \"%s\"\n" entry.ename;
+ flush stderr;
+ failwith "Grammar.extend"
+ } ]
+ in
+ if rules = [] then elev
+ else
+ let (levs1, make_lev, levs2) = get_level entry position elev in
+ let (levs, _) =
+ List.fold_left
+ (fun (levs, make_lev) (lname, assoc, level) ->
+ let lev = make_lev lname assoc in
+ let lev =
+ List.fold_left
+ (fun lev (symbols, action) -> do {
+ let symbols = List.map (change_to_self entry) symbols in
+ List.iter (check_gram entry) symbols;
+ let (e1, symbols) = get_initial entry symbols in
+ insert_tokens entry.egram symbols;
+ insert_level entry.ename e1 symbols action lev
+ })
+ lev level
+ in
+ ([lev :: levs], empty_lev))
+ ([], make_lev) rules
+ in
+ levs1 @ List.rev levs @ levs2
+;
+
+value logically_eq_symbols entry =
+ let rec eq_symbols s1 s2 =
+ match (s1, s2) with
+ [ (Snterm e1, Snterm e2) -> e1.ename = e2.ename
+ | (Snterm e1, Sself) -> e1.ename = entry.ename
+ | (Sself, Snterm e2) -> entry.ename = e2.ename
+ | (Snterml e1 l1, Snterml e2 l2) -> e1.ename = e2.ename && l1 = l2
+ | (Slist0 s1, Slist0 s2) -> eq_symbols s1 s2
+ | (Slist0sep s1 sep1 b1, Slist0sep s2 sep2 b2) ->
+ eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2
+ | (Slist1 s1, Slist1 s2) -> eq_symbols s1 s2
+ | (Slist1sep s1 sep1 b1, Slist1sep s2 sep2 b2) ->
+ eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2
+ | (Sopt s1, Sopt s2) -> eq_symbols s1 s2
+ | (Stree t1, Stree t2) -> eq_trees t1 t2
+ | _ -> s1 = s2 ]
+ and eq_trees t1 t2 =
+ match (t1, t2) with
+ [ (Node n1, Node n2) ->
+ eq_symbols n1.node n2.node && eq_trees n1.son n2.son &&
+ eq_trees n1.brother n2.brother
+ | (LocAct _ _ | DeadEnd, LocAct _ _ | DeadEnd) -> True
+ | _ -> False ]
+ in
+ eq_symbols
+;
+
+(* [delete_rule_in_tree] returns
+ [Some (dsl, t)] if success
+ [dsl] =
+ Some (list of deleted nodes) if branch deleted
+ None if action replaced by previous version of action
+ [t] = remaining tree
+ [None] if failure *)
+
+value delete_rule_in_tree entry =
+ let rec delete_in_tree symbols tree =
+ match (symbols, tree) with
+ [ ([s :: sl], Node n) ->
+ if logically_eq_symbols entry s n.node then delete_son sl n
+ else
+ match delete_in_tree symbols n.brother with
+ [ Some (dsl, t) ->
+ Some (dsl, Node {node = n.node; son = n.son; brother = t})
+ | None -> None ]
+ | ([s :: sl], _) -> None
+ | ([], Node n) ->
+ match delete_in_tree [] n.brother with
+ [ Some (dsl, t) ->
+ Some (dsl, Node {node = n.node; son = n.son; brother = t})
+ | None -> None ]
+ | ([], DeadEnd) -> None
+ | ([], LocAct _ []) -> Some (Some [], DeadEnd)
+ | ([], LocAct _ [action :: list]) -> Some (None, LocAct action list) ]
+ and delete_son sl n =
+ match delete_in_tree sl n.son with
+ [ Some (Some dsl, DeadEnd) -> Some (Some [n.node :: dsl], n.brother)
+ | Some (Some dsl, t) ->
+ let t = Node {node = n.node; son = t; brother = n.brother} in
+ Some (Some [n.node :: dsl], t)
+ | Some (None, t) ->
+ let t = Node {node = n.node; son = t; brother = n.brother} in
+ Some (None, t)
+ | None -> None ]
+ in
+ delete_in_tree
+;
+
+value rec decr_keyw_use gram =
+ fun
+ [ Stoken tok -> do {
+ let r = Hashtbl.find gram.gtokens tok in
+ decr r;
+ if r.val == 0 then do {
+ Hashtbl.remove gram.gtokens tok;
+ gram.glexer.Plexing.tok_removing tok
+ }
+ else ()
+ }
+ | Sfacto s -> decr_keyw_use gram s
+ | Smeta _ sl _ -> List.iter (decr_keyw_use gram) sl
+ | Slist0 s -> decr_keyw_use gram s
+ | Slist1 s -> decr_keyw_use gram s
+ | Slist0sep s1 s2 _ -> do { decr_keyw_use gram s1; decr_keyw_use gram s2 }
+ | Slist1sep s1 s2 _ -> do { decr_keyw_use gram s1; decr_keyw_use gram s2 }
+ | Sopt s -> decr_keyw_use gram s
+ | Sflag s -> decr_keyw_use gram s
+ | Stree t -> decr_keyw_use_in_tree gram t
+ | Svala _ s -> decr_keyw_use gram s
+ | Sself | Snext | Scut | Snterm _ | Snterml _ _ -> () ]
+and decr_keyw_use_in_tree gram =
+ fun
+ [ DeadEnd | LocAct _ _ -> ()
+ | Node n -> do {
+ decr_keyw_use gram n.node;
+ decr_keyw_use_in_tree gram n.son;
+ decr_keyw_use_in_tree gram n.brother
+ } ]
+;
+
+value rec delete_rule_in_suffix entry symbols =
+ fun
+ [ [lev :: levs] ->
+ match delete_rule_in_tree entry symbols lev.lsuffix with
+ [ Some (dsl, t) -> do {
+ match dsl with
+ [ Some dsl -> List.iter (decr_keyw_use entry.egram) dsl
+ | None -> () ];
+ match t with
+ [ DeadEnd when lev.lprefix == DeadEnd -> levs
+ | _ ->
+ let lev =
+ {assoc = lev.assoc; lname = lev.lname; lsuffix = t;
+ lprefix = lev.lprefix}
+ in
+ [lev :: levs] ]
+ }
+ | None ->
+ let levs = delete_rule_in_suffix entry symbols levs in
+ [lev :: levs] ]
+ | [] -> raise Not_found ]
+;
+
+value rec delete_rule_in_prefix entry symbols =
+ fun
+ [ [lev :: levs] ->
+ match delete_rule_in_tree entry symbols lev.lprefix with
+ [ Some (dsl, t) -> do {
+ match dsl with
+ [ Some dsl -> List.iter (decr_keyw_use entry.egram) dsl
+ | None -> () ];
+ match t with
+ [ DeadEnd when lev.lsuffix == DeadEnd -> levs
+ | _ ->
+ let lev =
+ {assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix;
+ lprefix = t}
+ in
+ [lev :: levs] ]
+ }
+ | None ->
+ let levs = delete_rule_in_prefix entry symbols levs in
+ [lev :: levs] ]
+ | [] -> raise Not_found ]
+;
+
+value rec delete_rule_in_level_list entry symbols levs =
+ match symbols with
+ [ [Sself :: symbols] -> delete_rule_in_suffix entry symbols levs
+ | [Snterm e :: symbols] when e == entry ->
+ delete_rule_in_suffix entry symbols levs
+ | _ -> delete_rule_in_prefix entry symbols levs ]
+;
diff --git a/gramlib/gramext.mli b/gramlib/gramext.mli
new file mode 100644
index 0000000000..69498f6d40
--- /dev/null
+++ b/gramlib/gramext.mli
@@ -0,0 +1,87 @@
+(* camlp5r *)
+(* gramext.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+type parser_t 'a = Stream.t 'a -> Obj.t;
+type fparser_t 'a = Fstream.t 'a -> option (Obj.t * Fstream.t 'a);
+type bparser_t 'a = Fstream.bp 'a Obj.t;
+
+type parse_algorithm =
+ [ Predictive | Functional | Backtracking | DefaultAlgorithm ];
+
+type grammar 'te =
+ { gtokens : Hashtbl.t Plexing.pattern (ref int);
+ glexer : mutable Plexing.lexer 'te;
+ galgo : mutable parse_algorithm }
+;
+
+type g_entry 'te =
+ { egram : grammar 'te;
+ ename : string;
+ elocal : bool;
+ estart : mutable int -> parser_t 'te;
+ econtinue : mutable int -> int -> Obj.t -> parser_t 'te;
+ fstart : mutable int -> err_fun -> fparser_t 'te;
+ fcontinue : mutable int -> int -> Obj.t -> err_fun -> fparser_t 'te;
+ bstart : mutable int -> err_fun -> bparser_t 'te;
+ bcontinue : mutable int -> int -> Obj.t -> err_fun -> bparser_t 'te;
+ edesc : mutable g_desc 'te }
+and g_desc 'te =
+ [ Dlevels of list (g_level 'te)
+ | Dparser of parser_t 'te ]
+and g_level 'te =
+ { assoc : g_assoc;
+ lname : option string;
+ lsuffix : g_tree 'te;
+ lprefix : g_tree 'te }
+and g_assoc = [ NonA | RightA | LeftA ]
+and g_symbol 'te =
+ [ Sfacto of g_symbol 'te
+ | Smeta of string and list (g_symbol 'te) and Obj.t
+ | Snterm of g_entry 'te
+ | Snterml of g_entry 'te and string
+ | Slist0 of g_symbol 'te
+ | Slist0sep of g_symbol 'te and g_symbol 'te and bool
+ | Slist1 of g_symbol 'te
+ | Slist1sep of g_symbol 'te and g_symbol 'te and bool
+ | Sopt of g_symbol 'te
+ | Sflag of g_symbol 'te
+ | Sself
+ | Snext
+ | Scut
+ | Stoken of Plexing.pattern
+ | Stree of g_tree 'te
+ | Svala of list string and g_symbol 'te ]
+and g_action = Obj.t
+and g_tree 'te =
+ [ Node of g_node 'te
+ | LocAct of g_action and list g_action
+ | DeadEnd ]
+and g_node 'te =
+ { node : g_symbol 'te; son : g_tree 'te; brother : g_tree 'te }
+and err_fun = unit -> string;
+
+type position =
+ [ First
+ | Last
+ | Before of string
+ | After of string
+ | Like of string
+ | Level of string ]
+;
+
+value levels_of_rules :
+ g_entry 'te -> option position ->
+ list
+ (option string * option g_assoc *
+ list (list (g_symbol 'te) * g_action)) ->
+ list (g_level 'te);
+value srules : list (list (g_symbol 'te) * g_action) -> g_symbol 'te;
+external action : 'a -> g_action = "%identity";
+value eq_symbol : g_symbol 'a -> g_symbol 'a -> bool;
+
+value delete_rule_in_level_list :
+ g_entry 'te -> list (g_symbol 'te) -> list (g_level 'te) ->
+ list (g_level 'te);
+
+value warning_verbose : ref bool;
diff --git a/gramlib/grammar.ml b/gramlib/grammar.ml
new file mode 100644
index 0000000000..d6a2019c20
--- /dev/null
+++ b/gramlib/grammar.ml
@@ -0,0 +1,2695 @@
+(* camlp5r *)
+(* grammar.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+#load "pa_fstream.cmo";
+
+value pervasives_stderr = stderr;
+
+open Gramext;
+open Format;
+
+value stderr = pervasives_stderr;
+
+value rec flatten_tree =
+ fun
+ [ DeadEnd -> []
+ | LocAct _ _ -> [[]]
+ | Node {node = n; brother = b; son = s} ->
+ List.map (fun l -> [n :: l]) (flatten_tree s) @ flatten_tree b ]
+;
+
+value utf8_print = ref True;
+
+value utf8_string_escaped s =
+ let b = Buffer.create (String.length s) in
+ loop 0 where rec loop i =
+ if i = String.length s then Buffer.contents b
+ else do {
+ match s.[i] with
+ | '"' → Buffer.add_string b "\\\""
+ | '\\' → Buffer.add_string b "\\\\"
+ | '\n' → Buffer.add_string b "\\n"
+ | '\t' → Buffer.add_string b "\\t"
+ | '\r' → Buffer.add_string b "\\r"
+ | '\b' → Buffer.add_string b "\\b"
+ | c → Buffer.add_char b c
+ end;
+ loop (i + 1)
+ }
+;
+
+value string_escaped s =
+ if utf8_print.val then utf8_string_escaped s
+ else String.escaped s
+;
+
+value print_str ppf s = fprintf ppf "\"%s\"" (string_escaped s);
+
+value rec print_symbol ppf =
+ fun
+ [ Sfacto s -> print_symbol ppf s
+ | Smeta n sl _ -> print_meta ppf n sl
+ | Slist0 s -> fprintf ppf "LIST0 %a" print_symbol1 s
+ | Slist0sep s t osep ->
+ fprintf ppf "LIST0 %a SEP %a%s" print_symbol1 s print_symbol1 t
+ (if osep then " OPT_SEP" else "")
+ | Slist1 s -> fprintf ppf "LIST1 %a" print_symbol1 s
+ | Slist1sep s t osep ->
+ fprintf ppf "LIST1 %a SEP %a%s" print_symbol1 s print_symbol1 t
+ (if osep then " OPT_SEP" else "")
+ | Sopt s -> fprintf ppf "OPT %a" print_symbol1 s
+ | Sflag s -> fprintf ppf "FLAG %a" print_symbol1 s
+ | Stoken (con, prm) when con <> "" && prm <> "" ->
+ fprintf ppf "%s@ %a" con print_str prm
+ | Svala _ s -> fprintf ppf "V %a" print_symbol s
+ | Snterml e l ->
+ fprintf ppf "%s%s@ LEVEL@ %a" e.ename (if e.elocal then "*" else "")
+ print_str l
+ | Snterm _ | Snext | Sself | Scut | Stoken _ | Stree _ as s ->
+ print_symbol1 ppf s ]
+and print_meta ppf n sl =
+ loop 0 sl where rec loop i =
+ fun
+ [ [] -> ()
+ | [s :: sl] -> do {
+ let j =
+ try String.index_from n i ' ' with [ Not_found -> String.length n ]
+ in
+ fprintf ppf "%s %a" (String.sub n i (j - i)) print_symbol1 s;
+ if sl = [] then ()
+ else do { fprintf ppf " "; loop (min (j + 1) (String.length n)) sl }
+ } ]
+and print_symbol1 ppf =
+ fun
+ [ Sfacto s -> print_symbol1 ppf s
+ | Snterm e -> fprintf ppf "%s%s" e.ename (if e.elocal then "*" else "")
+ | Sself -> pp_print_string ppf "SELF"
+ | Snext -> pp_print_string ppf "NEXT"
+ | Scut -> pp_print_string ppf "/"
+ | Stoken ("", s) -> print_str ppf s
+ | Stoken (con, "") -> pp_print_string ppf con
+ | Stree t -> print_level ppf pp_print_space (flatten_tree t)
+ | Smeta _ _ _ | Snterml _ _ | Slist0 _ | Slist0sep _ _ _ | Slist1 _ |
+ Slist1sep _ _ _ | Sopt _ | Sflag _ | Stoken _ | Svala _ _ as s ->
+ fprintf ppf "(%a)" print_symbol s ]
+and print_rule ppf symbols = do {
+ fprintf ppf "@[<hov 0>";
+ let _ =
+ List.fold_left
+ (fun sep symbol -> do {
+ fprintf ppf "%t%a" sep print_symbol symbol;
+ fun ppf -> fprintf ppf ";@ "
+ })
+ (fun ppf -> ()) symbols
+ in
+ fprintf ppf "@]"
+}
+and print_level ppf pp_print_space rules = do {
+ fprintf ppf "@[<hov 0>[ ";
+ let _ =
+ List.fold_left
+ (fun sep rule -> do {
+ fprintf ppf "%t%a" sep print_rule rule;
+ fun ppf -> fprintf ppf "%a| " pp_print_space ()
+ })
+ (fun ppf -> ()) rules
+ in
+ fprintf ppf " ]@]"
+};
+
+value print_levels ppf elev =
+ let _ =
+ List.fold_left
+ (fun sep lev -> do {
+ let rules =
+ List.map (fun t -> [Sself :: t]) (flatten_tree lev.lsuffix) @
+ flatten_tree lev.lprefix
+ in
+ fprintf ppf "%t@[<hov 2>" sep;
+ match lev.lname with
+ [ Some n -> fprintf ppf "%a@;<1 2>" print_str n
+ | None -> () ];
+ match lev.assoc with
+ [ LeftA -> fprintf ppf "LEFTA"
+ | RightA -> fprintf ppf "RIGHTA"
+ | NonA -> fprintf ppf "NONA" ];
+ fprintf ppf "@]@;<1 2>";
+ print_level ppf pp_force_newline rules;
+ fun ppf -> fprintf ppf "@,| "
+ })
+ (fun ppf -> ()) elev
+ in
+ ()
+;
+
+value print_entry ppf e = do {
+ fprintf ppf "@[<v 0>[ ";
+ match e.edesc with
+ [ Dlevels elev -> print_levels ppf elev
+ | Dparser _ -> fprintf ppf "<parser>" ];
+ fprintf ppf " ]@]"
+};
+
+value iter_entry f e =
+ let treated = ref [] in
+ let rec do_entry e =
+ if List.memq e treated.val then ()
+ else do {
+ treated.val := [e :: treated.val];
+ f e;
+ match e.edesc with
+ [ Dlevels ll -> List.iter do_level ll
+ | Dparser _ -> () ]
+ }
+ and do_level lev = do { do_tree lev.lsuffix; do_tree lev.lprefix }
+ and do_tree =
+ fun
+ [ Node n -> do_node n
+ | LocAct _ _ | DeadEnd -> () ]
+ and do_node n = do { do_symbol n.node; do_tree n.son; do_tree n.brother }
+ and do_symbol =
+ fun
+ [ Sfacto s -> do_symbol s
+ | Smeta _ sl _ -> List.iter do_symbol sl
+ | Snterm e -> do_entry e
+ | Snterml e _ -> do_entry e
+ | Slist0 s -> do_symbol s
+ | Slist1 s -> do_symbol s
+ | Sopt s -> do_symbol s
+ | Sflag s -> do_symbol s
+ | Slist0sep s1 s2 _ -> do { do_symbol s1; do_symbol s2 }
+ | Slist1sep s1 s2 _ -> do { do_symbol s1; do_symbol s2 }
+ | Stree t -> do_tree t
+ | Svala _ s -> do_symbol s
+ | Sself | Snext | Scut | Stoken _ -> () ]
+ in
+ do_entry e
+;
+
+value fold_entry f e init =
+ let treated = ref [] in
+ let rec do_entry accu e =
+ if List.memq e treated.val then accu
+ else do {
+ treated.val := [e :: treated.val];
+ let accu = f e accu in
+ match e.edesc with
+ [ Dlevels ll -> List.fold_left do_level accu ll
+ | Dparser _ -> accu ]
+ }
+ and do_level accu lev =
+ let accu = do_tree accu lev.lsuffix in
+ do_tree accu lev.lprefix
+ and do_tree accu =
+ fun
+ [ Node n -> do_node accu n
+ | LocAct _ _ | DeadEnd -> accu ]
+ and do_node accu n =
+ let accu = do_symbol accu n.node in
+ let accu = do_tree accu n.son in
+ do_tree accu n.brother
+ and do_symbol accu =
+ fun
+ [ Sfacto s -> do_symbol accu s
+ | Smeta _ sl _ -> List.fold_left do_symbol accu sl
+ | Snterm e -> do_entry accu e
+ | Snterml e _ -> do_entry accu e
+ | Slist0 s -> do_symbol accu s
+ | Slist1 s -> do_symbol accu s
+ | Sopt s -> do_symbol accu s
+ | Sflag s -> do_symbol accu s
+ | Slist0sep s1 s2 _ -> do_symbol (do_symbol accu s1) s2
+ | Slist1sep s1 s2 _ -> do_symbol (do_symbol accu s1) s2
+ | Stree t -> do_tree accu t
+ | Svala _ s -> do_symbol accu s
+ | Sself | Snext | Scut | Stoken _ -> accu ]
+ in
+ do_entry init e
+;
+
+value floc = ref (fun _ -> failwith "internal error when computing location");
+
+value loc_of_token_interval bp ep =
+ if bp == ep then
+ if bp == 0 then Ploc.dummy
+ else Ploc.after (floc.val (bp - 1)) 0 1
+ else
+ let loc1 = floc.val bp in
+ let loc2 = floc.val (pred ep) in
+ Ploc.encl loc1 loc2
+;
+
+value rec name_of_symbol entry =
+ fun
+ [ Snterm e -> "[" ^ e.ename ^ "]"
+ | Snterml e l -> "[" ^ e.ename ^ " level " ^ l ^ "]"
+ | Sself | Snext -> "[" ^ entry.ename ^ "]"
+ | Stoken tok -> entry.egram.glexer.Plexing.tok_text tok
+ | _ -> "???" ]
+;
+
+value rec get_token_list entry rev_tokl last_tok tree =
+ match tree with
+ [ Node {node = Stoken tok; son = son; brother = DeadEnd} ->
+ get_token_list entry [last_tok :: rev_tokl] (tok, None) son
+ | Node {node = Svala ls (Stoken tok); son = son; brother = DeadEnd} ->
+ get_token_list entry [last_tok :: rev_tokl] (tok, Some ls) son
+ | _ ->
+ if rev_tokl = [] then None
+ else Some (rev_tokl, last_tok, tree) ]
+;
+
+value rec name_of_symbol_failed entry =
+ fun
+ [ Sfacto s -> name_of_symbol_failed entry s
+ | Slist0 s -> name_of_symbol_failed entry s
+ | Slist0sep s _ _ -> name_of_symbol_failed entry s
+ | Slist1 s -> name_of_symbol_failed entry s
+ | Slist1sep s _ _ -> name_of_symbol_failed entry s
+ | Sopt s -> name_of_symbol_failed entry s
+ | Sflag s -> name_of_symbol_failed entry s
+ | Stree t -> name_of_tree_failed entry t
+ | Svala _ s -> name_of_symbol_failed entry s
+ | Smeta _ [s :: _] _ -> name_of_symbol_failed entry s
+ | s -> name_of_symbol entry s ]
+and name_of_tree_failed entry =
+ fun
+ [ Node {node = s; brother = bro; son = son} ->
+ let tokl =
+ match s with
+ [ Stoken tok -> get_token_list entry [] (tok, None) son
+ | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son
+ | _ -> None ]
+ in
+ match tokl with
+ [ None ->
+ let txt = name_of_symbol_failed entry s in
+ let txt =
+ match (s, son) with
+ [ (Sopt _, Node _) -> txt ^ " or " ^ name_of_tree_failed entry son
+ | _ -> txt ]
+ in
+ let txt =
+ match bro with
+ [ DeadEnd | LocAct _ _ -> txt
+ | Node _ -> txt ^ " or " ^ name_of_tree_failed entry bro ]
+ in
+ txt
+ | Some (rev_tokl, last_tok, son) ->
+ List.fold_left
+ (fun s (tok, _) ->
+ (if s = "" then "" else s ^ " ") ^
+ entry.egram.glexer.Plexing.tok_text tok)
+ "" (List.rev [last_tok :: rev_tokl]) ]
+ | DeadEnd | LocAct _ _ -> "???" ]
+;
+
+value search_tree_in_entry prev_symb tree =
+ fun
+ [ Dlevels levels ->
+ let rec search_levels =
+ fun
+ [ [] -> tree
+ | [level :: levels] ->
+ match search_level level with
+ [ Some tree -> tree
+ | None -> search_levels levels ] ]
+ and search_level level =
+ match search_tree level.lsuffix with
+ [ Some t -> Some (Node {node = Sself; son = t; brother = DeadEnd})
+ | None -> search_tree level.lprefix ]
+ and search_tree t =
+ if tree <> DeadEnd && t == tree then Some t
+ else
+ match t with
+ [ Node n ->
+ match search_symbol n.node with
+ [ Some symb ->
+ Some (Node {node = symb; son = n.son; brother = DeadEnd})
+ | None ->
+ match search_tree n.son with
+ [ Some t ->
+ Some (Node {node = n.node; son = t; brother = DeadEnd})
+ | None -> search_tree n.brother ] ]
+ | LocAct _ _ | DeadEnd -> None ]
+ and search_symbol symb =
+ match symb with
+ [ Snterm _ | Snterml _ _ | Slist0 _ | Slist0sep _ _ _ | Slist1 _ |
+ Slist1sep _ _ _ | Sopt _ | Stoken _ | Stree _
+ when symb == prev_symb ->
+ Some symb
+ | Slist0 symb ->
+ match search_symbol symb with
+ [ Some symb -> Some (Slist0 symb)
+ | None -> None ]
+ | Slist0sep symb sep b ->
+ match search_symbol symb with
+ [ Some symb -> Some (Slist0sep symb sep b)
+ | None ->
+ match search_symbol sep with
+ [ Some sep -> Some (Slist0sep symb sep b)
+ | None -> None ] ]
+ | Slist1 symb ->
+ match search_symbol symb with
+ [ Some symb -> Some (Slist1 symb)
+ | None -> None ]
+ | Slist1sep symb sep b ->
+ match search_symbol symb with
+ [ Some symb -> Some (Slist1sep symb sep b)
+ | None ->
+ match search_symbol sep with
+ [ Some sep -> Some (Slist1sep symb sep b)
+ | None -> None ] ]
+ | Sopt symb ->
+ match search_symbol symb with
+ [ Some symb -> Some (Sopt symb)
+ | None -> None ]
+ | Stree t ->
+ match search_tree t with
+ [ Some t -> Some (Stree t)
+ | None -> None ]
+ | _ -> None ]
+ in
+ search_levels levels
+ | Dparser _ -> tree ]
+;
+
+value error_verbose = ref False;
+
+value tree_failed entry prev_symb_result prev_symb tree = do {
+ let txt = name_of_tree_failed entry tree in
+ let txt =
+ match prev_symb with
+ [ Slist0 s ->
+ let txt1 = name_of_symbol_failed entry s in
+ txt1 ^ " or " ^ txt ^ " expected"
+ | Slist1 s ->
+ let txt1 = name_of_symbol_failed entry s in
+ txt1 ^ " or " ^ txt ^ " expected"
+ | Slist0sep s sep _ ->
+ match Obj.magic prev_symb_result with
+ [ [] ->
+ let txt1 = name_of_symbol_failed entry s in
+ txt1 ^ " or " ^ txt ^ " expected"
+ | _ ->
+ let txt1 = name_of_symbol_failed entry sep in
+ txt1 ^ " or " ^ txt ^ " expected" ]
+ | Slist1sep s sep _ ->
+ match Obj.magic prev_symb_result with
+ [ [] ->
+ let txt1 = name_of_symbol_failed entry s in
+ txt1 ^ " or " ^ txt ^ " expected"
+ | _ ->
+ let txt1 = name_of_symbol_failed entry sep in
+ txt1 ^ " or " ^ txt ^ " expected" ]
+ | Sopt _ | Sflag _ | Stree _ | Svala _ _ -> txt ^ " expected"
+ | _ -> txt ^ " expected after " ^ name_of_symbol_failed entry prev_symb ]
+ in
+ if error_verbose.val then do {
+ let tree = search_tree_in_entry prev_symb tree entry.edesc in
+ let ppf = err_formatter in
+ fprintf ppf "@[<v 0>@,";
+ fprintf ppf "----------------------------------@,";
+ fprintf ppf "Parse error in entry [%s], rule:@;<0 2>" entry.ename;
+ fprintf ppf "@[";
+ print_level ppf pp_force_newline (flatten_tree tree);
+ fprintf ppf "@]@,";
+ fprintf ppf "----------------------------------@,";
+ fprintf ppf "@]@."
+ }
+ else ();
+ txt ^ " (in [" ^ entry.ename ^ "])"
+};
+
+value symb_failed entry prev_symb_result prev_symb symb =
+ let tree = Node {node = symb; brother = DeadEnd; son = DeadEnd} in
+ tree_failed entry prev_symb_result prev_symb tree
+;
+
+external app : Obj.t -> 'a = "%identity";
+
+value is_level_labelled n lev =
+ match lev.lname with
+ [ Some n1 -> n = n1
+ | None -> False ]
+;
+
+value level_number entry lab =
+ let rec lookup levn =
+ fun
+ [ [] -> failwith ("unknown level " ^ lab)
+ | [lev :: levs] ->
+ if is_level_labelled lab lev then levn else lookup (succ levn) levs ]
+ in
+ match entry.edesc with
+ [ Dlevels elev -> lookup 0 elev
+ | Dparser _ -> raise Not_found ]
+;
+
+value rec top_symb entry =
+ fun
+ [ Sself | Snext -> Snterm entry
+ | Snterml e _ -> Snterm e
+ | Slist1sep s sep b -> Slist1sep (top_symb entry s) sep b
+ | _ -> raise Stream.Failure ]
+;
+
+value entry_of_symb entry =
+ fun
+ [ Sself | Snext -> entry
+ | Snterm e -> e
+ | Snterml e _ -> e
+ | _ -> raise Stream.Failure ]
+;
+
+value top_tree entry =
+ fun
+ [ Node {node = s; brother = bro; son = son} ->
+ Node {node = top_symb entry s; brother = bro; son = son}
+ | LocAct _ _ | DeadEnd -> raise Stream.Failure ]
+;
+
+value skip_if_empty bp p strm =
+ if Stream.count strm == bp then Gramext.action (fun a -> p strm)
+ else raise Stream.Failure
+;
+
+value continue entry bp a s son p1 =
+ parser
+ [: a = (entry_of_symb entry s).econtinue 0 bp a;
+ act = p1 ? tree_failed entry a s son :] ->
+ Gramext.action (fun _ -> app act a)
+;
+
+value do_recover parser_of_tree entry nlevn alevn bp a s son =
+ parser
+ [ [: a = parser_of_tree entry nlevn alevn (top_tree entry son) :] -> a
+ | [: a = skip_if_empty bp (parser []) :] -> a
+ | [: a =
+ continue entry bp a s son
+ (parser_of_tree entry nlevn alevn son) :] ->
+ a ]
+;
+
+value strict_parsing = ref False;
+
+value recover parser_of_tree entry nlevn alevn bp a s son strm =
+ if strict_parsing.val then raise (Stream.Error (tree_failed entry a s son))
+ else do_recover parser_of_tree entry nlevn alevn bp a s son strm
+;
+
+value token_count = ref 0;
+
+value peek_nth n strm = do {
+ let list = Stream.npeek n strm in
+ token_count.val := Stream.count strm + n;
+ let rec loop list n =
+ match (list, n) with
+ [ ([x :: _], 1) -> Some x
+ | ([_ :: l], n) -> loop l (n - 1)
+ | ([], _) -> None ]
+ in
+ loop list n
+};
+
+value item_skipped = ref False;
+value skip_item a = do { item_skipped.val := True; a };
+
+value call_and_push ps al strm = do {
+ item_skipped.val := False;
+ let a = ps strm in
+ let al = if item_skipped.val then al else [a :: al] in
+ item_skipped.val := False;
+ al
+};
+
+value fcall_and_push ps al err strm = do {
+ item_skipped.val := False;
+ match ps err strm with
+ [ Some (a, strm) -> do {
+ let al = if item_skipped.val then al else [a :: al] in
+ item_skipped.val := False;
+ Some (al, strm)
+ }
+ | None -> None ]
+};
+
+value bcall_and_push ps al err strm = do {
+ item_skipped.val := False;
+ match ps err strm with
+ [ Some (a, strm, Fstream.K kont) -> do {
+ let rec kont2 kont () = do {
+ item_skipped.val := False;
+ match kont () with
+ [ Some (a, strm, Fstream.K kont) -> do {
+ let al = if item_skipped.val then al else [a :: al] in
+ item_skipped.val := False;
+ Some (al, strm, Fstream.K (kont2 kont))
+ }
+ | None -> None ]
+ }
+ in
+ let al = if item_skipped.val then al else [a :: al] in
+ item_skipped.val := False;
+ Some (al, strm, Fstream.K (kont2 kont))
+ }
+ | None -> None ]
+};
+
+value token_ematch gram (tok, vala) =
+ let tematch = gram.glexer.Plexing.tok_match tok in
+ match vala with
+ | Some al ->
+ let pa =
+ match al with
+ [ [] ->
+ let t = "V " ^ fst tok in
+ gram.glexer.Plexing.tok_match (t, "")
+ | al ->
+ loop al where rec loop =
+ fun
+ [ [a :: al] ->
+ let pa = gram.glexer.Plexing.tok_match ("V", a) in
+ let pal = loop al in
+ fun tok ->
+ try pa tok with [ Stream.Failure -> pal tok ]
+ | [] -> fun tok -> raise Stream.Failure ] ]
+ in
+ fun tok ->
+ try Obj.repr (Ploc.VaAnt (Obj.magic (pa tok : string))) with
+ [ Stream.Failure -> Obj.repr (Ploc.VaVal (tematch tok)) ]
+ | None ->
+ fun tok -> Obj.repr (tematch tok : string)
+ end
+;
+
+value rec parser_of_tree entry nlevn alevn =
+ fun
+ [ DeadEnd -> parser []
+ | LocAct act _ -> parser [: :] -> act
+ | Node {node = Sself; son = LocAct act _; brother = DeadEnd} ->
+ parser [: a = entry.estart alevn :] -> app act a
+ | Node {node = Scut; son = son; brother = _} ->
+ parser_of_tree entry nlevn alevn son
+ | Node {node = Sself; son = LocAct act _; brother = bro} ->
+ let p2 = parser_of_tree entry nlevn alevn bro in
+ parser
+ [ [: a = entry.estart alevn :] -> app act a
+ | [: a = p2 :] -> a ]
+ | Node {node = s; son = son; brother = DeadEnd} ->
+ let tokl =
+ match s with
+ [ Stoken tok -> get_token_list entry [] (tok, None) son
+ | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son
+ | _ -> None ]
+ in
+ match tokl with
+ [ None ->
+ let ps = parser_of_symbol entry nlevn s in
+ let p1 = parser_of_tree entry nlevn alevn son in
+ let p1 = parser_cont p1 entry nlevn alevn s son in
+ parser bp
+ [: a = ps;
+ act = p1 bp a ? tree_failed entry a s son :] -> app act a
+ | Some (rev_tokl, (last_tok, svala), son) ->
+ let lt =
+ let t = Stoken last_tok in
+ match svala with
+ [ Some l -> Svala l t
+ | None -> t ]
+ in
+ let p1 = parser_of_tree entry nlevn alevn son in
+ let p1 = parser_cont p1 entry nlevn alevn lt son in
+ parser_of_token_list entry s son p1 (parser []) rev_tokl
+ (last_tok, svala) ]
+ | Node {node = s; son = son; brother = bro} ->
+ let tokl =
+ match s with
+ [ Stoken tok -> get_token_list entry [] (tok, None) son
+ | Svala ls (Stoken tok) -> get_token_list entry [] (tok, Some ls) son
+ | _ -> None ]
+ in
+ match tokl with
+ [ None ->
+ let ps = parser_of_symbol entry nlevn s in
+ let p1 = parser_of_tree entry nlevn alevn son in
+ let p1 = parser_cont p1 entry nlevn alevn s son in
+ let p2 = parser_of_tree entry nlevn alevn bro in
+ fun (strm : Stream.t _) ->
+ let bp = Stream.count strm in
+ match try Some (ps strm) with [ Stream.Failure -> None ] with
+ | Some a ->
+ match
+ try Some (p1 bp a strm) with [ Stream.Failure -> None ]
+ with
+ | Some act -> app act a
+ | None -> raise (Stream.Error (tree_failed entry a s son))
+ end
+ | None -> p2 strm
+ end
+ | Some (rev_tokl, (last_tok, vala), son) ->
+ let lt =
+ let t = Stoken last_tok in
+ match vala with
+ [ Some ls -> Svala ls t
+ | None -> t ]
+ in
+ let p2 = parser_of_tree entry nlevn alevn bro in
+ let p1 = parser_of_tree entry nlevn alevn son in
+ let p1 = parser_cont p1 entry nlevn alevn lt son in
+ let p1 =
+ parser_of_token_list entry lt son p1 p2 rev_tokl
+ (last_tok, vala)
+ in
+ parser
+ [ [: a = p1 :] -> a
+ | [: a = p2 :] -> a ] ] ]
+and parser_cont p1 entry nlevn alevn s son bp a =
+ parser
+ [ [: a = p1 :] -> a
+ | [: a = recover parser_of_tree entry nlevn alevn bp a s son :] -> a ]
+and parser_of_token_list entry s son p1 p2 rev_tokl last_tok =
+ let plast =
+ let n = List.length rev_tokl + 1 in
+ let tematch = token_ematch entry.egram last_tok in
+ let ps strm =
+ match peek_nth n strm with
+ [ Some tok -> do {
+ let r = tematch tok in
+ for i = 1 to n do { Stream.junk strm };
+ Obj.repr r
+ }
+ | None -> raise Stream.Failure ]
+ in
+ fun (strm : Stream.t _) ->
+ let bp = Stream.count strm in
+ let a = ps strm in
+ match try Some (p1 bp a strm) with [ Stream.Failure -> None ] with
+ | Some act -> app act a
+ | None -> raise (Stream.Error (tree_failed entry a s son))
+ end
+ in
+ match List.rev rev_tokl with
+ | [] -> parser [: a = plast :] -> a
+ | [tok :: tokl] ->
+ let tematch = token_ematch entry.egram tok in
+ let ps strm =
+ match peek_nth 1 strm with
+ [ Some tok -> tematch tok
+ | None -> raise Stream.Failure ]
+ in
+ let p1 =
+ loop 2 tokl where rec loop n =
+ fun
+ | [] -> plast
+ | [tok :: tokl] ->
+ let tematch = token_ematch entry.egram tok in
+ let ps strm =
+ match peek_nth n strm with
+ | Some tok -> tematch tok
+ | None -> raise Stream.Failure
+ end
+ in
+ let p1 = loop (n + 1) tokl in
+ parser [: a = ps; act = p1 ! :] -> app act a
+ end
+ in
+ parser [: a = ps; act = p1 ! :] -> app act a
+ end
+and parser_of_symbol entry nlevn =
+ fun
+ [ Sfacto s -> parser_of_symbol entry nlevn s
+ | Smeta _ symbl act ->
+ let act = Obj.magic act entry symbl in
+ Obj.magic
+ (List.fold_left
+ (fun act symb -> Obj.magic act (parser_of_symbol entry nlevn symb))
+ act symbl)
+ | Slist0 s ->
+ let ps = call_and_push (parser_of_symbol entry nlevn s) in
+ let rec loop al =
+ parser
+ [ [: al = ps al; a = loop al ! :] -> a
+ | [: :] -> al ]
+ in
+ parser [: a = loop [] :] -> Obj.repr (List.rev a)
+ | Slist0sep symb sep False ->
+ let ps = call_and_push (parser_of_symbol entry nlevn symb) in
+ let pt = parser_of_symbol entry nlevn sep in
+ let rec kont al =
+ parser
+ [ [: v = pt; al = ps al ? symb_failed entry v sep symb;
+ a = kont al ! :] ->
+ a
+ | [: :] -> al ]
+ in
+ parser
+ [ [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a)
+ | [: :] -> Obj.repr [] ]
+ | Slist0sep symb sep True ->
+ let ps = call_and_push (parser_of_symbol entry nlevn symb) in
+ let pt = parser_of_symbol entry nlevn sep in
+ let rec kont al =
+ parser
+ [ [: v = pt; al = ps al; a = kont al ! :] -> a
+ | [: v = pt :] -> al
+ | [: :] -> al ]
+ in
+ parser
+ [ [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a)
+ | [: :] -> Obj.repr [] ]
+ | Slist1 s ->
+ let ps = call_and_push (parser_of_symbol entry nlevn s) in
+ let rec loop al =
+ parser
+ [ [: al = ps al; a = loop al ! :] -> a
+ | [: :] -> al ]
+ in
+ parser [: al = ps []; a = loop al ! :] -> Obj.repr (List.rev a)
+ | Slist1sep symb sep False ->
+ let ps = call_and_push (parser_of_symbol entry nlevn symb) in
+ let pt = parser_of_symbol entry nlevn sep in
+ let rec kont al =
+ parser
+ [ [: v = pt;
+ al =
+ parser
+ [ [: a = ps al :] -> a
+ | [: a = parse_top_symb entry symb :] -> [a :: al]
+ | [: :] ->
+ raise (Stream.Error (symb_failed entry v sep symb)) ] !;
+ a = kont al ! :] ->
+ a
+ | [: :] -> al ]
+ in
+ parser [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a)
+ | Slist1sep symb sep True ->
+ let ps = call_and_push (parser_of_symbol entry nlevn symb) in
+ let pt = parser_of_symbol entry nlevn sep in
+ let rec kont al =
+ parser
+ [ [: v = pt; al = ps al; al = kont al ! :] -> al
+ | [: v = pt; a = parse_top_symb entry symb;
+ al = kont [a :: al] ! :] -> al
+ | [: v = pt :] -> al
+ | [: :] -> al ]
+ in
+ parser [: al = ps []; a = kont al ! :] -> Obj.repr (List.rev a)
+ | Sopt s ->
+ let ps = parser_of_symbol entry nlevn s in
+ parser
+ [ [: a = ps :] -> Obj.repr (Some a)
+ | [: :] -> Obj.repr None ]
+ | Sflag s ->
+ let ps = parser_of_symbol entry nlevn s in
+ parser
+ [ [: _ = ps :] -> Obj.repr True
+ | [: :] -> Obj.repr False ]
+ | Stree t ->
+ let pt = parser_of_tree entry 1 0 t in
+ parser bp
+ [: a = pt :] ep ->
+ let loc = loc_of_token_interval bp ep in
+ app a loc
+ | Svala al s ->
+ let pa =
+ match al with
+ [ [] ->
+ let t =
+ match s with
+ [ Sflag _ -> Some "V FLAG"
+ | Sopt _ -> Some "V OPT"
+ | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST"
+ | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST"
+ | Stoken (con, "") -> Some ("V " ^ con)
+ | _ -> None ]
+ in
+ match t with
+ [ Some t -> parser_of_token entry (t, "")
+ | None -> parser [] ]
+ | al ->
+ loop al where rec loop =
+ fun
+ [ [a :: al] ->
+ let pa = parser_of_token entry ("V", a) in
+ let pal = loop al in
+ parser
+ [ [: a = pa :] -> a
+ | [: a = pal :] -> a ]
+ | [] -> parser [] ] ]
+ in
+ let ps = parser_of_symbol entry nlevn s in
+ parser
+ [ [: a = pa :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string))
+ | [: a = ps :] -> Obj.repr (Ploc.VaVal a) ]
+ | Snterm e -> parser [: a = e.estart 0 :] -> a
+ | Snterml e l -> parser [: a = e.estart (level_number e l) :] -> a
+ | Sself -> parser [: a = entry.estart 0 :] -> a
+ | Snext -> parser [: a = entry.estart nlevn :] -> a
+ | Scut -> parser [: :] -> Obj.repr ()
+ | Stoken tok -> parser_of_token entry tok ]
+and parser_of_token entry tok =
+ let f = entry.egram.glexer.Plexing.tok_match tok in
+ fun strm ->
+ match Stream.peek strm with
+ [ Some tok -> do {
+ let r = f tok in
+ Stream.junk strm;
+ Obj.repr r
+ }
+ | None -> raise Stream.Failure ]
+and parse_top_symb entry symb =
+ parser_of_symbol entry 0 (top_symb entry symb)
+;
+
+value symb_failed_txt e s1 s2 = symb_failed e 0 s1 s2;
+
+value rec start_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn -> parser []
+ | [lev :: levs] ->
+ let p1 = start_parser_of_levels entry (succ clevn) levs in
+ match lev.lprefix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = parser_of_tree entry (succ clevn) alevn tree in
+ match levs with
+ [ [] ->
+ fun levn strm ->
+ (* this code should be there but is commented to preserve
+ compatibility with previous versions... with this code,
+ the grammar entry e: [[ "x"; a = e | "y" ]] should fail
+ because it should be: e: [RIGHTA[ "x"; a = e | "y" ]]...
+ if levn > clevn then match strm with parser []
+ else
+ *)
+ match strm with parser bp
+ [: act = p2 :] ep ->
+ let a = app act (loc_of_token_interval bp ep) in
+ entry.econtinue levn bp a strm
+ | _ ->
+ fun levn strm ->
+ if levn > clevn then p1 levn strm
+ else
+ match strm with parser bp
+ [ [: act = p2 :] ep ->
+ let a = app act (loc_of_token_interval bp ep) in
+ entry.econtinue levn bp a strm
+ | [: a = p1 levn :] -> a ] ] ] ]
+;
+
+value rec continue_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn bp a -> parser []
+ | [lev :: levs] ->
+ let p1 = continue_parser_of_levels entry (succ clevn) levs in
+ match lev.lsuffix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = parser_of_tree entry (succ clevn) alevn tree in
+ fun levn bp a strm ->
+ if levn > clevn then p1 levn bp a strm
+ else
+ match strm with parser
+ [ [: a = p1 levn bp a :] -> a
+ | [: act = p2 :] ep ->
+ let a = app act a (loc_of_token_interval bp ep) in
+ entry.econtinue levn bp a strm ] ] ]
+;
+
+value continue_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels elev ->
+ let p = continue_parser_of_levels entry 0 elev in
+ fun levn bp a ->
+ parser
+ [ [: a = p levn bp a :] -> a
+ | [: :] -> a ]
+ | Dparser p -> fun levn bp a -> parser [] ]
+;
+
+value empty_entry ename levn strm =
+ raise (Stream.Error ("entry [" ^ ename ^ "] is empty"))
+;
+
+value start_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels [] -> empty_entry entry.ename
+ | Dlevels elev -> start_parser_of_levels entry 0 elev
+ | Dparser p -> fun levn strm -> p strm ]
+;
+
+value default_algorithm_var = ref DefaultAlgorithm;
+value set_default_algorithm algo = default_algorithm_var.val := algo;
+value default_algorithm () = default_algorithm_var.val;
+
+(* deprecated since 2017-06-06: use 'set_default_algorithm' instead *)
+value backtrack_parse = ref False;
+value warned_using_backtrack_parse = ref False;
+value compatible_deprecated_backtrack_parse () =
+ if backtrack_parse.val then do {
+ if not warned_using_backtrack_parse.val then do {
+ eprintf "<W> use of Grammar.backtrace_parse ";
+ eprintf "deprecated since 2017-06-06\n%!";
+ warned_using_backtrack_parse.val := True
+ }
+ else ();
+ backtrack_parse.val := False;
+ set_default_algorithm Backtracking
+ }
+ else ()
+;
+
+(* parsing with functional streams *)
+
+value backtrack_trace = ref False;
+value backtrack_stalling_limit = ref 10000;
+value backtrack_trace_try = ref False;
+value tind = ref "";
+value max_fcount = ref None;
+value nb_ftry = ref 0;
+
+value no_err () = "";
+value ftree_failed entry prev_symb_result prev_symb tree () =
+ tree_failed entry prev_symb_result prev_symb tree
+;
+value fsymb_failed entry prev_symb_result prev_symb symb () =
+ symb_failed entry prev_symb_result prev_symb symb
+;
+
+value bfparser_of_token entry tok return_value =
+ let f = entry.egram.glexer.Plexing.tok_match tok in
+ fun err strm ->
+ let _ =
+ if backtrack_trace.val then do {
+ Printf.eprintf "%stesting (\"%s\", \"%s\") ..." tind.val (fst tok)
+ (snd tok);
+ flush stderr;
+ }
+ else ()
+ in
+ let _ =
+ if backtrack_stalling_limit.val > 0 || backtrack_trace_try.val then
+ let m =
+ match max_fcount.val with
+ | Some (m, _, _) -> m
+ | None -> 0
+ end
+ in
+ if Fstream.count strm > m then do {
+ if backtrack_trace.val then
+ Printf.eprintf " (token count max %d)%!" (Fstream.count strm)
+ else ();
+ let e : g_entry Obj.t = Obj.magic (entry : g_entry _) in
+ let cnt = Fstream.count strm in
+ max_fcount.val := Some (cnt, e, err);
+ nb_ftry.val := 0
+ }
+ else do {
+ if backtrack_trace.val then
+ Printf.eprintf " (token count %d/%d)%!" (Fstream.count strm) m
+ else ();
+ incr nb_ftry;
+ if backtrack_trace_try.val then do {
+ Printf.eprintf "\ntokens read: %d; tokens tests: %d" m
+ nb_ftry.val;
+ flush stderr;
+ }
+ else ();
+ if backtrack_stalling_limit.val > 0 &&
+ nb_ftry.val >= backtrack_stalling_limit.val
+ then do {
+ if backtrack_trace.val || backtrack_trace_try.val then
+ Printf.eprintf " (stalling limit reached)\n%!"
+ else ();
+ raise Stream.Failure
+ }
+ else ()
+ }
+ else ()
+ in
+ match Fstream.next strm with
+ [ Some (tok, strm) ->
+ try do {
+ let r = f tok in
+ let _ =
+ if backtrack_trace.val then Printf.eprintf " yes \"%s\"\n%!" r
+ else ()
+ in
+ nb_ftry.val := 0;
+ return_value r strm
+ }
+ with
+ [ Stream.Failure ->
+ let _ =
+ if backtrack_trace.val then Printf.eprintf " not found\n%!"
+ else ()
+ in
+ None ]
+ | None ->
+ let _ =
+ if backtrack_trace.val then do {
+ Printf.eprintf " eos\n";
+ flush stderr;
+ }
+ else ()
+ in
+ None ]
+;
+
+let s = try Sys.getenv "CAMLP5PARAM" with [ Not_found -> "" ] in
+loop 0 where rec loop i =
+ if i = String.length s then ()
+ else if s.[i] = 'b' then do {
+ set_default_algorithm Backtracking;
+ loop (i + 1)
+ }
+ else if s.[i] = 'f' then do {
+ set_default_algorithm Functional;
+ loop (i + 1)
+ }
+ else if s.[i] = 'p' then do {
+ set_default_algorithm Predictive;
+ loop (i + 1)
+ }
+ else if s.[i] = 'l' && i + 1 < String.length s && s.[i+1] = '=' then do {
+ let (n, i) =
+ loop 0 (i + 2) where rec loop n i =
+ if i = String.length s then (n, i)
+ else if s.[i] >= '0' && s.[i] <= '9' then
+ loop (10 * n + Char.code s.[i] - Char.code '0') (i + 1)
+ else (n, i)
+ in
+ backtrack_stalling_limit.val := n;
+ loop i
+ }
+ else if s.[i] = 't' then do {
+ backtrack_trace.val := True;
+ loop (i + 1)
+ }
+ else if s.[i] = 'y' then do {
+ backtrack_trace_try.val := True;
+ loop (i + 1)
+ }
+ else
+ loop (i + 1)
+;
+
+(* version with functional streams and limited backtracking *)
+
+value fcount = fparser bp [: :] → bp;
+
+value rec ftop_symb entry =
+ fun
+ [ Sself | Snext -> Some (Snterm entry)
+ | Snterml e _ -> Some (Snterm e)
+ | Slist1sep s sep b ->
+ match ftop_symb entry s with
+ [ Some s -> Some (Slist1sep s sep b)
+ | None -> None ]
+ | _ -> None ]
+;
+
+value ftop_tree entry son strm =
+ match son with
+ [ Node {node = s; brother = bro; son = son} ->
+ match ftop_symb entry s with
+ [ Some sy ->
+ let r = Node {node = sy; brother = bro; son = son} in
+ let _ =
+ if backtrack_trace.val then
+ Printf.eprintf "%srecovering pos %d\n%!" tind.val
+ (Fstream.count strm)
+ else ()
+ in
+ match strm with fparser [: :] -> r
+ | None ->
+ None ]
+ | LocAct _ _ | DeadEnd ->
+ None ]
+;
+
+value frecover fparser_of_tree entry next_levn assoc_levn son err =
+ fparser
+ [ [: t = ftop_tree entry son;
+ a = fparser_of_tree entry next_levn assoc_levn t err :] -> a ]
+;
+
+value fparser_of_token entry tok =
+ let return_value r strm = match strm with fparser [: :] -> Obj.repr r in
+ bfparser_of_token entry tok return_value
+;
+
+value rec fparser_of_tree entry next_levn assoc_levn =
+ fun
+ [ DeadEnd -> fun err -> fparser []
+ | LocAct act _ -> fun err -> fparser [: :] -> act
+ | Node {node = Sself; son = LocAct act _; brother = DeadEnd} ->
+ fun err -> fparser [: a = entry.fstart assoc_levn err :] -> app act a
+ | Node {node = Sself; son = LocAct act _; brother = bro} ->
+ let p2 = fparser_of_tree entry next_levn assoc_levn bro in
+ fun err ->
+ fparser
+ [ [: a = entry.fstart assoc_levn err :] -> app act a
+ | [: a = p2 err :] -> a ]
+ | Node {node = Scut; son = son; brother = _} ->
+ let p1 = fparser_of_tree entry next_levn assoc_levn son in
+ fun err ->
+ fparser
+ [ [: !; a = p1 err :] -> a ]
+ | Node {node = s; son = son; brother = DeadEnd} ->
+ let ps = fparser_of_symbol entry next_levn s in
+ let p1 = fparser_of_tree entry next_levn assoc_levn son in
+ let p1 = fparser_cont p1 entry next_levn assoc_levn son in
+ fun err ->
+ fparser [: a = ps err; act = p1 (ftree_failed entry a s son) :] ->
+ app act a
+ | Node {node = s; son = son; brother = bro} ->
+ let ps = fparser_of_symbol entry next_levn s in
+ let p1 = fparser_of_tree entry next_levn assoc_levn son in
+ let p1 = fparser_cont p1 entry next_levn assoc_levn son in
+ let p2 = fparser_of_tree entry next_levn assoc_levn bro in
+ fun err ->
+ fparser
+ [ [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> app act a
+ | [: a = p2 err :] -> a ] ]
+and fparser_cont p1 entry next_levn assoc_levn son err =
+ fparser
+ [ [: a = p1 err :] -> a
+ | [: a = frecover fparser_of_tree entry next_levn assoc_levn son err :] ->
+ a ]
+and fparser_of_symbol entry next_levn =
+ fun
+ [ Sfacto s -> fparser_of_symbol entry next_levn s
+ | Smeta _ symbl act ->
+ let _ = failwith "Smeta for fparser not impl" in
+ let act = Obj.magic act entry symbl in
+ Obj.magic
+ (List.fold_left
+ (fun act symb ->
+ Obj.magic act (fparser_of_symbol entry next_levn symb))
+ act symbl)
+ | Slist0 s ->
+ let ps = fparser_of_symbol entry next_levn s in
+ let ps = fcall_and_push ps in
+ let rec loop al err =
+ fparser
+ [ [: al = ps al err; a = loop al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err -> fparser [: a = loop [] err :] -> Obj.repr (List.rev a)
+ | Slist0sep symb sep False ->
+ let ps = fparser_of_symbol entry next_levn symb in
+ let ps = fcall_and_push ps in
+ let pt = fparser_of_symbol entry next_levn sep in
+ let rec kont al err =
+ fparser
+ [ [: v = pt err; al = ps al (fsymb_failed entry v sep symb);
+ a = kont al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err ->
+ fparser
+ [ [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | [: :] -> Obj.repr [] ]
+ | Slist1 s ->
+ let ps = fparser_of_symbol entry next_levn s in
+ let ps = fcall_and_push ps in
+ let rec loop al err =
+ fparser
+ [ [: al = ps al err; a = loop al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err ->
+ fparser [: al = ps [] err; a = loop al err :] -> Obj.repr (List.rev a)
+ | Slist0sep symb sep True ->
+ failwith "LIST0 _ SEP _ OPT_SEP not implemented; please report"
+ | Slist1sep symb sep False ->
+ let ps = fparser_of_symbol entry next_levn symb in
+ let ps = fcall_and_push ps in
+ let pt = fparser_of_symbol entry next_levn sep in
+ let pts = fparse_top_symb entry symb in
+ let rec kont al err =
+ fparser
+ [ [: v = pt err;
+ al =
+ fparser
+ [ [: a = ps al (fsymb_failed entry v sep symb) :] -> a
+ | [: a = pts (fsymb_failed entry v sep symb) :] -> [a :: al] ];
+ a = kont al err :] ->
+ a
+ | [: :] -> al ]
+ in
+ fun err ->
+ fparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | Slist1sep symb sep True ->
+ let ps = fparser_of_symbol entry next_levn symb in
+ let ps = fcall_and_push ps in
+ let pt = fparser_of_symbol entry next_levn sep in
+ let pts = fparse_top_symb entry symb in
+ let rec kont al err =
+ fparser
+ [ [: v = pt err; al = ps al err; al = kont al err :] -> al
+ | [: v = pt err; a = pts err; al = kont [a :: al] err :] -> al
+ | [: v = pt err :] -> al
+ | [: :] -> al ]
+ in
+ fun err ->
+ fparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | Sopt s ->
+ let ps = fparser_of_symbol entry next_levn s in
+ fun err ->
+ fparser
+ [ [: a = ps err :] -> Obj.repr (Some a)
+ | [: :] -> Obj.repr None ]
+ | Sflag s ->
+ let ps = fparser_of_symbol entry next_levn s in
+ fun err ->
+ fparser
+ [ [: _ = ps err :] -> Obj.repr True
+ | [: :] -> Obj.repr False ]
+ | Stree t ->
+ let pt = fparser_of_tree entry 1 0 t in
+ fun err ->
+ fparser bp
+ [: a = pt err :] ep ->
+ let loc = loc_of_token_interval bp ep in
+ app a loc
+ | Svala al s ->
+ let pa =
+ match al with
+ [ [] ->
+ let t =
+ match s with
+ [ Sflag _ -> Some "V FLAG"
+ | Sopt _ -> Some "V OPT"
+ | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST"
+ | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST"
+ | Stoken (con, "") -> Some ("V " ^ con)
+ | _ -> None ]
+ in
+ match t with
+ [ Some t -> fparser_of_token entry (t, "")
+ | None -> fun err -> fparser [] ]
+ | al ->
+ loop al where rec loop =
+ fun
+ [ [a :: al] ->
+ let pa = fparser_of_token entry ("V", a) in
+ let pal = loop al in
+ fun err ->
+ fparser
+ [ [: a = pa err :] -> a
+ | [: a = pal err :] -> a ]
+ | [] -> fun err -> fparser [] ] ]
+ in
+ let ps = fparser_of_symbol entry next_levn s in
+ fun err ->
+ fparser
+ [ [: a = pa err :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string))
+ | [: a = ps err :] -> Obj.repr (Ploc.VaVal a) ]
+ | Snterm e ->
+ fun err -> fparser [: a = e.fstart 0 err :] -> a
+ | Snterml e l ->
+ fun err -> fparser [: a = e.fstart (level_number e l) err :] -> a
+ | Sself -> fun err -> fparser [: a = entry.fstart 0 err :] -> a
+ | Snext -> fun err -> fparser [: a = entry.fstart next_levn err :] -> a
+ | Scut -> fun err -> fparser [: ! :] -> Obj.repr ()
+ | Stoken tok -> fparser_of_token entry tok ]
+and fparse_top_symb entry symb =
+ match ftop_symb entry symb with
+ [ Some sy -> fparser_of_symbol entry 0 sy
+ | None -> fun err -> fparser [] ]
+;
+
+value rec fstart_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn err -> fparser []
+ | [lev :: levs] ->
+ let p1 = fstart_parser_of_levels entry (succ clevn) levs in
+ match lev.lprefix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = fparser_of_tree entry (succ clevn) alevn tree in
+ match levs with
+ [ [] ->
+ fun levn err strm ->
+ match strm with fparser bp
+ [: act = p2 err; ep = fcount;
+ a =
+ entry.fcontinue levn bp
+ (app act (loc_of_token_interval bp ep)) err :] ->
+ a
+ | _ ->
+ fun levn err strm ->
+ if levn > clevn then p1 levn err strm
+ else
+ match strm with fparser bp
+ [ [: act = p2 err; ep = fcount;
+ a =
+ entry.fcontinue levn bp
+ (app act (loc_of_token_interval bp ep)) err :] ->
+ a
+ | [: a = p1 levn err :] -> a ] ] ] ]
+;
+
+value rec fcontinue_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn bp a err -> fparser []
+ | [lev :: levs] ->
+ let p1 = fcontinue_parser_of_levels entry (succ clevn) levs in
+ match lev.lsuffix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = fparser_of_tree entry (succ clevn) alevn tree in
+ fun levn bp a err strm ->
+ if levn > clevn then p1 levn bp a err strm
+ else
+ match strm with fparser
+ [ [: a = p1 levn bp a err :] -> a
+ | [: act = p2 err; ep = fcount;
+ a =
+ entry.fcontinue levn bp
+ (app act a (loc_of_token_interval bp ep)) err :] ->
+ a ] ] ]
+;
+
+value fstart_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels [] -> fun _ err -> fparser []
+ | Dlevels elev -> fstart_parser_of_levels entry 0 elev
+ | Dparser p -> fun levn err strm -> failwith "Dparser for Fstream" ]
+;
+
+value fcontinue_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels elev ->
+ let p = fcontinue_parser_of_levels entry 0 elev in
+ fun levn bp a err ->
+ fparser
+ [ [: a = p levn bp a err :] -> a
+ | [: :] -> a ]
+ | Dparser p -> fun levn bp a err -> fparser [] ]
+;
+
+(* version with functional streams and full backtracking *)
+
+value rec btop_symb entry =
+ fun
+ [ Sself | Snext -> Some (Snterm entry)
+ | Snterml e _ -> Some (Snterm e)
+ | Slist1sep s sep b ->
+ match btop_symb entry s with
+ [ Some s -> Some (Slist1sep s sep b)
+ | None -> None ]
+ | _ -> None ]
+;
+
+value btop_tree entry son strm =
+ match son with
+ [ Node {node = s; brother = bro; son = son} ->
+ match btop_symb entry s with
+ [ Some sy ->
+ let r = Node {node = sy; brother = bro; son = son} in
+ let _ =
+ if backtrack_trace.val then
+ Printf.eprintf "%srecovering pos %d\n%!" tind.val
+ (Fstream.count strm)
+ else ()
+ in
+ match strm with bparser [: :] -> r
+ | None ->
+ None ]
+ | LocAct _ _ | DeadEnd ->
+ None ]
+;
+
+value brecover bparser_of_tree entry next_levn assoc_levn son err =
+ bparser
+ [ [: t = btop_tree entry son;
+ a = bparser_of_tree entry next_levn assoc_levn t err :] -> a ]
+;
+
+value bparser_of_token entry tok =
+ let return_value r strm = match strm with bparser [: :] -> Obj.repr r in
+ bfparser_of_token entry tok return_value
+;
+
+value rec bparser_of_tree entry next_levn assoc_levn =
+ fun
+ [ DeadEnd -> fun err -> bparser []
+ | LocAct act _ -> fun err -> bparser [: :] -> act
+ | Node {node = Sself; son = LocAct act _; brother = DeadEnd} ->
+ fun err -> bparser [: a = entry.bstart assoc_levn err :] -> app act a
+ | Node {node = Sself; son = LocAct act _; brother = bro} ->
+ let p2 = bparser_of_tree entry next_levn assoc_levn bro in
+ fun err ->
+ bparser
+ [ [: a = entry.bstart assoc_levn err :] -> app act a
+ | [: a = p2 err :] -> a ]
+ | Node {node = Scut; son = son; brother = _} ->
+ let p1 = bparser_of_tree entry next_levn assoc_levn son in
+ fun err ->
+ bparser
+ [ [: !; a = p1 err :] -> a ]
+ | Node {node = s; son = son; brother = DeadEnd} ->
+ let ps = bparser_of_symbol entry next_levn s in
+ let p1 = bparser_of_tree entry next_levn assoc_levn son in
+ let p1 = bparser_cont p1 entry next_levn assoc_levn son in
+ fun err ->
+ bparser [: a = ps err; act = p1 (ftree_failed entry a s son) :] ->
+ app act a
+ | Node {node = s; son = son; brother = bro} ->
+ let ps = bparser_of_symbol entry next_levn s in
+ let p1 = bparser_of_tree entry next_levn assoc_levn son in
+ let p1 = bparser_cont p1 entry next_levn assoc_levn son in
+ let p2 = bparser_of_tree entry next_levn assoc_levn bro in
+ fun err ->
+ bparser
+ [ [: a = ps err; act = p1 (ftree_failed entry a s son) :] -> app act a
+ | [: a = p2 err :] -> a ] ]
+and bparser_cont p1 entry next_levn assoc_levn son err =
+ bparser
+ [ [: a = p1 err :] -> a
+ | [: a = brecover bparser_of_tree entry next_levn assoc_levn son err :] ->
+ a ]
+and bparser_of_symbol entry next_levn =
+ fun
+ [ Sfacto s -> bparser_of_symbol entry next_levn s
+ | Smeta _ symbl act ->
+ let _ = failwith "Smeta for bparser not impl" in
+ let act = Obj.magic act entry symbl in
+ Obj.magic
+ (List.fold_left
+ (fun act symb ->
+ Obj.magic act (bparser_of_symbol entry next_levn symb))
+ act symbl)
+ | Slist0 s ->
+ let ps = bparser_of_symbol entry next_levn s in
+ let ps = bcall_and_push ps in
+ let rec loop al err =
+ bparser
+ [ [: al = ps al err; a = loop al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err -> bparser [: a = loop [] err :] -> Obj.repr (List.rev a)
+ | Slist0sep symb sep False ->
+ let ps = bparser_of_symbol entry next_levn symb in
+ let ps = bcall_and_push ps in
+ let pt = bparser_of_symbol entry next_levn sep in
+ let rec kont al err =
+ bparser
+ [ [: v = pt err; al = ps al (fsymb_failed entry v sep symb);
+ a = kont al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err ->
+ bparser
+ [ [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | [: :] -> Obj.repr [] ]
+ | Slist1 s ->
+ let ps = bparser_of_symbol entry next_levn s in
+ let ps = bcall_and_push ps in
+ let rec loop al err =
+ bparser
+ [ [: al = ps al err; a = loop al err :] -> a
+ | [: :] -> al ]
+ in
+ fun err ->
+ bparser [: al = ps [] err; a = loop al err :] -> Obj.repr (List.rev a)
+ | Slist0sep symb sep True ->
+ failwith "LIST0 _ SEP _ OPT_SEP not implemented; please report"
+ | Slist1sep symb sep False ->
+ let ps = bparser_of_symbol entry next_levn symb in
+ let ps = bcall_and_push ps in
+ let pt = bparser_of_symbol entry next_levn sep in
+ let pts = bparse_top_symb entry symb in
+ let rec kont al err =
+ bparser
+ [ [: v = pt err;
+ al =
+ bparser
+ [ [: a = ps al (fsymb_failed entry v sep symb) :] -> a
+ | [: a = pts (fsymb_failed entry v sep symb) :] -> [a :: al] ];
+ a = kont al err :] ->
+ a
+ | [: :] -> al ]
+ in
+ fun err ->
+ bparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | Slist1sep symb sep True ->
+ let ps = bparser_of_symbol entry next_levn symb in
+ let ps = bcall_and_push ps in
+ let pt = bparser_of_symbol entry next_levn sep in
+ let pts = bparse_top_symb entry symb in
+ let rec kont al err =
+ bparser
+ [ [: v = pt err; al = ps al err; al = kont al err :] -> al
+ | [: v = pt err; a = pts err; al = kont [a :: al] err :] -> al
+ | [: v = pt err :] -> al
+ | [: :] -> al ]
+ in
+ fun err ->
+ bparser [: al = ps [] err; a = kont al err :] -> Obj.repr (List.rev a)
+ | Sopt s ->
+ let ps = bparser_of_symbol entry next_levn s in
+ fun err ->
+ bparser
+ [ [: a = ps err :] -> Obj.repr (Some a)
+ | [: :] -> Obj.repr None ]
+ | Sflag s ->
+ let ps = bparser_of_symbol entry next_levn s in
+ fun err ->
+ bparser
+ [ [: _ = ps err :] -> Obj.repr True
+ | [: :] -> Obj.repr False ]
+ | Stree t ->
+ let pt = bparser_of_tree entry 1 0 t in
+ fun err ->
+ bparser bp
+ [: a = pt err :] ep ->
+ let loc = loc_of_token_interval bp ep in
+ app a loc
+ | Svala al s ->
+ let pa =
+ match al with
+ [ [] ->
+ let t =
+ match s with
+ [ Sflag _ -> Some "V FLAG"
+ | Sopt _ -> Some "V OPT"
+ | Slist0 _ | Slist0sep _ _ _ -> Some "V LIST"
+ | Slist1 _ | Slist1sep _ _ _ -> Some "V LIST"
+ | Stoken (con, "") -> Some ("V " ^ con)
+ | _ -> None ]
+ in
+ match t with
+ [ Some t -> bparser_of_token entry (t, "")
+ | None -> fun err -> bparser [] ]
+ | al ->
+ loop al where rec loop =
+ fun
+ [ [a :: al] ->
+ let pa = bparser_of_token entry ("V", a) in
+ let pal = loop al in
+ fun err ->
+ bparser
+ [ [: a = pa err :] -> a
+ | [: a = pal err :] -> a ]
+ | [] -> fun err -> bparser [] ] ]
+ in
+ let ps = bparser_of_symbol entry next_levn s in
+ fun err ->
+ bparser
+ [ [: a = pa err :] -> Obj.repr (Ploc.VaAnt (Obj.magic a : string))
+ | [: a = ps err :] -> Obj.repr (Ploc.VaVal a) ]
+ | Snterm e ->
+ fun err -> bparser [: a = e.bstart 0 err :] -> a
+ | Snterml e l ->
+ fun err -> bparser [: a = e.bstart (level_number e l) err :] -> a
+ | Sself -> fun err -> bparser [: a = entry.bstart 0 err :] -> a
+ | Snext -> fun err -> bparser [: a = entry.bstart next_levn err :] -> a
+ | Scut -> fun err -> bparser [: ! :] -> Obj.repr ()
+ | Stoken tok -> bparser_of_token entry tok ]
+and bparse_top_symb entry symb =
+ match btop_symb entry symb with
+ [ Some sy -> bparser_of_symbol entry 0 sy
+ | None -> fun err -> bparser [] ]
+;
+
+value bcount strm = match strm with bparser [: :] -> Fstream.count strm;
+
+value rec bstart_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn err -> bparser []
+ | [lev :: levs] ->
+ let p1 = bstart_parser_of_levels entry (succ clevn) levs in
+ match lev.lprefix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = bparser_of_tree entry (succ clevn) alevn tree in
+ match levs with
+ [ [] ->
+ fun levn err strm ->
+ match strm with bparser bp
+ [: act = p2 err; ep = bcount;
+ a =
+ entry.bcontinue levn bp
+ (app act (loc_of_token_interval bp ep)) err :] ->
+ a
+ | _ ->
+ fun levn err strm ->
+ if levn > clevn then p1 levn err strm
+ else
+ match strm with bparser bp
+ [ [: act = p2 err; ep = bcount;
+ a =
+ entry.bcontinue levn bp
+ (app act (loc_of_token_interval bp ep)) err :] ->
+ a
+ | [: a = p1 levn err :] -> a ] ] ] ]
+;
+
+value rec bcontinue_parser_of_levels entry clevn =
+ fun
+ [ [] -> fun levn bp a err -> bparser []
+ | [lev :: levs] ->
+ let p1 = bcontinue_parser_of_levels entry (succ clevn) levs in
+ match lev.lsuffix with
+ [ DeadEnd -> p1
+ | tree ->
+ let alevn =
+ match lev.assoc with
+ [ LeftA | NonA -> succ clevn
+ | RightA -> clevn ]
+ in
+ let p2 = bparser_of_tree entry (succ clevn) alevn tree in
+ fun levn bp a err strm ->
+ if levn > clevn then p1 levn bp a err strm
+ else
+ match strm with bparser
+ [ [: a = p1 levn bp a err :] -> a
+ | [: act = p2 err; ep = bcount;
+ a =
+ entry.bcontinue levn bp
+ (app act a (loc_of_token_interval bp ep)) err :] ->
+ a ] ] ]
+;
+
+value bstart_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels [] -> fun _ err -> bparser []
+ | Dlevels elev -> bstart_parser_of_levels entry 0 elev
+ | Dparser p -> fun levn err strm -> failwith "Dparser for Fstream" ]
+;
+
+value bcontinue_parser_of_entry entry =
+ match entry.edesc with
+ [ Dlevels elev ->
+ let p = bcontinue_parser_of_levels entry 0 elev in
+ fun levn bp a err ->
+ bparser
+ [ [: a = p levn bp a err :] -> a
+ | [: :] -> a ]
+ | Dparser p -> fun levn bp a err -> bparser [] ]
+;
+
+(* Extend syntax *)
+
+value trace_entry_lev_name entry lev =
+ match entry.edesc with
+ | Dlevels ll ->
+ if lev < List.length ll then
+ let glev = List.nth ll lev in
+ match glev.lname with
+ | Some "" | None -> ()
+ | Some s -> Printf.eprintf " (\"%s\")" s
+ end
+ else ()
+ | Dparser _ -> ()
+ end
+;
+
+value may_trace_start entry f =
+ if backtrack_trace.val then
+ fun lev err strm -> do {
+ let t = tind.val in
+ Printf.eprintf "%s>> start %s lev %d" tind.val entry.ename lev;
+ trace_entry_lev_name entry lev;
+ Printf.eprintf "\n%!";
+ tind.val := tind.val ^ " ";
+ try do {
+ let r = f lev err strm in
+ tind.val := t;
+ Printf.eprintf "%s<< end %s lev %d" tind.val entry.ename lev;
+ trace_entry_lev_name entry lev;
+ Printf.eprintf "\n%!";
+ r
+ }
+ with e -> do {
+ tind.val := t;
+ Printf.eprintf "%sexception \"%s\"\n" tind.val
+ (Printexc.to_string e);
+ flush stderr;
+ raise e
+ }
+ }
+ else f
+;
+
+value may_trace_continue entry f =
+ if backtrack_trace.val then
+ fun lev bp a err strm -> do {
+ let t = tind.val in
+ Printf.eprintf "%s>> continue %s lev %d bp %d pos %d" tind.val
+ entry.ename lev bp (Fstream.count strm);
+ trace_entry_lev_name entry lev;
+ Printf.eprintf "\n%!";
+ tind.val := tind.val ^ " ";
+ try do {
+ let r = f lev bp a err strm in
+ tind.val := t;
+ Printf.eprintf "%s<< end continue %s lev %d %d" tind.val
+ entry.ename lev bp;
+ trace_entry_lev_name entry lev;
+ Printf.eprintf "\n%!";
+ r
+ }
+ with e -> do {
+ tind.val := t;
+ Printf.eprintf "%sexception \"%s\"" tind.val
+ (Printexc.to_string e);
+ trace_entry_lev_name entry lev;
+ Printf.eprintf "\n%!";
+ raise e
+ }
+ }
+ else f
+;
+
+value init_entry_functions entry = do {
+ entry.estart :=
+ fun lev strm -> do {
+ let f = start_parser_of_entry entry in
+ entry.estart := f;
+ f lev strm
+ };
+ entry.econtinue :=
+ fun lev bp a strm -> do {
+ let f = continue_parser_of_entry entry in
+ entry.econtinue := f;
+ f lev bp a strm
+ };
+ entry.fstart :=
+ fun lev err strm -> do {
+ let f = fstart_parser_of_entry entry in
+ let f = may_trace_start entry f in
+ entry.fstart := f;
+ f lev err strm
+ };
+ entry.fcontinue :=
+ fun lev bp a err strm -> do {
+ let f = fcontinue_parser_of_entry entry in
+ let f = may_trace_continue entry f in
+ entry.fcontinue := f;
+ f lev bp a err strm
+ };
+ entry.bstart :=
+ fun lev err strm -> do {
+ let f = bstart_parser_of_entry entry in
+ let f = may_trace_start entry f in
+ entry.bstart := f;
+ f lev err strm
+ };
+ entry.bcontinue :=
+ fun lev bp a err strm -> do {
+ let f = bcontinue_parser_of_entry entry in
+ let f = may_trace_continue entry f in
+ entry.bcontinue := f;
+ f lev bp a err strm
+ }
+};
+
+value reinit_entry_functions entry =
+ match entry.edesc with
+ [ Dlevels elev -> init_entry_functions entry
+ | _ -> () ]
+;
+
+value extend_entry entry position rules =
+ try do {
+ let elev = Gramext.levels_of_rules entry position rules in
+ entry.edesc := Dlevels elev;
+ init_entry_functions entry
+ }
+ with
+ [ Plexing.Error s -> do {
+ Printf.eprintf "Lexer initialization error:\n- %s\n" s;
+ flush stderr;
+ failwith "Grammar.extend"
+ } ]
+;
+
+value extend entry_rules_list =
+ let gram = ref None in
+ List.iter
+ (fun (entry, position, rules) -> do {
+ match gram.val with
+ [ Some g ->
+ if g != entry.egram then do {
+ Printf.eprintf "Error: entries with different grammars\n";
+ flush stderr;
+ failwith "Grammar.extend"
+ }
+ else ()
+ | None -> gram.val := Some entry.egram ];
+ extend_entry entry position rules
+ })
+ entry_rules_list
+;
+
+(* Deleting a rule *)
+
+value delete_rule entry sl =
+ match entry.edesc with
+ [ Dlevels levs -> do {
+ let levs = Gramext.delete_rule_in_level_list entry sl levs in
+ entry.edesc := Dlevels levs;
+ entry.estart :=
+ fun lev strm -> do {
+ let f = start_parser_of_entry entry in
+ entry.estart := f;
+ f lev strm
+ };
+ entry.econtinue :=
+ fun lev bp a strm -> do {
+ let f = continue_parser_of_entry entry in
+ entry.econtinue := f;
+ f lev bp a strm
+ };
+ entry.fstart :=
+ fun lev err strm -> do {
+ let f = fstart_parser_of_entry entry in
+ entry.fstart := f;
+ f lev err strm
+ };
+ entry.fcontinue :=
+ fun lev bp a err strm -> do {
+ let f = fcontinue_parser_of_entry entry in
+ entry.fcontinue := f;
+ f lev bp a err strm
+ };
+ entry.bstart :=
+ fun lev err strm -> do {
+ let f = bstart_parser_of_entry entry in
+ entry.bstart := f;
+ f lev err strm
+ };
+ entry.bcontinue :=
+ fun lev bp a err strm -> do {
+ let f = bcontinue_parser_of_entry entry in
+ entry.bcontinue := f;
+ f lev bp a err strm
+ }
+ }
+ | Dparser _ -> () ]
+;
+
+value safe_delete_rule = delete_rule;
+
+type parse_algorithm = Gramext.parse_algorithm ==
+ [ Predictive | Functional | Backtracking | DefaultAlgorithm ]
+;
+
+value warning_verbose = Gramext.warning_verbose;
+
+(* Normal interface *)
+
+type token = (string * string);
+type g = Gramext.grammar token;
+
+type ty_symbol 'self 'a = Gramext.g_symbol token;
+type ty_rule 'self 'f 'r = list (ty_symbol 'self Obj.t);
+type ty_production 'a = (ty_rule 'a Obj.t Obj.t * Gramext.g_action);
+type ty_extension =
+ (Gramext.g_entry token * option Gramext.position * list (option string * option Gramext.g_assoc * list (ty_production Obj.t)));
+
+value s_facto s = Sfacto s;
+value s_nterm e = Snterm e;
+value s_nterml e l = Snterml e l;
+value s_list0 s = Slist0 s;
+value s_list0sep s sep b = Slist0sep s sep b;
+value s_list1 s = Slist1 s;
+value s_list1sep s sep b = Slist1sep s sep b;
+value s_opt s = Sopt s;
+value s_flag s = Sflag s;
+value s_self = Sself;
+value s_next = Snext;
+value s_token tok = Stoken tok;
+value s_rules (t : list (ty_production Obj.t)) = Gramext.srules (Obj.magic t);
+value s_vala sl s = Svala sl s;
+
+value r_stop = [];
+value r_next r s = r @ [s];
+value r_cut r = r @ [Scut];
+
+value production (p : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f)) = (Obj.magic p : ty_production 'a);
+value extension e pos (r : list (option string * option Gramext.g_assoc * list (ty_production Obj.t))) = ((e, pos, Obj.magic r) : ty_extension);
+
+value safe_extend (l : list ty_extension) = extend (Obj.magic l);
+
+value create_toktab () = Hashtbl.create 301;
+value gcreate glexer =
+ {gtokens = create_toktab (); glexer = glexer; galgo = DefaultAlgorithm}
+;
+
+value set_algorithm g algo = g.galgo := algo;
+
+value tokens g con = do {
+ let list = ref [] in
+ Hashtbl.iter
+ (fun (p_con, p_prm) c ->
+ if p_con = con then list.val := [(p_prm, c.val) :: list.val] else ())
+ g.gtokens;
+ list.val
+};
+
+value glexer g = g.glexer;
+
+type gen_parsable 'te =
+ { pa_chr_strm : Stream.t char;
+ pa_tok_strm : Stream.t 'te;
+ pa_tok_fstrm : mutable Fstream.t 'te;
+ pa_loc_func : Plexing.location_function }
+;
+
+type parsable = gen_parsable token;
+
+value fstream_of_stream ts =
+ Fstream.from
+ (fun _ ->
+ match Stream.peek ts with
+ | None -> None
+ | x -> do { Stream.junk ts; x }
+ end)
+;
+
+value parsable g cs =
+ let (ts, lf) = g.glexer.Plexing.tok_func cs in
+ let fts = fstream_of_stream ts in
+ {pa_chr_strm = cs; pa_tok_strm = ts; pa_tok_fstrm = fts; pa_loc_func = lf}
+;
+
+value parse_parsable entry p = do {
+ let efun = entry.estart 0 in
+ let ts = p.pa_tok_strm in
+ let cs = p.pa_chr_strm in
+ let fun_loc = p.pa_loc_func in
+ let restore =
+ let old_floc = floc.val in
+ let old_tc = token_count.val in
+ fun () -> do {
+ floc.val := old_floc;
+ token_count.val := old_tc;
+ }
+ in
+ let get_loc () =
+ try
+ let cnt = Stream.count ts in
+ let loc = fun_loc cnt in
+ if token_count.val - 1 <= cnt then loc
+ else Ploc.encl loc (fun_loc (token_count.val - 1))
+ with
+ [ Failure _ -> Ploc.make_unlined (Stream.count cs, Stream.count cs + 1) ]
+ in
+ floc.val := fun_loc;
+ token_count.val := 0;
+ try do {
+ let r = efun ts in
+ restore ();
+ r
+ }
+ with
+ [ Stream.Failure -> do {
+ let loc = get_loc () in
+ restore ();
+ Ploc.raise loc (Stream.Error ("illegal begin of " ^ entry.ename))
+ }
+ | Stream.Error _ as exc -> do {
+ let loc = get_loc () in
+ restore ();
+ Ploc.raise loc exc
+ }
+ | exc -> do {
+ let loc = (Stream.count cs, Stream.count cs + 1) in
+ restore ();
+ Ploc.raise (Ploc.make_unlined loc) exc
+ } ]
+};
+
+value bfparse entry efun restore2 p = do {
+ let default_loc () =
+ let cs = p.pa_chr_strm in
+ Ploc.make_unlined (Stream.count cs, Stream.count cs + 1)
+ in
+ let restore =
+ let old_tc = token_count.val in
+ let old_nb_ftry = nb_ftry.val in
+ fun () -> do {
+ token_count.val := old_tc;
+ nb_ftry.val := old_nb_ftry;
+ restore2 ();
+ }
+ in
+ let get_loc cnt =
+ try
+ let loc = p.pa_loc_func cnt in
+ if token_count.val - 1 <= cnt then loc
+ else Ploc.encl loc (p.pa_loc_func (token_count.val - 1))
+ with
+ [ Failure _ -> default_loc ()
+ | e -> do { restore (); raise e } ]
+ in
+ token_count.val := 0;
+ nb_ftry.val := 0;
+ if backtrack_trace_try.val then do {
+ Printf.eprintf "\n";
+ flush stderr;
+ }
+ else ();
+ let r =
+ let fts = p.pa_tok_fstrm in
+ try efun no_err fts with
+ [ Stream.Failure | Fstream.Cut -> do {
+ let cnt = Fstream.count fts + Fstream.count_unfrozen fts - 1 in
+ let loc = get_loc cnt in
+ let mess =
+ match max_fcount.val with
+ | Some (cnt, entry, err) ->
+ let mess = err () in
+ let mess =
+ if mess = "" then sprintf "failure in [%s]" entry.ename
+ else mess
+ in
+ if backtrack_trace.val then
+ mess ^ Printf.sprintf " (max token count %d)" cnt
+ else mess
+ | None ->
+ sprintf "[%s] failed" entry.ename
+ end
+ in
+ let mess =
+ if backtrack_trace.val then
+ mess ^ Printf.sprintf " (cnt %d) (cnt+unfrozen %d)"
+ token_count.val cnt
+ else mess
+ in
+ restore ();
+ Ploc.raise loc (Stream.Error mess)
+ }
+ | exc -> do {
+ restore ();
+ Ploc.raise (default_loc ()) exc
+ } ]
+ in
+ restore (); r
+};
+
+value bfparse_token_stream entry efun ts = do {
+ let restore2 () = () in
+ if backtrack_trace.val then
+ Printf.eprintf "%sbfparse_token_stream [%s]\n%!" tind.val entry.ename
+ else ();
+ let p =
+ {pa_chr_strm = [: :];
+ pa_tok_strm = ts;
+ pa_tok_fstrm = fstream_of_stream ts;
+ pa_loc_func = floc.val}
+ in
+ bfparse entry efun restore2 p
+};
+
+value bfparse_parsable entry p efun = do {
+ let restore2 =
+ let old_floc = floc.val in
+ let old_max_fcount = max_fcount.val in
+ fun () -> do {
+ floc.val := old_floc;
+ max_fcount.val := old_max_fcount;
+ }
+ in
+ floc.val := p.pa_loc_func;
+ max_fcount.val := None;
+ if backtrack_trace.val then
+ Printf.eprintf "%sbfparse_parsable [%s]\n%!" tind.val entry.ename
+ else ();
+ bfparse entry efun restore2 p
+};
+
+value fparse_token_stream entry ts =
+ let efun err fts =
+ match entry.fstart 0 err fts with
+ | Some (a, _) -> Obj.magic a
+ | None -> raise Stream.Failure
+ end
+ in
+ bfparse_token_stream entry efun ts
+;
+
+value fparse_parsable entry p =
+ let efun err fts =
+ match entry.fstart 0 err fts with
+ [ Some (r, strm) -> do { p.pa_tok_fstrm := strm; r }
+ | None -> raise Stream.Failure ]
+ in
+ bfparse_parsable entry p efun
+;
+
+value bparse_token_stream entry ts =
+ let efun err fts =
+ match entry.bstart 0 err fts with
+ | Some (a, _, _) -> Obj.magic a
+ | None -> raise Stream.Failure
+ end
+ in
+ bfparse_token_stream entry efun ts
+;
+
+value bparse_parsable entry p =
+ let efun err fts =
+ match entry.bstart 0 err fts with
+ [ Some (r, strm, _) -> do { p.pa_tok_fstrm := strm; r }
+ | None -> raise Stream.Failure ]
+ in
+ bfparse_parsable entry p efun
+;
+
+value bparse_parsable_all entry p = do {
+ let efun = entry.bstart 0 in
+ let fts = p.pa_tok_fstrm in
+ let cs = p.pa_chr_strm in
+ let fun_loc = p.pa_loc_func in
+ let restore =
+ let old_floc = floc.val in
+ let old_tc = token_count.val in
+ let old_max_fcount = max_fcount.val in
+ let old_nb_ftry = nb_ftry.val in
+ fun () -> do {
+ floc.val := old_floc;
+ token_count.val := old_tc;
+ max_fcount.val := old_max_fcount;
+ nb_ftry.val := old_nb_ftry;
+ }
+ in
+ floc.val := fun_loc;
+ token_count.val := 0;
+ max_fcount.val := None;
+ if backtrack_trace.val then
+ Printf.eprintf "%sbparse_parsable_all [%s]: max token count reset\n%!"
+ tind.val entry.ename
+ else
+ nb_ftry.val := 0;
+ if backtrack_trace_try.val then do {
+ Printf.eprintf "\n";
+ flush stderr;
+ }
+ else ();
+ try do {
+ let rl =
+ loop [] (efun no_err fts) where rec loop rev_rl =
+ fun
+ [ Some (r, strm, k) ->
+ let _ =
+ if backtrack_trace.val then do {
+ Printf.eprintf "result found !\n\n";
+ flush stderr;
+ }
+ else ()
+ in
+ loop [r :: rev_rl] (Fstream.bcontinue k)
+ | None -> List.rev rev_rl ]
+ in
+ restore ();
+ rl
+ }
+ with exc -> do {
+ let loc = (Stream.count cs, Stream.count cs + 1) in
+ restore ();
+ Ploc.raise (Ploc.make_unlined loc) exc
+ }
+};
+
+value find_entry e s =
+ let rec find_levels =
+ fun
+ [ [] -> None
+ | [lev :: levs] ->
+ match find_tree lev.lsuffix with
+ [ None ->
+ match find_tree lev.lprefix with
+ [ None -> find_levels levs
+ | x -> x ]
+ | x -> x ] ]
+ and find_symbol =
+ fun
+ [ Sfacto s -> find_symbol s
+ | Snterm e -> if e.ename = s then Some e else None
+ | Snterml e _ -> if e.ename = s then Some e else None
+ | Smeta _ sl _ -> find_symbol_list sl
+ | Slist0 s -> find_symbol s
+ | Slist0sep s _ _ -> find_symbol s
+ | Slist1 s -> find_symbol s
+ | Slist1sep s _ _ -> find_symbol s
+ | Sopt s -> find_symbol s
+ | Sflag s -> find_symbol s
+ | Stree t -> find_tree t
+ | Svala _ s -> find_symbol s
+ | Sself | Snext | Scut | Stoken _ -> None ]
+ and find_symbol_list =
+ fun
+ [ [s :: sl] ->
+ match find_symbol s with
+ [ None -> find_symbol_list sl
+ | x -> x ]
+ | [] -> None ]
+ and find_tree =
+ fun
+ [ Node {node = s; brother = bro; son = son} ->
+ match find_symbol s with
+ [ None ->
+ match find_tree bro with
+ [ None -> find_tree son
+ | x -> x ]
+ | x -> x ]
+ | LocAct _ _ | DeadEnd -> None ]
+ in
+ match e.edesc with
+ [ Dlevels levs ->
+ match find_levels levs with
+ [ Some e -> e
+ | None -> raise Not_found ]
+ | Dparser _ -> raise Not_found ]
+;
+
+value bfparser_of_parser p fstrm return_value = do {
+ let shift_token_number = Fstream.count fstrm in
+ let old_floc = floc.val in
+ let restore () = floc.val := old_floc in
+ floc.val := fun i -> old_floc (shift_token_number + i);
+ let ts =
+ let fts = ref fstrm in
+ Stream.from
+ (fun _ ->
+ match Fstream.next fts.val with
+ [ Some (v, fstrm) -> do { fts.val := fstrm; Some v }
+ | None -> None ])
+ in
+ let r =
+ try
+ let r = (Obj.magic p ts : Obj.t) in
+ let fstrm =
+ loop fstrm (Stream.count ts) where rec loop fstrm i =
+ if i = 0 then fstrm
+ else
+ match Fstream.next fstrm with
+ [ Some (_, fstrm) -> loop fstrm (i - 1)
+ | None -> failwith "internal error in Entry.of_parser" ]
+ in
+ return_value r fstrm
+ with e -> do {
+ restore ();
+ match e with
+ | Stream.Failure -> None
+ | _ -> raise e
+ end
+ }
+ in
+ do { restore (); r }
+};
+
+value fparser_of_parser p err fstrm =
+ let return_value r fstrm = match fstrm with fparser [: :] -> r in
+ bfparser_of_parser p fstrm return_value
+;
+
+value bparser_of_parser p err fstrm =
+ let return_value r fstrm = match fstrm with bparser [: :] -> r in
+ bfparser_of_parser p fstrm return_value
+;
+
+module Entry =
+ struct
+ type te = token;
+ type e 'a = g_entry te;
+ value create g n =
+ {egram = g; ename = n; elocal = False; estart = empty_entry n;
+ econtinue _ _ _ = parser []; fstart _ _ = fparser [];
+ fcontinue _ _ _ _ = fparser []; bstart _ _ = bparser [];
+ bcontinue _ _ _ _ = bparser []; edesc = Dlevels []}
+ ;
+ value parse_parsable (entry : e 'a) p : 'a =
+ let _ = compatible_deprecated_backtrack_parse () in
+ match entry.egram.galgo with
+ [ DefaultAlgorithm ->
+ match default_algorithm_var.val with
+ | Predictive | DefaultAlgorithm ->
+ Obj.magic (parse_parsable entry p : Obj.t)
+ | Backtracking ->
+ Obj.magic (bparse_parsable entry p : Obj.t)
+ | Functional ->
+ Obj.magic (fparse_parsable entry p : Obj.t)
+ end
+ | Predictive ->
+ Obj.magic (parse_parsable entry p : Obj.t)
+ | Functional ->
+ Obj.magic (fparse_parsable entry p : Obj.t)
+ | Backtracking ->
+ Obj.magic (bparse_parsable entry p : Obj.t) ]
+ ;
+ value parse (entry : e 'a) cs : 'a =
+ let parsable = parsable entry.egram cs in
+ parse_parsable entry parsable
+ ;
+ value parse_parsable_all (entry : e 'a) p : 'a =
+ let _ = compatible_deprecated_backtrack_parse () in
+ match entry.egram.galgo with
+ [ DefaultAlgorithm ->
+ match default_algorithm_var.val with
+ | Predictive | DefaultAlgorithm ->
+ try Obj.magic [(parse_parsable entry p : Obj.t)] with
+ [ Stream.Failure | Stream.Error _ -> [] ]
+ | Backtracking ->
+ Obj.magic (bparse_parsable_all entry p : list Obj.t)
+ | Functional ->
+ failwith "Entry.parse_parsable_all: func parsing not impl"
+ end
+ | Predictive ->
+ try Obj.magic [(parse_parsable entry p : Obj.t)] with
+ [ Stream.Failure | Stream.Error _ -> [] ]
+ | Functional ->
+ failwith "parse_parsable_all: functional parsing not impl"
+ | Backtracking ->
+ Obj.magic (bparse_parsable_all entry p : list Obj.t) ]
+ ;
+ value parse_all (entry : e 'a) cs : 'a =
+ let parsable = parsable entry.egram cs in
+ parse_parsable_all entry parsable
+ ;
+ value parse_token_stream (entry : e 'a) ts : 'a =
+ let _ = compatible_deprecated_backtrack_parse () in
+ match entry.egram.galgo with
+ | DefaultAlgorithm ->
+ match default_algorithm_var.val with
+ | Predictive | DefaultAlgorithm ->
+ Obj.magic (entry.estart 0 ts : Obj.t)
+ | Backtracking ->
+ failwith "not impl Entry.parse_token_stream default backtrack"
+ | Functional ->
+ failwith "Entry.parse_token_stream: func parsing not impl"
+ end
+ | Predictive -> Obj.magic (entry.estart 0 ts : Obj.t)
+ | Functional ->
+ failwith "not impl Entry.parse_token_stream functional"
+ | Backtracking ->
+ failwith "not impl Entry.parse_token_stream backtrack"
+ end
+ ;
+ value warned_using_parse_token = ref False;
+ value parse_token (entry : e 'a) ts : 'a = do {
+ (* commented: too often warned in Coq...
+ if not warned_using_parse_token.val then do {
+ eprintf "<W> use of Grammar.Entry.parse_token ";
+ eprintf "deprecated since 2017-06-16\n%!";
+ eprintf "use Grammar.Entry.parse_token_stream instead\n%! ";
+ warned_using_parse_token.val := True
+ }
+ else ();
+ *)
+ parse_token_stream entry ts
+ };
+ value name e = e.ename;
+ value of_parser g n (p : Stream.t te -> 'a) : e 'a =
+ {egram = g; ename = n; elocal = False;
+ estart _ = (Obj.magic p : Stream.t te -> Obj.t);
+ econtinue _ _ _ = parser [];
+ fstart _ = fparser_of_parser p;
+ fcontinue _ _ _ _ = fparser [];
+ bstart _ = bparser_of_parser p;
+ bcontinue _ _ _ _ = bparser [];
+ edesc = Dparser (Obj.magic p : Stream.t te -> Obj.t)}
+ ;
+ external obj : e 'a -> Gramext.g_entry te = "%identity";
+ value print ppf e = fprintf ppf "%a@." print_entry (obj e);
+ value find e s = find_entry (obj e) s;
+ end
+;
+
+value of_entry e = e.egram;
+
+value create_local_entry g n =
+ {egram = g; ename = n; elocal = True; estart = empty_entry n;
+ econtinue _ _ _ = parser []; fstart _ _ = fparser [];
+ fcontinue _ _ _ _ = fparser []; bstart _ _ = bparser [];
+ bcontinue _ _ _ _ = bparser []; edesc = Dlevels []}
+;
+
+(* Unsafe *)
+
+value clear_entry e = do {
+ e.estart := fun _ -> parser [];
+ e.econtinue := fun _ _ _ -> parser [];
+ e.fstart := fun _ _ -> fparser [];
+ e.fcontinue := fun _ _ _ _ -> fparser [];
+ e.bstart := fun _ _ -> bparser [];
+ e.bcontinue := fun _ _ _ _ -> bparser [];
+ match e.edesc with
+ [ Dlevels _ -> e.edesc := Dlevels []
+ | Dparser _ -> () ]
+};
+
+value gram_reinit g glexer = do {
+ Hashtbl.clear g.gtokens;
+ g.glexer := glexer
+};
+
+module Unsafe =
+ struct
+ value gram_reinit = gram_reinit;
+ value clear_entry = clear_entry;
+ end
+;
+
+(* Functorial interface *)
+
+module type GLexerType =
+ sig
+ type te = 'x;
+ value lexer : Plexing.lexer te;
+ end
+;
+
+module type S =
+ sig
+ type te = 'x;
+ type parsable = 'x;
+ value parsable : Stream.t char -> parsable;
+ value tokens : string -> list (string * int);
+ value glexer : Plexing.lexer te;
+ value set_algorithm : parse_algorithm -> unit;
+ module Entry :
+ sig
+ type e 'a = 'x;
+ value create : string -> e 'a;
+ value parse : e 'a -> parsable -> 'a;
+ value name : e 'a -> string;
+ value of_parser : string -> (Stream.t te -> 'a) -> e 'a;
+ value parse_token_stream : e 'a -> Stream.t te -> 'a;
+ value print : Format.formatter -> e 'a -> unit;
+ external obj : e 'a -> Gramext.g_entry te = "%identity";
+ value parse_token : e 'a -> Stream.t te -> 'a;
+ end
+ ;
+ type ty_symbol 'self 'a = 'x;
+ (** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The
+ first type argument is the type of the ambient entry, the second one is the
+ type of the produced value. *)
+
+ type ty_rule 'self 'f 'r = 'x;
+
+ type ty_production 'a = 'x;
+
+ value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a;
+ (* | Smeta of string and list (g_symbol 'te) and Obj.t *)
+ value s_nterm : Entry.e 'a -> ty_symbol 'self 'a;
+ value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a;
+ value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+ value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+ value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+ value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+ value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a);
+ value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool;
+ value s_self : ty_symbol 'self 'self;
+ value s_next : ty_symbol 'self 'self;
+ value s_token : Plexing.pattern -> ty_symbol 'self string;
+ value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a;
+ value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a);
+
+ value r_stop : ty_rule 'self 'r 'r;
+ value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r;
+ value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r;
+
+ value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a;
+
+ module Unsafe :
+ sig
+ value gram_reinit : Plexing.lexer te -> unit;
+ value clear_entry : Entry.e 'a -> unit;
+ end
+ ;
+ value extend :
+ Entry.e 'a -> option Gramext.position ->
+ list
+ (option string * option Gramext.g_assoc *
+ list (list (Gramext.g_symbol te) * Gramext.g_action)) ->
+ unit;
+ value safe_extend :
+ Entry.e 'a -> option Gramext.position ->
+ list
+ (option string * option Gramext.g_assoc *
+ list (ty_production 'a)) ->
+ unit;
+ value delete_rule : Entry.e 'a -> list (Gramext.g_symbol te) -> unit;
+ value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'r 'f -> unit;
+ end
+;
+
+module GMake (L : GLexerType) =
+ struct
+ type te = L.te;
+ type parsable = gen_parsable te;
+ value gram = gcreate L.lexer;
+ value parsable cs =
+ let (ts, lf) = L.lexer.Plexing.tok_func cs in
+ let fts = fstream_of_stream ts in
+ {pa_chr_strm = cs; pa_tok_strm = ts; pa_tok_fstrm = fts;
+ pa_loc_func = lf}
+ ;
+ value tokens = tokens gram;
+ value glexer = glexer gram;
+ value set_algorithm algo = gram.galgo := algo;
+ module Entry =
+ struct
+ type e 'a = g_entry te;
+ value create n =
+ {egram = gram; ename = n; elocal = False; estart = empty_entry n;
+ econtinue _ _ _ = parser []; fstart _ _ = fparser [];
+ fcontinue _ _ _ _ = bparser []; bstart _ _ = bparser [];
+ bcontinue _ _ _ _ = bparser []; edesc = Dlevels []}
+ ;
+ external obj : e 'a -> Gramext.g_entry te = "%identity";
+ value parse (e : e 'a) p : 'a =
+ let _ = compatible_deprecated_backtrack_parse () in
+ match gram.galgo with
+ [ DefaultAlgorithm ->
+ match default_algorithm_var.val with
+ | Predictive | DefaultAlgorithm ->
+ Obj.magic (parse_parsable e p : Obj.t)
+ | Backtracking ->
+ Obj.magic (bparse_parsable e p : Obj.t)
+ | Functional ->
+ Obj.magic (fparse_parsable e p : Obj.t)
+ end
+ | Predictive ->
+ Obj.magic (parse_parsable e p : Obj.t)
+ | Functional ->
+ Obj.magic (fparse_parsable e p : Obj.t)
+ | Backtracking ->
+ Obj.magic (bparse_parsable e p : Obj.t) ]
+ ;
+ value parse_token_stream (e : e 'a) ts : 'a =
+ let _ = compatible_deprecated_backtrack_parse () in
+ match e.egram.galgo with
+ | DefaultAlgorithm ->
+ match default_algorithm_var.val with
+ | Predictive | DefaultAlgorithm ->
+ Obj.magic (e.estart 0 ts : Obj.t)
+ | Backtracking ->
+ bparse_token_stream e ts
+ | Functional ->
+ fparse_token_stream e ts
+ end
+ | Predictive -> Obj.magic (e.estart 0 ts : Obj.t)
+ | Functional -> fparse_token_stream e ts
+ | Backtracking -> bparse_token_stream e ts
+ end
+ ;
+ value warned_using_parse_token = ref False;
+ value parse_token (entry : e 'a) ts : 'a = do {
+ (* commented: too often warned in Coq...
+ if not warned_using_parse_token.val then do {
+ eprintf "<W> use of Entry.parse_token ";
+ eprintf "deprecated since 2017-06-16\n%!";
+ eprintf "use Entry.parse_token_stream instead\n%! ";
+ warned_using_parse_token.val := True
+ }
+ else ();
+ *)
+ parse_token_stream entry ts
+ };
+ value name e = e.ename;
+ value of_parser n (p : Stream.t te -> 'a) : e 'a =
+ {egram = gram; ename = n; elocal = False;
+ estart _ = (Obj.magic p : Stream.t te -> Obj.t);
+ econtinue _ _ _ = parser [];
+ fstart _ = fparser_of_parser p;
+ fcontinue _ _ _ _ = fparser [];
+ bstart _ = bparser_of_parser p;
+ bcontinue _ _ _ _ = bparser [];
+ edesc = Dparser (Obj.magic p : Stream.t te -> Obj.t)}
+ ;
+ value print ppf e = fprintf ppf "%a@." print_entry (obj e);
+ end
+ ;
+ type ty_symbol 'self 'a = Gramext.g_symbol te;
+ type ty_rule 'self 'f 'r = list (ty_symbol 'self Obj.t);
+ type ty_production 'a = (ty_rule 'a Obj.t Obj.t * Gramext.g_action);
+
+ value s_facto s = Sfacto s;
+ value s_nterm e = Snterm e;
+ value s_nterml e l = Snterml e l;
+ value s_list0 s = Slist0 s;
+ value s_list0sep s sep b = Slist0sep s sep b;
+ value s_list1 s = Slist1 s;
+ value s_list1sep s sep b = Slist1sep s sep b;
+ value s_opt s = Sopt s;
+ value s_flag s = Sflag s;
+ value s_self = Sself;
+ value s_next = Snext;
+ value s_token tok = Stoken tok;
+ value s_rules (t : list (ty_production Obj.t)) = Gramext.srules (Obj.magic t);
+ value s_vala sl s = Svala sl s;
+
+ value r_stop = [];
+ value r_next r s = r @ [s];
+ value r_cut r = r @ [Scut];
+
+ value production (p : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f)) = (Obj.magic p : ty_production 'a);
+
+ module Unsafe =
+ struct
+ value gram_reinit = gram_reinit gram;
+ value clear_entry = clear_entry;
+ end
+ ;
+ value extend = extend_entry;
+ value safe_extend e pos (r : list (option string * option Gramext.g_assoc * list (ty_production Obj.t))) = extend e pos (Obj.magic r);
+ value delete_rule e r = delete_rule (Entry.obj e) r;
+ value safe_delete_rule = delete_rule;
+ end
+;
diff --git a/gramlib/grammar.mli b/gramlib/grammar.mli
new file mode 100644
index 0000000000..ae2fa25f2f
--- /dev/null
+++ b/gramlib/grammar.mli
@@ -0,0 +1,338 @@
+(* camlp5r *)
+(* grammar.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+(** Extensible grammars.
+
+ This module implements the Camlp5 extensible grammars system.
+ Grammars entries can be extended using the [EXTEND] statement,
+ added by loading the Camlp5 [pa_extend.cmo] file. *)
+
+type g = 'x;
+ (** The type for grammars, holding entries. *)
+type token = (string * string);
+
+value gcreate : Plexing.lexer token -> g;
+ (** Create a new grammar, without keywords, using the lexer given
+ as parameter. *)
+value tokens : g -> string -> list (string * int);
+ (** Given a grammar and a token pattern constructor, returns the list of
+ the corresponding values currently used in all entries of this grammar.
+ The integer is the number of times this pattern value is used.
+
+ Examples:
+- The call [Grammar.tokens g ""] returns the keywords list.
+- The call [Grammar.tokens g "IDENT"] returns the list of all usages
+ of the pattern "IDENT" in the [EXTEND] statements. *)
+value glexer : g -> Plexing.lexer token;
+ (** Return the lexer used by the grammar *)
+
+type parsable = 'abstract;
+value parsable : g -> Stream.t char -> parsable;
+ (** Type and value allowing to keep the same token stream between
+ several calls of entries of the same grammar, to prevent possible
+ loss of tokens. To be used with [Entry.parse_parsable] below *)
+
+module Entry :
+ sig
+ type e 'a = 'x;
+ value create : g -> string -> e 'a;
+ value parse : e 'a -> Stream.t char -> 'a;
+ value parse_all : e 'a -> Stream.t char -> list 'a;
+ value parse_parsable : e 'a -> parsable -> 'a;
+ value name : e 'a -> string;
+ value of_parser : g -> string -> (Stream.t token -> 'a) -> e 'a;
+ value parse_token_stream : e 'a -> Stream.t token -> 'a;
+ value print : Format.formatter -> e 'a -> unit;
+ value find : e 'a -> string -> e Obj.t;
+ external obj : e 'a -> Gramext.g_entry token = "%identity";
+ (* deprecated since 2017-06-17 *)
+ value parse_token : e 'a -> Stream.t token -> 'a;
+ end
+;
+ (** Module to handle entries.
+- [Entry.e] is the type for entries returning values of type ['a].
+- [Entry.create g n] creates a new entry named [n] in the grammar [g].
+- [Entry.parse e] returns the stream parser of the entry [e].
+- [Entry.parse_all e] returns the stream parser returning all possible
+ values while parsing with the entry [e]: may return more than one
+ value when the parsing algorithm is [Backtracking]
+- [Entry.parse_all e] returns the parser returning all possible values.
+- [Entry.parse_parsable e] returns the parsable parser of the entry [e].
+- [Entry.name e] returns the name of the entry [e].
+- [Entry.of_parser g n p] makes an entry from a token stream parser.
+- [Entry.parse_token_stream e] returns the token stream parser of the
+ entry [e].
+- [Entry.print e] displays the entry [e] using [Format].
+- [Entry.find e s] finds the entry named [s] in the rules of [e].
+- [Entry.obj e] converts an entry into a [Gramext.g_entry] allowing
+ to see what it holds.
+- [Entry.parse_token]: deprecated since 2017-06-16; old name for
+ [Entry.parse_token_stream] *)
+
+value of_entry : Entry.e 'a -> g;
+ (** Return the grammar associated with an entry. *)
+
+type ty_symbol 'self 'a = 'x;
+(** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The
+ first type argument is the type of the ambient entry, the second one is the
+ type of the produced value. *)
+
+type ty_rule 'self 'f 'r = 'x;
+
+type ty_production 'a = 'x;
+
+type ty_extension = 'x;
+
+value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a;
+(* | Smeta of string and list (g_symbol 'te) and Obj.t *)
+value s_nterm : Entry.e 'a -> ty_symbol 'self 'a;
+value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a;
+value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a);
+value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool;
+value s_self : ty_symbol 'self 'self;
+value s_next : ty_symbol 'self 'self;
+value s_token : Plexing.pattern -> ty_symbol 'self string;
+value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a;
+value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a);
+
+value r_stop : ty_rule 'self 'r 'r;
+value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r;
+value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r;
+
+value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a;
+
+value extension : Entry.e 'a -> option Gramext.position ->
+ list (option string * option Gramext.g_assoc * list (ty_production 'a)) -> ty_extension;
+
+value safe_extend : list ty_extension -> unit;
+value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'f 'r -> unit;
+
+(** {6 Clearing grammars and entries} *)
+
+module Unsafe :
+ sig
+ value gram_reinit : g -> Plexing.lexer token -> unit;
+ value clear_entry : Entry.e 'a -> unit;
+ end
+;
+ (** Module for clearing grammars and entries. To be manipulated with
+ care, because: 1) reinitializing a grammar destroys all tokens
+ and there may have problems with the associated lexer if there
+ are keywords; 2) clearing an entry does not destroy the tokens
+ used only by itself.
+- [Unsafe.reinit_gram g lex] removes the tokens of the grammar
+- and sets [lex] as a new lexer for [g]. Warning: the lexer
+- itself is not reinitialized.
+- [Unsafe.clear_entry e] removes all rules of the entry [e]. *)
+
+(** {6 Parsing algorithm} *)
+
+type parse_algorithm = Gramext.parse_algorithm ==
+ [ Predictive | Functional | Backtracking | DefaultAlgorithm ]
+;
+ (** Type of algorithm used in grammar entries.
+ [Predictive]: use imperative streams with predictive parsing
+ [Functional]: use functional streams with limited backtracking
+ [Backtracking]: use functional streams with full backtracking
+ [DefaultAlgorithm]: use the general default algorithm set by the
+ function [set_default_algorithm] below or through the environment
+ variablefound in the variable CAMLP5PARAM.
+ The default, when a grammar is created, is [DefaultAlgorithm]. *)
+
+value set_algorithm : g -> parse_algorithm -> unit;
+ (** Set the parsing algorithm for all entries of a given grammar. *)
+
+value set_default_algorithm : parse_algorithm -> unit;
+ (** Set the default parsing algorithm for all grammars.
+ If the environment variable CAMLP5PARAM contains "b", the
+ default is [Backtracking]; if it contains 'f', the default is
+ [Functional]; if it contains 'p', the default is [Predictive]. *)
+value default_algorithm : unit -> parse_algorithm;
+ (** Return the current default algorithm. *)
+
+value backtrack_stalling_limit : ref int;
+ (** Limitation of backtracking to prevent stalling in case of syntax
+ error. In backtracking algorithm, when there is a syntax error,
+ the parsing continues trying to find another solution. It some
+ grammars, it can be very long before checking all possibilities.
+ This number limits the number of tokens tests after a backtrack.
+ (The number of tokens tests is reset to zero when the token
+ stream overtakes the last reached token.) The default is 10000.
+ If set to 0, there is no limit. Can be set by the environment
+ variable CAMLP5PARAM by "l=value". *)
+
+(** {6 Functorial interface} *)
+
+ (** Alternative for grammars use. Grammars are no more Ocaml values:
+ there is no type for them. Modules generated preserve the
+ rule "an entry cannot call an entry of another grammar" by
+ normal OCaml typing. *)
+
+module type GLexerType =
+ sig
+ type te = 'x;
+ value lexer : Plexing.lexer te;
+ end
+;
+ (** The input signature for the functor [Grammar.GMake]: [te] is the
+ type of the tokens. *)
+
+module type S =
+ sig
+ type te = 'x;
+ type parsable = 'x;
+ value parsable : Stream.t char -> parsable;
+ value tokens : string -> list (string * int);
+ value glexer : Plexing.lexer te;
+ value set_algorithm : parse_algorithm -> unit;
+ module Entry :
+ sig
+ type e 'a = 'y;
+ value create : string -> e 'a;
+ value parse : e 'a -> parsable -> 'a;
+ value name : e 'a -> string;
+ value of_parser : string -> (Stream.t te -> 'a) -> e 'a;
+ value parse_token_stream : e 'a -> Stream.t te -> 'a;
+ value print : Format.formatter -> e 'a -> unit;
+ external obj : e 'a -> Gramext.g_entry te = "%identity";
+ (* deprecated since 2017-06-17 *)
+ value parse_token : e 'a -> Stream.t te -> 'a;
+ end
+ ;
+
+ type ty_symbol 'self 'a = 'x;
+ (** Type of grammar symbols. A type-safe wrapper around Gramext.symbol. The
+ first type argument is the type of the ambient entry, the second one is the
+ type of the produced value. *)
+
+ type ty_rule 'self 'f 'r = 'x;
+
+ type ty_production 'a = 'x;
+
+ value s_facto : ty_symbol 'self 'a -> ty_symbol 'self 'a;
+ (* | Smeta of string and list (g_symbol 'te) and Obj.t *)
+ value s_nterm : Entry.e 'a -> ty_symbol 'self 'a;
+ value s_nterml : Entry.e 'a -> string -> ty_symbol 'self 'a;
+ value s_list0 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+ value s_list0sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+ value s_list1 : ty_symbol 'self 'a -> ty_symbol 'self (list 'a);
+ value s_list1sep : ty_symbol 'self 'a -> ty_symbol 'self 'b -> bool -> ty_symbol 'self (list 'a);
+ value s_opt : ty_symbol 'self 'a -> ty_symbol 'self (option 'a);
+ value s_flag : ty_symbol 'self 'a -> ty_symbol 'self bool;
+ value s_self : ty_symbol 'self 'self;
+ value s_next : ty_symbol 'self 'self;
+ value s_token : Plexing.pattern -> ty_symbol 'self string;
+ value s_rules : list (ty_production 'a) -> ty_symbol 'self 'a;
+ value s_vala : list string -> ty_symbol 'self 'a -> ty_symbol 'self (Ploc.vala 'a);
+
+ value r_stop : ty_rule 'self 'r 'r;
+ value r_next : ty_rule 'self 'a 'r -> ty_symbol 'self 'b -> ty_rule 'self ('b -> 'a) 'r;
+ value r_cut : ty_rule 'self 'a 'r -> ty_rule 'self 'a 'r;
+
+ value production : (ty_rule 'a 'f (Ploc.t -> 'a) * 'f) -> ty_production 'a;
+
+ module Unsafe :
+ sig
+ value gram_reinit : Plexing.lexer te -> unit;
+ value clear_entry : Entry.e 'a -> unit;
+ end
+ ;
+ value extend :
+ Entry.e 'a -> option Gramext.position ->
+ list
+ (option string * option Gramext.g_assoc *
+ list (list (Gramext.g_symbol te) * Gramext.g_action)) ->
+ unit;
+ value safe_extend :
+ Entry.e 'a -> option Gramext.position ->
+ list
+ (option string * option Gramext.g_assoc *
+ list (ty_production 'a)) ->
+ unit;
+ value delete_rule : Entry.e 'a -> list (Gramext.g_symbol te) -> unit;
+ value safe_delete_rule : Entry.e 'a -> ty_rule 'a 'f 'r -> unit;
+ end
+;
+ (** Signature type of the functor [Grammar.GMake]. The types and
+ functions are almost the same than in generic interface, but:
+- Grammars are not values. Functions holding a grammar as parameter
+ do not have this parameter yet.
+- The type [parsable] is used in function [parse] instead of
+ the char stream, avoiding the possible loss of tokens.
+- The type of tokens (expressions and patterns) can be any
+ type (instead of (string * string)); the module parameter
+ must specify a way to show them as (string * string) *)
+
+module GMake (L : GLexerType) : S with type te = L.te;
+
+(** {6 Miscellaneous} *)
+
+value skip_item : 'a -> 'a;
+ (** [Grammar.skip_item x] can be called in a semantic action of
+ a grammar rule to ask the grammar to skip that item if it
+ is called in a list (LIST0 or LIST1). The function returns
+ the item itself (for typing reasons) but its value is ignored.
+ This function is used to allow IFDEF and IFNDEF for cases of
+ constructor declarations and pattern matchings. *)
+
+value error_verbose : ref bool;
+ (** Flag for displaying more information in case of parsing error;
+ default = [False] *)
+
+value warning_verbose : ref bool;
+ (** Flag for displaying warnings while extension; default = [True] *)
+
+value strict_parsing : ref bool;
+ (** Flag to apply strict parsing, without trying to recover errors;
+ default = [False] *)
+
+value utf8_print : ref bool;
+ (** Flag to consider strings as utf8-encoded when printing them;
+ default = [True] *)
+
+value print_entry : Format.formatter -> Gramext.g_entry 'te -> unit;
+ (** General printer for all kinds of entries (obj entries) *)
+
+value iter_entry :
+ (Gramext.g_entry 'te -> unit) -> Gramext.g_entry 'te -> unit;
+ (** [Grammar.iter_entry f e] applies [f] to the entry [e] and
+ transitively all entries called by [e]. The order in which
+ the entries are passed to [f] is the order they appear in
+ each entry. Each entry is passed only once. *)
+
+value fold_entry :
+ (Gramext.g_entry 'te -> 'a -> 'a) -> Gramext.g_entry 'te -> 'a -> 'a;
+ (** [Grammar.fold_entry f e init] computes [(f eN .. (f e2 (f e1 init)))],
+ where [e1 .. eN] are [e] and transitively all entries called by [e].
+ The order in which the entries are passed to [f] is the order they
+ appear in each entry. Each entry is passed only once. *)
+
+value reinit_entry_functions : Gramext.g_entry 'te -> unit;
+
+(*** For system use *)
+
+value loc_of_token_interval : int -> int -> Ploc.t;
+value extend :
+ list
+ (Gramext.g_entry 'te * option Gramext.position *
+ list
+ (option string * option Gramext.g_assoc *
+ list (list (Gramext.g_symbol 'te) * Gramext.g_action))) ->
+ unit;
+value delete_rule : Entry.e 'a -> list (Gramext.g_symbol token) -> unit;
+
+value parse_top_symb :
+ Gramext.g_entry 'te -> Gramext.g_symbol 'te -> Stream.t 'te -> Obj.t;
+value symb_failed_txt :
+ Gramext.g_entry 'te -> Gramext.g_symbol 'te -> Gramext.g_symbol 'te ->
+ string;
+value create_local_entry : g -> string -> Entry.e 'a;
+
+(* deprecated since 2017-06-06 *)
+(* rather use "set_default_algorithm Backtracking" *)
+value backtrack_parse : ref bool;
diff --git a/gramlib/plexing.ml b/gramlib/plexing.ml
new file mode 100644
index 0000000000..947e1980b5
--- /dev/null
+++ b/gramlib/plexing.ml
@@ -0,0 +1,241 @@
+(* camlp5r *)
+(* plexing.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+open Versdep;
+
+type pattern = (string * string);
+
+exception Error of string;
+
+type location = Ploc.t;
+type location_function = int -> location;
+type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function);
+
+type lexer 'te =
+ { tok_func : lexer_func 'te;
+ tok_using : pattern -> unit;
+ tok_removing : pattern -> unit;
+ tok_match : mutable pattern -> 'te -> string;
+ tok_text : pattern -> string;
+ tok_comm : mutable option (list location) }
+;
+
+value make_loc = Ploc.make_unlined;
+value dummy_loc = Ploc.dummy;
+
+value lexer_text (con, prm) =
+ if con = "" then "'" ^ prm ^ "'"
+ else if prm = "" then con
+ else con ^ " '" ^ prm ^ "'"
+;
+
+value locerr () = failwith "Lexer: location function";
+value loct_create () = (ref (array_create 1024 None), ref False);
+value loct_func (loct, ov) i =
+ match
+ if i < 0 || i >= Array.length loct.val then
+ if ov.val then Some dummy_loc else None
+ else Array.unsafe_get loct.val i
+ with
+ [ Some loc -> loc
+ | None -> locerr () ]
+;
+value loct_add (loct, ov) i loc =
+ if i >= Array.length loct.val then
+ let new_tmax = Array.length loct.val * 2 in
+ if new_tmax < Sys.max_array_length then do {
+ let new_loct = array_create new_tmax None in
+ Array.blit loct.val 0 new_loct 0 (Array.length loct.val);
+ loct.val := new_loct;
+ loct.val.(i) := Some loc
+ }
+ else ov.val := True
+ else loct.val.(i) := Some loc
+;
+
+value make_stream_and_location next_token_loc =
+ let loct = loct_create () in
+ let ts =
+ Stream.from
+ (fun i -> do {
+ let (tok, loc) = next_token_loc () in
+ loct_add loct i loc;
+ Some tok
+ })
+ in
+ (ts, loct_func loct)
+;
+
+value lexer_func_of_parser next_token_loc cs =
+ let line_nb = ref 1 in
+ let bolpos = ref 0 in
+ make_stream_and_location (fun () -> next_token_loc (cs, line_nb, bolpos))
+;
+
+value lexer_func_of_ocamllex lexfun cs =
+ let lb =
+ Lexing.from_function
+ (fun s n ->
+ try do { string_set s 0 (Stream.next cs); 1 } with
+ [ Stream.Failure -> 0 ])
+ in
+ let next_token_loc _ =
+ let tok = lexfun lb in
+ let loc = make_loc (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
+ (tok, loc)
+ in
+ make_stream_and_location next_token_loc
+;
+
+(* Char and string tokens to real chars and string *)
+
+value buff = ref (string_create 80);
+value store len x = do {
+ if len >= string_length buff.val then
+ buff.val := string_cat buff.val (string_create (string_length buff.val))
+ else ();
+ string_set buff.val len x;
+ succ len
+};
+value get_buff len = string_sub buff.val 0 len;
+
+value valch x = Char.code x - Char.code '0';
+value valch_a x = Char.code x - Char.code 'a' + 10;
+value valch_A x = Char.code x - Char.code 'A' + 10;
+
+value rec backslash s i =
+ if i = String.length s then raise Not_found
+ else
+ match s.[i] with
+ [ 'n' -> ('\n', i + 1)
+ | 'r' -> ('\r', i + 1)
+ | 't' -> ('\t', i + 1)
+ | 'b' -> ('\b', i + 1)
+ | '\\' -> ('\\', i + 1)
+ | '"' -> ('"', i + 1)
+ | ''' -> (''', i + 1)
+ | '0'..'9' as c -> backslash1 (valch c) s (i + 1)
+ | 'x' -> backslash1h s (i + 1)
+ | _ -> raise Not_found ]
+and backslash1 cod s i =
+ if i = String.length s then ('\\', i - 1)
+ else
+ match s.[i] with
+ [ '0'..'9' as c -> backslash2 (10 * cod + valch c) s (i + 1)
+ | _ -> ('\\', i - 1) ]
+and backslash2 cod s i =
+ if i = String.length s then ('\\', i - 2)
+ else
+ match s.[i] with
+ [ '0'..'9' as c -> (Char.chr (10 * cod + valch c), i + 1)
+ | _ -> ('\\', i - 2) ]
+and backslash1h s i =
+ if i = String.length s then ('\\', i - 1)
+ else
+ match s.[i] with
+ [ '0'..'9' as c -> backslash2h (valch c) s (i + 1)
+ | 'a'..'f' as c -> backslash2h (valch_a c) s (i + 1)
+ | 'A'..'F' as c -> backslash2h (valch_A c) s (i + 1)
+ | _ -> ('\\', i - 1) ]
+and backslash2h cod s i =
+ if i = String.length s then ('\\', i - 2)
+ else
+ match s.[i] with
+ [ '0'..'9' as c -> (Char.chr (16 * cod + valch c), i + 1)
+ | 'a'..'f' as c -> (Char.chr (16 * cod + valch_a c), i + 1)
+ | 'A'..'F' as c -> (Char.chr (16 * cod + valch_A c), i + 1)
+ | _ -> ('\\', i - 2) ]
+;
+
+value rec skip_indent s i =
+ if i = String.length s then i
+ else
+ match s.[i] with
+ [ ' ' | '\t' -> skip_indent s (i + 1)
+ | _ -> i ]
+;
+
+value skip_opt_linefeed s i =
+ if i = String.length s then i else if s.[i] = '\010' then i + 1 else i
+;
+
+value eval_char s =
+ if String.length s = 1 then s.[0]
+ else if String.length s = 0 then failwith "invalid char token"
+ else if s.[0] = '\\' then
+ if String.length s = 2 && s.[1] = ''' then '''
+ else
+ try
+ let (c, i) = backslash s 1 in
+ if i = String.length s then c else raise Not_found
+ with
+ [ Not_found -> failwith "invalid char token" ]
+ else failwith "invalid char token"
+;
+
+value eval_string loc s =
+ bytes_to_string (loop 0 0) where rec loop len i =
+ if i = String.length s then get_buff len
+ else
+ let (len, i) =
+ if s.[i] = '\\' then
+ let i = i + 1 in
+ if i = String.length s then failwith "invalid string token"
+ else if s.[i] = '"' then (store len '"', i + 1)
+ else
+ match s.[i] with
+ [ '\010' -> (len, skip_indent s (i + 1))
+ | '\013' -> (len, skip_indent s (skip_opt_linefeed s (i + 1)))
+ | c ->
+ try
+ let (c, i) = backslash s i in
+ (store len c, i)
+ with
+ [ Not_found -> (store (store len '\\') c, i + 1) ] ]
+ else (store len s.[i], i + 1)
+ in
+ loop len i
+;
+
+value default_match =
+ fun
+ [ ("ANY", "") -> fun (con, prm) -> prm
+ | ("ANY", v) ->
+ fun (con, prm) -> if v = prm then v else raise Stream.Failure
+ | (p_con, "") ->
+ fun (con, prm) -> if con = p_con then prm else raise Stream.Failure
+ | (p_con, p_prm) ->
+ fun (con, prm) ->
+ if con = p_con && prm = p_prm then prm else raise Stream.Failure ]
+;
+
+value input_file = ref "";
+value line_nb = ref (ref 0);
+value bol_pos = ref (ref 0);
+value restore_lexing_info = ref None;
+
+(* The lexing buffer used by pa_lexer.cmo *)
+
+value rev_implode l =
+ let s = string_create (List.length l) in
+ bytes_to_string (loop (string_length s - 1) l) where rec loop i =
+ fun
+ [ [c :: l] -> do { string_unsafe_set s i c; loop (i - 1) l }
+ | [] -> s ]
+;
+
+module Lexbuf :
+ sig
+ type t = 'abstract;
+ value empty : t;
+ value add : char -> t -> t;
+ value get : t -> string;
+ end =
+ struct
+ type t = list char;
+ value empty = [];
+ value add c l = [c :: l];
+ value get = rev_implode;
+ end
+;
diff --git a/gramlib/plexing.mli b/gramlib/plexing.mli
new file mode 100644
index 0000000000..f0a03da583
--- /dev/null
+++ b/gramlib/plexing.mli
@@ -0,0 +1,143 @@
+(* camlp5r *)
+(* plexing.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+(** Lexing for Camlp5 grammars.
+
+ This module defines the Camlp5 lexer type to be used in extensible
+ grammars (see module [Grammar]). It also provides some useful functions
+ to create lexers. *)
+
+type pattern = (string * string);
+ (* Type for values used by the generated code of the EXTEND
+ statement to represent terminals in entry rules.
+- The first string is the constructor name (must start with
+ an uppercase character). When it is empty, the second string
+ is supposed to be a keyword.
+- The second string is the constructor parameter. Empty if it
+ has no parameter (corresponding to the 'wildcard' pattern).
+- The way tokens patterns are interpreted to parse tokens is done
+ by the lexer, function [tok_match] below. *)
+
+exception Error of string;
+ (** A lexing error exception to be used by lexers. *)
+
+(** Lexer type *)
+
+type lexer 'te =
+ { tok_func : lexer_func 'te;
+ tok_using : pattern -> unit;
+ tok_removing : pattern -> unit;
+ tok_match : mutable pattern -> 'te -> string;
+ tok_text : pattern -> string;
+ tok_comm : mutable option (list Ploc.t) }
+ (** The type for lexers compatible with camlp5 grammars. The parameter
+ type ['te] is the type of the tokens.
+- The field [tok_func] is the main lexer function. See [lexer_func]
+ type below.
+- The field [tok_using] is a function called by the [EXTEND]
+ statement to warn the lexer that a rule uses this pattern
+ (given as parameter). This allow the lexer 1/ to check that
+ the pattern constructor is really among its possible constructors
+ 2/ to enter the keywords in its tables.
+- The field [tok_removing] is a function possibly called by the
+ [DELETE_RULE] statement to warn the lexer that this pattern
+ (given as parameter) is no more used in the grammar (the grammar
+ system maintains a number of usages of all patterns and calls this
+ function when this number falls to zero). If it is a keyword, this
+ allow the lexer to remove it in its tables.
+- The field [tok_match] is a function called by the camlp5
+ grammar system to ask the lexer how the input tokens have to
+ be matched against the patterns. Warning: for efficiency, this
+ function has to be written as a function taking patterns as
+ parameters and, for each pattern value, returning a function
+ matching a token, *not* as a function with two parameters.
+- The field [tok_text] is a function called by the grammar
+ system to get the name of the tokens for the error messages,
+ in case of syntax error, or for the displaying of the rules
+ of an entry.
+- The field [tok_comm] is a mutable place where the lexer can
+ put the locations of the comments, if its initial value is not
+ [None]. If it is [None], nothing has to be done by the lexer. *)
+
+and lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function)
+ (** The type of a lexer function (field [tok_func] of the type
+ [glexer]). The character stream is the input stream to be
+ lexed. The result is a pair of a token stream and a location
+ function (see below) for this tokens stream. *)
+
+and location_function = int -> Ploc.t;
+ (** The type of a function giving the location of a token in the
+ source from the token number in the stream (starting from zero). *)
+
+value lexer_text : pattern -> string;
+ (** A simple [tok_text] function. *)
+
+value default_match : pattern -> (string * string) -> string;
+ (** A simple [tok_match] function, appling to the token type
+ [(string * string)] *)
+
+(** Lexers from parsers or ocamllex
+
+ The functions below create lexer functions either from a [char stream]
+ parser or for an [ocamllex] function. With the returned function [f],
+ it is possible to get a simple lexer (of the type [Plexing.glexer] above):
+ {[
+ { Plexing.tok_func = f;
+ Plexing.tok_using = (fun _ -> ());
+ Plexing.tok_removing = (fun _ -> ());
+ Plexing.tok_match = Plexing.default_match;
+ Plexing.tok_text = Plexing.lexer_text;
+ Plexing.tok_comm = None }
+ ]}
+ Note that a better [tok_using] function should check the used tokens
+ and raise [Plexing.Error] for incorrect ones. The other functions
+ [tok_removing], [tok_match] and [tok_text] may have other implementations
+ as well. *)
+
+value lexer_func_of_parser :
+ ((Stream.t char * ref int * ref int) -> ('te * Ploc.t)) -> lexer_func 'te;
+ (** A lexer function from a lexer written as a char stream parser
+ returning the next token and its location. The two references
+ with the char stream contain the current line number and the
+ position of the beginning of the current line. *)
+value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te;
+ (** A lexer function from a lexer created by [ocamllex] *)
+
+(** Function to build a stream and a location function *)
+
+value make_stream_and_location :
+ (unit -> ('te * Ploc.t)) -> (Stream.t 'te * location_function);
+ (** General function *)
+
+(** Useful functions and values *)
+
+value eval_char : string -> char;
+value eval_string : Ploc.t -> string -> string;
+ (** Convert a char or a string token, where the backslashes had not
+ been interpreted into a real char or string; raise [Failure] if
+ bad backslash sequence found; [Plexing.eval_char (Char.escaped c)]
+ would return [c] and [Plexing.eval_string (String.escaped s)] would
+ return [s] *)
+
+value restore_lexing_info : ref (option (int * int));
+value input_file : ref string;
+value line_nb : ref (ref int);
+value bol_pos : ref (ref int);
+ (** Special variables used to reinitialize line numbers and position
+ of beginning of line with their correct current values when a parser
+ is called several times with the same character stream. Necessary
+ for directives (e.g. #load or #use) which interrupt the parsing.
+ Without usage of these variables, locations after the directives
+ can be wrong. *)
+
+(** The lexing buffer used by streams lexers *)
+
+module Lexbuf :
+ sig
+ type t = 'a;
+ value empty : t;
+ value add : char -> t -> t;
+ value get : t -> string;
+ end
+;
diff --git a/gramlib/ploc.ml b/gramlib/ploc.ml
new file mode 100644
index 0000000000..30ae4eaa21
--- /dev/null
+++ b/gramlib/ploc.ml
@@ -0,0 +1,217 @@
+(* camlp5r *)
+(* ploc.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+#load "pa_macro.cmo";
+
+type t =
+ { fname : string;
+ line_nb : int;
+ bol_pos : int;
+ line_nb_last : int;
+ bol_pos_last : int;
+ bp : int;
+ ep : int;
+ comm : string;
+ ecomm : string }
+;
+
+value make_loc fname line_nb bol_pos (bp, ep) comm =
+ {fname = fname; line_nb = line_nb; bol_pos = bol_pos;
+ line_nb_last = line_nb; bol_pos_last = bol_pos;
+ bp = bp; ep = ep; comm = comm; ecomm = ""}
+;
+
+value make_unlined (bp, ep) =
+ {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0;
+ bp = bp; ep = ep; comm = ""; ecomm = ""}
+;
+
+value dummy =
+ {fname = ""; line_nb = 1; bol_pos = 0; line_nb_last = -1; bol_pos_last = 0;
+ bp = 0; ep = 0; comm = ""; ecomm = ""}
+;
+
+value file_name loc = loc.fname;
+value first_pos loc = loc.bp;
+value last_pos loc = loc.ep;
+value line_nb loc = loc.line_nb;
+value bol_pos loc = loc.bol_pos;
+value line_nb_last loc = loc.line_nb_last;
+value bol_pos_last loc = loc.bol_pos_last;
+value comment loc = loc.comm;
+value comment_last loc = loc.ecomm;
+
+IFDEF OCAML_VERSION <= OCAML_1_07 OR COMPATIBLE_WITH_OLD_OCAML THEN
+ value with_bp_ep l bp ep =
+ {fname = l.fname; line_nb = l.line_nb; bol_pos = l.bol_pos;
+ line_nb_last = l.line_nb_last; bol_pos_last = l.bol_pos_last; bp = bp;
+ ep = ep; comm = l.comm; ecomm = l.ecomm}
+ ;
+ value with_comm l comm =
+ {fname = l.fname; line_nb = l.line_nb; bol_pos = l.bol_pos;
+ line_nb_last = l.line_nb_last; bol_pos_last = l.bol_pos_last; bp = l.bp;
+ ep = l.ep; comm = comm; ecomm = l.ecomm}
+ ;
+END;
+
+value encl loc1 loc2 =
+ if loc1.bp < loc2.bp then
+ if loc1.ep < loc2.ep then
+ {fname = loc1.fname; line_nb = loc1.line_nb; bol_pos = loc1.bol_pos;
+ line_nb_last = loc2.line_nb_last; bol_pos_last = loc2.bol_pos_last;
+ bp = loc1.bp; ep = loc2.ep; comm = loc1.comm; ecomm = loc2.comm}
+ else
+ loc1
+ else
+ if loc2.ep < loc1.ep then
+ {fname = loc2.fname; line_nb = loc2.line_nb; bol_pos = loc2.bol_pos;
+ line_nb_last = loc1.line_nb_last; bol_pos_last = loc1.bol_pos_last;
+ bp = loc2.bp; ep = loc1.ep; comm = loc2.comm; ecomm = loc1.comm}
+ else
+ loc2
+;
+value shift sh loc = {(loc) with bp = sh + loc.bp; ep = sh + loc.ep};
+value sub loc sh len = {(loc) with bp = loc.bp + sh; ep = loc.bp + sh + len};
+value after loc sh len =
+ {(loc) with bp = loc.ep + sh; ep = loc.ep + sh + len}
+;
+value with_comment loc comm = {(loc) with comm = comm};
+
+value name = ref "loc";
+
+value from_file fname loc =
+ let (bp, ep) = (first_pos loc, last_pos loc) in
+ try
+ let ic = open_in_bin fname in
+ let strm = Stream.of_channel ic in
+ let rec loop fname lin =
+ let rec not_a_line_dir col =
+ parser cnt
+ [ [: `c; s :] ->
+ if cnt < bp then
+ if c = '\n' then loop fname (lin + 1)
+ else not_a_line_dir (col + 1) s
+ else
+ let col = col - (cnt - bp) in
+ (fname, lin, col, col + ep - bp)
+ | [: :] ->
+ (fname, lin, col, col + 1) ]
+ in
+ let rec a_line_dir str n col =
+ parser
+ [ [: `'\n' :] -> loop str n
+ | [: `_; s :] -> a_line_dir str n (col + 1) s ]
+ in
+ let rec spaces col =
+ parser
+ [ [: `' '; s :] -> spaces (col + 1) s
+ | [: :] -> col ]
+ in
+ let rec check_string str n col =
+ parser
+ [ [: `'"'; col = spaces (col + 1); s :] -> a_line_dir str n col s
+ | [: `c when c <> '\n'; s :] ->
+ check_string (str ^ String.make 1 c) n (col + 1) s
+ | [: a = not_a_line_dir col :] -> a ]
+ in
+ let check_quote n col =
+ parser
+ [ [: `'"'; s :] -> check_string "" n (col + 1) s
+ | [: a = not_a_line_dir col :] -> a ]
+ in
+ let rec check_num n col =
+ parser
+ [ [: `('0'..'9' as c); s :] ->
+ check_num (10 * n + Char.code c - Char.code '0') (col + 1) s
+ | [: col = spaces col; s :] -> check_quote n col s ]
+ in
+ let begin_line =
+ parser
+ [ [: `'#'; col = spaces 1; s :] -> check_num 0 col s
+ | [: a = not_a_line_dir 0 :] -> a ]
+ in
+ begin_line strm
+ in
+ let r =
+ try loop fname 1 with
+ [ Stream.Failure ->
+ let bol = bol_pos loc in
+ (fname, line_nb loc, bp - bol, ep - bol) ]
+ in
+ do { close_in ic; r }
+ with
+ [ Sys_error _ -> (fname, 1, bp, ep) ]
+;
+
+value second_line fname ep0 (line, bp) ep = do {
+ let ic = open_in fname in
+ seek_in ic bp;
+ loop line bp bp where rec loop line bol p =
+ if p = ep then do {
+ close_in ic;
+ if bol = bp then (line, ep0)
+ else (line, ep - bol)
+ }
+ else do {
+ let (line, bol) =
+ match input_char ic with
+ [ '\n' -> (line + 1, p + 1)
+ | _ -> (line, bol) ]
+ in
+ loop line bol (p + 1)
+ }
+};
+
+value get loc = do {
+ if loc.fname = "" || loc.fname = "-" then do {
+ (loc.line_nb, loc.bp - loc.bol_pos, loc.line_nb, loc.ep - loc.bol_pos,
+ loc.ep - loc.bp)
+ }
+ else do {
+ let (bl, bc, ec) =
+ (loc.line_nb, loc.bp - loc.bol_pos, loc.ep - loc.bol_pos)
+ in
+ let (el, eep) = second_line loc.fname ec (bl, loc.bp) loc.ep in
+ (bl, bc, el, eep, ec - bc)
+ }
+};
+
+value call_with r v f a =
+ let saved = r.val in
+ try do {
+ r.val := v;
+ let b = f a in
+ r.val := saved;
+ b
+ }
+ with e -> do { r.val := saved; raise e }
+;
+
+exception Exc of t and exn;
+
+value raise loc exc =
+ match exc with
+ [ Exc _ _ -> raise exc
+ | _ -> raise (Exc loc exc) ]
+;
+
+type vala 'a =
+ [ VaAnt of string
+ | VaVal of 'a ]
+;
+
+value warned = ref True;
+value warning_deprecated_since_6_00 name =
+ if not warned.val then do {
+ Printf.eprintf "<W> %s deprecated since version 6.00" name;
+ warned.val := True
+ }
+ else ()
+;
+
+value make line_nb bol_pos (bp, ep) =
+ let _ = warning_deprecated_since_6_00 "Ploc.make" in
+ {fname = ""; line_nb = line_nb; bol_pos = bol_pos; line_nb_last = line_nb;
+ bol_pos_last = bol_pos; bp = bp; ep = ep; comm = ""; ecomm = ""}
+;
diff --git a/gramlib/ploc.mli b/gramlib/ploc.mli
new file mode 100644
index 0000000000..1d6df4d322
--- /dev/null
+++ b/gramlib/ploc.mli
@@ -0,0 +1,129 @@
+(* camlp5r *)
+(* ploc.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+(** Locations and some pervasive type and value. *)
+
+type t = 'abstract;
+
+(* located exceptions *)
+
+exception Exc of t and exn;
+ (** [Ploc.Exc loc e] is an encapsulation of the exception [e] with
+ the input location [loc]. To be used to specify a location
+ for an error. This exception must not be raised by [raise] but
+ rather by [Ploc.raise] (see below), to prevent the risk of several
+ encapsulations of [Ploc.Exc]. *)
+value raise : t -> exn -> 'a;
+ (** [Ploc.raise loc e], if [e] is already the exception [Ploc.Exc],
+ re-raise it (ignoring the new location [loc]), else raise the
+ exception [Ploc.Exc loc e]. *)
+
+(* making locations *)
+
+value make_loc : string -> int -> int -> (int * int) -> string -> t;
+ (** [Ploc.make_loc fname line_nb bol_pos (bp, ep) comm] creates a location
+ starting at line number [line_nb], where the position of the beginning
+ of the line is [bol_pos] and between the positions [bp] (included) and
+ [ep] excluded. And [comm] is the comment before the location. The
+ positions are in number of characters since the begin of the stream. *)
+value make_unlined : (int * int) -> t;
+ (** [Ploc.make_unlined] is like [Ploc.make] except that the line number
+ is not provided (to be used e.g. when the line number is unknown. *)
+
+value dummy : t;
+ (** [Ploc.dummy] is a dummy location, used in situations when location
+ has no meaning. *)
+
+(* getting location info *)
+
+value file_name : t -> string;
+ (** [Ploc.file_name loc] returns the file name of the location. *)
+value first_pos : t -> int;
+ (** [Ploc.first_pos loc] returns the position of the begin of the location
+ in number of characters since the beginning of the stream. *)
+value last_pos : t -> int;
+ (** [Ploc.last_pos loc] returns the position of the first character not
+ in the location in number of characters since the beginning of the
+ stream. *)
+value line_nb : t -> int;
+ (** [Ploc.line_nb loc] returns the line number of the location or [-1] if
+ the location does not contain a line number (i.e. built with
+ [Ploc.make_unlined]. *)
+value bol_pos : t -> int;
+ (** [Ploc.bol_pos loc] returns the position of the beginning of the line
+ of the location in number of characters since the beginning of
+ the stream, or [0] if the location does not contain a line number
+ (i.e. built with [Ploc.make_unlined]. *)
+value line_nb_last : t -> int;
+value bol_pos_last : t -> int;
+ (** Return the line number and the position of the beginning of the line
+ of the last position. *)
+value comment : t -> string;
+ (** [Ploc.comment loc] returns the comment before the location. *)
+value comment_last : t -> string;
+ (** [Ploc.comment loc] returns the last comment of the location. *)
+
+(* combining locations *)
+
+value encl : t -> t -> t;
+ (** [Ploc.encl loc1 loc2] returns the location starting at the
+ smallest start of [loc1] and [loc2] and ending at the greatest end
+ of them. In other words, it is the location enclosing [loc1] and
+ [loc2]. *)
+value shift : int -> t -> t;
+ (** [Ploc.shift sh loc] returns the location [loc] shifted with [sh]
+ characters. The line number is not recomputed. *)
+value sub : t -> int -> int -> t;
+ (** [Ploc.sub loc sh len] is the location [loc] shifted with [sh]
+ characters and with length [len]. The previous ending position
+ of the location is lost. *)
+value after : t -> int -> int -> t;
+ (** [Ploc.after loc sh len] is the location just after loc (starting at
+ the end position of [loc]) shifted with [sh] characters and of length
+ [len]. *)
+value with_comment : t -> string -> t;
+ (** Change the comment part of the given location *)
+
+(* miscellaneous *)
+
+value name : ref string;
+ (** [Ploc.name.val] is the name of the location variable used in grammars
+ and in the predefined quotations for OCaml syntax trees. Default:
+ ["loc"] *)
+
+value get : t -> (int * int * int * int * int);
+ (** [Ploc.get loc] returns in order: 1/ the line number of the begin
+ of the location, 2/ its column, 3/ the line number of the first
+ character not in the location, 4/ its column and 5/ the length
+ of the location. The file where the location occurs (if any) may
+ be read during this operation. *)
+
+value from_file : string -> t -> (string * int * int * int);
+ (** [Ploc.from_file fname loc] reads the file [fname] up to the
+ location [loc] and returns the real input file, the line number
+ and the characters location in the line; the real input file
+ can be different from [fname] because of possibility of line
+ directives typically generated by /lib/cpp. *)
+
+(* pervasives *)
+
+type vala 'a =
+ [ VaAnt of string
+ | VaVal of 'a ]
+;
+ (** Encloser of many abstract syntax tree nodes types, in "strict" mode.
+ This allow the system of antiquotations of abstract syntax tree
+ quotations to work when using the quotation kit [q_ast.cmo]. *)
+
+value call_with : ref 'a -> 'a -> ('b -> 'c) -> 'b -> 'c;
+ (** [Ploc.call_with r v f a] sets the reference [r] to the value [v],
+ then call [f a], and resets [r] to its initial value. If [f a] raises
+ an exception, its initial value is also reset and the exception is
+ re-raised. The result is the result of [f a]. *)
+
+(**/**)
+
+value make : int -> int -> (int * int) -> t;
+ (** deprecated function since version 6.00; use [make_loc] instead
+ with the empty string *)
diff --git a/gramlib/token.ml b/gramlib/token.ml
new file mode 100644
index 0000000000..9c1664ccd9
--- /dev/null
+++ b/gramlib/token.ml
@@ -0,0 +1,37 @@
+(* camlp5r *)
+(* token.ml,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+type pattern = Plexing.pattern;
+
+exception Error of string;
+
+type location = Ploc.t;
+type location_function = int -> location;
+type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function);
+
+type glexer 'te = Plexing.lexer 'te ==
+ { tok_func : lexer_func 'te;
+ tok_using : pattern -> unit;
+ tok_removing : pattern -> unit;
+ tok_match : mutable pattern -> 'te -> string;
+ tok_text : pattern -> string;
+ tok_comm : mutable option (list location) }
+;
+
+value make_loc = Ploc.make_unlined;
+value dummy_loc = Ploc.dummy;
+
+value make_stream_and_location = Plexing.make_stream_and_location;
+value lexer_func_of_parser = Plexing.lexer_func_of_parser;
+value lexer_func_of_ocamllex = Plexing.lexer_func_of_ocamllex;
+
+value eval_char = Plexing.eval_char;
+value eval_string = Plexing.eval_string;
+
+value lexer_text = Plexing.lexer_text;
+value default_match = Plexing.default_match;
+
+value line_nb = Plexing.line_nb;
+value bol_pos = Plexing.bol_pos;
+value restore_lexing_info = Plexing.restore_lexing_info;
diff --git a/gramlib/token.mli b/gramlib/token.mli
new file mode 100644
index 0000000000..333d53ce86
--- /dev/null
+++ b/gramlib/token.mli
@@ -0,0 +1,56 @@
+(* camlp5r *)
+(* token.mli,v *)
+(* Copyright (c) INRIA 2007-2017 *)
+
+(** Module deprecated since Camlp5 version 5.00. Use now module Plexing.
+ Compatibility assumed. *)
+
+type pattern = Plexing.pattern;
+
+exception Error of string;
+ (** Use now [Plexing.Error] *)
+
+type glexer 'te = Plexing.lexer 'te ==
+ { tok_func : Plexing.lexer_func 'te;
+ tok_using : pattern -> unit;
+ tok_removing : pattern -> unit;
+ tok_match : mutable pattern -> 'te -> string;
+ tok_text : pattern -> string;
+ tok_comm : mutable option (list Ploc.t) }
+;
+
+type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function)
+and location_function = int -> Ploc.t;
+
+value lexer_text : pattern -> string;
+ (** Use now [Plexing.lexer_text] *)
+value default_match : pattern -> (string * string) -> string;
+ (** Use now [Plexing.default_match] *)
+
+value lexer_func_of_parser :
+ ((Stream.t char * ref int * ref int) -> ('te * Ploc.t)) -> lexer_func 'te;
+ (** Use now [Plexing.lexer_func_of_parser] *)
+value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te;
+ (** Use now [Plexing.lexer_func_of_ocamllex] *)
+
+value make_stream_and_location :
+ (unit -> ('te * Ploc.t)) -> (Stream.t 'te * location_function);
+ (** Use now [Plexing.make_stream_and_location] *)
+
+value eval_char : string -> char;
+ (** Use now [Plexing.eval_char] *)
+value eval_string : Ploc.t -> string -> string;
+ (** Use now [Plexing.eval_string] *)
+
+value restore_lexing_info : ref (option (int * int));
+ (** Use now [Plexing.restore_lexing_info] *)
+value line_nb : ref (ref int);
+ (** Use now [Plexing.line_nb] *)
+value bol_pos : ref (ref int);
+ (** Use now [Plexing.bol_pos] *)
+
+(* deprecated since version 4.08 *)
+
+type location = Ploc.t;
+value make_loc : (int * int) -> Ploc.t;
+value dummy_loc : Ploc.t;