aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPierre-Marie Pédrot2015-02-06 21:35:36 +0100
committerPierre-Marie Pédrot2015-02-06 22:26:57 +0100
commitcdfb6705e0a2d01b7c01d83bfe898a64ee148c34 (patch)
tree438c12a04e313dbebbbee0f7c838f043d1a0c550 /lib
parent1fe296cd7de29c37a735c4bef4979310c25bffb3 (diff)
More efficient Richpp.
We build the rich XML at once without generating the printed string.
Diffstat (limited to 'lib')
-rw-r--r--lib/richpp.ml173
-rw-r--r--lib/richpp.mli4
2 files changed, 84 insertions, 93 deletions
diff --git a/lib/richpp.ml b/lib/richpp.ml
index 442050cf89..c4a9c39d5a 100644
--- a/lib/richpp.ml
+++ b/lib/richpp.ml
@@ -15,126 +15,117 @@ type 'annotation located = {
endpos : int
}
-type context =
+type 'a stack =
| Leaf
-| Node of string * xml list * context
+| Node of string * 'a located gxml list * int * 'a stack
+
+type 'a context = {
+ mutable stack : 'a stack;
+ (** Pending opened nodes *)
+ mutable offset : int;
+ (** Quantity of characters printed so far *)
+ mutable annotations : 'a option Int.Map.t;
+ (** Map associating annotations to indexes *)
+ mutable index : int;
+ (** Current index of annotations *)
+}
+
+(** We use Format to introduce tags inside the pretty-printed document.
+ Each inserted tag is a fresh index that we keep in sync with the contents
+ of annotations.
+ We build an XML tree on the fly, by plugging ourselves in Format tag
+ marking functions. As those functions are called when actually writing to
+ the device, the resulting tree is correct.
+*)
let rich_pp annotate ppcmds =
- (** First, we use Format to introduce tags inside the pretty-printed document.
- Each inserted tag is a fresh index that we keep in sync with the contents
- of annotations.
-
- We build an XML tree on the fly, by plugging ourselves in Format tag
- marking functions. As those functions are called when actually writing to
- the device, the resulting tree is correct.
- *)
- let annotations = ref [] in
- let index = ref (-1) in
+
+ let context = {
+ stack = Leaf;
+ offset = 0;
+ annotations = Int.Map.empty;
+ index = (-1);
+ } in
+
let pp_tag obj =
- let () = incr index in
- let () = annotations := obj :: !annotations in
- string_of_int !index
+ let index = context.index + 1 in
+ let () = context.index <- index in
+ let obj = annotate obj in
+ let () = context.annotations <- Int.Map.add index obj context.annotations in
+ string_of_int index
in
let pp_buffer = Buffer.create 13 in
- let push_pcdata context =
+ let push_pcdata () =
(** Push the optional PCData on the above node *)
- if (Buffer.length pp_buffer = 0) then ()
- else match !context with
+ let len = Buffer.length pp_buffer in
+ if len = 0 then ()
+ else match context.stack with
| Leaf -> assert false
- | Node (node, child, ctx) ->
+ | Node (node, child, pos, ctx) ->
let data = Buffer.contents pp_buffer in
let () = Buffer.clear pp_buffer in
- context := Node (node, PCData data :: child, ctx)
+ let () = context.stack <- Node (node, PCData data :: child, pos, ctx) in
+ context.offset <- context.offset + len
in
- let open_xml_tag context tag =
- let () = push_pcdata context in
- context := Node (tag, [], !context)
+ let open_xml_tag tag =
+ let () = push_pcdata () in
+ context.stack <- Node (tag, [], context.offset, context.stack)
in
- let close_xml_tag context tag =
- let () = push_pcdata context in
- match !context with
+ let close_xml_tag tag =
+ let () = push_pcdata () in
+ match context.stack with
| Leaf -> assert false
- | Node (node, child, ctx) ->
+ | Node (node, child, pos, ctx) ->
let () = assert (String.equal tag node) in
- let xml = Element (node, [], List.rev child) in
+ let annotation =
+ try Int.Map.find (int_of_string node) context.annotations
+ with _ -> None
+ in
+ let annotation = {
+ annotation = annotation;
+ startpos = pos;
+ endpos = context.offset;
+ } in
+ let xml = Element (node, annotation, List.rev child) in
match ctx with
| Leaf ->
(** Final node: we keep the result in a dummy context *)
- context := Node ("", [xml], Leaf)
- | Node (node, child, ctx) ->
- context := Node (node, xml :: child, ctx)
+ context.stack <- Node ("", [xml], 0, Leaf)
+ | Node (node, child, pos, ctx) ->
+ context.stack <- Node (node, xml :: child, pos, ctx)
in
- let xml_pp = Format.(
-
- let ft = formatter_of_buffer pp_buffer in
+ let open Format in
- let context = ref Leaf in
+ let ft = formatter_of_buffer pp_buffer in
- let tag_functions = {
- mark_open_tag = (fun tag -> let () = open_xml_tag context tag in "");
- mark_close_tag = (fun tag -> let () = close_xml_tag context tag in "");
- print_open_tag = ignore;
- print_close_tag = ignore;
- } in
+ let tag_functions = {
+ mark_open_tag = (fun tag -> let () = open_xml_tag tag in "");
+ mark_close_tag = (fun tag -> let () = close_xml_tag tag in "");
+ print_open_tag = ignore;
+ print_close_tag = ignore;
+ } in
- pp_set_formatter_tag_functions ft tag_functions;
- pp_set_mark_tags ft true;
+ pp_set_formatter_tag_functions ft tag_functions;
+ pp_set_mark_tags ft true;
- (** The whole output must be a valid document. To that
- end, we nest the document inside <pp> tags. *)
- pp_open_tag ft "pp";
- Pp.(pp_with ~pp_tag ft ppcmds);
- pp_close_tag ft ();
+ (** The whole output must be a valid document. To that
+ end, we nest the document inside <pp> tags. *)
+ pp_open_tag ft "pp";
+ Pp.(pp_with ~pp_tag ft ppcmds);
+ pp_close_tag ft ();
- (** Get the resulting XML tree. *)
- let () = pp_print_flush ft () in
- let () = assert (Buffer.length pp_buffer = 0) in
- match !context with
- | Node ("", [xml], Leaf) -> xml
- | _ -> assert false
- )
- in
- (** Second, we retrieve the final function that relates
- each tag to an annotation. *)
- let objs = CArray.rev_of_list !annotations in
- let get index = annotate objs.(index) in
-
- (** Third, the low-level XML is turned into a high-level
- semi-structured document that contains a located annotation in
- every node. During the traversal of the low-level XML document,
- we build a raw string representation of the pretty-print. *)
- let rec node buffer = function
- | Element (index, [], cs) ->
- let startpos, endpos, cs = children buffer cs in
- let annotation = try get (int_of_string index) with _ -> None in
- (Element (index, { annotation; startpos; endpos }, cs), endpos)
-
- | PCData s ->
- Buffer.add_string buffer s;
- (PCData s, Buffer.length buffer)
-
- | _ ->
- assert false (* Because of the form of XML produced by Format. *)
-
- and children buffer cs =
- let startpos = Buffer.length buffer in
- let cs, endpos =
- List.fold_left (fun (cs, endpos) c ->
- let c, endpos = node buffer c in
- (c :: cs, endpos)
- ) ([], startpos) cs
- in
- (startpos, endpos, List.rev cs)
- in
- let xml, _ = node pp_buffer xml_pp in
+ (** Get the resulting XML tree. *)
+ let () = pp_print_flush ft () in
+ let () = assert (Buffer.length pp_buffer = 0) in
+ match context.stack with
+ | Node ("", [xml], 0, Leaf) -> xml
+ | _ -> assert false
- (** We return the raw pretty-printing and its annotations tree. *)
- (Buffer.contents pp_buffer, xml)
let annotations_positions xml =
let rec node accu = function
diff --git a/lib/richpp.mli b/lib/richpp.mli
index 446ee1a046..bf80c8dc8c 100644
--- a/lib/richpp.mli
+++ b/lib/richpp.mli
@@ -17,13 +17,13 @@ type 'annotation located = {
}
(** [rich_pp get_annotations ppcmds] returns the interpretation
- of [ppcmds] as a string as well as a semi-structured document
+ of [ppcmds] as a semi-structured document
that represents (located) annotations of this string.
The [get_annotations] function is used to convert tags into the desired
annotation. If this function returns [None], then no annotation is put. *)
val rich_pp :
(Pp.Tag.t -> 'annotation option) -> Pp.std_ppcmds ->
- string * 'annotation located Xml_datatype.gxml
+ 'annotation located Xml_datatype.gxml
(** [annotations_positions ssdoc] returns a list associating each
annotations with its position in the string from which [ssdoc] is