aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEmilio Jesus Gallego Arias2018-07-24 11:03:52 +0200
committerEmilio Jesus Gallego Arias2018-07-24 11:03:52 +0200
commit4ab54f3cca76632cb6e258c84abc259e15e9e9f8 (patch)
treec4374a0986acd6d4f6cac1a03e6bfa5ba7c972c9 /lib
parent580a026070ab74d05f38e1177632be83a8756566 (diff)
parent97069f69ab3a58cc4ccbaa1a835912c6c31dde4d (diff)
Merge PR #6801: Highlight differences between successive proof steps (color, underline, etc.)
Diffstat (limited to 'lib')
-rw-r--r--lib/lib.mllib1
-rw-r--r--lib/pp.ml75
-rw-r--r--lib/pp.mli19
-rw-r--r--lib/pp_diff.ml303
-rw-r--r--lib/pp_diff.mli116
5 files changed, 513 insertions, 1 deletions
diff --git a/lib/lib.mllib b/lib/lib.mllib
index 0891859423..41b3622a99 100644
--- a/lib/lib.mllib
+++ b/lib/lib.mllib
@@ -6,6 +6,7 @@ Control
Util
Pp
+Pp_diff
Stateid
Loc
Feedback
diff --git a/lib/pp.ml b/lib/pp.ml
index cd81f6e768..7f132686db 100644
--- a/lib/pp.ml
+++ b/lib/pp.ml
@@ -139,7 +139,7 @@ let v n s = Ppcmd_box(Pp_vbox n,s)
let hv n s = Ppcmd_box(Pp_hvbox n,s)
let hov n s = Ppcmd_box(Pp_hovbox n,s)
-(* Opening and closed of tags *)
+(* Opening and closing of tags *)
let tag t s = Ppcmd_tag(t,s)
(* In new syntax only double quote char is escaped by repeating it *)
@@ -167,6 +167,20 @@ let rec pr_com ft s =
Some s2 -> Format.pp_force_newline ft (); pr_com ft s2
| None -> ()
+let start_pfx = "start."
+let end_pfx = "end."
+
+let split_pfx pfx str =
+ let (str_len, pfx_len) = (String.length str, String.length pfx) in
+ if str_len >= pfx_len && (String.sub str 0 pfx_len) = pfx then
+ (pfx, String.sub str pfx_len (str_len - pfx_len)) else ("", str);;
+
+let split_tag tag =
+ let (pfx, ttag) = split_pfx start_pfx tag in
+ if pfx <> "" then (pfx, ttag) else
+ let (pfx, ttag) = split_pfx end_pfx tag in
+ (pfx, ttag);;
+
(* pretty printing functions *)
let pp_with ft pp =
let cpp_open_box = function
@@ -297,3 +311,62 @@ let prvect_with_sep sep elem v = prvecti_with_sep sep (fun _ -> elem) v
let prvect elem v = prvect_with_sep mt elem v
let surround p = hov 1 (str"(" ++ p ++ str")")
+
+(*** DEBUG code ***)
+
+let db_print_pp fmt pp =
+ let open Format in
+ let block_type fmt btype =
+ let (bt, v) =
+ match btype with
+ | Pp_hbox v -> ("Pp_hbox", v)
+ | Pp_vbox v -> ("Pp_vbox", v)
+ | Pp_hvbox v -> ("Pp_hvbox", v)
+ | Pp_hovbox v -> ("Pp_hovbox", v)
+ in
+ fprintf fmt "%s %d" bt v
+ in
+ let rec db_print_pp_r indent pp =
+ let ind () = fprintf fmt "%s" (String.make (2 * indent) ' ') in
+ ind();
+ match pp with
+ | Ppcmd_empty ->
+ fprintf fmt "Ppcmd_empty@;"
+ | Ppcmd_string str ->
+ fprintf fmt "Ppcmd_string '%s'@;" str
+ | Ppcmd_glue list ->
+ fprintf fmt "Ppcmd_glue@;";
+ List.iter (fun x -> db_print_pp_r (indent + 1) (repr x)) list;
+ | Ppcmd_box (block, pp) ->
+ fprintf fmt "Ppcmd_box %a@;" block_type block;
+ db_print_pp_r (indent + 1) (repr pp);
+ | Ppcmd_tag (tag, pp) ->
+ fprintf fmt "Ppcmd_tag %s@;" tag;
+ db_print_pp_r (indent + 1) (repr pp);
+ | Ppcmd_print_break (i, j) ->
+ fprintf fmt "Ppcmd_print_break %d %d@;" i j
+ | Ppcmd_force_newline ->
+ fprintf fmt "Ppcmd_force_newline@;"
+ | Ppcmd_comment list ->
+ fprintf fmt "Ppcmd_comment@;";
+ List.iter (fun x -> ind(); (fprintf fmt "%s@;" x)) list
+ in
+ pp_open_vbox fmt 0;
+ db_print_pp_r 0 pp;
+ pp_close_box fmt ();
+ pp_print_flush fmt ()
+
+let db_string_of_pp pp =
+ Format.asprintf "%a" db_print_pp pp
+
+let rec flatten pp =
+ match pp with
+ | Ppcmd_glue l -> Ppcmd_glue (List.concat (List.map
+ (fun x -> let x = flatten x in
+ match x with
+ | Ppcmd_glue l2 -> l2
+ | p -> [p])
+ l))
+ | Ppcmd_box (block, pp) -> Ppcmd_box (block, flatten pp)
+ | Ppcmd_tag (tag, pp) -> Ppcmd_tag (tag, flatten pp)
+ | p -> p
diff --git a/lib/pp.mli b/lib/pp.mli
index f3a0a29b8a..ed31daa561 100644
--- a/lib/pp.mli
+++ b/lib/pp.mli
@@ -189,3 +189,22 @@ val pr_vertical_list : ('b -> t) -> 'b list -> t
val pp_with : Format.formatter -> t -> unit
val string_of_ppcmds : t -> string
+
+
+(** Tag prefix to start a multi-token diff span *)
+val start_pfx : string
+
+(** Tag prefix to end a multi-token diff span *)
+val end_pfx : string
+
+(** Split a tag into prefix and base tag *)
+val split_tag : string -> string * string
+
+(** Print the Pp in tree form for debugging *)
+val db_print_pp : Format.formatter -> t -> unit
+
+(** Print the Pp in tree form for debugging, return as a string *)
+val db_string_of_pp : t -> string
+
+(** Combine nested Ppcmd_glues *)
+val flatten : t -> t
diff --git a/lib/pp_diff.ml b/lib/pp_diff.ml
new file mode 100644
index 0000000000..7b4b1eab73
--- /dev/null
+++ b/lib/pp_diff.ml
@@ -0,0 +1,303 @@
+(************************************************************************)
+(* * The Coq Proof Assistant / The Coq Development Team *)
+(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *)
+(* <O___,, * (see CREDITS file for the list of authors) *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(* * (see LICENSE file for the text of the license) *)
+(************************************************************************)
+
+(* DEBUG/UNIT TEST *)
+let cfprintf oc = Printf.(kfprintf (fun oc -> fprintf oc "") oc)
+let log_out_ch = ref stdout
+let cprintf s = cfprintf !log_out_ch s
+
+
+module StringDiff = Diff2.Make(struct
+ type elem = String.t
+ type t = elem array
+ let get t i = Array.get t i
+ let length t = Array.length t
+end)
+
+type diff_type =
+ [ `Removed
+ | `Added
+ | `Common
+ ]
+
+type diff_list = StringDiff.elem Diff2.edit list
+
+(* debug print diff data structure *)
+let db_print_diffs fmt diffs =
+ let open Format in
+ let print_diff = function
+ | `Common (opos, npos, s) ->
+ fprintf fmt "Common '%s' opos = %d npos = %d\n" s opos npos;
+ | `Removed (pos, s) ->
+ fprintf fmt "Removed '%s' opos = %d\n" s pos;
+ | `Added (pos, s) ->
+ fprintf fmt "Added '%s' npos = %d\n" s pos;
+ in
+ pp_open_vbox fmt 0;
+ List.iter print_diff diffs;
+ pp_close_box fmt ();
+ pp_print_flush fmt ()
+
+let string_of_diffs diffs =
+ Format.asprintf "%a" db_print_diffs diffs
+
+(* Adjust the diffs returned by the Myers algorithm to reduce the span of the
+changes. This gives more natural-looking diffs.
+
+While the Myers algorithm minimizes the number of changes between two
+sequences, it doesn't minimize the span of the changes. For example,
+representing elements in common in lower case and inserted elements in upper
+case (but ignoring case in the algorithm), ABabC and abABC both have 3 changes
+(A, B and C). However the span of the first sequence is 5 elements (ABabC)
+while the span of the second is 3 elements (ABC).
+
+The algorithm modifies the changes iteratively, for example ABabC -> aBAbC -> abABC
+
+dtype: identifies which of Added OR Removed to use; the other one is ignored.
+diff_list: output from the Myers algorithm
+*)
+let shorten_diff_span dtype diff_list =
+ let changed = ref false in
+ let diffs = Array.of_list diff_list in
+ let len = Array.length diffs in
+ let vinfo index =
+ match diffs.(index) with
+ | `Common (opos, npos, s) -> (`Common, opos, npos, s)
+ | `Removed (pos, s) -> (`Removed, pos, 0, s)
+ | `Added (pos, s) -> (`Added, 0, pos, s) in
+ let get_variant index =
+ let (v, _, _, _) = vinfo index in
+ v in
+ let get_str index =
+ let (_, _, _, s) = vinfo index in
+ s in
+
+ let iter start len lt incr = begin
+ let src = ref start in
+ let dst = ref start in
+ while (lt !src len) do
+ if (get_variant !src) = dtype then begin
+ if (lt !dst !src) then
+ dst := !src;
+ while (lt !dst len) && (get_variant !dst) <> `Common do
+ dst := !dst + incr;
+ done;
+ if (lt !dst len) && (get_str !src) = (get_str !dst) then begin
+ (* swap diff *)
+ let (_, c_opos, c_npos, str) = vinfo !dst
+ and (_, v_opos, v_npos, _) = vinfo !src in
+ changed := true;
+ if dtype = `Added then begin
+ diffs.(!src) <- `Common (c_opos, v_npos, str);
+ diffs.(!dst) <- `Added (c_npos, str);
+ end else begin
+ diffs.(!src) <- `Common (v_opos, c_npos, str);
+ diffs.(!dst) <- `Removed (c_opos, str)
+ end
+ end
+ end;
+ src := !src + incr
+ done
+ end in
+
+ iter 0 len (<) 1; (* left to right *)
+ iter (len-1) (-1) (>) (-1); (* right to left *)
+ if !changed then Array.to_list diffs else diff_list;;
+
+let has_changes diffs =
+ let rec has_changes_r diffs added removed =
+ match diffs with
+ | `Added _ :: t -> has_changes_r t true removed
+ | `Removed _ :: t -> has_changes_r t added true
+ | h :: t -> has_changes_r t added removed
+ | [] -> (added, removed) in
+ has_changes_r diffs false false;;
+
+(* get the Myers diff of 2 lists of strings *)
+let diff_strs old_strs new_strs =
+ let diffs = List.rev (StringDiff.diff old_strs new_strs) in
+ shorten_diff_span `Removed (shorten_diff_span `Added diffs);;
+
+(* Default string tokenizer. Makes each character a separate strin.
+Whitespace is not ignored. Doesn't handle UTF-8 differences well. *)
+let def_tokenize_string s =
+ let limit = (String.length s) - 1 in
+ let strs : string list ref = ref [] in
+ for i = 0 to limit do
+ strs := (String.make 1 s.[i]) :: !strs
+ done;
+ List.rev !strs
+
+(* get the Myers diff of 2 strings *)
+let diff_str ?(tokenize_string=def_tokenize_string) old_str new_str =
+ let old_toks = Array.of_list (tokenize_string old_str)
+ and new_toks = Array.of_list (tokenize_string new_str) in
+ diff_strs old_toks new_toks;;
+
+let get_dinfo = function
+ | `Common (_, _, s) -> (`Common, s)
+ | `Removed (_, s) -> (`Removed, s)
+ | `Added (_, s) -> (`Added, s)
+
+[@@@ocaml.warning "-32"]
+let string_of_diff_type = function
+ | `Common -> "Common"
+ | `Removed -> "Removed"
+ | `Added -> "Added"
+[@@@ocaml.warning "+32"]
+
+let wrap_in_bg diff_tag pp =
+ let open Pp in
+ (tag (Pp.start_pfx ^ diff_tag ^ ".bg") (str "")) ++ pp ++
+ (tag (Pp.end_pfx ^ diff_tag ^ ".bg") (str ""))
+
+exception Diff_Failure of string
+
+let add_diff_tags which pp diffs =
+ let open Pp in
+ let diff_tag = if which = `Added then "diff.added" else "diff.removed" in
+ let diffs : diff_list ref = ref diffs in
+ let in_diff = ref false in (* true = buf chars need a tag *)
+ let in_span = ref false in (* true = last pp had a start tag *)
+ let trans = ref false in (* true = this diff starts/ends highlight *)
+ let buf = Buffer.create 16 in
+ let acc_pp = ref [] in
+ let diff_str, diff_ind, diff_len = ref "", ref 0, ref 0 in
+ let prev_dtype, dtype, next_dtype = ref `Common, ref `Common, ref `Common in
+ let is_white c = List.mem c [' '; '\t'; '\n'; '\r'] in
+
+ let skip () =
+ while !diffs <> [] &&
+ (let (t, _) = get_dinfo (List.hd !diffs) in
+ t <> `Common && t <> which)
+ do
+ diffs := List.tl !diffs
+ done
+ in
+
+ let put_tagged case =
+ if Buffer.length buf > 0 then begin
+ let pp = str (Buffer.contents buf) in
+ Buffer.clear buf;
+ let tagged = match case with
+ | "" -> pp
+ | "tag" -> tag diff_tag pp
+ | "start" -> in_span := true; tag (start_pfx ^ diff_tag) pp
+ | "end" -> in_span := false; tag (end_pfx ^ diff_tag) pp
+ | _ -> raise (Diff_Failure "invalid tag id in put_tagged, should be impossible") in
+ acc_pp := tagged :: !acc_pp
+ end
+ in
+
+ let output_pps () =
+ let next_diff_char_hl = if !diff_ind < !diff_len then !dtype = which else !next_dtype = which in
+ let tag = if not !in_diff then ""
+ else if !in_span then
+ if next_diff_char_hl then "" else "end"
+ else
+ if next_diff_char_hl then "start" else "tag" in
+ put_tagged tag; (* flush any remainder *)
+ let l = !acc_pp in
+ acc_pp := [];
+ match List.length l with
+ | 0 -> str ""
+ | 1 -> List.hd l
+ | _ -> seq (List.rev l)
+ in
+
+ let maybe_next_diff () =
+ if !diff_ind = !diff_len && (skip(); !diffs <> []) then begin
+ let (t, s) = get_dinfo (List.hd !diffs) in
+ diff_str := s; diff_ind := 0; diff_len := String.length !diff_str;
+ diffs := List.tl !diffs; skip();
+ prev_dtype := !dtype;
+ dtype := t;
+ next_dtype := (match !diffs with
+ | diff2 :: _ -> let (nt, _) = get_dinfo diff2 in nt
+ | [] -> `Common);
+ trans := !dtype <> !prev_dtype
+ end;
+ in
+
+ let s_char c =
+ maybe_next_diff ();
+ (* matching first should handle tokens with spaces, e.g. in comments/strings *)
+ if !diff_ind < !diff_len && c = !diff_str.[!diff_ind] then begin
+ if !dtype = which && !trans && !diff_ind = 0 then begin
+ put_tagged "";
+ in_diff := true
+ end;
+ Buffer.add_char buf c;
+ diff_ind := !diff_ind + 1;
+ if !dtype = which && !dtype <> !next_dtype && !diff_ind = !diff_len then begin
+ put_tagged (if !in_span then "end" else "tag");
+ in_diff := false
+ end
+ end else if is_white c then
+ Buffer.add_char buf c
+ else begin
+ cprintf "mismatch: expected '%c' but got '%c'\n" !diff_str.[!diff_ind] c;
+ raise (Diff_Failure "string mismatch, shouldn't happen")
+ end
+ in
+
+ (* rearrange so existing tags are inside diff tags, provided that those tags
+ only contain Ppcmd_string's. Other cases (e.g. tag of a box) are not supported. *)
+ (* todo: Is there a better way to do this in OCaml without multiple 'repr's? *)
+ let reorder_tags child pp_tag pp =
+ match repr child with
+ | Ppcmd_tag (t1, pp) -> tag t1 (tag pp_tag pp)
+ | Ppcmd_glue l ->
+ if List.exists (fun x ->
+ match repr x with
+ | Ppcmd_tag (_, _) -> true
+ | _ -> false) l
+ then seq (List.map (fun x ->
+ match repr x with
+ | Ppcmd_tag (t2, pp2) -> tag t2 (tag pp_tag pp2)
+ | pp2 -> tag pp_tag (unrepr pp2)) l)
+ else child
+ | _ -> tag pp_tag child
+ in
+
+ let rec add_tags_r pp =
+ let r_pp = repr pp in
+ match r_pp with
+ | Ppcmd_string s -> String.iter s_char s; output_pps ()
+ | Ppcmd_glue l -> seq (List.map add_tags_r l)
+ | Ppcmd_box (block_type, pp) -> unrepr (Ppcmd_box (block_type, add_tags_r pp))
+ | Ppcmd_tag (pp_tag, pp) -> reorder_tags (add_tags_r pp) pp_tag pp
+ | _ -> pp
+ in
+ let (has_added, has_removed) = has_changes !diffs in
+ let rv = add_tags_r pp in
+ skip ();
+ if !diffs <> [] then
+ raise (Diff_Failure "left-over diff info at end of Pp.t, should be impossible");
+ if has_added || has_removed then wrap_in_bg diff_tag rv else rv;;
+
+let diff_pp ?(tokenize_string=def_tokenize_string) o_pp n_pp =
+ let open Pp in
+ let o_str = string_of_ppcmds o_pp in
+ let n_str = string_of_ppcmds n_pp in
+ let diffs = diff_str ~tokenize_string o_str n_str in
+ (add_diff_tags `Removed o_pp diffs, add_diff_tags `Added n_pp diffs);;
+
+let diff_pp_combined ?(tokenize_string=def_tokenize_string) ?(show_removed=false) o_pp n_pp =
+ let open Pp in
+ let o_str = string_of_ppcmds o_pp in
+ let n_str = string_of_ppcmds n_pp in
+ let diffs = diff_str ~tokenize_string o_str n_str in
+ let (_, has_removed) = has_changes diffs in
+ let added = add_diff_tags `Added n_pp diffs in
+ if show_removed && has_removed then
+ let removed = add_diff_tags `Removed o_pp diffs in
+ (v 0 (removed ++ cut() ++ added))
+ else added;;
diff --git a/lib/pp_diff.mli b/lib/pp_diff.mli
new file mode 100644
index 0000000000..03468271d2
--- /dev/null
+++ b/lib/pp_diff.mli
@@ -0,0 +1,116 @@
+(************************************************************************)
+(* * The Coq Proof Assistant / The Coq Development Team *)
+(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *)
+(* <O___,, * (see CREDITS file for the list of authors) *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(* * (see LICENSE file for the text of the license) *)
+(************************************************************************)
+
+(**
+Computes the differences between 2 Pp's and adds additional tags to a Pp
+to highlight them. Strings are split into tokens using the Coq lexer,
+then the lists of tokens are diffed using the Myers algorithm. A fixup routine,
+shorten_diff_span, shortens the span of the diff result in some cases.
+
+Highlights use 4 tags to specify the color and underline/strikeout. These are
+"diffs.added", "diffs.removed", "diffs.added.bg" and "diffs.removed.bg". The
+first two are for added or removed text; the last two are for unmodified parts
+of a modified item. Diffs that span multiple strings in the Pp are tagged with
+"start.diff.*" and "end.diff.*", but only on the first and last strings of the span.
+
+If the inputs are not acceptable to the lexer, break the strings into
+lists of tokens and call diff_strs, then add_diff_tags with a Pp.t that matches
+the input lists of strings. Tokens that the lexer doesn't return exactly as they
+appeared in the input will raise an exception in add_diff_tags (e.g. comments
+and quoted strings). Fixing that requires tweaking the lexer.
+
+Limitations/Possible enhancements:
+
+- Make diff_pp immune to unlexable strings by adding a flag to the lexer.
+*)
+
+(** Compute the diff between two Pp.t structures and return
+versions of each with diffs highlighted as (old, new) *)
+val diff_pp : ?tokenize_string:(string -> string list) -> Pp.t -> Pp.t -> Pp.t * Pp.t
+
+(** Compute the diff between two Pp.t structures and return
+a highlighted Pp.t. If [show_removed] is true, show separate lines for
+removals and additions, otherwise only show additions *)
+val diff_pp_combined : ?tokenize_string:(string -> string list) -> ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t
+
+(** Raised if the diff fails *)
+exception Diff_Failure of string
+
+module StringDiff :
+sig
+ type elem = String.t
+ type t = elem array
+end
+
+type diff_type =
+ [ `Removed
+ | `Added
+ | `Common
+ ]
+
+type diff_list = StringDiff.elem Diff2.edit list
+
+(** Compute the difference between 2 strings in terms of tokens, using the
+lexer to identify tokens.
+
+If the strings are not lexable, this routine will raise Diff_Failure.
+(I expect to modify the lexer soon so this won't happen.)
+
+Therefore you should catch any exceptions. The workaround for now is for the
+caller to tokenize the strings itself and then call diff_strs.
+*)
+val diff_str : ?tokenize_string:(string -> string list) -> string -> string -> StringDiff.elem Diff2.edit list
+
+(** Compute the differences between 2 lists of strings, treating the strings
+in the lists as indivisible units.
+*)
+val diff_strs : StringDiff.t -> StringDiff.t -> StringDiff.elem Diff2.edit list
+
+(** Generate a new Pp that adds tags marking diffs to a Pp structure:
+which: either `Added or `Removed, indicates which type of diffs to add
+pp: the original structure. For `Added, must be the new pp passed to diff_pp
+ For `Removed, must be the old pp passed to diff_pp. Passing the wrong one
+ will likely raise Diff_Failure.
+diffs: the diff list returned by diff_pp
+
+Diffs of single strings in the Pp are tagged with "diff.added" or "diff.removed".
+Diffs that span multiple strings in the Pp are tagged with "start.diff.*" or
+"end.diff.*", but only on the first and last strings of the span.
+
+Ppcmd_strings will be split into multiple Ppcmd_strings if a diff starts or ends
+in the middle of the string. Whitespace just before or just after a diff will
+not be part of the highlight.
+
+Prexisting tags in pp may contain only a single Ppcmd_string. Those tags will be
+placed inside the diff tags to ensure proper nesting of tags within spans of
+"start.diff.*" ... "end.diff.*".
+
+Under some "impossible" conditions, this routine may raise Diff_Failure.
+If you want to make your call especially bulletproof, catch this
+exception, print a user-visible message, then recall this routine with
+the first argument set to None, which will skip the diff.
+*)
+val add_diff_tags : diff_type -> Pp.t -> StringDiff.elem Diff2.edit list -> Pp.t
+
+(** Returns a boolean pair (added, removed) for [diffs] where a true value
+indicates that something was added/removed in the diffs.
+*)
+val has_changes : diff_list -> bool * bool
+
+val get_dinfo : StringDiff.elem Diff2.edit -> diff_type * string
+
+(** Returns a modified [pp] with the background highlighted with
+"start.<diff_tag>.bg" and "end.<diff_tag>.bg" tags at the beginning
+and end of the returned Pp.t
+*)
+val wrap_in_bg : string -> Pp.t -> Pp.t
+
+(** Displays the diffs to a printable format for debugging *)
+val string_of_diffs : diff_list -> string