Merge PR #6801: Highlight differences between successive proof steps (color, underline, etc.)

author: Emilio Jesus Gallego Arias 2018-07-24 11:03:52 +0200
committer: Emilio Jesus Gallego Arias 2018-07-24 11:03:52 +0200
commit: 4ab54f3cca76632cb6e258c84abc259e15e9e9f8 (patch)
tree: c4374a0986acd6d4f6cac1a03e6bfa5ba7c972c9 /lib
parent: 580a026070ab74d05f38e1177632be83a8756566 (diff)
parent: 97069f69ab3a58cc4ccbaa1a835912c6c31dde4d (diff)
5 files changed, 513 insertions, 1 deletions
diff --git a/lib/lib.mllib b/lib/lib.mllib
index 0891859423..41b3622a99 100644
--- a/lib/lib.mllib
+++ b/lib/lib.mllib
@@ -6,6 +6,7 @@ Control
 Util
 
 Pp
+Pp_diff
 Stateid
 Loc
 Feedback
diff --git a/lib/pp.ml b/lib/pp.ml
index cd81f6e768..7f132686db 100644
--- a/lib/pp.ml
+++ b/lib/pp.ml
@@ -139,7 +139,7 @@ let v   n s = Ppcmd_box(Pp_vbox n,s)
 let hv  n s = Ppcmd_box(Pp_hvbox n,s)
 let hov n s = Ppcmd_box(Pp_hovbox n,s)
 
-(* Opening and closed of tags *)
+(* Opening and closing of tags *)
 let tag t s = Ppcmd_tag(t,s)
 
 (* In new syntax only double quote char is escaped by repeating it *)
@@ -167,6 +167,20 @@ let rec pr_com ft s =
       Some s2 -> Format.pp_force_newline ft (); pr_com ft s2
     | None -> ()
 
+let start_pfx = "start."
+let end_pfx = "end."
+
+let split_pfx pfx str =
+  let (str_len, pfx_len) = (String.length str, String.length pfx) in
+  if str_len >= pfx_len && (String.sub str 0 pfx_len) = pfx then
+    (pfx, String.sub str pfx_len (str_len - pfx_len)) else ("", str);;
+
+let split_tag tag =
+  let (pfx, ttag) = split_pfx start_pfx tag in
+  if pfx <> "" then (pfx, ttag) else
+    let (pfx, ttag) = split_pfx end_pfx tag in
+    (pfx, ttag);;
+
 (* pretty printing functions *)
 let pp_with ft pp =
   let cpp_open_box = function
@@ -297,3 +311,62 @@ let prvect_with_sep sep elem v = prvecti_with_sep sep (fun _ -> elem) v
 let prvect elem v = prvect_with_sep mt elem v
 
 let surround p = hov 1 (str"(" ++ p ++ str")")
+
+(*** DEBUG code ***)
+
+let db_print_pp fmt pp =
+  let open Format in
+  let block_type fmt btype =
+    let (bt, v) =
+      match btype with
+      | Pp_hbox v -> ("Pp_hbox", v)
+      | Pp_vbox v -> ("Pp_vbox", v)
+      | Pp_hvbox v -> ("Pp_hvbox", v)
+      | Pp_hovbox v -> ("Pp_hovbox", v)
+    in
+    fprintf fmt "%s %d" bt v
+  in
+  let rec db_print_pp_r indent pp =
+    let ind () = fprintf fmt "%s" (String.make (2 * indent) ' ') in
+    ind();
+    match pp with
+    | Ppcmd_empty ->
+        fprintf fmt "Ppcmd_empty@;"
+    | Ppcmd_string str ->
+        fprintf fmt "Ppcmd_string '%s'@;" str
+    | Ppcmd_glue list ->
+        fprintf fmt "Ppcmd_glue@;";
+        List.iter (fun x -> db_print_pp_r (indent + 1) (repr x)) list;
+    | Ppcmd_box (block, pp) ->
+        fprintf fmt "Ppcmd_box %a@;" block_type block;
+        db_print_pp_r (indent + 1) (repr pp);
+    | Ppcmd_tag (tag, pp) ->
+        fprintf fmt "Ppcmd_tag %s@;" tag;
+        db_print_pp_r (indent + 1) (repr pp);
+    | Ppcmd_print_break (i, j) ->
+        fprintf fmt "Ppcmd_print_break %d %d@;" i j
+    | Ppcmd_force_newline ->
+        fprintf fmt "Ppcmd_force_newline@;"
+    | Ppcmd_comment list ->
+        fprintf fmt "Ppcmd_comment@;";
+        List.iter (fun x -> ind(); (fprintf fmt "%s@;" x)) list
+  in
+  pp_open_vbox fmt 0;
+  db_print_pp_r 0 pp;
+  pp_close_box fmt ();
+  pp_print_flush fmt ()
+
+let db_string_of_pp pp =
+  Format.asprintf "%a" db_print_pp pp
+
+let rec flatten pp =
+  match pp with
+  | Ppcmd_glue l -> Ppcmd_glue (List.concat (List.map
+      (fun x -> let x = flatten x in
+                  match x with
+                  | Ppcmd_glue l2 -> l2
+                  | p -> [p])
+      l))
+  | Ppcmd_box (block, pp) -> Ppcmd_box (block, flatten pp)
+  | Ppcmd_tag (tag, pp) -> Ppcmd_tag (tag, flatten pp)
+  | p -> p
diff --git a/lib/pp.mli b/lib/pp.mli
index f3a0a29b8a..ed31daa561 100644
--- a/lib/pp.mli
+++ b/lib/pp.mli
@@ -189,3 +189,22 @@ val pr_vertical_list : ('b -> t) -> 'b list -> t
 val pp_with          : Format.formatter -> t -> unit
 
 val string_of_ppcmds : t -> string
+
+
+(** Tag prefix to start a multi-token diff span *)
+val start_pfx : string
+
+(** Tag prefix to end a multi-token diff span *)
+val end_pfx : string
+
+(** Split a tag into prefix and base tag *)
+val split_tag : string -> string * string
+
+(** Print the Pp in tree form for debugging *)
+val db_print_pp : Format.formatter -> t -> unit
+
+(** Print the Pp in tree form for debugging, return as a string *)
+val db_string_of_pp : t -> string
+
+(** Combine nested Ppcmd_glues *)
+val flatten : t -> t
diff --git a/lib/pp_diff.ml b/lib/pp_diff.ml
new file mode 100644
index 0000000000..7b4b1eab73
--- /dev/null
+++ b/lib/pp_diff.ml
@@ -0,0 +1,303 @@
+(************************************************************************)
+(*         *   The Coq Proof Assistant / The Coq Development Team       *)
+(*  v      *   INRIA, CNRS and contributors - Copyright 1999-2018       *)
+(* <O___,, *       (see CREDITS file for the list of authors)           *)
+(*   \VV/  **************************************************************)
+(*    //   *    This file is distributed under the terms of the         *)
+(*         *     GNU Lesser General Public License Version 2.1          *)
+(*         *     (see LICENSE file for the text of the license)         *)
+(************************************************************************)
+
+(* DEBUG/UNIT TEST *)
+let cfprintf oc = Printf.(kfprintf (fun oc -> fprintf oc "") oc)
+let log_out_ch = ref stdout
+let cprintf s = cfprintf !log_out_ch s
+
+
+module StringDiff = Diff2.Make(struct
+  type elem = String.t
+  type t = elem array
+  let get t i = Array.get t i
+  let length t = Array.length t
+end)
+
+type diff_type =
+  [ `Removed
+  | `Added
+  | `Common
+  ]
+
+type diff_list = StringDiff.elem Diff2.edit list
+
+(* debug print diff data structure *)
+let db_print_diffs fmt diffs =
+  let open Format in
+  let print_diff = function
+    | `Common (opos, npos, s) ->
+      fprintf fmt "Common '%s' opos = %d npos = %d\n" s opos npos;
+    | `Removed (pos, s) ->
+      fprintf fmt "Removed '%s' opos = %d\n" s pos;
+    | `Added (pos, s) ->
+      fprintf fmt "Added '%s' npos = %d\n" s pos;
+  in
+  pp_open_vbox fmt 0;
+  List.iter print_diff diffs;
+  pp_close_box fmt ();
+  pp_print_flush fmt ()
+
+let string_of_diffs diffs =
+  Format.asprintf "%a" db_print_diffs diffs
+
+(* Adjust the diffs returned by the Myers algorithm to reduce the span of the
+changes.  This gives more natural-looking diffs.
+
+While the Myers algorithm minimizes the number of changes between two
+sequences, it doesn't minimize the span of the changes.  For example,
+representing elements in common in lower case and inserted elements in upper
+case (but ignoring case in the algorithm), ABabC and abABC both have 3 changes
+(A, B and C).  However the span of the first sequence is 5 elements (ABabC)
+while the span of the second is 3 elements (ABC).
+
+The algorithm modifies the changes iteratively, for example ABabC -> aBAbC -> abABC
+
+dtype: identifies which of Added OR Removed to use; the other one is ignored.
+diff_list: output from the Myers algorithm
+*)
+let shorten_diff_span dtype diff_list =
+  let changed = ref false in
+  let diffs = Array.of_list diff_list in
+  let len = Array.length diffs in
+  let vinfo index =
+    match diffs.(index) with
+    | `Common (opos, npos, s) -> (`Common, opos, npos, s)
+    | `Removed (pos, s) -> (`Removed, pos, 0, s)
+    | `Added (pos, s) -> (`Added, 0, pos, s) in
+  let get_variant index =
+    let (v, _, _, _) = vinfo index in
+    v in
+  let get_str index =
+    let (_, _, _, s) = vinfo index in
+    s in
+
+  let iter start len lt incr = begin
+    let src = ref start in
+    let dst = ref start in
+    while (lt !src len) do
+      if (get_variant !src) = dtype then begin
+        if (lt !dst !src) then
+          dst := !src;
+        while (lt !dst len) && (get_variant !dst) <> `Common do
+          dst := !dst + incr;
+        done;
+        if (lt !dst len) && (get_str !src) = (get_str !dst) then begin
+          (* swap diff *)
+          let (_, c_opos, c_npos, str) = vinfo !dst
+          and (_, v_opos, v_npos, _) = vinfo !src in
+          changed := true;
+          if dtype = `Added then begin
+            diffs.(!src) <- `Common (c_opos, v_npos, str);
+            diffs.(!dst) <- `Added (c_npos, str);
+          end else begin
+            diffs.(!src) <- `Common (v_opos, c_npos, str);
+            diffs.(!dst) <- `Removed (c_opos, str)
+          end
+        end
+      end;
+      src := !src + incr
+    done
+  end in
+
+  iter 0 len (<) 1; (* left to right *)
+  iter (len-1) (-1) (>) (-1); (* right to left *)
+  if !changed then Array.to_list diffs else diff_list;;
+
+let has_changes diffs =
+  let rec has_changes_r diffs added removed =
+    match diffs with
+    | `Added _ :: t   -> has_changes_r t true removed
+    | `Removed _ :: t -> has_changes_r t added true
+    | h :: t -> has_changes_r t added removed
+    | [] -> (added, removed) in
+  has_changes_r diffs false false;;
+
+(* get the Myers diff of 2 lists of strings *)
+let diff_strs old_strs new_strs =
+  let diffs = List.rev (StringDiff.diff old_strs new_strs) in
+  shorten_diff_span `Removed (shorten_diff_span `Added diffs);;
+
+(* Default string tokenizer.  Makes each character a separate strin.
+Whitespace is not ignored.  Doesn't handle UTF-8 differences well. *)
+let def_tokenize_string s =
+  let limit = (String.length s) - 1 in
+  let strs : string list ref = ref [] in
+  for i = 0 to limit do
+    strs := (String.make 1 s.[i]) :: !strs
+  done;
+  List.rev !strs
+
+(* get the Myers diff of 2 strings *)
+let diff_str ?(tokenize_string=def_tokenize_string) old_str new_str =
+  let old_toks = Array.of_list (tokenize_string old_str)
+  and new_toks = Array.of_list (tokenize_string new_str) in
+  diff_strs old_toks new_toks;;
+
+let get_dinfo = function
+  | `Common (_, _, s) -> (`Common, s)
+  | `Removed (_, s) -> (`Removed, s)
+  | `Added (_, s) -> (`Added, s)
+
+[@@@ocaml.warning "-32"]
+let string_of_diff_type = function
+  | `Common  -> "Common"
+  | `Removed -> "Removed"
+  | `Added -> "Added"
+[@@@ocaml.warning "+32"]
+
+let wrap_in_bg diff_tag pp =
+  let open Pp in
+  (tag (Pp.start_pfx ^ diff_tag ^ ".bg") (str "")) ++ pp ++
+  (tag (Pp.end_pfx   ^ diff_tag ^ ".bg") (str ""))
+
+exception Diff_Failure of string
+
+let add_diff_tags which pp diffs  =
+  let open Pp in
+  let diff_tag = if which = `Added then "diff.added" else "diff.removed" in
+  let diffs : diff_list ref = ref diffs in
+  let in_diff = ref false in (* true = buf chars need a tag *)
+  let in_span = ref false in (* true = last pp had a start tag *)
+  let trans = ref false in   (* true = this diff starts/ends highlight *)
+  let buf = Buffer.create 16 in
+  let acc_pp = ref [] in
+  let diff_str, diff_ind, diff_len = ref "", ref 0, ref 0 in
+  let prev_dtype, dtype, next_dtype = ref `Common, ref `Common, ref `Common in
+  let is_white c = List.mem c [' '; '\t'; '\n'; '\r'] in
+
+  let skip () =
+    while !diffs <> [] &&
+      (let (t, _) = get_dinfo (List.hd !diffs) in
+        t <> `Common && t <> which)
+    do
+      diffs := List.tl !diffs
+    done
+  in
+
+  let put_tagged case =
+    if Buffer.length buf > 0 then begin
+      let pp = str (Buffer.contents buf) in
+      Buffer.clear buf;
+      let tagged = match case with
+      | ""      -> pp
+      | "tag"   -> tag diff_tag pp
+      | "start" -> in_span := true;  tag (start_pfx ^ diff_tag) pp
+      | "end"   -> in_span := false; tag (end_pfx ^ diff_tag) pp
+      | _ -> raise (Diff_Failure "invalid tag id in put_tagged, should be impossible") in
+      acc_pp := tagged :: !acc_pp
+    end
+  in
+
+  let output_pps () =
+    let next_diff_char_hl = if !diff_ind < !diff_len then !dtype = which else !next_dtype = which in
+    let tag = if not !in_diff then ""
+              else  if !in_span then
+                      if next_diff_char_hl then "" else "end"
+                    else
+                      if next_diff_char_hl then "start" else "tag" in
+    put_tagged tag;  (* flush any remainder *)
+    let l = !acc_pp in
+    acc_pp := [];
+    match List.length l with
+    | 0 -> str ""
+    | 1 -> List.hd l
+    | _ -> seq (List.rev l)
+  in
+
+  let maybe_next_diff () =
+    if !diff_ind = !diff_len && (skip(); !diffs <> []) then begin
+      let (t, s) = get_dinfo (List.hd !diffs) in
+      diff_str := s; diff_ind := 0; diff_len := String.length !diff_str;
+      diffs := List.tl !diffs; skip();
+      prev_dtype := !dtype;
+      dtype := t;
+      next_dtype := (match !diffs with
+        | diff2 :: _ -> let (nt, _) = get_dinfo diff2 in nt
+        | [] -> `Common);
+      trans := !dtype <> !prev_dtype
+    end;
+  in
+
+  let s_char c =
+    maybe_next_diff ();
+    (* matching first should handle tokens with spaces, e.g. in comments/strings *)
+    if !diff_ind < !diff_len && c = !diff_str.[!diff_ind] then begin
+      if !dtype = which && !trans && !diff_ind = 0 then begin
+        put_tagged "";
+        in_diff := true
+      end;
+      Buffer.add_char buf c;
+      diff_ind := !diff_ind + 1;
+      if !dtype = which && !dtype <> !next_dtype && !diff_ind = !diff_len then begin
+        put_tagged (if !in_span then "end" else "tag");
+        in_diff := false
+      end
+    end else if is_white c then
+      Buffer.add_char buf c
+    else begin
+      cprintf "mismatch: expected '%c' but got '%c'\n" !diff_str.[!diff_ind] c;
+      raise (Diff_Failure "string mismatch, shouldn't happen")
+    end
+  in
+
+  (* rearrange so existing tags are inside diff tags, provided that those tags
+    only contain Ppcmd_string's.  Other cases (e.g. tag of a box) are not supported. *)
+  (* todo: Is there a better way to do this in OCaml without multiple 'repr's? *)
+  let reorder_tags child pp_tag pp =
+    match repr child with
+    | Ppcmd_tag (t1, pp) -> tag t1 (tag pp_tag pp)
+    | Ppcmd_glue l ->
+      if List.exists (fun x ->
+          match repr x with
+          | Ppcmd_tag (_, _) -> true
+          | _ -> false)  l
+      then seq (List.map (fun x ->
+          match repr x with
+          | Ppcmd_tag (t2, pp2) -> tag t2 (tag pp_tag pp2)
+          | pp2 -> tag pp_tag (unrepr pp2))   l)
+      else child
+    | _ -> tag pp_tag child
+  in
+
+  let rec add_tags_r pp =
+    let r_pp = repr pp in
+    match r_pp with
+    | Ppcmd_string s -> String.iter s_char s; output_pps ()
+    | Ppcmd_glue l -> seq (List.map add_tags_r l)
+    | Ppcmd_box (block_type, pp) -> unrepr (Ppcmd_box (block_type, add_tags_r pp))
+    | Ppcmd_tag (pp_tag, pp) -> reorder_tags (add_tags_r pp) pp_tag pp
+    | _ -> pp
+  in
+  let (has_added, has_removed) = has_changes !diffs in
+  let rv = add_tags_r pp in
+  skip ();
+  if !diffs <> [] then
+    raise (Diff_Failure "left-over diff info at end of Pp.t, should be impossible");
+  if has_added || has_removed then wrap_in_bg diff_tag rv else rv;;
+
+let diff_pp ?(tokenize_string=def_tokenize_string) o_pp n_pp =
+  let open Pp in
+  let o_str = string_of_ppcmds o_pp in
+  let n_str = string_of_ppcmds n_pp in
+  let diffs = diff_str ~tokenize_string o_str n_str in
+  (add_diff_tags `Removed o_pp diffs, add_diff_tags `Added n_pp diffs);;
+
+let diff_pp_combined ?(tokenize_string=def_tokenize_string) ?(show_removed=false) o_pp n_pp =
+  let open Pp in
+  let o_str = string_of_ppcmds o_pp in
+  let n_str = string_of_ppcmds n_pp in
+  let diffs = diff_str ~tokenize_string o_str n_str in
+  let (_, has_removed) = has_changes diffs in
+  let added = add_diff_tags `Added n_pp diffs in
+  if show_removed && has_removed then
+    let removed = add_diff_tags `Removed o_pp diffs in
+    (v 0 (removed ++ cut() ++ added))
+  else added;;
diff --git a/lib/pp_diff.mli b/lib/pp_diff.mli
new file mode 100644
index 0000000000..03468271d2
--- /dev/null
+++ b/lib/pp_diff.mli
@@ -0,0 +1,116 @@
+(************************************************************************)
+(*         *   The Coq Proof Assistant / The Coq Development Team       *)
+(*  v      *   INRIA, CNRS and contributors - Copyright 1999-2018       *)
+(* <O___,, *       (see CREDITS file for the list of authors)           *)
+(*   \VV/  **************************************************************)
+(*    //   *    This file is distributed under the terms of the         *)
+(*         *     GNU Lesser General Public License Version 2.1          *)
+(*         *     (see LICENSE file for the text of the license)         *)
+(************************************************************************)
+
+(**
+Computes the differences between 2 Pp's and adds additional tags to a Pp
+to highlight them.  Strings are split into tokens using the Coq lexer,
+then the lists of tokens are diffed using the Myers algorithm.  A fixup routine,
+shorten_diff_span, shortens the span of the diff result in some cases.
+
+Highlights use 4 tags to specify the color and underline/strikeout.  These are
+"diffs.added", "diffs.removed", "diffs.added.bg" and "diffs.removed.bg".  The
+first two are for added or removed text; the last two are for unmodified parts
+of a modified item.  Diffs that span multiple strings in the Pp are tagged with
+"start.diff.*" and "end.diff.*", but only on the first and last strings of the span.
+
+If the inputs are not acceptable to the lexer, break the strings into
+lists of tokens and call diff_strs, then add_diff_tags with a Pp.t that matches
+the input lists of strings.  Tokens that the lexer doesn't return exactly as they
+appeared in the input will raise an exception in add_diff_tags (e.g. comments
+and quoted strings).  Fixing that requires tweaking the lexer.
+
+Limitations/Possible enhancements:
+
+- Make diff_pp immune to unlexable strings by adding a flag to the lexer.
+*)
+
+(** Compute the diff between two Pp.t structures and return
+versions of each with diffs highlighted as (old, new) *)
+val diff_pp : ?tokenize_string:(string -> string list) -> Pp.t -> Pp.t -> Pp.t * Pp.t
+
+(** Compute the diff between two Pp.t structures and return
+a highlighted Pp.t.  If [show_removed] is true, show separate lines for
+removals and additions, otherwise only show additions *)
+val diff_pp_combined : ?tokenize_string:(string -> string list) -> ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t
+
+(** Raised if the diff fails *)
+exception Diff_Failure of string
+
+module StringDiff :
+sig
+  type elem = String.t
+  type t = elem array
+end
+
+type diff_type =
+  [ `Removed
+  | `Added
+  | `Common
+  ]
+
+type diff_list = StringDiff.elem Diff2.edit list
+
+(** Compute the difference between 2 strings in terms of tokens, using the
+lexer to identify tokens.
+
+If the strings are not lexable, this routine will raise Diff_Failure.
+(I expect to modify the lexer soon so this won't happen.)
+
+Therefore you should catch any exceptions.  The workaround for now is for the
+caller to tokenize the strings itself and then call diff_strs.
+*)
+val diff_str : ?tokenize_string:(string -> string list) -> string -> string -> StringDiff.elem Diff2.edit list
+
+(** Compute the differences between 2 lists of strings, treating the strings
+in the lists as indivisible units.
+*)
+val diff_strs : StringDiff.t -> StringDiff.t -> StringDiff.elem Diff2.edit list
+
+(** Generate a new Pp that adds tags marking diffs to a Pp structure:
+which: either `Added or `Removed, indicates which type of diffs to add
+pp: the original structure. For `Added, must be the new pp passed to diff_pp
+  For `Removed, must be the old pp passed to diff_pp.  Passing the wrong one
+  will likely raise Diff_Failure.
+diffs: the diff list returned by diff_pp
+
+Diffs of single strings in the Pp are tagged with "diff.added" or "diff.removed".
+Diffs that span multiple strings in the Pp are tagged with "start.diff.*" or
+"end.diff.*", but only on the first and last strings of the span.
+
+Ppcmd_strings will be split into multiple Ppcmd_strings if a diff starts or ends
+in the middle of the string.  Whitespace just before or just after a diff will
+not be part of the highlight.
+
+Prexisting tags in pp may contain only a single Ppcmd_string.  Those tags will be
+placed inside the diff tags to ensure proper nesting of tags within spans of
+"start.diff.*" ... "end.diff.*".
+
+Under some  "impossible" conditions, this routine may raise Diff_Failure.
+If you want to make your call especially bulletproof, catch this
+exception, print a user-visible message, then recall this routine with
+the first argument set to None, which will skip the diff.
+*)
+val add_diff_tags : diff_type -> Pp.t -> StringDiff.elem Diff2.edit list -> Pp.t
+
+(** Returns a boolean pair (added, removed) for [diffs] where a true value
+indicates that something was added/removed in the diffs.
+*)
+val has_changes : diff_list -> bool * bool
+
+val get_dinfo : StringDiff.elem Diff2.edit -> diff_type * string
+
+(** Returns a modified [pp] with the background highlighted with
+"start.<diff_tag>.bg" and "end.<diff_tag>.bg" tags at the beginning
+and end of the returned Pp.t
+*)
+val wrap_in_bg : string -> Pp.t -> Pp.t
+
+(** Displays the diffs to a printable format for debugging *)
+val string_of_diffs : diff_list -> string
author	Emilio Jesus Gallego Arias	2018-07-24 11:03:52 +0200
committer	Emilio Jesus Gallego Arias	2018-07-24 11:03:52 +0200
commit	4ab54f3cca76632cb6e258c84abc259e15e9e9f8 (patch)
tree	c4374a0986acd6d4f6cac1a03e6bfa5ba7c972c9 /lib
parent	580a026070ab74d05f38e1177632be83a8756566 (diff)
parent	97069f69ab3a58cc4ccbaa1a835912c6c31dde4d (diff)