From 70a139733dfaa42d665a85736ee4324d926723ed Mon Sep 17 00:00:00 2001 From: Jim Fehrle Date: Sun, 7 Oct 2018 15:14:56 -0700 Subject: Make diffs work for more input strings Diff code uses the lexer to recognize tokens in the inputs, which can be Pp.t's or strings. To add the highlights in the Pp.t, the diff code matches characters in the input to characters in the tokens. Current code fails for inputs containing quote marks or "(*" because the quote marks and comments don't appear in the tokens. This commit adds a "diff mode" to the lexer to return those characters, making the diff routine more robust. --- test-suite/unit-tests/printing/proof_diffs_test.ml | 35 ++++++++++++---------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'test-suite') diff --git a/test-suite/unit-tests/printing/proof_diffs_test.ml b/test-suite/unit-tests/printing/proof_diffs_test.ml index 7f9e6cc6e0..d0b8d21b69 100644 --- a/test-suite/unit-tests/printing/proof_diffs_test.ml +++ b/test-suite/unit-tests/printing/proof_diffs_test.ml @@ -51,23 +51,28 @@ let t () = assert_equal ~msg:"has `Removed" ~printer:string_of_bool true has_removed let _ = add_test "diff_str add/remove" t -(* example of a limitation, not really a test *) -let t () = - try - let _ = diff_str "a" ">" in - assert_failure "unlexable string gives an exception" - with _ -> () -let _ = add_test "diff_str unlexable" t - -(* problematic examples for tokenize_string: - comments omitted - quoted string loses quote marks (are escapes supported/handled?) - char constant split into 2 +(* lexer tweaks: + comments are lexed as multiple tokens + strings tokens include begin/end quotes and embedded "" + single multibyte characters returned even if they're not keywords + + inputs that give a lexer failure (but no use case needs them yet): + ".12" + unterminated string + invalid UTF-8 sequences *) let t () = - List.iter (fun x -> cprintf "'%s' " x) (tokenize_string "(* comment *) \"string\" 'c' xx"); - cprintf "\n" -let _ = add_test "tokenize_string examples" t + let str = "(* comment.field *) ?id () \"str\"\"ing\" \\ := Ж > ∃ 'c' xx" in + let toks = tokenize_string str in + (*List.iter (fun x -> cprintf "'%s' " x) toks;*) + (*cprintf "\n";*) + let str_no_white = String.concat "" (String.split_on_char ' ' str) in + assert_equal ~printer:(fun x -> x) str_no_white (String.concat "" toks); + List.iter (fun s -> + assert_equal ~msg:("'" ^ s ^ "' is a single token") ~printer:string_of_bool true (List.mem s toks)) + [ "(*"; "()"; ":="] + +let _ = add_test "tokenize_string/diff_mode in lexer" t open Pp -- cgit v1.2.3