aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPierre-Marie Pédrot2016-03-03 18:59:51 +0100
committerPierre-Marie Pédrot2016-03-03 19:33:49 +0100
commit508d5a99101097948b6de342295eec0d5c8cbe72 (patch)
tree6287f344756f10dc67b1c36182480a0b76ad3c4f /lib
parentdb2c6f0054d3e05f82da7494ce790c04b1976401 (diff)
Fixing bug #4105: poor escaping in the protocol between CoqIDE and coqtop.
Printing invalid UTF-8 string startled GTK too much, leading to CoqIDE dying improperly. We now check that all strings outputed by Coq are proper UTF-8. This is not perfect, as CoqIDE will sometimes truncate strings which contains the null character, but at least it should not crash.
Diffstat (limited to 'lib')
-rw-r--r--lib/unicode.ml7
-rw-r--r--lib/unicode.mli3
2 files changed, 10 insertions, 0 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 1765e93dcd..cfaa73cc11 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -168,6 +168,13 @@ let next_utf8 s i =
(c land 0x3F) lsl 6 + (d land 0x3F)
else err ()
+let is_utf8 s =
+ let rec check i =
+ let (off, _) = next_utf8 s i in
+ check (i + off)
+ in
+ try check 0 with End_of_input -> true | Invalid_argument _ -> false
+
(* Check the well-formedness of an identifier *)
let initial_refutation j n s =
diff --git a/lib/unicode.mli b/lib/unicode.mli
index 520203d435..65e75a20d6 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -26,3 +26,6 @@ val lowercase_first_char : string -> string
(** For extraction, turn a unicode string into an ascii-only one *)
val is_basic_ascii : string -> bool
val ascii_of_ident : string -> string
+
+(** Validate an UTF-8 string *)
+val is_utf8 : string -> bool