From 508d5a99101097948b6de342295eec0d5c8cbe72 Mon Sep 17 00:00:00 2001 From: Pierre-Marie Pédrot Date: Thu, 3 Mar 2016 18:59:51 +0100 Subject: Fixing bug #4105: poor escaping in the protocol between CoqIDE and coqtop. Printing invalid UTF-8 string startled GTK too much, leading to CoqIDE dying improperly. We now check that all strings outputed by Coq are proper UTF-8. This is not perfect, as CoqIDE will sometimes truncate strings which contains the null character, but at least it should not crash. --- lib/unicode.ml | 7 +++++++ lib/unicode.mli | 3 +++ 2 files changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/unicode.ml b/lib/unicode.ml index 1765e93dcd..cfaa73cc11 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -168,6 +168,13 @@ let next_utf8 s i = (c land 0x3F) lsl 6 + (d land 0x3F) else err () +let is_utf8 s = + let rec check i = + let (off, _) = next_utf8 s i in + check (i + off) + in + try check 0 with End_of_input -> true | Invalid_argument _ -> false + (* Check the well-formedness of an identifier *) let initial_refutation j n s = diff --git a/lib/unicode.mli b/lib/unicode.mli index 520203d435..65e75a20d6 100644 --- a/lib/unicode.mli +++ b/lib/unicode.mli @@ -26,3 +26,6 @@ val lowercase_first_char : string -> string (** For extraction, turn a unicode string into an ascii-only one *) val is_basic_ascii : string -> bool val ascii_of_ident : string -> string + +(** Validate an UTF-8 string *) +val is_utf8 : string -> bool -- cgit v1.2.3