From 5174ee7e118d2bc57fc2d8a6619101735af79b16 Mon Sep 17 00:00:00 2001 From: Matej Kosik Date: Wed, 16 Dec 2015 12:55:40 +0100 Subject: COMMENTS: added to the "Unicode" module. --- lib/unicode.ml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/unicode.ml') diff --git a/lib/unicode.ml b/lib/unicode.ml index 1765e93dcd..05998bb804 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -18,7 +18,7 @@ exception Unsupported to simplify the masking process. (This choice seems to be a good trade-off between speed and space after some benchmarks.) *) -(* A 256ko table, initially filled with zeros. *) +(* A 256 KiB table, initially filled with zeros. *) let table = Array.make (1 lsl 17) 0 (* Associate a 2-bit pattern to each status at position [i]. @@ -147,6 +147,11 @@ let utf8_of_unicode n = s end +(* If [s] is some UTF-8 encoded string + and [i] is a position of some UTF-8 character within [s] + then [next_utf8 s i] returns [(j,n)] where: + - [j] indicates the position of the next UTF-8 character + - [n] represents the UTF-8 character at index [i] *) let next_utf8 s i = let err () = invalid_arg "utf8" in let l = String.length s - i in -- cgit v1.2.3 From 508d5a99101097948b6de342295eec0d5c8cbe72 Mon Sep 17 00:00:00 2001 From: Pierre-Marie Pédrot Date: Thu, 3 Mar 2016 18:59:51 +0100 Subject: Fixing bug #4105: poor escaping in the protocol between CoqIDE and coqtop. Printing invalid UTF-8 string startled GTK too much, leading to CoqIDE dying improperly. We now check that all strings outputed by Coq are proper UTF-8. This is not perfect, as CoqIDE will sometimes truncate strings which contains the null character, but at least it should not crash. --- lib/unicode.ml | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/unicode.ml') diff --git a/lib/unicode.ml b/lib/unicode.ml index 1765e93dcd..cfaa73cc11 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -168,6 +168,13 @@ let next_utf8 s i = (c land 0x3F) lsl 6 + (d land 0x3F) else err () +let is_utf8 s = + let rec check i = + let (off, _) = next_utf8 s i in + check (i + off) + in + try check 0 with End_of_input -> true | Invalid_argument _ -> false + (* Check the well-formedness of an identifier *) let initial_refutation j n s = -- cgit v1.2.3