From 57c6ffd23836364168ffd1c66dbddbecf830c7c6 Mon Sep 17 00:00:00 2001
From: Hugo Herbelin
Date: Thu, 13 Oct 2016 15:57:14 +0200
Subject: Stopping warning on unrecognized unicode character in notation
 (fixing #5136).

The warning was pointless since the notation was accepted and parsed
anyway.

We now treat unrecognized unicode characters like ordinary
undefined tokens (e.g. "#" in a bare Coq).

For instance, "aₚ", or ".ₚ", or "?ₚ" now fail with "Undefined token"
rather than "Unsupported Unicode character".
---
 lib/unicode.ml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'lib/unicode.ml')

diff --git a/lib/unicode.ml b/lib/unicode.ml
index dc852d9819..ced5e258c2 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -8,9 +8,7 @@
 
 (** Unicode utilities *)
 
-type status = Letter | IdentPart | Symbol
-
-exception Unsupported
+type status = Letter | IdentPart | Symbol | Unknown
 
 (* The following table stores classes of Unicode characters that
    are used by the lexer. There are 3 different classes so 2 bits are
@@ -29,6 +27,7 @@ let mask i = function
   | Letter    -> 1 lsl ((i land 7) lsl 1) (* 01 *)
   | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *)
   | Symbol    -> 3 lsl ((i land 7) lsl 1) (* 11 *)
+  | Unknown   -> 0 lsl ((i land 7) lsl 1) (* 00 *)
 
 (* Helper to reset 2 bits in a word. *)
 let reset_mask i =
@@ -55,7 +54,7 @@ let lookup x =
     if      v = 1 then Letter
     else if v = 2 then IdentPart
     else if v = 3 then Symbol
-    else raise Unsupported
+    else Unknown
 
 (* [classify] discriminates between 3 different kinds of
    symbols based on the standard unicode classification (extracted from
@@ -215,7 +214,6 @@ let ident_refutation s =
         |x -> x
   with
   | End_of_input -> Some (true,"The empty string is not an identifier.")
-  | Unsupported -> Some (true,s^": unsupported character in utf8 sequence.")
   | Invalid_argument _ -> Some (true,s^": invalid utf8 sequence.")
 
 let lowercase_unicode =
-- 
cgit v1.2.3