diff options
| author | Hugo Herbelin | 2016-10-13 15:57:14 +0200 |
|---|---|---|
| committer | Hugo Herbelin | 2016-10-17 20:22:17 +0200 |
| commit | 57c6ffd23836364168ffd1c66dbddbecf830c7c6 (patch) | |
| tree | 051f302085ef8a33f90bd0533053d158e1b29b6d /lib/unicode.ml | |
| parent | 4204581ccb8bdf0f6c4298029c010c6deb643594 (diff) | |
Stopping warning on unrecognized unicode character in notation (fixing #5136).
The warning was pointless since the notation was accepted and parsed
anyway.
We now treat unrecognized unicode characters like ordinary
undefined tokens (e.g. "#" in a bare Coq).
For instance, "aₚ", or ".ₚ", or "?ₚ" now fail with "Undefined token"
rather than "Unsupported Unicode character".
Diffstat (limited to 'lib/unicode.ml')
| -rw-r--r-- | lib/unicode.ml | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index dc852d9819..ced5e258c2 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -8,9 +8,7 @@ (** Unicode utilities *) -type status = Letter | IdentPart | Symbol - -exception Unsupported +type status = Letter | IdentPart | Symbol | Unknown (* The following table stores classes of Unicode characters that are used by the lexer. There are 3 different classes so 2 bits are @@ -29,6 +27,7 @@ let mask i = function | Letter -> 1 lsl ((i land 7) lsl 1) (* 01 *) | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *) | Symbol -> 3 lsl ((i land 7) lsl 1) (* 11 *) + | Unknown -> 0 lsl ((i land 7) lsl 1) (* 00 *) (* Helper to reset 2 bits in a word. *) let reset_mask i = @@ -55,7 +54,7 @@ let lookup x = if v = 1 then Letter else if v = 2 then IdentPart else if v = 3 then Symbol - else raise Unsupported + else Unknown (* [classify] discriminates between 3 different kinds of symbols based on the standard unicode classification (extracted from @@ -215,7 +214,6 @@ let ident_refutation s = |x -> x with | End_of_input -> Some (true,"The empty string is not an identifier.") - | Unsupported -> Some (true,s^": unsupported character in utf8 sequence.") | Invalid_argument _ -> Some (true,s^": invalid utf8 sequence.") let lowercase_unicode = |
