diff options
| author | Pierre Letouzey | 2016-05-19 15:18:26 +0200 |
|---|---|---|
| committer | Pierre Letouzey | 2016-05-19 15:18:26 +0200 |
| commit | 244d7a9aafe7ad613dd2095ca3126560cb3ea1d0 (patch) | |
| tree | 26102e433f0072ab32f724fa231693510119c37b /lib/unicode.ml | |
| parent | c14e6eebc6c3696623a440cd7eaa4a8d8fe4f492 (diff) | |
Unicode.ascii_of_ident is now truly injective
A non-ASCII char is now converted to _UUxxxx_ with xxxx being its unicode index
in hexa. And any preexisting _UU substring in the ident is converted to _UUU.
The switch from __Uxxxx_ to _UUxxxx_ is cosmetic, it just helps the extraction
(less __ in names). But the other part of the patch (detection of preexisting
_UU substrings) is critical to make ascii_of_ident truly injective and avoid
the following kind of proof of False via native_compute :
Definition α := 1.
Definition __U03b1_ := 2.
Lemma oups : False.
Proof.
assert (α = __U03b1_). { native_compute. reflexivity. }
discriminate.
Qed.
Diffstat (limited to 'lib/unicode.ml')
| -rw-r--r-- | lib/unicode.ml | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index 938e8f1a99..7aa8d9d513 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -240,14 +240,24 @@ let is_basic_ascii s = !ok let ascii_of_ident s = - if is_basic_ascii s then s else - let i = ref 0 and out = ref "" in - begin try while true do + let len = String.length s in + let has_UU i = + i+2 < len && s.[i]='_' && s.[i+1]='U' && s.[i+2]='U' + in + let i = ref 0 in + while !i < len && Char.code s.[!i] < 128 && not (has_UU !i) do + incr i + done; + if !i = len then s else + let out = Buffer.create (2*len) in + Buffer.add_substring out s 0 !i; + while !i < len do let j, n = next_utf8 s !i in - out := - if n >= 128 - then Printf.sprintf "%s__U%04x_" !out n - else Printf.sprintf "%s%c" !out s.[!i]; - i := !i + j - done with End_of_input -> () end; - !out + if n >= 128 then + (Printf.bprintf out "_UU%04x_" n; i := !i + j) + else if has_UU !i then + (Buffer.add_string out "_UUU"; i := !i + 3) + else + (Buffer.add_char out s.[!i]; incr i) + done; + Buffer.contents out |
