diff options
| author | Emilio Jesus Gallego Arias | 2017-01-30 15:28:01 +0100 |
|---|---|---|
| committer | Emilio Jesus Gallego Arias | 2017-02-15 00:35:26 +0100 |
| commit | 3cdcad29ee9d28b0cb39740004da90a0fe291543 (patch) | |
| tree | d5518ea65638cc486aeeb652530b725925e96d98 /lib | |
| parent | a92492652c146c4c51a94922345ddf4c168cdcf4 (diff) | |
[unicode] Address comments in PR#314.
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/unicode.ml | 27 |
1 files changed, 4 insertions, 23 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index 3ac4e8ca7c..959ccaf73c 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -124,30 +124,11 @@ exception End_of_input let utf8_of_unicode n = if n < 128 then String.make 1 (Char.chr n) - else if n < 2048 then - String.init 2 (fun idx -> - match idx with - | 0 -> Char.chr (192 + n / 64) - | 1 -> Char.chr (128 + n mod 64) - | _ -> 'x' - ) - else if n < 65536 then - String.init 3 (fun idx -> - match idx with - | 0 -> Char.chr (224 + n / 4096) - | 1 -> Char.chr (128 + (n / 64) mod 64) - | 2 -> Char.chr (128 + n mod 64) - | _ -> 'x' - ) else - String.init 4 (fun idx -> - match idx with - | 0 -> Char.chr (240 + n / 262144) - | 1 -> Char.chr (128 + (n / 4096) mod 64) - | 2 -> Char.chr (128 + (n / 64) mod 64) - | 4 -> Char.chr (128 + n mod 64) - | _ -> 'x' - ) + let (m,s) = if n < 2048 then (2,192) else if n < 65536 then (3,224) else (4,240) in + String.init m (fun i -> + let j = (n lsr ((m - 1 - i) * 6)) land 63 in + Char.chr (j + if i = 0 then s else 128)) (* If [s] is some UTF-8 encoded string and [i] is a position of some UTF-8 character within [s] |
