diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/util.ml | 20 | ||||
| -rw-r--r-- | lib/util.mli | 1 |
2 files changed, 21 insertions, 0 deletions
diff --git a/lib/util.ml b/lib/util.ml index ea5da9e3c2..ad48e7981a 100644 --- a/lib/util.ml +++ b/lib/util.ml @@ -363,6 +363,26 @@ let lowercase_first_char_utf8 s = let j, n = next_utf8 s 0 in utf8_of_unicode (lowercase_unicode n) +(** For extraction, we need to encode unicode character into ascii ones *) + +let ascii_of_ident s = + let check_ascii s = + let ok = ref true in + String.iter (fun c -> if Char.code c >= 128 then ok := false) s; + !ok + in + if check_ascii s then s else + let i = ref 0 and out = ref "" in + begin try while true do + let j, n = next_utf8 s !i in + out := + if n >= 128 + then Printf.sprintf "%s__U%04x_" !out n + else Printf.sprintf "%s%c" !out s.[!i]; + i := !i + j + done with End_of_input -> () end; + !out + (* Lists *) let rec list_compare cmp l1 l2 = diff --git a/lib/util.mli b/lib/util.mli index 8c811d4390..0976d742c8 100644 --- a/lib/util.mli +++ b/lib/util.mli @@ -107,6 +107,7 @@ val classify_unicode : int -> utf8_status val check_ident : string -> unit val check_ident_soft : string -> unit val lowercase_first_char_utf8 : string -> string +val ascii_of_ident : string -> string (** {6 Lists. } *) |
