diff options
Diffstat (limited to 'lib/unicode.ml')
| -rw-r--r-- | lib/unicode.ml | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index 1765e93dcd..05998bb804 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -18,7 +18,7 @@ exception Unsupported to simplify the masking process. (This choice seems to be a good trade-off between speed and space after some benchmarks.) *) -(* A 256ko table, initially filled with zeros. *) +(* A 256 KiB table, initially filled with zeros. *) let table = Array.make (1 lsl 17) 0 (* Associate a 2-bit pattern to each status at position [i]. @@ -147,6 +147,11 @@ let utf8_of_unicode n = s end +(* If [s] is some UTF-8 encoded string + and [i] is a position of some UTF-8 character within [s] + then [next_utf8 s i] returns [(j,n)] where: + - [j] indicates the position of the next UTF-8 character + - [n] represents the UTF-8 character at index [i] *) let next_utf8 s i = let err () = invalid_arg "utf8" in let l = String.length s - i in |
