diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/pp.ml | 32 | ||||
| -rw-r--r-- | lib/pp.mli | 3 |
2 files changed, 35 insertions, 0 deletions
@@ -168,6 +168,38 @@ let utf8_length s = done ; !cnt +(* Variant of String.sub for UTF8 character positions *) +let utf8_sub s start_u len_u = + let len_b = String.length s + and end_u = start_u + len_u + and cnt = ref 0 + and nc = ref 0 + and p = ref 0 in + let start_b = ref len_b in + while !p < len_b && !cnt < end_u do + if !cnt <= start_u then start_b := !p ; + begin + match s.[!p] with + | '\000'..'\127' -> nc := 0 (* ascii char *) + | '\128'..'\191' -> nc := 0 (* cannot start with a continuation byte *) + | '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *) + | '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *) + | '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *) + | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *) + | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *) + | '\254'..'\255' -> nc := 0 (* invalid byte *) + end ; + incr p ; + while !p < len_b && !nc > 0 do + match s.[!p] with + | '\128'..'\191' (* next continuation byte *) -> incr p ; decr nc + | _ (* not a continuation byte *) -> nc := 0 + done ; + incr cnt + done ; + let end_b = !p in + String.sub s !start_b (end_b - !start_b) + (* formatting commands *) let str s = Glue.atom(Ppcmd_print (Str_def s)) let stras (i, s) = Glue.atom(Ppcmd_print (Str_len (s, i))) diff --git a/lib/pp.mli b/lib/pp.mli index a18744c376..56b82e4489 100644 --- a/lib/pp.mli +++ b/lib/pp.mli @@ -100,6 +100,9 @@ val close_tag : unit -> std_ppcmds val string_of_ppcmds : std_ppcmds -> string +val utf8_length : string -> int +val utf8_sub : string -> int -> int -> string + (** {6 Printing combinators} *) val pr_comma : unit -> std_ppcmds |
