COMMENTS: added to the "Unicode" module.

author: Matej Kosik 2015-12-16 12:55:40 +0100
committer: Matej Kosik 2015-12-18 15:57:49 +0100
commit: 5174ee7e118d2bc57fc2d8a6619101735af79b16 (patch)
tree: d387c3bcf3f7358b7aefbfa5e3d556b46f8b32ec /lib
parent: ca42472322013714050b98756aeaa222908fbe67 (diff)
2 files changed, 22 insertions, 7 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 1765e93dcd..05998bb804 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -18,7 +18,7 @@ exception Unsupported
    to simplify the masking process. (This choice seems to be a good
    trade-off between speed and space after some benchmarks.) *)
 
-(* A 256ko table, initially filled with zeros. *)
+(* A 256 KiB table, initially filled with zeros. *)
 let table = Array.make (1 lsl 17) 0
 
 (* Associate a 2-bit pattern to each status at position [i].
@@ -147,6 +147,11 @@ let utf8_of_unicode n =
       s
     end
 
+(* If [s] is some UTF-8 encoded string
+   and [i] is a position of some UTF-8 character within [s]
+   then [next_utf8 s i] returns [(j,n)] where:
+   - [j] indicates the position of the next UTF-8 character
+   - [n] represents the UTF-8 character at index [i] *)
 let next_utf8 s i =
   let err () = invalid_arg "utf8" in
   let l = String.length s - i in
diff --git a/lib/unicode.mli b/lib/unicode.mli
index 098f6c919d..eb75f00c28 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -10,19 +10,29 @@
 
 type status = Letter | IdentPart | Symbol
 
+(** This exception is raised when UTF-8 the input string contains unsupported UTF-8 characters. *)
 exception Unsupported
 
-(** Classify a unicode char into 3 classes, or raise [Unsupported] *)
+(** Classify a unicode char into 3 classes.
+    @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
 val classify : int -> status
 
-(** Check whether a given string be used as a legal identifier.
-    - [None] means yes
-    - [Some (b,s)] means no, with explanation [s] and severity [b] *)
+(** Return [None] if a given string can be used as a (Coq) identifier.
+    Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity.
+    @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
 val ident_refutation : string -> (bool * string) option
 
-(** First char of a string, converted to lowercase *)
+(** First char of a string, converted to lowercase
+    @raise Unsupported if the input string contains unsupported UTF-8 characters.
+    @raise Assert_failure if the input string is empty. *)
 val lowercase_first_char : string -> string
 
-(** For extraction, turn a unicode string into an ascii-only one *)
+(** Return [true] if all UTF-8 characters in the input string are just plain ASCII characters.
+    Returns [false] otherwise. *)
 val is_basic_ascii : string -> bool
+
+(** [ascii_of_ident s] maps UTF-8 string to a string composed solely from ASCII characters.
+    Those UTF-8 characters which do not have their ASCII counterparts are
+    translated to ["__Uxxxx_"] where {i xxxx} are four hexadecimal digits.
+    @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
 val ascii_of_ident : string -> string
author	Matej Kosik	2015-12-16 12:55:40 +0100
committer	Matej Kosik	2015-12-18 15:57:49 +0100
commit	5174ee7e118d2bc57fc2d8a6619101735af79b16 (patch)
tree	d387c3bcf3f7358b7aefbfa5e3d556b46f8b32ec /lib
parent	ca42472322013714050b98756aeaa222908fbe67 (diff)