From 526791d917f9b0804376eae02a462a3b32dd7cba Mon Sep 17 00:00:00 2001 From: Hugo Herbelin Date: Wed, 4 Oct 2017 22:00:19 +0200 Subject: Distinguishing pseudo-letters out of the set of unicode letters. This includes _ and insecable space which can be used in idents and this allows more precise heuristics. --- lib/unicode.mli | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib/unicode.mli') diff --git a/lib/unicode.mli b/lib/unicode.mli index c7d7424801..a608d5f026 100644 --- a/lib/unicode.mli +++ b/lib/unicode.mli @@ -8,7 +8,7 @@ (** Unicode utilities *) -type status = Letter | IdentPart | Symbol | Unknown +type status (** Classify a unicode char into 3 classes or unknown. *) val classify : int -> status @@ -17,6 +17,15 @@ val classify : int -> status Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *) val ident_refutation : string -> (bool * string) option +(** Tells if a valid initial character for an identifier *) +val is_valid_ident_initial : status -> bool + +(** Tells if a valid non-initial character for an identifier *) +val is_valid_ident_trailing : status -> bool + +(** Tells if a character is unclassified *) +val is_unknown : status -> bool + (** First char of a string, converted to lowercase @raise Assert_failure if the input string is empty. *) val lowercase_first_char : string -> string -- cgit v1.2.3 From 40260a31cd197f655e6d3e0570a88d96fc1a9cac Mon Sep 17 00:00:00 2001 From: Hugo Herbelin Date: Wed, 4 Oct 2017 15:37:36 +0200 Subject: Fixing BZ#5769 (variable of type "_something" was named after invalid "_"). --- lib/unicode.mli | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/unicode.mli') diff --git a/lib/unicode.mli b/lib/unicode.mli index a608d5f026..49b8444937 100644 --- a/lib/unicode.mli +++ b/lib/unicode.mli @@ -30,6 +30,10 @@ val is_unknown : status -> bool @raise Assert_failure if the input string is empty. *) val lowercase_first_char : string -> string +(** Split a string supposed to be an ident at the first letter; + as an optimization, return None if the first character is a letter *) +val split_at_first_letter : string -> (string * string) option + (** Return [true] if all UTF-8 characters in the input string are just plain ASCII characters. Returns [false] otherwise. *) val is_basic_ascii : string -> bool -- cgit v1.2.3