diff options
| author | herbelin | 2008-04-13 21:41:54 +0000 |
|---|---|---|
| committer | herbelin | 2008-04-13 21:41:54 +0000 |
| commit | bd0219b62a60cfc58c3c25858b41a005727c68be (patch) | |
| tree | d718b8cca8d3e1f9c5c75a4be8e90dcd0f2f009c /lib | |
| parent | db49598f897eec7482e2c26a311f77a52201416e (diff) | |
Bugs, nettoyage, et améliorations diverses
- vérification de la cohérence des ident pour éviter une option -R
avec des noms non parsables (la vérification est faite dans
id_of_string ce qui est très exigeant; faudrait-il une solution plus
souple ?)
- correction message d'erreur inapproprié dans le apply qui descend dans les
conjonctions
- nettoyage autour de l'échec en présence de métas dans le prim_refiner
- nouveau message d'erreur quand des variables ne peuvent être instanciées
- quelques simplifications et davantage de robustesse dans inversion
- factorisation du code de constructor and co avec celui de econstructor and co
Documentation des tactiques
- edestruct/einduction/ecase/eelim et nouveautés apply
- nouvelle sémantique des intropatterns disjonctifs et documentation des
pattern -> et <-
- relecture de certaines parties du chapitre tactique
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@10785 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/util.ml | 182 | ||||
| -rw-r--r-- | lib/util.mli | 7 |
2 files changed, 181 insertions, 8 deletions
diff --git a/lib/util.ml b/lib/util.ml index ce336ac34c..b317c849cc 100644 --- a/lib/util.ml +++ b/lib/util.ml @@ -50,15 +50,8 @@ let pi3 (_,_,a) = a (* Characters *) -let is_letter c = - (c >= 'a' && c <= 'z') or - (c >= 'A' && c <= 'Z') or - (c >= '\248' && c <= '\255') or - (c >= '\192' && c <= '\214') or - (c >= '\216' && c <= '\246') - +let is_letter c = (c >= 'a' && c <= 'z') or (c >= 'A' && c <= 'Z') let is_digit c = (c >= '0' && c <= '9') - let is_ident_tail c = is_letter c or is_digit c or c = '\'' or c = '_' @@ -126,6 +119,179 @@ module Stringset = Set.Make(struct type t = string let compare = compare end) module Stringmap = Map.Make(struct type t = string let compare = compare end) +type utf8_status = Utf8Letter | Utf8IdentPart | Utf8Symbol + +exception UnsupportedUtf8 + +let classify_utf8 unicode = + match unicode land 0x1F000 with + | 0x0 -> + begin match unicode with + (* utf-8 Basic Latin underscore *) + | x when x = 0x005F -> Utf8Letter + (* utf-8 Basic Latin letters *) + | x when 0x0041 <= x & x <= 0x005A -> Utf8Letter + | x when 0x0061 <= x & x <= 0x007A -> Utf8Letter + (* utf-8 Basic Latin digits and quote *) + | x when 0x0030 <= x & x <= 0x0039 or x = 0x0027 -> Utf8IdentPart + (* utf-8 Basic Latin symbols *) + | x when x <= 0x007F -> Utf8Symbol + (* utf-8 Latin-1 non breaking space U00A0 *) + | 0x00A0 -> Utf8Letter + (* utf-8 Latin-1 symbols U00A1-00BF *) + | x when 0x00A0 <= x & x <= 0x00BF -> Utf8Symbol + (* utf-8 Latin-1 letters U00C0-00D6 *) + | x when 0x00C0 <= x & x <= 0x00D6 -> Utf8Letter + (* utf-8 Latin-1 symbol U00D7 *) + | 0x00D7 -> Utf8Symbol + (* utf-8 Latin-1 letters U00D8-00F6 *) + | x when 0x00D8 <= x & x <= 0x00F6 -> Utf8Letter + (* utf-8 Latin-1 symbol U00F7 *) + | 0x00F7 -> Utf8Symbol + (* utf-8 Latin-1 letters U00F8-00FF *) + | x when 0x00F8 <= x & x <= 0x00FF -> Utf8Letter + (* utf-8 Latin Extended A U0100-017F and Latin Extended B U0180-U0241 *) + | x when 0x0100 <= x & x <= 0x0241 -> Utf8Letter + (* utf-8 Phonetic letters U0250-02AF *) + | x when 0x0250 <= x & x <= 0x02AF -> Utf8Letter + (* utf-8 what do to with diacritics U0300-U036F ? *) + (* utf-8 Greek letters U0380-03FF *) + | x when 0x0380 <= x & x <= 0x03FF -> Utf8Letter + (* utf-8 Cyrillic letters U0400-0481 *) + | x when 0x0400 <= x & x <= 0x0481 -> Utf8Letter + (* utf-8 Cyrillic symbol U0482 *) + | 0x0482 -> Utf8Symbol + (* utf-8 what do to with diacritics U0483-U0489 \ U0487 ? *) + (* utf-8 Cyrillic letters U048A-U4F9 (Warning: 04CF) *) + | x when 0x048A <= x & x <= 0x04F9 -> Utf8Letter + (* utf-8 Cyrillic supplement letters U0500-U050F *) + | x when 0x0500 <= x & x <= 0x050F -> Utf8Letter + (* utf-8 Hebrew letters U05D0-05EA *) + | x when 0x05D0 <= x & x <= 0x05EA -> Utf8Letter + (* utf-8 Arabic letters U0621-064A *) + | x when 0x0621 <= x & x <= 0x064A -> Utf8Letter + (* utf-8 Arabic supplement letters U0750-076D *) + | x when 0x0750 <= x & x <= 0x076D -> Utf8Letter + | _ -> raise UnsupportedUtf8 + end + | 0x1000 -> + begin match unicode with + (* utf-8 Georgian U10A0-10FF (has holes) *) + | x when 0x10A0 <= x & x <= 0x10FF -> Utf8Letter + (* utf-8 Hangul Jamo U1100-11FF (has holes) *) + | x when 0x1100 <= x & x <= 0x11FF -> Utf8Letter + (* utf-8 Latin additional letters U1E00-1E9B and U1EA0-1EF9 *) + | x when 0x1E00 <= x & x <= 0x1E9B -> Utf8Letter + | x when 0x1EA0 <= x & x <= 0x1EF9 -> Utf8Letter + | _ -> raise UnsupportedUtf8 + end + | 0x2000 -> + begin match unicode with + (* utf-8 general punctuation U2080-2089 *) + (* Hyphens *) + | x when 0x2010 <= x & x <= 0x2011 -> Utf8Letter + (* Dashes and other symbols *) + | x when 0x2012 <= x & x <= 0x2027 -> Utf8Symbol + (* Per mille and per ten thousand signs *) + | x when 0x2030 <= x & x <= 0x2031 -> Utf8Symbol + (* Prime letters *) + | x when 0x2032 <= x & x <= 0x2034 or x = 0x2057 -> Utf8IdentPart + (* Miscellaneous punctuation *) + | x when 0x2039 <= x & x <= 0x2056 -> Utf8Symbol + | x when 0x2058 <= x & x <= 0x205E -> Utf8Symbol + (* Invisible mathematical operators *) + | x when 0x2061 <= x & x <= 0x2063 -> Utf8Symbol + (* utf-8 superscript U2070-207C *) + | x when 0x2070 <= x & x <= 0x207C -> Utf8Symbol + (* utf-8 subscript U2080-2089 *) + | x when 0x2080 <= x & x <= 0x2089 -> Utf8IdentPart + (* utf-8 letter-like U2100-214F *) + | x when 0x2100 <= x & x <= 0x214F -> Utf8Letter + (* utf-8 number-forms U2153-2183 *) + | x when 0x2153 <= x & x <= 0x2183 -> Utf8Symbol + (* utf-8 arrows A U2190-21FF *) + (* utf-8 mathematical operators U2200-22FF *) + (* utf-8 miscellaneous technical U2300-23FF *) + | x when 0x2190 <= x & x <= 0x23FF -> Utf8Symbol + (* utf-8 box drawing U2500-257F has ceiling, etc. *) + (* utf-8 block elements U2580-259F *) + (* utf-8 geom. shapes U25A0-25FF (has triangles, losange, etc) *) + (* utf-8 miscellaneous symbols U2600-26FF *) + | x when 0x2500 <= x & x <= 0x26FF -> Utf8Symbol + (* utf-8 arrows B U2900-297F *) + | x when 0x2900 <= x & x <= 0x297F -> Utf8Symbol + (* utf-8 mathematical operators U2A00-2AFF *) + | x when 0x2A00 <= x & x <= 0x2AFF -> Utf8Symbol + (* utf-8 bold symbols U2768-U2775 *) + | x when 0x2768 <= x & x <= 0x2775 -> Utf8Symbol + (* utf-8 arrows and brackets U27E0-U27FF *) + | x when 0x27E0 <= x & x <= 0x27FF -> Utf8Symbol + (* utf-8 brackets, braces and parentheses *) + | x when 0x2980 <= x & x <= 0x299F -> Utf8Symbol + (* utf-8 miscellaneous including double-plus U29F0-U29FF *) + | x when 0x29F0 <= x & x <= 0x29FF -> Utf8Symbol + | _ -> raise UnsupportedUtf8 + end + | _ -> + begin match unicode with + (* utf-8 Hiragana U3040-309F and Katakana U30A0-30FF *) + | x when 0x3040 <= x & x <= 0x30FF -> Utf8Letter + (* utf-8 Unified CJK Ideographs U4E00-9FA5 *) + | x when 0x4E00 <= x & x <= 0x9FA5 -> Utf8Letter + (* utf-8 Hangul syllables UAC00-D7AF *) + | x when 0xAC00 <= x & x <= 0xD7AF -> Utf8Letter + (* utf-8 Gothic U10330-1034A *) + | x when 0x10330 <= x & x <= 0x1034A -> Utf8Letter + | _ -> raise UnsupportedUtf8 + end + +exception End_of_input + +let next_utf8 s i = + let err () = invalid_arg "utf8" in + let l = String.length s - i in + if l = 0 then raise End_of_input + else let a = Char.code s.[i] in if a <= 0x7F then + 1, a + else if a land 0x40 = 0 then err () + else if l = 1 then err () + else let b = Char.code s.[i+1] in if b land 0xC0 <> 0x80 then err () + else if a land 0x20 = 0 then + 2, (a land 0x1F) lsl 6 + (b land 0x3F) + else if l = 2 then err () + else let c = Char.code s.[i+2] in if c land 0xC0 <> 0x80 then err () + else if a land 0x10 = 0 then + 3, (a land 0x0F) lsl 12 + (b land 0x3F) lsl 6 + (c land 0x3F) + else if l = 3 then err () + else let d = Char.code s.[i+3] in if d land 0xC0 <> 0x80 then err () + else if a land 0x08 = 0 then + 4, (a land 0x07) lsl 18 + (b land 0x3F) lsl 12 + + (c land 0x3F) lsl 6 + (d land 0x3F) + else err () + +(* Check the well-formedness of an identifier *) + +let check_ident s = + let i = ref 0 in + if s <> ".." then try + let j, n = next_utf8 s 0 in + match classify_utf8 n with + | Utf8Letter -> + i := !i + j; + begin try + while true do + let j, n = next_utf8 s !i in + match classify_utf8 n with + | Utf8Letter | Utf8IdentPart -> i := !i + j + | _ -> error + ("invalid character "^(String.sub s !i j)^" in identifier "^s) + done + with End_of_input -> () end + | _ -> error (s^": an identifier should start with a letter") + with + | End_of_input -> error "The empty string is not an identifier" + | Invalid_argument _ -> error (s^": invalid utf8 sequence") + (* Lists *) let list_intersect l1 l2 = diff --git a/lib/util.mli b/lib/util.mli index befe844bc8..bcaa08b2ec 100644 --- a/lib/util.mli +++ b/lib/util.mli @@ -78,6 +78,13 @@ val parse_loadpath : string -> string list module Stringset : Set.S with type elt = string module Stringmap : Map.S with type key = string +type utf8_status = Utf8Letter | Utf8IdentPart | Utf8Symbol + +exception UnsupportedUtf8 + +val classify_utf8 : int -> utf8_status +val check_ident : string -> unit + (*s Lists. *) val list_add_set : 'a -> 'a list -> 'a list |
