diff options
| author | regisgia | 2013-09-02 09:20:59 +0000 |
|---|---|---|
| committer | regisgia | 2013-09-02 09:20:59 +0000 |
| commit | a318e3c86bf8e9e849817a96069268bcf0a9d2e2 (patch) | |
| tree | e165622ea994d70789436767abcf17aa3d2abae2 | |
| parent | 3ea0f070e68af32a696a859b3c863dbd2d4f8a4f (diff) | |
* parsing/Lexer: Cosmetics.
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16754 85f007b7-540e-0410-9357-904b9bb8a0f7
| -rw-r--r-- | parsing/lexer.ml4 | 135 |
1 files changed, 74 insertions, 61 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4 index 680139b12e..4173270c10 100644 --- a/parsing/lexer.ml4 +++ b/parsing/lexer.ml4 @@ -87,13 +87,13 @@ module Error = struct let to_string x = "Syntax Error: Lexer: " ^ (match x with - | Illegal_character -> "Illegal character" - | Unterminated_comment -> "Unterminated comment" - | Unterminated_string -> "Unterminated string" - | Undefined_token -> "Undefined token" - | Bad_token tok -> Format.sprintf "Bad token %S" tok - | UnsupportedUnicode x -> - Printf.sprintf "Unsupported Unicode character (0x%x)" x) + | Illegal_character -> "Illegal character" + | Unterminated_comment -> "Unterminated comment" + | Unterminated_string -> "Unterminated string" + | Undefined_token -> "Undefined token" + | Bad_token tok -> Format.sprintf "Bad token %S" tok + | UnsupportedUnicode x -> + Printf.sprintf "Unsupported Unicode character (0x%x)" x) (* Require to fix the Camlp4 signature *) let print ppf x = Pp.pp_with ppf (Pp.str (to_string x)) @@ -144,22 +144,22 @@ let lookup_utf8_tail c cs = if Int.equal (c1 land 0x20) 0 then match Stream.npeek 2 cs with | [_;c2] -> - check_utf8_trailing_byte cs c2; - 2, (c1 land 0x1F) lsl 6 + (Char.code c2 land 0x3F) + check_utf8_trailing_byte cs c2; + 2, (c1 land 0x1F) lsl 6 + (Char.code c2 land 0x3F) | _ -> error_utf8 cs else if Int.equal (c1 land 0x10) 0 then match Stream.npeek 3 cs with | [_;c2;c3] -> - check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3; - 3, (c1 land 0x0F) lsl 12 + (Char.code c2 land 0x3F) lsl 6 + - (Char.code c3 land 0x3F) + check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3; + 3, (c1 land 0x0F) lsl 12 + (Char.code c2 land 0x3F) lsl 6 + + (Char.code c3 land 0x3F) | _ -> error_utf8 cs else match Stream.npeek 4 cs with | [_;c2;c3;c4] -> - check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3; - check_utf8_trailing_byte cs c4; - 4, (c1 land 0x07) lsl 18 + (Char.code c2 land 0x3F) lsl 12 + - (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F) + check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3; + check_utf8_trailing_byte cs c4; + 4, (c1 land 0x07) lsl 18 + (Char.code c2 land 0x3F) lsl 12 + + (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F) | _ -> error_utf8 cs in try Unicode.classify unicode, n @@ -180,10 +180,10 @@ let check_keyword str = let rec loop_symb = parser | [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str | [< s >] -> - match unlocated lookup_utf8 s with - | Utf8Token (_,n) -> njunk n s; loop_symb s - | AsciiChar -> Stream.junk s; loop_symb s - | EmptyStream -> () + match unlocated lookup_utf8 s with + | Utf8Token (_,n) -> njunk n s; loop_symb s + | AsciiChar -> Stream.junk s; loop_symb s + | EmptyStream -> () in loop_symb (Stream.of_string str) @@ -191,7 +191,8 @@ let check_keyword_to_add s = try check_keyword s with Error.E (UnsupportedUnicode unicode) -> Flags.if_verbose msg_warning - (strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x which will not be parsable." s unicode)) + (strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \ + which will not be parsable." s unicode)) let check_ident str = let rec loop_id intail = parser @@ -200,11 +201,13 @@ let check_ident str = | [< ' ('0'..'9' | ''') when intail; s >] -> loop_id true s | [< s >] -> - match unlocated lookup_utf8 s with - | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s - | Utf8Token (Unicode.IdentPart, n) when intail -> njunk n s; loop_id true s - | EmptyStream -> () - | Utf8Token _ | AsciiChar -> bad_token str + match unlocated lookup_utf8 s with + | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s + | Utf8Token (Unicode.IdentPart, n) when intail -> + njunk n s; + loop_id true s + | EmptyStream -> () + | Utf8Token _ | AsciiChar -> bad_token str in loop_id false (Stream.of_string str) @@ -257,7 +260,7 @@ let rec ident_tail len = parser | [< s >] -> match lookup_utf8 s with | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) -> - ident_tail (nstore n len s) s + ident_tail (nstore n len s) s | _ -> len let rec number len = parser @@ -270,21 +273,28 @@ let rec string in_comments bp len = parser | [< ''('; s >] -> (parser | [< ''*'; s >] -> - string (Option.map succ in_comments) bp (store (store len '(') '*') s + string + (Option.map succ in_comments) + bp (store (store len '(') '*') + s | [< >] -> - string in_comments bp (store len '(') s) s + string in_comments bp (store len '(') s) s | [< ''*'; s >] -> (parser | [< '')'; s >] -> let () = match in_comments with | Some 0 -> - msg_warning (strbrk "Not interpreting \"*)\" as the end of current non-terminated comment because it occurs in a non-terminated string of the comment.") + msg_warning + (strbrk + "Not interpreting \"*)\" as the end of current \ + non-terminated comment because it occurs in a \ + non-terminated string of the comment.") | _ -> () in let in_comments = Option.map pred in_comments in - string in_comments bp (store (store len '*') ')') s + string in_comments bp (store (store len '*') ')') s | [< >] -> - string in_comments bp (store len '*') s) s + string in_comments bp (store len '*') s) s | [< 'c; s >] -> string in_comments bp (store len c) s | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_string @@ -320,7 +330,7 @@ let push_char c = (List.mem c [' ';'\t']&& (Int.equal (Buffer.length current) 0 || not (let s = Buffer.contents current in - List.mem s.[String.length s - 1] [' ';'\t';'\n';'\r']))) + List.mem s.[String.length s - 1] [' ';'\t';'\n';'\r']))) then real_push_char c @@ -341,7 +351,9 @@ let comment_stop ep = let bp = match !comment_begin with Some bp -> bp | None -> - msgerrnl(str"No begin location for comment '"++str current_s ++str"' ending at "++int ep); + msgerrnl(str "No begin location for comment '" + ++ str current_s ++str"' ending at " + ++ int ep); ep-1 in Pp.comments := ((bp,ep),current_s) :: !Pp.comments); Buffer.clear current; @@ -406,11 +418,11 @@ and progress_utf8 last nj n c tt cs = else match Util.List.skipn (nj+1) (Stream.npeek (nj+n) cs) with | l when Int.equal (List.length l) (n - 1) -> - List.iter (check_utf8_trailing_byte cs) l; - let tt = List.fold_left (fun tt c -> CharMap.find c tt.branch) tt l in - update_longest_valid_token last (nj+n) tt cs + List.iter (check_utf8_trailing_byte cs) l; + let tt = List.fold_left (fun tt c -> CharMap.find c tt.branch) tt l in + update_longest_valid_token last (nj+n) tt cs | _ -> - error_utf8 cs + error_utf8 cs with Not_found -> last @@ -430,9 +442,9 @@ let process_chars bp c cs = match t with | Some t -> (KEYWORD t, (bp, ep)) | None -> - let ep' = bp + utf8_char_size cs c in - njunk (ep' - ep) cs; - err (bp, ep') Undefined_token + let ep' = bp + utf8_char_size cs c in + njunk (ep' - ep) cs; + err (bp, ep') Undefined_token let token_of_special c s = match c with | '$' -> METAIDENT s @@ -448,7 +460,7 @@ let parse_after_special c bp = | [< s >] -> match lookup_utf8 s with | Utf8Token (Unicode.Letter, n) -> - token_of_special c (get_buff (ident_tail (nstore n 0 s) s)) + token_of_special c (get_buff (ident_tail (nstore n 0 s) s)) | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp c s) (* Parse what follows a question mark *) @@ -458,9 +470,10 @@ let parse_after_qmark bp s = | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK | None -> KEYWORD "?" | _ -> - match lookup_utf8 s with - | Utf8Token (Unicode.Letter, _) -> LEFTQMARK - | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp '?' s) + match lookup_utf8 s with + | Utf8Token (Unicode.Letter, _) -> LEFTQMARK + | AsciiChar | Utf8Token _ | EmptyStream -> + fst (process_chars bp '?' s) let blank_or_eof cs = match Stream.peek cs with @@ -481,8 +494,8 @@ let rec next_token = parser bp for instance ".(", or followed by a blank or eof. *) let () = match t with | KEYWORD "." -> - if not (blank_or_eof s) then err (bp,ep+1) Undefined_token; - if Flags.do_beautify() then between_com := true; + if not (blank_or_eof s) then err (bp,ep+1) Undefined_token; + if Flags.do_beautify() then between_com := true; | _ -> () in (t, (bp,ep)) @@ -504,23 +517,23 @@ let rec next_token = parser bp | [< ''*'; s >] -> comm_loc bp; push_string "(*"; - comment bp s; - next_token s + comment bp s; + next_token s | [< t = process_chars bp c >] -> comment_stop bp; t >] -> t | [< s >] -> match lookup_utf8 s with - | Utf8Token (Unicode.Letter, n) -> - let len = ident_tail (nstore n 0 s) s in - let id = get_buff len in - let ep = Stream.count s in - comment_stop bp; - (try find_keyword id s with Not_found -> IDENT id), (bp, ep) - | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) -> - let t = process_chars bp (Stream.next s) s in - comment_stop bp; t - | EmptyStream -> - comment_stop bp; (EOI, (bp, bp + 1)) + | Utf8Token (Unicode.Letter, n) -> + let len = ident_tail (nstore n 0 s) s in + let id = get_buff len in + let ep = Stream.count s in + comment_stop bp; + (try find_keyword id s with Not_found -> IDENT id), (bp, ep) + | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) -> + let t = process_chars bp (Stream.next s) s in + comment_stop bp; t + | EmptyStream -> + comment_stop bp; (EOI, (bp, bp + 1)) (* (* Debug: uncomment this for tracing tokens seen by coq...*) let next_token s = @@ -579,7 +592,7 @@ let func cs = Stream.from (fun i -> let (tok, loc) = next_token cs in - loct_add loct i (make_loc loc); Some tok) + loct_add loct i (make_loc loc); Some tok) in current_location_table := loct; (ts, loct_func loct) |
