aboutsummaryrefslogtreecommitdiff
path: root/parsing/cLexer.mli
blob: af4b7ba334bc26d129b0121c0c69a759e20c942a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
(************************************************************************)
(*         *   The Coq Proof Assistant / The Coq Development Team       *)
(*  v      *         Copyright INRIA, CNRS and contributors             *)
(* <O___,, * (see version control and CREDITS file for authors & dates) *)
(*   \VV/  **************************************************************)
(*    //   *    This file is distributed under the terms of the         *)
(*         *     GNU Lesser General Public License Version 2.1          *)
(*         *     (see LICENSE file for the text of the license)         *)
(************************************************************************)

(** When one registers a keyword she can declare it starts a quotation.
  In particular using QUOTATION("name:") in a grammar rule
  declares "name:" as a keyword and the token QUOTATION is
  matched whenever the keyword is followed by an identifier or a
  parenthesized text. Eg

    constr:x
    string:[....]
    ltac:(....)
    ltac:{....}

  The delimiter is made of 1 or more occurrences of the same parenthesis,
  eg ((.....)) or [[[[....]]]]. The idea being that if the text happens to
  contain the closing delimiter, one can make the delimiter longer and avoid
  confusion (no escaping). Eg

    string:[[ .. ']' .. ]]


  Nesting the delimiter is allowed, eg ((..((...))..)) is OK.

  Keywords don't need to end in ':' *)
type starts_quotation = NoQuotation | Quotation

(** This should be functional but it is not due to the interface *)
val add_keyword : ?quotation:starts_quotation -> string -> unit
val remove_keyword : string -> unit
val is_keyword : string -> bool
val keywords : unit -> CString.Set.t

type keyword_state
val set_keyword_state : keyword_state -> unit
val get_keyword_state : unit -> keyword_state

val check_ident : string -> unit
val is_ident : string -> bool
val check_keyword : string -> unit

(** When string is not an ident, returns a keyword. *)
val terminal : string -> string Tok.p

(** Precondition: the input is a number (c.f. [NumTok.t]) *)
val terminal_number : string -> NumTok.Unsigned.t Tok.p

(** The lexer of Coq: *)

module Lexer :
  Gramlib.Plexing.S with type te = Tok.t and type 'c pattern = 'c Tok.p

module Error : sig
  type t
  exception E of t
  val to_string : t -> string
end

(** Create a lexer.  true enables alternate handling for computing diffs.
It ensures that, ignoring white space, the concatenated tokens equal the input
string.  Specifically:
- for strings, return the enclosing quotes as tokens and treat the quoted value
as if it was unquoted, possibly becoming multiple tokens
- for comments, return the "(*" as a token and treat the contents of the comment as if
it was not in a comment, possibly becoming multiple tokens
- return any unrecognized Ascii or UTF-8 character as a string
*)

module LexerDiff :
  Gramlib.Plexing.S with type te = Tok.t and type 'c pattern = 'c Tok.p