aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Fehrle2019-07-21 14:28:10 -0700
committerJim Fehrle2019-07-28 14:43:26 -0700
commite33f5d2d3930ab7818abccef4bf2326c72b348eb (patch)
treedcc6c734a8ab85fae627851b9591bde48d3e69d0
parentcd6fc50854285f02bf151e94bdfb819988531fd2 (diff)
Update documentation on tokens, use "int" and "num"
for integers and natural nums
-rw-r--r--doc/sphinx/language/coq-library.rst36
-rw-r--r--doc/sphinx/language/gallina-specification-language.rst107
-rw-r--r--doc/sphinx/proof-engine/ltac.rst18
-rw-r--r--doc/sphinx/proof-engine/ltac2.rst8
-rw-r--r--doc/tools/docgram/doc_grammar.ml38
5 files changed, 126 insertions, 81 deletions
diff --git a/doc/sphinx/language/coq-library.rst b/doc/sphinx/language/coq-library.rst
index d1b95e6203..ac75240cfb 100644
--- a/doc/sphinx/language/coq-library.rst
+++ b/doc/sphinx/language/coq-library.rst
@@ -7,22 +7,20 @@ The |Coq| library
single: Theories
-The |Coq| library is structured into two parts:
+The |Coq| library has two parts:
- * **The initial library**: it contains elementary logical notions and
- data-types. It constitutes the basic state of the system directly
- available when running |Coq|;
+ * **The basic library**: definitions and theorems for
+ the most commonly used elementary logical notions and
+ data types. |Coq| normally loads these files automatically when it starts.
- * **The standard library**: general-purpose libraries containing various
- developments of |Coq| axiomatizations about sets, lists, sorting,
- arithmetic, etc. This library comes with the system and its modules
- are directly accessible through the ``Require`` command (see
- Section :ref:`compiled-files`);
+ * **The standard library**: general-purpose libraries with
+ definitions and theorems for sets, lists, sorting,
+ arithmetic, etc. To use these files, users must load them explicitly
+ with the ``Require`` command (see :ref:`compiled-files`)
-In addition, user-provided libraries or developments are provided by
-|Coq| users' community. These libraries and developments are available
-for download at http://coq.inria.fr (see
-Section :ref:`userscontributions`).
+There are also many libraries provided by |Coq| users' community.
+These libraries and developments are available
+for download at http://coq.inria.fr (see :ref:`userscontributions`).
This chapter briefly reviews the |Coq| libraries whose contents can
also be browsed at http://coq.inria.fr/stdlib.
@@ -514,8 +512,8 @@ realizability interpretation.
forall (A B:Prop) (P:Type), (A -> B -> P) -> A /\ B -> P.
-Basic Arithmetics
-~~~~~~~~~~~~~~~~~
+Basic Arithmetic
+~~~~~~~~~~~~~~~~
The basic library includes a few elementary properties of natural
numbers, together with the definitions of predecessor, addition and
@@ -804,8 +802,8 @@ Notation Interpretation
=============== ===================
-Notations for integer arithmetics
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Notations for integer arithmetic
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. index::
single: Arithmetical notations
@@ -822,7 +820,7 @@ Notations for integer arithmetics
The following table describes the syntax of expressions
-for integer arithmetics. It is provided by requiring and opening the module ``ZArith`` and opening scope ``Z_scope``.
+for integer arithmetic. It is provided by requiring and opening the module ``ZArith`` and opening scope ``Z_scope``.
It specifies how notations are interpreted and, when not
already reserved, the precedence and associativity.
@@ -866,7 +864,7 @@ Notations for real numbers
This is provided by requiring and opening the module ``Reals`` and
opening scope ``R_scope``. This set of notations is very similar to
-the notation for integer arithmetics. The inverse function was added.
+the notation for integer arithmetic. The inverse function was added.
=============== ===================
Notation Interpretation
diff --git a/doc/sphinx/language/gallina-specification-language.rst b/doc/sphinx/language/gallina-specification-language.rst
index 38f6714f46..91dfa34494 100644
--- a/doc/sphinx/language/gallina-specification-language.rst
+++ b/doc/sphinx/language/gallina-specification-language.rst
@@ -44,78 +44,91 @@ Lexical conventions
===================
Blanks
- Space, newline and horizontal tabulation are considered as blanks.
+ Space, newline and horizontal tab are considered blanks.
Blanks are ignored but they separate tokens.
Comments
- Comments in Coq are enclosed between ``(*`` and ``*)``, and can be nested.
- They can contain any character. However, :token:`string` literals must be
+ Comments are enclosed between ``(*`` and ``*)``. They can be nested.
+ They can contain any character. However, embedded :token:`string` literals must be
correctly closed. Comments are treated as blanks.
-Identifiers and access identifiers
+Identifiers and field identifiers
Identifiers, written :token:`ident`, are sequences of letters, digits, ``_`` and
- ``'``, that do not start with a digit or ``'``. That is, they are
- recognized by the following lexical class:
+ ``'``, that do not start with a digit or ``'``. That is, they are
+ recognized by the following grammar (except that the string ``_`` is reserved;
+ it is not a valid identifier):
.. productionlist:: coq
- first_letter : a..z ∣ A..Z ∣ _ ∣ unicode-letter
- subsequent_letter : a..z ∣ A..Z ∣ 0..9 ∣ _ ∣ ' ∣ unicode-letter ∣ unicode-id-part
ident : `first_letter`[`subsequent_letter`…`subsequent_letter`]
- access_ident : .`ident`
+ field : .`ident`
+ first_letter : a..z ∣ A..Z ∣ _ ∣ `unicode_letter`
+ subsequent_letter : `first_letter` ∣ 0..9 ∣ ' ∣ `unicode_id_part`
All characters are meaningful. In particular, identifiers are case-sensitive.
- The entry ``unicode-letter`` non-exhaustively includes Latin,
+ :production:`unicode_letter` non-exhaustively includes Latin,
Greek, Gothic, Cyrillic, Arabic, Hebrew, Georgian, Hangul, Hiragana
and Katakana characters, CJK ideographs, mathematical letter-like
- symbols, hyphens, non-breaking space, … The entry ``unicode-id-part``
+ symbols and non-breaking space. :production:`unicode_id_part`
non-exhaustively includes symbols for prime letters and subscripts.
- Access identifiers, written :token:`access_ident`, are identifiers prefixed by
- `.` (dot) without blank. They are used in the syntax of qualified
- identifiers.
+ Field identifiers, written :token:`field`, are identifiers prefixed by
+ `.` (dot) with no blank between the dot and the identifier. They are used in
+ the syntax of qualified identifiers.
Numerals
- Numerals are sequences of digits with a potential fractional part
- and exponent. Integers are numerals without fractional nor exponent
- part and optionally preceded by a minus sign. Underscores ``_`` can
- be used as comments in numerals.
+ Numerals are sequences of digits with an optional fractional part
+ and exponent, optionally preceded by a minus sign. :token:`int` is an integer;
+ a numeral without fractional or exponent parts. :token:`num` is a non-negative
+ integer. Underscores embedded in the digits are ignored, for example
+ ``1_000_000`` is the same as ``1000000``.
.. productionlist:: coq
- digit : 0..9
+ numeral : `num`[. `num`][`exp`[`sign`]`num`]
+ int : [-]`num`
num : `digit`…`digit`
- integer : [-]`num`
- dot : .
+ digit : 0..9
exp : e | E
sign : + | -
- numeral : `num`[`dot` `num`][`exp`[`sign`]`num`]
Strings
- Strings are delimited by ``"`` (double quote), and enclose a sequence of
- any characters different from ``"`` or the sequence ``""`` to denote the
- double quote character. In grammars, the entry for quoted strings is
- :production:`string`.
+ Strings begin and end with ``"`` (double quote). Use ``""`` to represent
+ a double quote character within a string. In the grammar, strings are
+ identified with :production:`string`.
Keywords
- The following identifiers are reserved keywords, and cannot be
- employed otherwise::
-
- _ as at cofix else end exists exists2 fix for
- forall fun if IF in let match mod return
- SProp Prop Set Type then using where with
-
-Special tokens
- The following sequences of characters are special tokens::
-
- ! % & && ( () ) * + ++ , - -> . .( ..
- / /\ : :: :< := :> ; < <- <-> <: <= <> =
- => =_D > >-> >= ? ?= @ [ \/ ] ^ { | |-
- || } ~ #[
-
- Lexical ambiguities are resolved according to the “longest match”
- rule: when a sequence of non alphanumerical characters can be
- decomposed into several different ways, then the first token is the
- longest possible one (among all tokens defined at this moment), and so
- on.
+ The following character sequences are reserved keywords that cannot be
+ used as identifiers::
+
+ _ Axiom CoFixpoint Definition Fixpoint Hypothesis IF Parameter Prop
+ SProp Set Theorem Type Variable as at by cofix discriminated else
+ end exists exists2 fix for forall fun if in lazymatch let match
+ multimatch return then using where with
+
+ Note that plugins may define additional keywords when they are loaded.
+
+Other tokens
+ The set of
+ tokens defined at any given time can vary because the :cmd:`Notation`
+ command can define new tokens. A :cmd:`Require` command may load more notation definitions,
+ while the end of a :cmd:`Section` may remove notations. Some notations
+ are defined in the basic library (see :ref:`thecoqlibrary`) and are normallly
+ loaded automatically at startup time.
+
+ Here are the character sequences that Coq directly defines as tokens
+ without using :cmd:`Notation` (omitting 25 specialized tokens that begin with
+ ``#int63_``)::
+
+ ! #[ % & ' ( () (bfs) (dfs) ) * ** + , - ->
+ . .( .. ... / : ::= := :> :>> ; < <+ <- <:
+ <<: <= = => > >-> >= ? @ @{ [ [= ] _ _eqn
+ `( `{ { {| | |- || }
+
+ When multiple tokens match the beginning of a sequence of characters,
+ the longest matching token is used.
+ Occasionally you may need to insert spaces to separate tokens. For example,
+ if ``~`` and ``~~`` are both defined as tokens, the inputs ``~ ~`` and
+ ``~~`` generate different tokens, whereas if `~~` is not defined, then the
+ two inputs are equivalent.
.. _term:
@@ -164,7 +177,7 @@ is described in Chapter :ref:`syntaxextensionsandinterpretationscopes`.
: ( `name` [: `term`] := `term` )
: ' `pattern`
name : `ident` | _
- qualid : `ident` | `qualid` `access_ident`
+ qualid : `ident` | `qualid` `field`
sort : SProp | Prop | Set | Type
fix_bodies : `fix_body`
: `fix_body` with `fix_body` with … with `fix_body` for `ident`
diff --git a/doc/sphinx/proof-engine/ltac.rst b/doc/sphinx/proof-engine/ltac.rst
index 46f9826e41..362c3da6cb 100644
--- a/doc/sphinx/proof-engine/ltac.rst
+++ b/doc/sphinx/proof-engine/ltac.rst
@@ -31,10 +31,10 @@ Syntax
The syntax of the tactic language is given below. See Chapter
:ref:`gallinaspecificationlanguage` for a description of the BNF metasyntax used
in these grammar rules. Various already defined entries will be used in this
-chapter: entries :token:`natural`, :token:`integer`, :token:`ident`,
+chapter: entries :token:`num`, :token:`int`, :token:`ident`
:token:`qualid`, :token:`term`, :token:`cpattern` and :token:`tactic`
-represent respectively the natural and integer numbers, the authorized
-identificators and qualified names, Coq terms and patterns and all the atomic
+represent respectively natural and integer numbers,
+identifiers, qualified names, Coq terms, patterns and the atomic
tactics described in Chapter :ref:`tactics`.
The syntax of :production:`cpattern` is
@@ -141,10 +141,10 @@ mode but it can also be used in toplevel definitions as shown below.
: `atom`
atom : `qualid`
: ()
- : `integer`
+ : `int`
: ( `ltac_expr` )
component : `string` | `qualid`
- message_token : `string` | `ident` | `integer`
+ message_token : `string` | `ident` | `int`
tacarg : `qualid`
: ()
: ltac : `atom`
@@ -159,11 +159,11 @@ mode but it can also be used in toplevel definitions as shown below.
match_rule : `cpattern` => `ltac_expr`
: context [`ident`] [ `cpattern` ] => `ltac_expr`
: _ => `ltac_expr`
- test : `integer` = `integer`
- : `integer` (< | <= | > | >=) `integer`
+ test : `int` = `int`
+ : `int` (< | <= | > | >=) `int`
selector : [`ident`]
- : `integer`
- : (`integer` | `integer` - `integer`), ..., (`integer` | `integer` - `integer`)
+ : `int`
+ : (`int` | `int` - `int`), ..., (`int` | `int` - `int`)
toplevel_selector : `selector`
: all
: par
diff --git a/doc/sphinx/proof-engine/ltac2.rst b/doc/sphinx/proof-engine/ltac2.rst
index 3036648b08..ceaa2775bf 100644
--- a/doc/sphinx/proof-engine/ltac2.rst
+++ b/doc/sphinx/proof-engine/ltac2.rst
@@ -179,7 +179,7 @@ constructions from ML.
: let `ltac2_var` := `ltac2_term` in `ltac2_term`
: let rec `ltac2_var` := `ltac2_term` in `ltac2_term`
: match `ltac2_term` with `ltac2_branch` ... `ltac2_branch` end
- : `integer`
+ : `int`
: `string`
: `ltac2_term` ; `ltac2_term`
: [| `ltac2_term` ; ... ; `ltac2_term` |]
@@ -670,7 +670,7 @@ A scope is a name given to a grammar entry used to produce some Ltac2 expression
at parsing time. Scopes are described using a form of S-expression.
.. prodn::
- ltac2_scope ::= {| @string | @integer | @lident ({+, @ltac2_scope}) }
+ ltac2_scope ::= {| @string | @int | @lident ({+, @ltac2_scope}) }
A few scopes contain antiquotation features. For sake of uniformity, all
antiquotations are introduced by the syntax :n:`$@lident`.
@@ -719,7 +719,7 @@ The following scopes are built-in.
+ parses a Ltac2 expression at the next level and return it as is.
-- :n:`tactic(n = @integer)`:
+- :n:`tactic(n = @int)`:
+ parses a Ltac2 expression at the provided level :n:`n` and return it as is.
@@ -760,7 +760,7 @@ Notations
The Ltac2 parser can be extended by syntactic notations.
-.. cmd:: Ltac2 Notation {+ {| @lident (@ltac2_scope) | @string } } {? : @integer} := @ltac2_term
+.. cmd:: Ltac2 Notation {+ {| @lident (@ltac2_scope) | @string } } {? : @int} := @ltac2_term
:name: Ltac2 Notation
A Ltac2 notation adds a parsing rule to the Ltac2 grammar, which is expanded
diff --git a/doc/tools/docgram/doc_grammar.ml b/doc/tools/docgram/doc_grammar.ml
index 9f0a1942f9..eb86bab37e 100644
--- a/doc/tools/docgram/doc_grammar.ml
+++ b/doc/tools/docgram/doc_grammar.ml
@@ -1020,6 +1020,37 @@ let apply_edit_file g edits =
in
List.rev (List.concat (List.map traverse prod))
+ (* get the special tokens in the grammar *)
+let print_special_tokens g =
+ let rec traverse set = function
+ | Sterm s ->
+ let c = s.[0] in
+ if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') then set
+ else StringSet.add s set
+ | Snterm s -> set
+ | Slist1 sym
+ | Slist0 sym
+ | Sopt sym
+ -> traverse set sym
+ | Slist1sep (sym, sep)
+ | Slist0sep (sym, sep)
+ -> traverse (traverse set sym) sep
+ | Sparen sym_list -> traverse_prod set sym_list
+ | Sprod sym_list_list -> traverse_prods set sym_list_list
+ | Sedit _
+ | Sedit2 _ -> set
+ and traverse_prod set prod = List.fold_left traverse set prod
+ and traverse_prods set prods = List.fold_left traverse_prod set prods
+ in
+ let spec_toks = List.fold_left (fun set b ->
+ let nt, prods = b in
+ traverse_prods set prods)
+ StringSet.empty (NTMap.bindings !g.map)
+ in
+ Printf.printf "Special tokens:";
+ StringSet.iter (fun t -> Printf.printf " %s" t) spec_toks;
+ Printf.printf "\n\n"
+
(* get the transitive closure of a non-terminal excluding "stops" symbols.
Preserve ordering to the extent possible *)
(* todo: at the moment, the code doesn't use the ordering; consider switching to using
@@ -1099,8 +1130,9 @@ let print_chunks g out fmt () =
(*seen := StringSet.diff !seen (StringSet.of_list ssr_tops);*)
(*print_chunk out g seen fmt "vernac_toplevel" ["vernac_toplevel"] [];*)
-let start_symbols = ["vernac_toplevel"; "tactic_mode"]
-let tokens = [ "BULLET"; "FIELD"; "IDENT"; "NUMERAL"; "STRING" ] (* don't report as undefined *)
+let start_symbols = ["vernac_toplevel"]
+(* don't report tokens as undefined *)
+let tokens = [ "bullet"; "field"; "ident"; "int"; "num"; "numeral"; "string" ]
let report_bad_nts g file =
let rec get_nts refd defd bindings =
@@ -1468,6 +1500,8 @@ let process_grammar args =
print_in_order out g `MLG !g.order StringSet.empty;
close_out out;
finish_with_file (dir "fullGrammar") args.verify;
+ if args.verbose then
+ print_special_tokens g;
if not args.fullGrammar then begin
(* do shared edits *)