Merge PR #10548: Refine documentation of tokens

Reviewed-by: Zimmi48 Ack-by: cpitclaudel Ack-by: herbelin
author: Théo Zimmermann 2019-07-29 16:11:31 +0200
committer: Théo Zimmermann 2019-07-29 16:11:31 +0200
commit: 807b1e18575914f9956569ab71bb5fe29716cbdf (patch)
tree: 0cafe066f6f340891581f5077a12315069e4af8d /doc
parent: fd9185ba9f72bb7631dbd0d113717ac15804451a (diff)
parent: e33f5d2d3930ab7818abccef4bf2326c72b348eb (diff)
5 files changed, 126 insertions, 81 deletions
diff --git a/doc/sphinx/language/coq-library.rst b/doc/sphinx/language/coq-library.rst
index d1b95e6203..ac75240cfb 100644
--- a/doc/sphinx/language/coq-library.rst
+++ b/doc/sphinx/language/coq-library.rst
@@ -7,22 +7,20 @@ The |Coq| library
    single: Theories
 
 
-The |Coq| library is structured into two parts:
+The |Coq| library has two parts:
 
-  * **The initial library**: it contains elementary logical notions and
-    data-types. It constitutes the basic state of the system directly
-    available when running |Coq|;
+  * **The basic library**: definitions and theorems for
+    the most commonly used elementary logical notions and
+    data types. |Coq| normally loads these files automatically when it starts.
 
-  * **The standard library**: general-purpose libraries containing various
-    developments of |Coq| axiomatizations about sets, lists, sorting,
-    arithmetic, etc. This library comes with the system and its modules
-    are directly accessible through the ``Require`` command (see
-    Section :ref:`compiled-files`);
+  * **The standard library**: general-purpose libraries with
+    definitions and theorems for sets, lists, sorting,
+    arithmetic, etc. To use these files, users must load them explicitly
+    with the ``Require`` command (see :ref:`compiled-files`)
 
-In addition, user-provided libraries or developments are provided by
-|Coq| users' community. These libraries and developments are available
-for download at http://coq.inria.fr (see
-Section :ref:`userscontributions`).
+There are also many libraries provided by |Coq| users' community.
+These libraries and developments are available
+for download at http://coq.inria.fr (see :ref:`userscontributions`).
 
 This chapter briefly reviews the |Coq| libraries whose contents can
 also be browsed at http://coq.inria.fr/stdlib.
@@ -514,8 +512,8 @@ realizability interpretation.
    forall (A B:Prop) (P:Type), (A -> B -> P) -> A /\ B -> P.
 
 
-Basic Arithmetics
-~~~~~~~~~~~~~~~~~
+Basic Arithmetic
+~~~~~~~~~~~~~~~~
 
 The basic library includes a few elementary properties of natural
 numbers, together with the definitions of predecessor, addition and
@@ -804,8 +802,8 @@ Notation          Interpretation
 ===============   ===================
 
 
-Notations for integer arithmetics
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Notations for integer arithmetic
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. index::
   single: Arithmetical notations
@@ -822,7 +820,7 @@ Notations for integer arithmetics
 
 
 The following table describes the syntax of expressions
-for integer arithmetics. It is provided by requiring and opening the module ``ZArith`` and opening scope ``Z_scope``.
+for integer arithmetic. It is provided by requiring and opening the module ``ZArith`` and opening scope ``Z_scope``.
 It specifies how notations are interpreted and, when not
 already reserved, the precedence and associativity.
 
@@ -866,7 +864,7 @@ Notations for real numbers
 
 This is provided by requiring and opening the module ``Reals`` and
 opening scope ``R_scope``. This set of notations is very similar to
-the notation for integer arithmetics. The inverse function was added.
+the notation for integer arithmetic. The inverse function was added.
 
 ===============   ===================
 Notation          Interpretation
diff --git a/doc/sphinx/language/gallina-specification-language.rst b/doc/sphinx/language/gallina-specification-language.rst
index 38f6714f46..91dfa34494 100644
--- a/doc/sphinx/language/gallina-specification-language.rst
+++ b/doc/sphinx/language/gallina-specification-language.rst
@@ -44,78 +44,91 @@ Lexical conventions
 ===================
 
 Blanks
-  Space, newline and horizontal tabulation are considered as blanks.
+  Space, newline and horizontal tab are considered blanks.
   Blanks are ignored but they separate tokens.
 
 Comments
-  Comments in Coq are enclosed between ``(*`` and ``*)``, and can be nested.
-  They can contain any character. However, :token:`string` literals must be
+  Comments are enclosed between ``(*`` and ``*)``.  They can be nested.
+  They can contain any character. However, embedded :token:`string` literals must be
   correctly closed. Comments are treated as blanks.
 
-Identifiers and access identifiers
+Identifiers and field identifiers
   Identifiers, written :token:`ident`, are sequences of letters, digits, ``_`` and
-  ``'``, that do not start with a digit or ``'``. That is, they are
-  recognized by the following lexical class:
+  ``'``, that do not start with a digit or ``'``.  That is, they are
+  recognized by the following grammar (except that the string ``_`` is reserved;
+  it is not a valid identifier):
 
   .. productionlist:: coq
-     first_letter      : a..z ∣ A..Z ∣ _ ∣ unicode-letter
-     subsequent_letter : a..z ∣ A..Z ∣ 0..9 ∣ _ ∣ ' ∣ unicode-letter ∣ unicode-id-part
      ident             : `first_letter`[`subsequent_letter`…`subsequent_letter`]
-     access_ident      : .`ident`
+     field             : .`ident`
+     first_letter      : a..z ∣ A..Z ∣ _ ∣ `unicode_letter`
+     subsequent_letter : `first_letter` ∣ 0..9 ∣ ' ∣ `unicode_id_part`
 
   All characters are meaningful. In particular, identifiers are case-sensitive.
-  The entry ``unicode-letter`` non-exhaustively includes Latin,
+  :production:`unicode_letter` non-exhaustively includes Latin,
   Greek, Gothic, Cyrillic, Arabic, Hebrew, Georgian, Hangul, Hiragana
   and Katakana characters, CJK ideographs, mathematical letter-like
-  symbols, hyphens, non-breaking space, … The entry ``unicode-id-part``
+  symbols and non-breaking space. :production:`unicode_id_part`
   non-exhaustively includes symbols for prime letters and subscripts.
 
-  Access identifiers, written :token:`access_ident`, are identifiers prefixed by
-  `.` (dot) without blank. They are used in the syntax of qualified
-  identifiers.
+  Field identifiers, written :token:`field`, are identifiers prefixed by
+  `.` (dot) with no blank between the dot and the identifier. They are used in
+  the syntax of qualified identifiers.
 
 Numerals
-  Numerals are sequences of digits with a potential fractional part
-  and exponent. Integers are numerals without fractional nor exponent
-  part and optionally preceded by a minus sign. Underscores ``_`` can
-  be used as comments in numerals.
+  Numerals are sequences of digits with an optional fractional part
+  and exponent, optionally preceded by a minus sign. :token:`int` is an integer;
+  a numeral without fractional or exponent parts. :token:`num` is a non-negative
+  integer.  Underscores embedded in the digits are ignored, for example
+  ``1_000_000`` is the same as ``1000000``.
 
   .. productionlist:: coq
-     digit   : 0..9
+     numeral : `num`[. `num`][`exp`[`sign`]`num`]
+     int     : [-]`num`
      num     : `digit`…`digit`
-     integer : [-]`num`
-     dot     : .
+     digit   : 0..9
      exp     : e | E
      sign    : + | -
-     numeral : `num`[`dot` `num`][`exp`[`sign`]`num`]
 
 Strings
-  Strings are delimited by ``"`` (double quote), and enclose a sequence of
-  any characters different from ``"`` or the sequence ``""`` to denote the
-  double quote character. In grammars, the entry for quoted strings is
-  :production:`string`.
+  Strings begin and end with ``"`` (double quote).  Use ``""`` to represent
+  a double quote character within a string.  In the grammar, strings are
+  identified with :production:`string`.
 
 Keywords
-  The following identifiers are reserved keywords, and cannot be
-  employed otherwise::
-
-    _ as at cofix else end exists exists2 fix for
-    forall fun if IF in let match mod return
-    SProp Prop Set Type then using where with
-
-Special tokens
-  The following sequences of characters are special tokens::
-
-    ! % & && ( () ) * + ++ , - -> . .( ..
-    / /\ : :: :< := :> ; < <- <-> <: <= <> =
-    => =_D > >-> >= ? ?= @ [ \/ ] ^ { | |-
-    || } ~ #[
-
-  Lexical ambiguities are resolved according to the “longest match”
-  rule: when a sequence of non alphanumerical characters can be
-  decomposed into several different ways, then the first token is the
-  longest possible one (among all tokens defined at this moment), and so
-  on.
+  The following character sequences are reserved keywords that cannot be
+  used as identifiers::
+
+    _ Axiom CoFixpoint Definition Fixpoint Hypothesis IF Parameter Prop
+    SProp Set Theorem Type Variable as at by cofix discriminated else
+    end exists exists2 fix for forall fun if in lazymatch let match
+    multimatch return then using where with
+
+  Note that plugins may define additional keywords when they are loaded.
+
+Other tokens
+  The set of
+  tokens defined at any given time can vary because the :cmd:`Notation`
+  command can define new tokens.  A :cmd:`Require` command may load more notation definitions,
+  while the end of a :cmd:`Section` may remove notations.  Some notations
+  are defined in the basic library (see :ref:`thecoqlibrary`) and are normallly
+  loaded automatically at startup time.
+
+  Here are the character sequences that Coq directly defines as tokens
+  without using :cmd:`Notation` (omitting 25 specialized tokens that begin with
+  ``#int63_``)::
+
+    ! #[ % & ' ( () (bfs) (dfs) ) * ** + , - ->
+    . .( .. ... / : ::= := :> :>> ; < <+ <- <:
+    <<: <= = => > >-> >= ? @ @{ [ [= ] _ _eqn
+    `( `{ { {| | |- || }
+
+  When multiple tokens match the beginning of a sequence of characters,
+  the longest matching token is used.
+  Occasionally you may need to insert spaces to separate tokens.  For example,
+  if ``~`` and ``~~`` are both defined as tokens, the inputs ``~ ~`` and
+  ``~~`` generate different tokens, whereas if `~~` is not defined, then the
+  two inputs are equivalent.
 
 .. _term:
 
@@ -164,7 +177,7 @@ is described in Chapter :ref:`syntaxextensionsandinterpretationscopes`.
                     : ( `name` [: `term`] := `term` )
                     : ' `pattern`
    name             : `ident` | _
-   qualid           : `ident` | `qualid` `access_ident`
+   qualid           : `ident` | `qualid` `field`
    sort             : SProp | Prop | Set | Type
    fix_bodies       : `fix_body`
                     : `fix_body` with `fix_body` with … with `fix_body` for `ident`
diff --git a/doc/sphinx/proof-engine/ltac.rst b/doc/sphinx/proof-engine/ltac.rst
index 46f9826e41..362c3da6cb 100644
--- a/doc/sphinx/proof-engine/ltac.rst
+++ b/doc/sphinx/proof-engine/ltac.rst
@@ -31,10 +31,10 @@ Syntax
 The syntax of the tactic language is given below. See Chapter
 :ref:`gallinaspecificationlanguage` for a description of the BNF metasyntax used
 in these grammar rules. Various already defined entries will be used in this
-chapter: entries :token:`natural`, :token:`integer`, :token:`ident`,
+chapter: entries :token:`num`, :token:`int`, :token:`ident`
 :token:`qualid`, :token:`term`, :token:`cpattern` and :token:`tactic`
-represent respectively the natural and integer numbers, the authorized
-identificators and qualified names, Coq terms and patterns and all the atomic
+represent respectively natural and integer numbers,
+identifiers, qualified names, Coq terms, patterns and the atomic
 tactics described in Chapter :ref:`tactics`.
 
 The syntax of :production:`cpattern` is
@@ -141,10 +141,10 @@ mode but it can also be used in toplevel definitions as shown below.
                      : `atom`
    atom              : `qualid`
                      : ()
-                     : `integer`
+                     : `int`
                      : ( `ltac_expr` )
    component : `string` | `qualid`
-   message_token     : `string` | `ident` | `integer`
+   message_token     : `string` | `ident` | `int`
    tacarg            : `qualid`
                      : ()
                      : ltac : `atom`
@@ -159,11 +159,11 @@ mode but it can also be used in toplevel definitions as shown below.
    match_rule        : `cpattern` => `ltac_expr`
                      : context [`ident`] [ `cpattern` ] => `ltac_expr`
                      : _ => `ltac_expr`
-   test              : `integer` = `integer`
-                     : `integer` (< | <= | > | >=) `integer`
+   test              : `int` = `int`
+                     : `int` (< | <= | > | >=) `int`
    selector          : [`ident`]
-                     : `integer`
-                     : (`integer` | `integer` - `integer`), ..., (`integer` | `integer` - `integer`)
+                     : `int`
+                     : (`int` | `int` - `int`), ..., (`int` | `int` - `int`)
    toplevel_selector : `selector`
                      : all
                      : par
diff --git a/doc/sphinx/proof-engine/ltac2.rst b/doc/sphinx/proof-engine/ltac2.rst
index 3036648b08..ceaa2775bf 100644
--- a/doc/sphinx/proof-engine/ltac2.rst
+++ b/doc/sphinx/proof-engine/ltac2.rst
@@ -179,7 +179,7 @@ constructions from ML.
                     : let `ltac2_var` := `ltac2_term` in `ltac2_term`
                     : let rec `ltac2_var` := `ltac2_term` in `ltac2_term`
                     : match `ltac2_term` with `ltac2_branch` ... `ltac2_branch` end
-                    : `integer`
+                    : `int`
                     : `string`
                     : `ltac2_term` ; `ltac2_term`
                     : [| `ltac2_term` ; ... ; `ltac2_term` |]
@@ -670,7 +670,7 @@ A scope is a name given to a grammar entry used to produce some Ltac2 expression
 at parsing time. Scopes are described using a form of S-expression.
 
 .. prodn::
-   ltac2_scope ::= {| @string | @integer | @lident ({+, @ltac2_scope}) }
+   ltac2_scope ::= {| @string | @int | @lident ({+, @ltac2_scope}) }
 
 A few scopes contain antiquotation features. For sake of uniformity, all
 antiquotations are introduced by the syntax :n:`$@lident`.
@@ -719,7 +719,7 @@ The following scopes are built-in.
 
   + parses a Ltac2 expression at the next level and return it as is.
 
-- :n:`tactic(n = @integer)`:
+- :n:`tactic(n = @int)`:
 
   + parses a Ltac2 expression at the provided level :n:`n` and return it as is.
 
@@ -760,7 +760,7 @@ Notations
 
 The Ltac2 parser can be extended by syntactic notations.
 
-.. cmd:: Ltac2 Notation {+ {| @lident (@ltac2_scope) | @string } } {? : @integer} := @ltac2_term
+.. cmd:: Ltac2 Notation {+ {| @lident (@ltac2_scope) | @string } } {? : @int} := @ltac2_term
    :name: Ltac2 Notation
 
    A Ltac2 notation adds a parsing rule to the Ltac2 grammar, which is expanded
diff --git a/doc/tools/docgram/doc_grammar.ml b/doc/tools/docgram/doc_grammar.ml
index 9f0a1942f9..eb86bab37e 100644
--- a/doc/tools/docgram/doc_grammar.ml
+++ b/doc/tools/docgram/doc_grammar.ml
@@ -1020,6 +1020,37 @@ let apply_edit_file g edits =
   in
   List.rev (List.concat (List.map traverse prod))
 
+  (* get the special tokens in the grammar *)
+let print_special_tokens g =
+  let rec traverse set = function
+  | Sterm s ->
+    let c = s.[0] in
+    if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') then set
+    else StringSet.add s set
+  | Snterm s -> set
+  | Slist1 sym
+  | Slist0 sym
+  | Sopt sym
+    -> traverse set sym
+  | Slist1sep (sym, sep)
+  | Slist0sep (sym, sep)
+    -> traverse (traverse set sym) sep
+  | Sparen sym_list -> traverse_prod set sym_list
+  | Sprod sym_list_list -> traverse_prods set sym_list_list
+  | Sedit _
+  | Sedit2 _ -> set
+  and traverse_prod set prod = List.fold_left traverse set prod
+  and traverse_prods set prods = List.fold_left traverse_prod set prods
+  in
+  let spec_toks = List.fold_left (fun set b ->
+      let nt, prods = b in
+      traverse_prods set prods)
+    StringSet.empty (NTMap.bindings !g.map)
+  in
+  Printf.printf "Special tokens:";
+  StringSet.iter (fun t -> Printf.printf " %s" t) spec_toks;
+  Printf.printf "\n\n"
+
 (* get the transitive closure of a non-terminal excluding "stops" symbols.
    Preserve ordering to the extent possible *)
 (* todo: at the moment, the code doesn't use the ordering; consider switching to using
@@ -1099,8 +1130,9 @@ let print_chunks g out fmt () =
     (*seen := StringSet.diff !seen (StringSet.of_list ssr_tops);*)
     (*print_chunk out g seen fmt "vernac_toplevel" ["vernac_toplevel"] [];*)
 
-let start_symbols = ["vernac_toplevel"; "tactic_mode"]
-let tokens = [ "BULLET"; "FIELD"; "IDENT"; "NUMERAL"; "STRING" ] (* don't report as undefined *)
+let start_symbols = ["vernac_toplevel"]
+(* don't report tokens as undefined *)
+let tokens = [ "bullet"; "field"; "ident"; "int"; "num"; "numeral"; "string" ]
 
 let report_bad_nts g file =
   let rec get_nts refd defd bindings =
@@ -1468,6 +1500,8 @@ let process_grammar args =
   print_in_order out g `MLG !g.order StringSet.empty;
   close_out out;
   finish_with_file (dir "fullGrammar") args.verify;
+  if args.verbose then
+    print_special_tokens g;
 
   if not args.fullGrammar then begin
     (* do shared edits *)
author	Théo Zimmermann	2019-07-29 16:11:31 +0200
committer	Théo Zimmermann	2019-07-29 16:11:31 +0200
commit	807b1e18575914f9956569ab71bb5fe29716cbdf (patch)
tree	0cafe066f6f340891581f5077a12315069e4af8d /doc
parent	fd9185ba9f72bb7631dbd0d113717ac15804451a (diff)
parent	e33f5d2d3930ab7818abccef4bf2326c72b348eb (diff)