diff options
Diffstat (limited to 'dev')
| -rw-r--r-- | dev/README.md | 6 | ||||
| -rwxr-xr-x | dev/bench/gitlab.sh | 510 | ||||
| -rwxr-xr-x | dev/bench/render_results | 434 | ||||
| -rw-r--r-- | dev/bench/sort-by-deps | 33 | ||||
| -rwxr-xr-x | dev/bench/sort-by-deps.sh | 15 | ||||
| -rwxr-xr-x | dev/bench/timelog2html | 141 | ||||
| -rwxr-xr-x | dev/build/windows/MakeCoq_MinGW.bat | 2 | ||||
| -rwxr-xr-x | dev/ci/ci-metacoq.sh | 2 | ||||
| -rw-r--r-- | dev/ci/user-overlays/12565-ppedrot-fix-tc-search-opacity.sh | 6 | ||||
| -rw-r--r-- | dev/ci/user-overlays/12709-ppedrot-hint-pattern-out.sh | 6 | ||||
| -rw-r--r-- | dev/ci/user-overlays/12720-ppedrot-factor-class-hint-clenv.sh | 6 | ||||
| -rw-r--r-- | dev/ci/user-overlays/12756-jashug-dont-refresh-argument-names.sh | 9 | ||||
| -rw-r--r-- | dev/doc/parsing.md | 397 |
13 files changed, 1561 insertions, 6 deletions
diff --git a/dev/README.md b/dev/README.md index 0c6b8020f1..0a6b196ec0 100644 --- a/dev/README.md +++ b/dev/README.md @@ -22,14 +22,12 @@ | [`dev/doc/changes.md`](doc/changes.md) | (partial) Per-version summary of the evolution of Coq ML source | | [`dev/doc/style.txt`](doc/style.txt) | A few style recommendations for writing Coq ML files | | [`dev/doc/debugging.md`](doc/debugging.md) | Help for debugging or profiling | -| [`dev/doc/universes.txt`](doc/universes.txt) | Help for debugging universes | -| [`dev/doc/extensions.txt`](doc/extensions.txt) | Some help about TACTIC EXTEND | -| [`dev/doc/perf-analysis`](doc/perf-analysis)| Analysis of perfs measured on the compilation of user contribs | +| [`dev/doc/universes.md`](doc/universes.md) | Help for debugging universes | | [`dev/doc/econstr.md`](doc/econstr.md) | Describes `Econstr`, implementation of treatment of `evar` in the engine | | [`dev/doc/primproj.md`](doc/primproj.md) | Describes primitive projections | +| [`dev/doc/parsing.md`](doc/parsing.md) | Grammar and parsing overview | | [`dev/doc/proof-engine.md`](doc/proof-engine.md) | Tutorial on new proof engine | | [`dev/doc/xml-protocol.md`](doc/xml-protocol.md) | XML protocol that coqtop and IDEs use to communicate | -| [`dev/doc/MERGING.md`](doc/MERGING.md) | How pull requests should be merged into `master` | | [`dev/doc/release-process.md`](doc/release-process.md) | Process of creating a new Coq release | diff --git a/dev/bench/gitlab.sh b/dev/bench/gitlab.sh new file mode 100755 index 0000000000..5423f30aba --- /dev/null +++ b/dev/bench/gitlab.sh @@ -0,0 +1,510 @@ +#! /usr/bin/env bash + +# ASSUMPTIONS: +# - the OPAM packages, specified by the user, are topologically sorted wrt. to the dependency relationship. +# - all the variables below are set. + +set -e + +BENCH_DEBUG=1 + +r='\033[0m' # reset (all attributes off) +b='\033[1m' # bold +u='\033[4m' # underline +nl=$'\n' + +number_of_processors=$(cat /proc/cpuinfo | grep '^processor *' | wc -l) + +program_name="$0" +program_path=$(readlink -f "${program_name%/*}") + +coqbot_url_prefix="https://coqbot.herokuapp.com/pendulum/" + +# Check that the required arguments are provided + +check_variable () { + if [ ! -v "$1" ] + then + echo "Variable $1 should be set" + exit 1 + fi +} + +echo $PWD + +#check_variable "BUILD_ID" +#check_variable "BUILD_URL" +#check_variable "JOB_NAME" +#check_variable "JENKINS_URL" +check_variable "coq_pr_number" +check_variable "coq_pr_comment_id" +check_variable "new_ocaml_switch" +check_variable "new_coq_repository" +check_variable "new_coq_commit" +check_variable "new_coq_opam_archive_git_uri" +check_variable "new_coq_opam_archive_git_branch" +check_variable "old_ocaml_switch" +check_variable "old_coq_repository" +old_coq_commit="609152467f4d717713b7ea700f5155fc9f341cd7" +check_variable "old_coq_opam_archive_git_uri" +check_variable "old_coq_opam_archive_git_branch" +check_variable "num_of_iterations" +check_variable "coq_opam_packages" + +if which jq > /dev/null; then + : +else + echo > /dev/stderr + echo "ERROR: \"jq\" program is not available." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +if echo "$num_of_iterations" | grep '^[1-9][0-9]*$' 2> /dev/null > /dev/null; then + : +else + echo + echo "ERROR: num_of_iterations \"$num_of_iterations\" is not a positive integer." > /dev/stderr + print_man_page_hint + exit 1 +fi + +mkdir -p "_bench" +working_dir="$PWD/_bench" + +log_dir=$working_dir/logs +mkdir "$log_dir" + +if [ ! -z "${coq_pr_number}" ]; then + github_response="$(curl "https://api.github.com/repos/coq/coq/pulls/${coq_pr_number}")" + new_coq_repository="$(echo "${github_response}" | jq -r '.head.repo.clone_url')" + new_coq_commit="$(echo "${github_response}" | jq -r '.head.sha')" + old_coq_repository="$(echo "${github_response}" | jq -r '.base.repo.clone_url')" + old_coq_commit="$(echo "${github_response}" | jq -r '.base.sha')" + coq_pr_title="$(echo "${github_response}" | jq -r '.title')" + # for coqbot parsing purposes, coq_pr_number and coq_pr_comment_id must not have newlines + coq_pr_number="$(echo "${coq_pr_number}" | tr -d '\n' | tr -d '\r')" + coq_pr_comment_id="$(echo "${coq_pr_comment_id}" | tr -d '\n' | tr -d '\r')" + + for val in "${new_coq_repository}" "${new_coq_commit}" "${old_coq_repository}" "${old_coq_commit}" "${coq_pr_title}"; do + if [ -z "$val" ] || [ "val" == "null" ]; then + echo 'ERROR: Invalid Response:' > /dev/stderr + echo "${github_response}" > /dev/stderr + echo "Info:" > /dev/stderr + curl -i "https://api.github.com/repos/coq/coq/pulls/${coq_pr_number}" > /dev/stderr + exit 1 + fi + done + + if [ -z "$BENCH_DEBUG" ]; then # if it's non-empty, this'll get + # printed later anyway. But we + # want to see it always if we're + # automatically computing values + echo "DEBUG: new_coq_repository = $new_coq_repository" + echo "DEBUG: new_coq_commit = $new_coq_commit" + echo "DEBUG: old_coq_repository = $old_coq_repository" + echo "DEBUG: old_coq_commit = $old_coq_commit" + fi + +fi + +if [ ! -z "$BENCH_DEBUG" ] +then + echo "DEBUG: ocaml -version = `ocaml -version`" + echo "DEBUG: working_dir = $working_dir" + echo "DEBUG: new_ocaml_switch = $new_ocaml_switch" + echo "DEBUG: new_coq_repository = $new_coq_repository" + echo "DEBUG: new_coq_commit = $new_coq_commit" + echo "DEBUG: new_coq_opam_archive_git_uri = $new_coq_opam_archive_git_uri" + echo "DEBUG: new_coq_opam_archive_git_branch = $new_coq_opam_archive_git_branch" + echo "DEBUG: old_ocaml_switch = $old_ocaml_switch" + echo "DEBUG: old_coq_repository = $old_coq_repository" + echo "DEBUG: old_coq_commit = $old_coq_commit" + echo "DEBUG: old_coq_opam_archive_git_uri = $old_coq_opam_archive_git_uri" + echo "DEBUG: old_coq_opam_archive_git_branch = $old_coq_opam_archive_git_branch" + echo "DEBUG: num_of_iterations = $num_of_iterations" + echo "DEBUG: coq_opam_packages = $coq_opam_packages" + echo "DEBUG: coq_pr_number = $coq_pr_number" + echo "DEBUG: coq_pr_comment_id = $coq_pr_comment_id" +fi + +# -------------------------------------------------------------------------------- + +# Some sanity checks of command-line arguments provided by the user that can be done right now. + +if which perf > /dev/null; then + echo -n +else + echo > /dev/stderr + echo "ERROR: \"perf\" program is not available." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +if which curl > /dev/null; then + : +else + echo > /dev/stderr + echo "ERROR: \"curl\" program is not available." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +if [ ! -e "$working_dir" ]; then + echo > /dev/stderr + echo "ERROR: \"$working_dir\" does not exist." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +if [ ! -d "$working_dir" ]; then + echo > /dev/stderr + echo "ERROR: \"$working_dir\" is not a directory." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +if [ ! -w "$working_dir" ]; then + echo > /dev/stderr + echo "ERROR: \"$working_dir\" is not writable." > /dev/stderr + echo > /dev/stderr + exit 1 +fi + +coq_opam_packages_on_separate_lines=$(echo "$coq_opam_packages" | sed 's/ /\n/g') +if [ $(echo "$coq_opam_packages_on_separate_lines" | wc -l) != $(echo "$coq_opam_packages_on_separate_lines" | sort | uniq | wc -l) ]; then + echo "ERROR: The provided set of OPAM packages contains duplicates." + exit 1 +fi + +# -------------------------------------------------------------------------------- + +# Tell coqbot to update the initial comment, if we know which one to update +function coqbot_update_comment() { + is_done="$1" + comment_body="$2" + uninstallable_packages="$3" + + if [ ! -z "${coq_pr_number}" ]; then + comment_text="" + + if [ -z "${is_done}" ]; then + comment_text="in progress, " + else + comment_text="" + fi + comment_text="Benchmarking ${comment_text}log available [here](${BUILD_URL}/console), workspace available [here](${JENKINS_URL}/view/benchmarking/job/${JOB_NAME}/ws/${BUILD_ID})" + + if [ ! -z "${comment_body}" ]; then + comment_text="${comment_text}${nl}"'```'"${nl}${comment_body}${nl}"'```' + fi + + if [ ! -z "${uninstallable_packages}" ]; then + comment_text="${comment_text}${nl}The following packages failed to install: ${uninstallable_packages}" + fi + + # if there's a comment id, we update the comment while we're + # in progress; otherwise, we wait until the end to post a new + # comment + if [ ! -z "${coq_pr_comment_id}" ]; then + # Tell coqbot to update the in-progress comment + curl -X POST --data-binary "${coq_pr_number}${nl}${coq_pr_comment_id}${nl}${comment_text}" "${coqbot_url_prefix}/update-comment" + elif [ ! -z "${is_done}" ]; then + # Tell coqbot to post a new comment that we're done benchmarking + curl -X POST --data-binary "${coq_pr_number}${nl}${comment_text}" "${coqbot_url_prefix}/new-comment" + fi + if [ ! -z "${is_done}" ]; then + # Tell coqbot to remove the `needs: benchmarking` label + curl -X POST --data-binary "${coq_pr_number}" "${coqbot_url_prefix}/benchmarking-done" + fi + fi +} + +# initial update to the comment, to say that we're in progress +coqbot_update_comment "" "" "" + +# -------------------------------------------------------------------------------- + +# Clone the indicated git-repository. + +coq_dir="$working_dir/coq" +git clone -q "$new_coq_repository" "$coq_dir" +cd "$coq_dir" +git remote rename origin new_coq_repository +git remote add old_coq_repository "$old_coq_repository" +git fetch -q "$old_coq_repository" +git checkout -q $new_coq_commit + +official_coq_branch=master +coq_opam_version=dev + +# -------------------------------------------------------------------------------- + +new_opam_root="$working_dir/opam.NEW" +old_opam_root="$working_dir/opam.OLD" + +# -------------------------------------------------------------------------------- + +old_coq_opam_archive_dir="$working_dir/old_coq_opam_archive" +git clone -q --depth 1 -b "$old_coq_opam_archive_git_branch" "$old_coq_opam_archive_git_uri" "$old_coq_opam_archive_dir" +new_coq_opam_archive_dir="$working_dir/new_coq_opam_archive" +git clone -q --depth 1 -b "$new_coq_opam_archive_git_branch" "$new_coq_opam_archive_git_uri" "$new_coq_opam_archive_dir" + +initial_opam_packages="num ocamlfind dune" + +# Create an opam root and install Coq +# $1 = root_name {ex: NEW / OLD} +# $2 = compiler name +# $3 = git hash of Coq to be installed +# $4 = directory of coq opam archive +create_opam() { + + local RUNNER="$1" + local OPAM_DIR="$working_dir/opam.$RUNNER" + local OPAM_COMP="$2" + local COQ_HASH="$3" + local OPAM_COQ_DIR="$4" + + export OPAMROOT="$OPAM_DIR" + + opam init --disable-sandboxing -qn -j$number_of_processors --bare + # Allow beta compiler switches + opam repo add -q --set-default beta https://github.com/ocaml/ocaml-beta-repository.git + # Allow experimental compiler switches + opam repo add -q --set-default ocaml-pr https://github.com/ejgallego/ocaml-pr-repository.git + # Rest of default switches + opam repo add -q --set-default iris-dev "https://gitlab.mpi-sws.org/FP/opam-dev.git" + + opam switch create -qy -j$number_of_processors "$OPAM_COMP" + eval $(opam env) + + # For some reason opam guesses an incorrect upper bound on the + # number of jobs available on Travis, so we set it here manually: + opam config set-global jobs $number_of_processors + if [ ! -z "$BENCH_DEBUG" ]; then opam config list; fi + + opam repo add -q --this-switch coq-extra-dev "$OPAM_COQ_DIR/extra-dev" + opam repo add -q --this-switch coq-released "$OPAM_COQ_DIR/released" + + opam install -qy -j$number_of_processors $initial_opam_packages + if [ ! -z "$BENCH_DEBUG" ]; then opam repo list; fi + + cd "$coq_dir" + if [ ! -z "$BENCH_DEBUG" ]; then echo "DEBUG: $1_coq_commit = $COQ_HASH"; fi + + git checkout -q $COQ_HASH + COQ_HASH_LONG=$(git log --pretty=%H | head -n 1) + + echo "$1_coq_commit_long = $COQ_HASH_LONG" + + _RES=0 + /usr/bin/time -o "$log_dir/coq.$RUNNER.1.time" --format="%U %M %F" \ + perf stat -e instructions:u,cycles:u -o "$log_dir/coq.$RUNNER.1.perf" \ + opam pin add -y -b -j "$number_of_processors" --kind=path coq.dev . \ + 3>$log_dir/coq.$RUNNER.opam_install.1.stdout 1>&3 \ + 4>$log_dir/coq.$RUNNER.opam_install.1.stderr 2>&4 || \ + _RES=$? + if [ $_RES = 0 ]; then + echo "Coq ($RUNNER) installed successfully" + else + echo "ERROR: \"opam install coq.$coq_opam_version\" has failed (for the $RUNNER commit = $COQ_HASH_LONG)." + exit 1 + fi + + # we don't multi compile coq for now (TODO some other time) + # the render needs all the files so copy them around + for it in $(seq 2 $num_of_iterations); do + cp "$log_dir/coq.$RUNNER.1.time" "$log_dir/coq.$RUNNER.$it.time" + cp "$log_dir/coq.$RUNNER.1.perf" "$log_dir/coq.$RUNNER.$it.perf" + done + +} + +# Create an OPAM-root to which we will install the NEW version of Coq. +create_opam "NEW" "$new_ocaml_switch" "$new_coq_commit" "$new_coq_opam_archive_dir" +new_coq_commit_long="$COQ_HASH_LONG" + +# Create an OPAM-root to which we will install the OLD version of Coq. +create_opam "OLD" "$old_ocaml_switch" "$old_coq_commit" "$old_coq_opam_archive_dir" +old_coq_commit_long="$COQ_HASH_LONG" +# -------------------------------------------------------------------------------- +# Measure the compilation times of the specified OPAM packages in both switches + +# Sort the opam packages +sorted_coq_opam_packages=$("${program_path}/sort-by-deps.sh" ${coq_opam_packages}) +if [ ! -z "$BENCH_DEBUG" ] +then + echo "DEBUG: sorted_coq_opam_packages = ${sorted_coq_opam_packages}" +fi + +# Generate per line timing info in devs that use coq_makefile +export TIMING=1 + +# The following variable will be set in the following cycle: +installable_coq_opam_packages=coq + +for coq_opam_package in $sorted_coq_opam_packages; do + + if [ ! -z "$BENCH_DEBUG" ]; then + opam list + echo "DEBUG: coq_opam_package = $coq_opam_package" + opam show $coq_opam_package || continue 2 + else + # cause to skip with error if unknown package + opam show $coq_opam_package >/dev/null || continue 2 + fi + + for RUNNER in NEW OLD; do + + # perform measurements for the NEW/OLD commit (provided by the user) + if [ $RUNNER = "NEW" ]; then + export OPAMROOT="$new_opam_root" + echo "Testing NEW commit: $(date)" + else + export OPAMROOT="$old_opam_root" + echo "Testing OLD commit: $(date)" + fi + + eval $(opam env) + + # If a given OPAM-package was already installed (as a + # dependency of some OPAM-package that we have benchmarked + # before), remove it. + opam uninstall -q $coq_opam_package + + # OPAM 2.0 likes to ignore the -j when it feels like :S so we + # workaround that here. + opam config set-global jobs $number_of_processors + + opam install $coq_opam_package -v -b -j$number_of_processors --deps-only -y \ + 3>$log_dir/$coq_opam_package.$RUNNER.opam_install.deps_only.stdout 1>&3 \ + 4>$log_dir/$coq_opam_package.$RUNNER.opam_install.deps_only.stderr 2>&4 || continue 2 + + opam config set-global jobs 1 + + if [ ! -z "$BENCH_DEBUG" ]; then ls -l $working_dir; fi + + for iteration in $(seq $num_of_iterations); do + _RES=0 + /usr/bin/time -o "$log_dir/$coq_opam_package.$RUNNER.$iteration.time" --format="%U %M %F" \ + perf stat -e instructions:u,cycles:u -o "$log_dir/$coq_opam_package.$RUNNER.$iteration.perf" \ + opam install -v -b -j1 $coq_opam_package \ + 3>$log_dir/$coq_opam_package.$RUNNER.opam_install.$iteration.stdout 1>&3 \ + 4>$log_dir/$coq_opam_package.$RUNNER.opam_install.$iteration.stderr 2>&4 || \ + _RES=$? + if [ $_RES = 0 ]; + then + echo $_RES > $log_dir/$coq_opam_package.$RUNNER.opam_install.$iteration.exit_status + # "opam install" was successful. + + # Remove the benchmarked OPAM-package, unless this is the + # very last iteration (we want to keep this OPAM-package + # because other OPAM-packages we will benchmark later + # might depend on it --- it would be a waste of time to + # remove it now just to install it later) + if [ $iteration != $num_of_iterations ]; then + opam uninstall -q $coq_opam_package + fi + else + # "opam install" failed. + echo $_RES > $log_dir/$coq_opam_package.$RUNNER.opam_install.$iteration.exit_status + continue 3 + fi + done + done + + installable_coq_opam_packages="$installable_coq_opam_packages $coq_opam_package" + + # -------------------------------------------------------------- + + # Print the intermediate results after we finish benchmarking each OPAM package + if [ "$coq_opam_package" = "$(echo $sorted_coq_opam_packages | sed 's/ /\n/g' | tail -n 1)" ]; then + + # It does not make sense to print the intermediate results when + # we finished bechmarking the very last OPAM package because the + # next thing will do is that we will print the final results. + # It would look lame to print the same table twice. + : + else + + echo "DEBUG: $program_path/render_results "$log_dir" $num_of_iterations $new_coq_commit_long $old_coq_commit_long 0 user_time_pdiff $installable_coq_opam_packages" + if [ ! -z "$BENCH_DEBUG" ]; then + cat $log_dir/$coq_opam_package.$RUNNER.1.time || true + cat $log_dir/$coq_opam_package.$RUNNER.1.perf || true + fi + rendered_results="$($program_path/render_results "$log_dir" $num_of_iterations $new_coq_commit_long $old_coq_commit_long 0 user_time_pdiff $installable_coq_opam_packages)" + echo "${rendered_results}" + # update the comment + coqbot_update_comment "" "${rendered_results}" "" + fi + + # Generate HTML report for LAST run + + # N.B. Not all packages end in .dev, e.g., coq-lambda-rust uses .dev.timestamp. + # So we use a wildcard to catch such packages. This will have to be updated if + # ever there is a package that uses some different naming scheme. + new_base_path=$new_ocaml_switch/.opam-switch/build/$coq_opam_package.dev*/ + old_base_path=$old_ocaml_switch/.opam-switch/build/$coq_opam_package.dev*/ + for vo in `cd $new_opam_root/$new_base_path/; find -name '*.vo'`; do + if [ -e $old_opam_root/$old_base_path/${vo%%o}.timing -a \ + -e $new_opam_root/$new_base_path/${vo%%o}.timing ]; then + mkdir -p $working_dir/html/$coq_opam_package/`dirname $vo`/ + $program_path/timelog2html $new_opam_root/$new_base_path/${vo%%o} \ + $old_opam_root/$old_base_path/${vo%%o}.timing \ + $new_opam_root/$new_base_path/${vo%%o}.timing > \ + $working_dir/html/$coq_opam_package/${vo%%o}.html + fi + done +done + +# The following directories in $working_dir are no longer used: +# +# - coq, opam.OLD, opam.NEW + +# Measured data for each `$coq_opam_package`, `$iteration`, `status \in {NEW,OLD}`: +# +# - $working_dir/$coq_opam_package.$status.$iteration.time +# => output of /usr/bin/time --format="%U" ... +# +# - $working_dir/$coq_opam_package.NEW.$iteration.perf +# => output of perf stat -e instructions:u,cycles:u ... +# +# The next script processes all these files and prints results in a table. + +echo "INFO: workspace = https://ci.inria.fr/coq/view/benchmarking/job/$JOB_NAME/ws/$BUILD_ID" + +# Print the final results. +if [ -z "$installable_coq_opam_packages" ]; then + # Tell the user that none of the OPAM-package(s) the user provided + # /are installable. + printf "\n\nINFO: failed to install: $sorted_coq_opam_packages" + coqbot_update_comment "done" "" "$sorted_coq_opam_packages" + exit 1 +else + echo "DEBUG: $program_path/render_results "$log_dir" $num_of_iterations $new_coq_commit_long $old_coq_commit_long 0 user_time_pdiff $installable_coq_opam_packages" + rendered_results="$($program_path/render_results "$log_dir" $num_of_iterations $new_coq_commit_long $old_coq_commit_long 0 user_time_pdiff $installable_coq_opam_packages)" + echo "${rendered_results}" + + echo "INFO: per line timing: https://ci.inria.fr/coq/job/$JOB_NAME/ws/$BUILD_ID/html/" + + cd "$coq_dir" + echo INFO: Old Coq version + git log -n 1 "$old_coq_commit" + echo INFO: New Coq version + git log -n 1 "$new_coq_commit" + + not_installable_coq_opam_packages=`comm -23 <(echo $sorted_coq_opam_packages | sed 's/ /\n/g' | sort | uniq) <(echo $installable_coq_opam_packages | sed 's/ /\n/g' | sort | uniq) | sed 's/\t//g'` + + coqbot_update_comment "done" "${rendered_results}" "${not_installable_coq_opam_packages}" + + exit_code=0 + + if [ ! -z "$not_installable_coq_opam_packages" ]; then + # Tell the user that some of the provided OPAM-package(s) + # is/are not installable. + printf '\n\nINFO: failed to install %s\n' "$not_installable_coq_opam_packages" + exit_code=1 + fi + + exit 0 +fi diff --git a/dev/bench/render_results b/dev/bench/render_results new file mode 100755 index 0000000000..72affd70b2 --- /dev/null +++ b/dev/bench/render_results @@ -0,0 +1,434 @@ +#! /usr/bin/env ocaml + +(* ASSUMPTIONS: + - the 1-st command line argument (working directory): + - designates an existing readable directory + - which contains *.time and *.perf files produced by bench.sh script + - the 2-nd command line argument (number of iterations): + - is a positive integer + - the 3-rd command line argument (minimal user time): + - is a positive floating point number + - the 4-th command line argument determines the name of the column according to which the resulting table will be sorted. + Valid values are: + - package_name + - user_time_pdiff + - the rest of the command line-arguments + - are names of benchamarked Coq OPAM packages for which bench.sh script generated *.time and *.perf files + *) + +#use "topfind";; +#require "unix";; +#print_depth 100000000;; +#print_length 100000000;; + +open Printf +open Unix +;; + +let _ = Printexc.record_backtrace true +;; + +type ('a,'b) pkg_timings = { + user_time : 'a; + num_instr : 'b; + num_cycles : 'b; + num_mem : 'b; + num_faults : 'b; +} +;; + +let reduce_pkg_timings (m_f : 'a list -> 'c) (m_a : 'b list -> 'd) (t : ('a,'b) pkg_timings list) : ('c,'d) pkg_timings = + { user_time = m_f @@ List.map (fun x -> x.user_time) t + ; num_instr = m_a @@ List.map (fun x -> x.num_instr) t + ; num_cycles = m_a @@ List.map (fun x -> x.num_cycles) t + ; num_mem = m_a @@ List.map (fun x -> x.num_mem) t + ; num_faults = m_a @@ List.map (fun x -> x.num_faults) t + } +;; + +(******************************************************************************) +(* BEGIN Copied from batteries, to remove *) +(******************************************************************************) +let run_and_read cmd = + (* This code is before the open of BatInnerIO + to avoid using batteries' wrapped IOs *) + let string_of_file fn = + let buff_size = 1024 in + let buff = Buffer.create buff_size in + let ic = open_in fn in + let line_buff = Bytes.create buff_size in + begin + let was_read = ref (input ic line_buff 0 buff_size) in + while !was_read <> 0 do + Buffer.add_subbytes buff line_buff 0 !was_read; + was_read := input ic line_buff 0 buff_size; + done; + close_in ic; + end; + Buffer.contents buff + in + let tmp_fn = Filename.temp_file "" "" in + let cmd_to_run = cmd ^ " > " ^ tmp_fn in + let status = Unix.system cmd_to_run in + let output = string_of_file tmp_fn in + Unix.unlink tmp_fn; + (status, output) +;; + +let ( %> ) f g x = g (f x) +;; + +let run = run_and_read %> snd +;; + +module Float = struct + let nan = Pervasives.nan +end + +module Tuple4 = struct + + let first (x,_,_,_) = x + let second (_,y,_,_) = y + let third (_,_,z,_) = z + let fourth (_,_,_,z) = z + +end +;; + +module List = struct + include List + + let rec init_tailrec_aux acc i n f = + if i >= n then acc + else init_tailrec_aux (f i :: acc) (i+1) n f + + let rec init_aux i n f = + if i >= n then [] + else + let r = f i in + r :: init_aux (i+1) n f + + let rev_init_threshold = + match Sys.backend_type with + | Sys.Native | Sys.Bytecode -> 10_000 + (* We don't known the size of the stack, better be safe and assume it's small. *) + | Sys.Other _ -> 50 + + let init len f = + if len < 0 then invalid_arg "List.init" else + if len > rev_init_threshold then rev (init_tailrec_aux [] 0 len f) + else init_aux 0 len f + + let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + + let reduce f = function + | [] -> + invalid_arg "List.reduce: Empty List" + | h :: t -> + fold_left f h t + + let min l = reduce Pervasives.min l + let max l = reduce Pervasives.max l + +end +;; + +module String = struct + + include String + + let rchop ?(n = 1) s = + if n < 0 then + invalid_arg "String.rchop: number of characters to chop is negative" + else + let slen = length s in + if slen <= n then "" else sub s 0 (slen - n) + +end +;; + +(******************************************************************************) +(* END Copied from batteries, to remove *) +(******************************************************************************) + +let mk_pkg_timings work_dir pkg_name suffix iteration = + let command_prefix = "cat " ^ work_dir ^ "/" ^ pkg_name ^ suffix ^ string_of_int iteration in + let time_command_output = command_prefix ^ ".time" |> run |> String.rchop ~n:1 |> String.split_on_char ' ' in + + let nth x i = List.nth i x in + + { user_time = time_command_output |> nth 0 |> float_of_string + (* Perf can indeed be not supported in some systems, so we must fail gracefully *) + ; num_instr = + (try command_prefix ^ ".perf | grep instructions:u | awk '{print $1}' | sed 's/,//g'" |> + run |> String.rchop ~n:1 |> int_of_string + with Failure _ -> 0) + ; num_cycles = + (try command_prefix ^ ".perf | grep cycles:u | awk '{print $1}' | sed 's/,//g'" |> + run |> String.rchop ~n:1 |> int_of_string + with Failure _ -> 0) + ; num_mem = time_command_output |> nth 1 |> int_of_string + ; num_faults = time_command_output |> nth 2 |> int_of_string + } +;; + +(* process command line paramters *) +assert (Array.length Sys.argv > 5); +let work_dir = Sys.argv.(1) in +let num_of_iterations = int_of_string Sys.argv.(2) in +let new_coq_version = Sys.argv.(3) in +let old_coq_version = Sys.argv.(4) in +let minimal_user_time = float_of_string Sys.argv.(5) in +let sorting_column = Sys.argv.(6) in +let coq_opam_packages = Sys.argv |> Array.to_list |> List.drop 7 in + +(* ASSUMPTIONS: + + "working_dir" contains all the files produced by the following command: + + two_points_on_the_same_branch.sh $working_directory $coq_repository $coq_branch[:$new:$old] $num_of_iterations coq_opam_package_1 coq_opam_package_2 ... coq_opam_package_N +-sf +*) + +(* Run a given bash command; + wait until it termines; + check if its exit status is 0; + return its whole stdout as a string. *) + +let proportional_difference_of_integers new_value old_value = + if old_value = 0 + then Float.nan + else float_of_int (new_value - old_value) /. float_of_int old_value *. 100.0 +in + +let count_number_of_digits_before_decimal_point = + log10 %> floor %> int_of_float %> succ %> max 1 +in + +(* parse the *.time and *.perf files *) +coq_opam_packages +|> List.map + (fun package_name -> + package_name,(* compilation_results_for_NEW : (float * int * int * int) list *) + List.init num_of_iterations succ |> List.map (mk_pkg_timings work_dir package_name ".NEW."), + List.init num_of_iterations succ |> List.map (mk_pkg_timings work_dir package_name ".OLD.")) + +(* from the list of measured values, select just the minimal ones *) + +|> List.map + (fun ((package_name : string), + (new_measurements : (float, int) pkg_timings list), + (old_measurements : (float, int) pkg_timings list)) -> + let f_min : float list -> float = List.min in + let i_min : int list -> int = List.min in + package_name, + reduce_pkg_timings f_min i_min new_measurements, + reduce_pkg_timings f_min i_min old_measurements + ) + +(* compute the "proportional differences in % of the NEW measurement and the OLD measurement" of all measured values *) +|> List.map + (fun (package_name, new_t, old_t) -> + package_name, new_t, old_t, + { user_time = (new_t.user_time -. old_t.user_time) /. old_t.user_time *. 100.0 + ; num_instr = proportional_difference_of_integers new_t.num_instr old_t.num_instr + ; num_cycles = proportional_difference_of_integers new_t.num_cycles old_t.num_cycles + ; num_mem = proportional_difference_of_integers new_t.num_mem old_t.num_mem + ; num_faults = proportional_difference_of_integers new_t.num_faults old_t.num_faults + }) + +(* sort the table with results *) +|> List.sort + (match sorting_column with + | "user_time_pdiff" -> + fun (_,_,_,perf1) (_,_,_,perf2) -> + compare perf1.user_time perf2.user_time + | "package_name" -> + fun (n1,_,_,_) (n2,_,_,_) -> compare n1 n2 + | _ -> + assert false + ) + +(* Keep only measurements that took at least "minimal_user_time" (in seconds). *) + +|> List.filter + (fun (_, new_t, old_t, _) -> + minimal_user_time <= new_t.user_time && minimal_user_time <= old_t.user_time) + +(* Below we take the measurements and format them to stdout. *) + +|> fun measurements -> + + let precision = 2 in + + (* the labels that we will print *) + let package_name__label = "package_name" in + let new__label = "NEW" in + let old__label = "OLD" in + let proportional_difference__label = "PDIFF" in + + (* the lengths of labels that we will print *) + let new__label__length = String.length new__label in + let proportional_difference__label__length = String.length proportional_difference__label in + + (* widths of individual columns of the table *) + let package_name__width = + max (measurements |> List.map (Tuple4.first %> String.length) |> List.max) + (String.length package_name__label) in + + let llf proj = + let lls = count_number_of_digits_before_decimal_point (List.max proj) + 1 + precision in + max lls new__label__length in + + let lli proj = + let lls = count_number_of_digits_before_decimal_point (float_of_int (List.(max proj))) + 1 + precision in + max lls new__label__length in + + let new_timing_width = reduce_pkg_timings llf lli @@ List.map Tuple4.second measurements in + let old_timing_width = reduce_pkg_timings llf lli @@ List.map Tuple4.third measurements in + + let llp proj = + let lls = + count_number_of_digits_before_decimal_point List.(max List.(map abs_float proj)) + 2 + precision in + max lls proportional_difference__label__length in + + let perc_timing_width = reduce_pkg_timings llp llp @@ List.map Tuple4.fourth measurements in + + (* print the table *) + let rec make_dashes = function + | 0 -> "" + | count -> "─" ^ make_dashes (pred count) + in + + let vertical_separator left_glyph middle_glyph right_glyph = + sprintf "%s─%s─%s─%s─%s─%s───%s─%s─%s─%s───%s─%s─%s─%s───%s─%s─%s─%s───%s─%s─%s─%s───%s\n" + left_glyph + (make_dashes package_name__width) + middle_glyph + (make_dashes new_timing_width.user_time) + (make_dashes old_timing_width.user_time) + (make_dashes perc_timing_width.user_time) + middle_glyph + (make_dashes new_timing_width.num_cycles) + (make_dashes old_timing_width.num_cycles) + (make_dashes perc_timing_width.num_cycles) + middle_glyph + (make_dashes new_timing_width.num_instr) + (make_dashes old_timing_width.num_instr) + (make_dashes perc_timing_width.num_instr) + middle_glyph + (make_dashes new_timing_width.num_mem) + (make_dashes old_timing_width.num_mem) + (make_dashes perc_timing_width.num_mem) + middle_glyph + (make_dashes new_timing_width.num_faults) + (make_dashes old_timing_width.num_faults) + (make_dashes perc_timing_width.num_faults) + right_glyph + in + + let center_string string width = + let string_length = String.length string in + let width = max width string_length in + let left_hfill = (width - string_length) / 2 in + let right_hfill = width - left_hfill - string_length in + String.make left_hfill ' ' ^ string ^ String.make right_hfill ' ' + in + printf "\n"; + print_string (vertical_separator "┌" "┬" "┐"); + "│" ^ String.make (1 + package_name__width + 1) ' ' ^ "│" + ^ center_string "user time [s]" (1 + new_timing_width.user_time + 1 + old_timing_width.user_time + 1 + perc_timing_width.user_time + 3) ^ "│" + ^ center_string "CPU cycles" (1 + new_timing_width.num_cycles + 1 + old_timing_width.num_cycles + 1 + perc_timing_width.num_cycles + 3) ^ "│" + ^ center_string "CPU instructions" (1 + new_timing_width.num_instr + 1 + old_timing_width.num_instr + 1 + perc_timing_width.num_instr + 3) ^ "│" + ^ center_string "max resident mem [KB]" (1 + new_timing_width.num_mem + 1 + old_timing_width.num_mem + 1 + perc_timing_width.num_mem + 3) ^ "│" + ^ center_string "mem faults" (1 + new_timing_width.num_faults + 1 + old_timing_width.num_faults + 1 + perc_timing_width.num_faults + 3) + ^ "│\n" |> print_string; + printf "│%*s │ %*s│ %*s│ %*s│ %*s│ %*s│\n" + (1 + package_name__width) "" + (new_timing_width.user_time + 1 + old_timing_width.user_time + 1 + perc_timing_width.user_time + 3) "" + (new_timing_width.num_cycles + 1 + old_timing_width.num_cycles + 1 + perc_timing_width.num_cycles + 3) "" + (new_timing_width.num_instr + 1 + old_timing_width.num_instr + 1 + perc_timing_width.num_instr + 3) "" + (new_timing_width.num_mem + 1 + old_timing_width.num_mem + 1 + perc_timing_width.num_mem + 3) "" + (new_timing_width.num_faults + 1 + old_timing_width.num_faults + 1 + perc_timing_width.num_faults + 3) ""; + printf "│ %*s │ %*s %*s %*s │ %*s %*s %*s │ %*s %*s %*s │ %*s %*s %*s │ %*s %*s %*s │\n" + package_name__width package_name__label + new_timing_width.user_time new__label + old_timing_width.user_time old__label + perc_timing_width.user_time proportional_difference__label + new_timing_width.num_cycles new__label + old_timing_width.num_cycles old__label + perc_timing_width.num_cycles proportional_difference__label + new_timing_width.num_instr new__label + old_timing_width.num_instr old__label + perc_timing_width.num_instr proportional_difference__label + new_timing_width.num_mem new__label + old_timing_width.num_mem old__label + perc_timing_width.num_mem proportional_difference__label + new_timing_width.num_faults new__label + old_timing_width.num_faults old__label + perc_timing_width.num_faults proportional_difference__label; + measurements |> List.iter + (fun (package_name, new_t, old_t, perc) -> + print_string (vertical_separator "├" "┼" "┤"); + printf "│ %*s │ %*.*f %*.*f %+*.*f %% │ %*d %*d %+*.*f %% │ %*d %*d %+*.*f %% │ %*d %*d %+*.*f %% │ %*d %*d %+*.*f %% │\n" + package_name__width package_name + new_timing_width.user_time precision new_t.user_time + old_timing_width.user_time precision old_t.user_time + perc_timing_width.user_time precision perc.user_time + new_timing_width.num_cycles new_t.num_cycles + old_timing_width.num_cycles old_t.num_cycles + perc_timing_width.num_cycles precision perc.num_cycles + new_timing_width.num_instr new_t.num_instr + old_timing_width.num_instr old_t.num_instr + perc_timing_width.num_instr precision perc.num_instr + new_timing_width.num_mem new_t.num_mem + old_timing_width.num_mem old_t.num_mem + perc_timing_width.num_mem precision perc.num_mem + new_timing_width.num_faults new_t.num_faults + old_timing_width.num_faults old_t.num_faults + perc_timing_width.num_faults precision perc.num_faults); + +print_string (vertical_separator "└" "┴" "┘"); + +(* ejgallego: disable this as it is very verbose and brings up little info in the log. *) +if false then begin +printf " + +PDIFF = proportional difference between measurements done for the NEW and the OLD Coq version + = (NEW_measurement - OLD_measurement) / OLD_measurement * 100%% + +NEW = %s +OLD = %s + +Columns: + + 1. user time [s] + + Total number of CPU-seconds that the process used directly (in user mode), in seconds. + (In other words, \"%%U\" quantity provided by the \"/usr/bin/time\" command.) + + 2. CPU cycles + + Total number of CPU-cycles that the process used directly (in user mode). + (In other words, \"cycles:u\" quantity provided by the \"/usr/bin/perf\" command.) + + 3. CPU instructions + + Total number of CPU-instructions that the process used directly (in user mode). + (In other words, \"instructions:u\" quantity provided by the \"/usr/bin/perf\" command.) + + 4. max resident mem [KB] + + Maximum resident set size of the process during its lifetime, in Kilobytes. + (In other words, \"%%M\" quantity provided by the \"/usr/bin/time\" command.) + + 5. mem faults + + Number of major, or I/O-requiring, page faults that occurred while the process was running. + These are faults where the page has actually migrated out of primary memory. + (In other words, \"%%F\" quantity provided by the \"/usr/bin/time\" command.) + +" new_coq_version old_coq_version; +end diff --git a/dev/bench/sort-by-deps b/dev/bench/sort-by-deps new file mode 100644 index 0000000000..e1da4e0ed5 --- /dev/null +++ b/dev/bench/sort-by-deps @@ -0,0 +1,33 @@ +#!/usr/bin/env ocaml + +let get_pkg_name arg = + List.nth (String.split_on_char ':' arg) 0 + +let get_pkg_deps arg = + String.split_on_char ',' (List.nth (String.split_on_char ':' arg) 1) + +let split_pkg arg = get_pkg_name arg, get_pkg_deps arg + +let depends_on arg1 arg2 = + let pkg1, deps1 = split_pkg arg1 in + let pkg2, deps2 = split_pkg arg2 in + pkg1 != pkg2 && List.mem pkg2 deps1 + +let rec sort = function + | [], [] -> [] + | [], deferred -> sort (List.rev deferred, []) + | arg :: rest, deferred -> + (* check if any remaining package reverse-depends on this one *) + if List.exists (fun other_arg -> depends_on arg other_arg) rest + then (* defer this package *) + sort (rest, arg :: deferred) + else (* emit this package, and then try again with any deferred packages *) + arg :: sort (List.rev deferred @ rest, []) + +let main () = + let args = Array.to_list Sys.argv in + let pkgs = List.tl args in + let sorted_pkgs = sort (pkgs, []) in + Printf.printf "%s\n%!" (String.concat " " (List.map get_pkg_name sorted_pkgs)) + +let () = main () diff --git a/dev/bench/sort-by-deps.sh b/dev/bench/sort-by-deps.sh new file mode 100755 index 0000000000..075976c17d --- /dev/null +++ b/dev/bench/sort-by-deps.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +program_name="$0" +program_path=$(readlink -f "${program_name%/*}") + +# We add || true (which may not be needed without set -e) to be +# explicit about the fact that this script does not fail even if `opam +# install --show-actions` does, e.g., because of a non-existent +# package +# +# TODO: Figure out how to use the OPAM API +# (https://opam.ocaml.org/doc/api/) to call this from OCaml. +for i in "$@"; do + echo -n "$i:"; ((echo -n "$(opam install --show-actions "$i" | grep -o '∗\s*install\s*[^ ]*' | sed 's/∗\s*install\s*//g')" | tr '\n' ',') || true); echo +done | xargs ocaml "${program_path}/sort-by-deps" diff --git a/dev/bench/timelog2html b/dev/bench/timelog2html new file mode 100755 index 0000000000..abbeb5936d --- /dev/null +++ b/dev/bench/timelog2html @@ -0,0 +1,141 @@ +#!/usr/bin/env lua5.1 + +args = {...} + +vfile = assert(args[1], "arg1 missing: .v file") +table.remove(args,1) +assert(#args > 0, "arg missing: at lease one aux file") +data_files = args + +source = assert(io.open(vfile), "unable to open "..vfile):read("*a") + +function htmlescape(s) + return (s:gsub("&","&"):gsub("<","<"):gsub(">",">")) +end + +colors = { + '#F08080', '#EEE8AA', '#98FB98' +} + +assert(#data_files <= #colors, "only ".. #colors .." data files are supported") + +vname = vfile:match("([^/]+.v)$") + +print([[ +<html> +<head> +<title>]]..vname..[[</title> +<style>]]) +for i,k in ipairs(colors) do + print( + ".time" .. i .. " {".. + "background-color: " .. k .. ";".. + "height: ".. 100 / #data_files .."%;".. + "top: " .. 100 / #data_files * (i - 1) .. "%;".. + "z-index: -1; position: absolute; opacity: 50%; }") +end +print([[.code { + z-index: 0; + position: relative; + border-style: solid; + border-color: transparent; + border-width: 1px; +} +.code:hover { + border-color: black; +} +pre { + display: inline; +} +</style> +</head> +<body> +<h1>Timings for ]]..vname..[[</h1> +<ol> +]]) +for i,data_file in ipairs(data_files) do + print('<li style="background-color: '..colors[i]..'">' .. data_file .. "</li>") +end +print("</ol>") + +all_data = {} + +for _, data_file in ipairs(data_files) do + local data = {} + local last_end = -1 + local lines = 1 + for l in io.lines(data_file) do + local b,e,t = l:match('^Chars ([%d]+) %- ([%d]+) %S+ ([%d%.]+) secs') + if b then + if tonumber(b) > last_end + 1 then + local text = string.sub(source,last_end+1,b-1) + if not text:match('^%s+$') then + local _, n = text:gsub('\n','') + data[#data+1] = { + start = last_end+1; stop = b-1; time = 0; + text = text; lines = lines + } + lines = lines + n + last_end = b + end + end + local text = string.sub(source,last_end+1,e) + local _, n = text:gsub('\n','') + local _, eoln = text:match('^[%s\n]*'):gsub('\n','') + data[#data+1] = { + start = b; stop = e; time = tonumber(t); text = text; + lines = lines + } + lines = lines + n + last_end = tonumber(e) + end + end + if last_end + 1 <= string.len(source) then + local text = string.sub(source,last_end+1,string.len(source)) + data[#data+1] = { + start = last_end+1; stop = string.len(source); time = 0; + text = text; lines = lines+1 + } + end +all_data[#all_data+1] = data +end + +max = 0; +for _, data in ipairs(all_data) do + for _,d in ipairs(data) do + max = math.max(max,d.time) + end +end + +data = all_data[1] +for j,d in ipairs(data) do + print('<div class="code" title="File: '..vname.. + '\nLine: '..d.lines..'\n') + for k=1,#all_data do + print('Time'..k..': '..all_data[k][j].time..'s') + end + print('">') + for k=1,#all_data do + print('<div class="time'..k..'" style="width: '.. + all_data[k][j].time * 100 / max ..'%"></div>') + end + if d.text == '\n' then + print('<pre>\n\n</pre>') + elseif d.text:match('\n$') then + print('<pre>'..htmlescape(d.text)..'\n</pre>') + else + print('<pre>'..htmlescape(d.text)..'</pre>') + end + print("</div>") +end + +print [[ +</body> +</html> +]] + +-- vim: set ts=4: + +--for i = 1,#data do +-- io.stderr:write(data[i].text) +--end diff --git a/dev/build/windows/MakeCoq_MinGW.bat b/dev/build/windows/MakeCoq_MinGW.bat index 577ce35aae..fd6ea9bb09 100755 --- a/dev/build/windows/MakeCoq_MinGW.bat +++ b/dev/build/windows/MakeCoq_MinGW.bat @@ -55,7 +55,7 @@ IF DEFINED HTTP_PROXY ( )
REM see -cygrepo in ReadMe.txt
-SET CYGWIN_REPOSITORY=http://mirror.easyname.at/cygwin
+SET CYGWIN_REPOSITORY=https://mirrors.kernel.org/sourceware/cygwin
REM see -cygcache in ReadMe.txt
SET CYGWIN_LOCAL_CACHE_WFMT=%BATCHDIR%cygwin_cache
diff --git a/dev/ci/ci-metacoq.sh b/dev/ci/ci-metacoq.sh index 1302065961..27876d68de 100755 --- a/dev/ci/ci-metacoq.sh +++ b/dev/ci/ci-metacoq.sh @@ -5,4 +5,4 @@ ci_dir="$(dirname "$0")" git_download metacoq -( cd "${CI_BUILD_DIR}/metacoq" && ./configure.sh local && make ci-local && make install ) +( cd "${CI_BUILD_DIR}/metacoq" && ./configure.sh local && make .merlin && make ci-local && make install ) diff --git a/dev/ci/user-overlays/12565-ppedrot-fix-tc-search-opacity.sh b/dev/ci/user-overlays/12565-ppedrot-fix-tc-search-opacity.sh new file mode 100644 index 0000000000..7c04608403 --- /dev/null +++ b/dev/ci/user-overlays/12565-ppedrot-fix-tc-search-opacity.sh @@ -0,0 +1,6 @@ +if [ "$CI_PULL_REQUEST" = "12565" ] || [ "$CI_BRANCH" = "fix-tc-search-opacity" ]; then + + coqhammer_CI_REF=fix-tc-search-opacity + coqhammer_CI_GITURL=https://github.com/ppedrot/coqhammer + +fi diff --git a/dev/ci/user-overlays/12709-ppedrot-hint-pattern-out.sh b/dev/ci/user-overlays/12709-ppedrot-hint-pattern-out.sh new file mode 100644 index 0000000000..56a69abbf7 --- /dev/null +++ b/dev/ci/user-overlays/12709-ppedrot-hint-pattern-out.sh @@ -0,0 +1,6 @@ +if [ "$CI_PULL_REQUEST" = "12709" ] || [ "$CI_BRANCH" = "hint-pattern-out" ]; then + + coqhammer_CI_REF=hint-pattern-out + coqhammer_CI_GITURL=https://github.com/ppedrot/coqhammer + +fi diff --git a/dev/ci/user-overlays/12720-ppedrot-factor-class-hint-clenv.sh b/dev/ci/user-overlays/12720-ppedrot-factor-class-hint-clenv.sh new file mode 100644 index 0000000000..e57f95ef19 --- /dev/null +++ b/dev/ci/user-overlays/12720-ppedrot-factor-class-hint-clenv.sh @@ -0,0 +1,6 @@ +if [ "$CI_PULL_REQUEST" = "12720" ] || [ "$CI_BRANCH" = "factor-class-hint-clenv" ]; then + + coqhammer_CI_REF=factor-class-hint-clenv + coqhammer_CI_GITURL=https://github.com/ppedrot/coqhammer + +fi diff --git a/dev/ci/user-overlays/12756-jashug-dont-refresh-argument-names.sh b/dev/ci/user-overlays/12756-jashug-dont-refresh-argument-names.sh new file mode 100644 index 0000000000..54fdd87566 --- /dev/null +++ b/dev/ci/user-overlays/12756-jashug-dont-refresh-argument-names.sh @@ -0,0 +1,9 @@ +if [ "$CI_PULL_REQUEST" = "12756" ] || [ "$CI_BRANCH" = "dont-refresh-argument-names" ]; then + + mathcomp_CI_REF=dont-refresh-argument-names-overlay + mathcomp_CI_GITURL=https://github.com/jashug/math-comp + + oddorder_CI_REF=dont-refresh-argument-names-overlay + oddorder_CI_GITURL=https://github.com/jashug/odd-order + +fi diff --git a/dev/doc/parsing.md b/dev/doc/parsing.md new file mode 100644 index 0000000000..f8b4537e77 --- /dev/null +++ b/dev/doc/parsing.md @@ -0,0 +1,397 @@ +# Parsing + +Coq's parser is based on Camlp5 using an extensible grammar. Somewhat helpful +Camlp5 documentation is available [here](http://camlp5.github.io/doc/htmlc/grammars.html). +However, the Camlp5 code has been copied into the Coq source tree and may differ +from the Camlp5 release. + +Notable attributes of the parser include: + +* The grammar is extensible at run time. This is essential for supporting notations + and optionally-loaded plugins that extend the grammar. + +* The grammar is split into multiple source files. Nonterminals can be local to a file + or global. + +* While 95% of the nonterminals and almost all the productions are defined in the grammar, + a few are defined directly in OCaml code. Since many developers have worked on the parser + over the years, this code can be idiosyncratic, reflecting various coding styles. + +* The parser is a recursive descent parser that, by default, only looks at the next token + to make a parsing decision. It's possible to hand-code additional lookahead where + necessary by writing OCaml code. + +* There's no code that checks whether a grammar is ambiguous or whether every production + can be recognized. Developers who modify the grammar may, in some cases, need to structure their + added productions in specific ways to ensure that their additions are parsable and that they + don't break existing productions. + +## Contents ## + +- [Grammars: `*.mlg` File Structure](#grammars-mlg-file-structure) +- [Grammars: Nonterminals and Productions](#grammars-nonterminals-and-productions) + - [Alternate production syntax](#alternate-production-syntax) +- [Usage notes](#usage-notes) + - [Other components](#other-components) + - [Parsing productions](#parsing-productions) + - [Lookahead](#lookahead) + +## Grammars: `*.mlg` File Structure ## + +Grammars are defined in `*.mlg` files, which `coqpp` compiles into `*.ml` files at build time. +`coqpp` code is in the `coqpp` directory. `coqpp` uses yacc and lex to parse the grammar files. +You can examine its yacc and lex input files in `coqpp_lex.mll` and `coqpp_parse.mly` for +details not fully covered here. + +In addition, there is a `doc_grammar` build utility that uses the `coqpp` parser to extract the +grammar, then edits and inserts it into the documentation. This is described in +[`doc/tools/docgram/README.md`](../../doc/tools/docgram/README.md). +`doc_grammar` generates +[`doc/tools/docgram/fullGrammar`](../../doc/tools/docgram/fullGrammar), +which has the full grammar for Coq +(not including some optionally-loaded plugins). This may be easier to read since everything is +in one file and the parser action routines and other OCaml code are omitted. + +`*.mlg` files contain the following types of nodes (See `node` in the yacc grammar). This part is +very specific to Coq (not so similar to Camlp5): + +* OCaml code - OCaml code enclosed in curly braces, which is copied verbatim to the generated `*.ml` file + +* Comments - comments in the `*.mlg` file in the form `(* … *)`, which are not copied + to the generated `*.ml` file. Comments in OCaml code are preserved. + +* `DECLARE_PLUGIN "*_plugin"` - associates the file with a specific plugin, for example "ltac_plugin" + +* `GRAMMAR EXTEND` - adds additional nonterminals and productions to the grammar and declares global + nonterminals referenced in the `GRAMMAR EXTEND`: + + ``` + GRAMMAR EXTEND Gram + GLOBAL: + bignat bigint …; + <nonterminal definitions> + END + ``` + + Global nonterminals are declared in `pcoq.ml`, e.g. `let bignat = Entry.create "Prim.bignat"`. + All the `*.mlg` files include `open Pcoq` and often its modules, e.g. `open Pcoq.Prim`. + + `GRAMMAR EXTEND` should be used only for large syntax additions. To add new commands + and tactics, use these instead: + + - `VERNAC COMMAND EXTEND` to add new commands + - `TACTIC EXTEND` to add new tactics + - `ARGUMENT EXTEND` to add new nonterminals + + These constructs provide essential semantic information that's provided in a more complex, + less readable way with `GRAMMAR EXTEND`. + +* `VERNAC COMMAND EXTEND` - adds new command syntax by adding productions to the + `command` nonterminal. For example: + + ``` + VERNAC COMMAND EXTEND ExtractionLibrary CLASSIFIED AS QUERY + | [ "Extraction" "Library" ident(m) ] + -> { extraction_library false m } + END + ``` + + Productions here are represented with alternate syntax, described later. + + New commands should be added using this construct rather than `GRAMMAR EXTEND` so + they are correctly registered, such as having the correct command classifier. + + TODO: explain "ExtractionLibrary", CLASSIFIED AS, CLASSIFIED BY, "{ tactic_mode }", STATE + +* `VERNAC { … } EXTEND` - TODO. A variant. The `{ … }` is a block of OCaml code. + +* `TACTIC EXTEND` - adds new tactic syntax by adding productions to `simple_tactic`. + For example: + + ``` + TACTIC EXTEND btauto + | [ "btauto" ] -> { Refl_btauto.Btauto.tac } + END + ``` + + adds a new nonterminal `btauto`. + + New tactics should be added using this construct rather than `GRAMMAR EXTEND`. + + TODO: explain DEPRECATED, LEVEL (not shown) + +* `ARGUMENT EXTEND` - defines a new nonterminal + + ``` + ARGUMENT EXTEND ast_closure_term + PRINTED BY { pp_ast_closure_term } + INTERPRETED BY { interp_ast_closure_term } + GLOBALIZED BY { glob_ast_closure_term } + SUBSTITUTED BY { subst_ast_closure_term } + RAW_PRINTED BY { pp_ast_closure_term } + GLOB_PRINTED BY { pp_ast_closure_term } + | [ term_annotation(a) constr(c) ] -> { mk_ast_closure_term a c } + END + ``` + + See comments in `tacentries.mli` for partial information on the various + arguments. + +* `VERNAC ARGUMENT EXTEND` - (part of `argument_extend` in the yacc grammar) defines + productions for a single nonterminal. For example: + + ``` + VERNAC ARGUMENT EXTEND language + PRINTED BY { pr_language } + | [ "Ocaml" ] -> { let _ = warn_deprecated_ocaml_spelling () in Ocaml } + | [ "OCaml" ] -> { Ocaml } + | [ "Haskell" ] -> { Haskell } + | [ "Scheme" ] -> { Scheme } + | [ "JSON" ] -> { JSON } + END + ``` + + TODO: explain PRINTED BY, CODE + +* DOC_GRAMMAR - Used in `doc_grammar`-generated files to permit simplified syntax + +Note that you can reverse engineer many details by comparing the `.mlg` input file with +the `.ml` generated by `coqpp`. + +## Grammars: Nonterminals and Productions + +Here's a simple nonterminal definition in the Camlp5 format: + + ``` + universe: + [ [ IDENT "max"; "("; ids = LIST1 universe_expr SEP ","; ")" -> { ids } + | u = universe_expr -> { [u] } ] ] + ; + ``` + +In which: +* `universe` is the nonterminal being defined +* productions are separated by `|` and, as a group, are enclosed in `[ [ … ] ];` +* `u = universe_expr` refers to the `universe_expr` nonterminal. `u` is bound to + the value returned by that nonterminal's action routine, which can be + referred to in the action routine. For `ids = LIST1 universe_expr SEP ","`, + `ids` is bound to the list of values returned by `universe_expr`. +* `-> { … }` contains the OCaml action routine, which is executed when the production is recognized + and returns a value +* Semicolons separate adjacent grammatical elements (nonterminals, strings or other constructs) + +Grammatical elements that appear in productions are: + +- nonterminal names - identifiers in the form `[a-zA-Z0-9_]*`. These correspond to variables in + the generated `.ml` code. In some cases a qualified name, such as `Prim.name`, is used. +- `"…"` - a literal string that becomes a keyword and cannot be used as an `ident`. + The string doesn't have to be a valid identifier; frequently the string will contain only + punctuation characters. Generally we try to avoid adding new keywords that are also valid + identifiers--though there is an unresolved debate among the developers about whether having more + such keywords in general is good (e.g. it makes it easier to highlight keywords in GUIs) + or bad (more keywords for the user to avoid and new keywords may require changes to existing + proof files). +- `IDENT "…"` - a literal string that has the form of an `ident` that doesn't become + a keyword +- `OPT element` - optionally include `element` (e.g. a nonterminal, IDENT "…" or "…"). + The value is of type `'a option`. +- `LIST1 element` - a list of one or more `element`s. The value is of type `'a list`. +- `LIST0 element` - an optional list of `element`s +- `LIST1 element SEP sep` - a list of `element`s separated by `sep` +- `LIST0 element SEP sep` - an optional list of `element`s separated by `sep` +- `( elements )` - grouping to represent a series of elements as a unit, + useful within `OPT` and `LIST*`. +- `[ elements1 | elements2 | … ]` - alternatives (either `elements1` or `elements2` or …), + actually nested productions, each of which can have its own action routines + +Nonterminals can also be defined with multiple levels to specify precedence and associativity +of its productions. This is described in the Coq documentation under the `Print Grammar` +command. The first square bracket around a nonterminal definition is for grouping +level definitions, which are separated with `|`, for example: + + ``` + tactic_expr: + [ "5" RIGHTA + [ te = binder_tactic -> { te } ] + | "4" LEFTA + : + ``` + +Grammar extensions can specify what level they are modifying, for example: + + ``` + tactic_expr: LEVEL "1" [ RIGHTA + [ tac = tactic_expr; intros = ssrintros_ne -> { tclintros_expr ~loc tac intros } + ] ]; + ``` + +### Alternate production syntax ### + +Except for `GRAMMAR EXTEND`, the `EXTEND` nodes in the `*.mlg`s use simplified syntax in +productions that's similar to what's used in the `Tactic Notation` and +`Ltac2 Notation` commands. For example: + + ``` + TACTIC EXTEND cc + | [ "congruence" ] -> { congruence_tac 1000 [] } + | [ "congruence" integer(n) ] -> { congruence_tac n [] } + | [ "congruence" "with" ne_constr_list(l) ] -> { congruence_tac 1000 l } + | [ "congruence" integer(n) "with" ne_constr_list(l) ] -> + { congruence_tac n l } + END + ``` + +Nonterminals appearing in the alternate production syntax are accessed through `wit_*` symbols +defined in the OCaml code. Some commonly used symbols are defined in `stdarg.ml`. +Others are defined in the code generated by `ARGUMENT EXTEND` and `VERNAC ARGUMENT EXTEND` +constructs. References to nonterminals that don't have `wit_*` symbols cause +compilation errors. + +The differences are: +* The outer `: [ … ];` is omitted. Each production is enclosed in `| [ … ]`. +* The action routine is outside the square brackets +* Literal strings that are valid identifiers don't become reserved keywords +* No semicolons separating elements of the production +* `integer(n)` is used to bind a nonterminal value to a variable instead of `n = integer` +* Alternate forms of constructs are used: + * `ne_entry_list` for `LIST1 entry` + * `entry_list` for `LIST0 entry` + * `ne_entry_list_sep(var, sep)` for `LIST1 entry SEP sep` where the list is bound to `var` + * `entry_list_sep(var, sep)` for `LIST0 entry SEP sep` where the list is bound to `var` + * `entry_opt` for OPT entry +* There's no way to define `LEVEL`s +* There's no equivalent to `( elements )` or `[ elements1 | elements2 | … ]`, which may + require repeating similar syntax several times. For example, this single production + is equivalent to 8 productions in `TACTIC EXTEND` representing all possible expansions of + three `OPT`s: + + ``` + | IDENT "Add"; IDENT "Parametric"; IDENT "Relation"; LIST0 binder; ":"; constr; constr; + OPT [ IDENT "reflexivity"; IDENT "proved"; IDENT "by"; constr -> { … } ]; + OPT [ IDENT "symmetry"; IDENT "proved"; IDENT "by"; constr -> { … } ]; + OPT [ IDENT "transitivity"; IDENT "proved"; IDENT "by"; constr -> { … } ]; + IDENT "as"; ident -> { … } + ``` + +## Usage notes + +### Other components + +Coq's lexer is in `clexer.ml`. Its 10 token types are defined in `tok.ml`. + +The parser is in `grammar.ml`. The extensive use of GADT (generalized algebraic datatypes) +makes it harder for the uninitiated to understand it. + +When the parser is invoked, the call tells the parser which nonterminal to parse. `vernac_control` +is the start symbol for commands. `tactic_mode` is the start symbol for tactics. +Tactics give syntax errors if Coq is not in proof mode. There are additional details +not mentioned here. + +### Parsing productions + +Some thoughts, not to be taken as identifying all the issues: + +Since the parser examines only the next token to make a parsing decision (and perhaps +because of other potentially fixable limitations), some productions have to be ordered +or structured in a particular way to parse correctly in all cases. + +For example, consider these productions: + + ``` + command: [ [ + | IDENT "Print"; p = printable -> { VernacPrint p } + | IDENT "Print"; qid = smart_global; l = OPT univ_name_list -> { VernacPrint (PrintName (qid,l)) } + | IDENT "Print"; IDENT "Module"; "Type"; qid = global -> + { VernacPrint (PrintModuleType qid) } + | IDENT "Print"; IDENT "Module"; qid = global -> + { VernacPrint (PrintModule qid) } + | IDENT "Print"; IDENT "Namespace" ; ns = dirpath -> + { VernacPrint (PrintNamespace ns) } + : + + printable: + [ [ IDENT "Term"; qid = smart_global; l = OPT univ_name_list -> { PrintName (qid,l) } + | IDENT "All" -> { PrintFullContext } + | IDENT "Section"; s = global -> { PrintSectionContext s } + : + ``` + +Reversing the order of the first two productions in `command` causes the `All` in `Print All` to +be parsed incorrectly as a `smart_global`, making that command unavailable. `Print Namespace nat.` +still works correctly, though. + +Similarly, the production for `Print Module Type` has to appear before `Print Module <global>` +in order to be reachable. + +Internally, the parser generates a tree that represents the possible prefixes for the +productions of a nonterminal as described in +[the Camlp5 documentation](http://camlp5.github.io/doc/htmlc/grammars.html#b:Rules-insertion). + +Here's another example in which the way the productions are written matters. `OPT` at +the beginning of a production doesn't always work well: + + ``` + command: [ [ + | IDENT "Foo"; n = natural -> { VernacBack 1 } + | OPT (IDENT "ZZ"); IDENT "Foo" -> { VernacBack 1 } + : + ``` + +`Foo.` looks like it should be accepted, but it gives a parse error: + + ``` + Unnamed_thm < Foo. + Toplevel input, characters 3-4: + > Foo. + > ^ + Error: + Syntax error: [prim:natural] expected after 'Foo' (in [vernac:command]). + ``` + +Reversing the order of the productions doesn't help, but splitting +the 'OPT' production into 2 productions works: + + ``` + | IDENT "Foo" -> { VernacBack 1 } + | IDENT "ZZ"; IDENT "Foo" -> { VernacBack 1 } + | IDENT "Foo"; n = natural -> { VernacBack 1 } + + ``` + +On the other hand, `OPT` works just fine when the parser has already found the +right production. For example `Back` and `Back <natural>` can be combined using +an `OPT`: + + ``` + | IDENT "Back"; n = OPT natural -> { VernacBack (Option.default 1 n) } + ``` + +### Lookahead + +It's possible to look ahead more than one symbol using OCaml code. Generally we +avoid doing this unless there's a strong reason to do so. For example, this +code defines a new nonterminal `local_test_lpar_id_colon` that checks that +the next 3 tokens are `"("` `ident` and `":"` without consuming any input: + + ``` + let local_test_lpar_id_colon = + let open Pcoq.Lookahead in + to_entry "lpar_id_colon" begin + lk_kw "(" >> lk_ident >> lk_kw ":" + end + ``` + +This one checks that the next 2 tokens are `"["` and `"|"` with no space between. +This is a special case: intropatterns can have sequences like `"[|]"` that are +3 different tokens with empty nonterminals between them. Making `"[|"` a keyword +would break existing code with "[|]": + + ``` + let test_array_opening = + let open Pcoq.Lookahead in + to_entry "test_array_opening" begin + lk_kw "[" >> lk_kw "|" >> check_no_space + end + ``` + +TODO: how to add a tactic or command |
