5 files changed, 202 insertions, 60 deletions
diff --git a/tools/CoqMakefile.in b/tools/CoqMakefile.in
index 49fb88cd8c..1d682218b6 100644
--- a/tools/CoqMakefile.in
+++ b/tools/CoqMakefile.in
@@ -125,6 +125,10 @@ CAMLPKGS ?=
 TIMING?=
 # Option for changing sorting of timing output file
 TIMING_SORT_BY ?= auto
+# Option for changing the fuzz parameter on the output file
+TIMING_FUZZ ?= 0
+# Option for changing whether to use real or user time for timing tables
+TIMING_REAL?=
 # Output file names for timed builds
 TIME_OF_BUILD_FILE               ?= time-of-build.log
 TIME_OF_BUILD_BEFORE_FILE        ?= time-of-build-before.log
@@ -335,6 +339,19 @@ all.timing.diff:
 	$(HIDE)$(MAKE) --no-print-directory -f "$(SELF)" post-all
 .PHONY: all.timing.diff
 
+ifeq (0,$(TIMING_REAL))
+TIMING_REAL_ARG :=
+TIMING_USER_ARG := --user
+else
+ifeq (1,$(TIMING_REAL))
+TIMING_REAL_ARG := --real
+TIMING_USER_ARG :=
+else
+TIMING_REAL_ARG :=
+TIMING_USER_ARG :=
+endif
+endif
+
 make-pretty-timed-before:: TIME_OF_BUILD_FILE=$(TIME_OF_BUILD_BEFORE_FILE)
 make-pretty-timed-after:: TIME_OF_BUILD_FILE=$(TIME_OF_BUILD_AFTER_FILE)
 make-pretty-timed make-pretty-timed-before make-pretty-timed-after::
@@ -342,9 +359,9 @@ make-pretty-timed make-pretty-timed-before make-pretty-timed-after::
 	$(HIDE)($(MAKE) --no-print-directory -f "$(PARENT)" $(TGTS) TIMED=1 2>&1 && touch pretty-timed-success.ok) | tee -a $(TIME_OF_BUILD_FILE)
 	$(HIDE)rm pretty-timed-success.ok # must not be -f; must fail if the touch failed
 print-pretty-timed::
-	$(HIDE)$(COQMAKE_ONE_TIME_FILE) $(TIME_OF_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
+	$(HIDE)$(COQMAKE_ONE_TIME_FILE) $(TIMING_REAL_ARG) $(TIME_OF_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
 print-pretty-timed-diff::
-	$(HIDE)$(COQMAKE_BOTH_TIME_FILES) --sort-by=$(TIMING_SORT_BY) $(TIME_OF_BUILD_AFTER_FILE) $(TIME_OF_BUILD_BEFORE_FILE) $(TIME_OF_PRETTY_BOTH_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
+	$(HIDE)$(COQMAKE_BOTH_TIME_FILES) --sort-by=$(TIMING_SORT_BY) $(TIMING_REAL_ARG) $(TIME_OF_BUILD_AFTER_FILE) $(TIME_OF_BUILD_BEFORE_FILE) $(TIME_OF_PRETTY_BOTH_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
 ifeq (,$(BEFORE))
 print-pretty-single-time-diff::
 	@echo 'Error: Usage: $(MAKE) print-pretty-single-time-diff AFTER=path/to/file.v.after-timing BEFORE=path/to/file.v.before-timing'
@@ -356,7 +373,7 @@ print-pretty-single-time-diff::
 	$(HIDE)false
 else
 print-pretty-single-time-diff::
-	$(HIDE)$(COQMAKE_BOTH_SINGLE_TIMING_FILES) --sort-by=$(TIMING_SORT_BY) $(AFTER) $(BEFORE) $(TIME_OF_PRETTY_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
+	$(HIDE)$(COQMAKE_BOTH_SINGLE_TIMING_FILES) --fuzz=$(TIMING_FUZZ) --sort-by=$(TIMING_SORT_BY) $(TIMING_USER_ARG) $(AFTER) $(BEFORE) $(TIME_OF_PRETTY_BUILD_FILE) $(TIME_OF_PRETTY_BUILD_EXTRA_FILES)
 endif
 endif
 pretty-timed:
@@ -695,7 +712,7 @@ $(VFILES:.v=.vok): %.vok: %.v
 	$(HIDE)$(TIMER) $(COQC) -vok $(COQDEBUG) $(COQFLAGS) $(COQLIBS) $<
 
 $(addsuffix .timing.diff,$(VFILES)): %.timing.diff : %.before-timing %.after-timing
-	$(SHOW)PYTHON TIMING-DIFF $<
+	$(SHOW)PYTHON TIMING-DIFF $*.{before,after}-timing
 	$(HIDE)$(MAKE) --no-print-directory -f "$(SELF)" print-pretty-single-time-diff BEFORE=$*.before-timing AFTER=$*.after-timing TIME_OF_PRETTY_BUILD_FILE="$@"
 
 $(BEAUTYFILES): %.v.beautified: %.v
diff --git a/tools/TimeFileMaker.py b/tools/TimeFileMaker.py
index 3d07661d56..210901f8a7 100644
--- a/tools/TimeFileMaker.py
+++ b/tools/TimeFileMaker.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from __future__ import print_function
 import sys
 import re
+import argparse
 from io import open
 
 # This script parses the output of `make TIMED=1` into a dictionary
@@ -14,18 +15,76 @@ STRIP_REG = re.compile('^(coq/|contrib/|)(?:theories/|src/)?')
 STRIP_REP = r'\1'
 INFINITY  = '\u221e'
 
-def parse_args(argv, USAGE, HELP_STRING):
-    sort_by = 'auto'
-    if any(arg.startswith('--sort-by=') for arg in argv[1:]):
-        sort_by = [arg for arg in argv[1:] if arg.startswith('--sort-by=')][-1][len('--sort-by='):]
-    args = [arg for arg in argv if not arg.startswith('--sort-by=')]
-    if len(args) < 3 or '--help' in args[1:] or '-h' in args[1:] or sort_by not in ('auto', 'absolute', 'diff'):
-        print(USAGE)
-        if '--help' in args[1:] or '-h' in args[1:]:
-            print(HELP_STRING)
-            if len(args) == 2: sys.exit(0)
-        sys.exit(1)
-    return sort_by, args
+def nonnegative(arg):
+    v = int(arg)
+    if v < 0: raise argparse.ArgumentTypeError("%s is an invalid non-negative int value" % arg)
+    return v
+
+def add_sort_by(parser):
+    return parser.add_argument(
+        '--sort-by', type=str, dest='sort_by', choices=('auto', 'absolute', 'diff'),
+        default='auto',
+        help=('How to sort the table entries.\n' +
+              'The "auto" method sorts by absolute time differences ' +
+              'rounded towards zero to a whole-number of seconds, then ' +
+              'by times in the "after" column, and finally ' +
+              'lexicographically by file name. This will put the ' +
+              'biggest changes in either direction first, and will ' +
+              'prefer sorting by build-time over subsecond changes in ' +
+              'build time (which are frequently noise); lexicographic ' +
+              'sorting forces an order on files which take effectively ' +
+              'no time to compile.\n' +
+              'The "absolute" method sorts by the total time taken.\n' +
+              'The "diff" method sorts by the signed difference in time.'))
+
+def add_fuzz(parser):
+    return parser.add_argument(
+        '--fuzz', dest='fuzz', metavar='N', type=nonnegative, default=0,
+        help=('By default, two lines are only considered the same if ' +
+              'the character offsets and initial code strings match.  '
+              'This option relaxes this constraint by allowing the ' +
+              'character offsets to differ by up to N characters, as long ' +
+              'as the total number of characters and initial code strings ' +
+              'continue to match.  This is useful when there are small changes ' +
+              'to a file, and you want to match later lines that have not ' +
+              'changed even though the character offsets have changed.'))
+
+def add_real(parser, single_timing=False):
+    return parser.add_argument(
+        '--real', action='store_true',
+        help=(r'''Use real times rather than user times.
+
+''' + ('''By default, the input is expected to contain lines in the format:
+FILE_NAME (...user: NUMBER_IN_SECONDS...)
+If --real is passed, then the lines are instead expected in the format:
+FILE_NAME (...real: NUMBER_IN_SECONDS...)''' if not single_timing else
+'''The input is expected to contain lines in the format:
+Chars START - END COMMAND NUMBER secs (NUMBERu...)''')))
+
+def add_user(parser, single_timing=False):
+    return parser.add_argument(
+        '--user', dest='real', action='store_false',
+        help=(r'''Use user times rather than real times.
+
+''' + ('''By default, the input is expected to contain lines in the format:
+FILE_NAME (...real: NUMBER_IN_SECONDS...)
+If --user is passed, then the lines are instead expected in the format:
+FILE_NAME (...user: NUMBER_IN_SECONDS...)''' if not single_timing else
+'''The input is expected to contain lines in the format:
+Chars START - END COMMAND NUMBER secs (NUMBERu...)''')))
+
+# N.B. We need to include default=None for nargs='*', c.f., https://bugs.python.org/issue28609#msg280180
+def add_file_name_gen(parser, prefix='', descr='file containing the build log', stddir='in', defaults=None, **kwargs):
+    extra = ('' if defaults is None else ' (defaults to %s if no argument is passed)' % defaults)
+    return parser.add_argument(
+        prefix + 'FILE_NAME', type=str,
+        help=('The name of the %s (use "-" for std%s)%s.' % (descr, stddir, extra)),
+        **kwargs)
+
+def add_file_name(parser): return add_file_name_gen(parser)
+def add_after_file_name(parser): return add_file_name_gen(parser, 'AFTER_', 'file containing the "after" build log')
+def add_before_file_name(parser): return add_file_name_gen(parser, 'BEFORE_', 'file containing the "before" build log')
+def add_output_file_name(parser): return add_file_name_gen(parser, 'OUTPUT_', 'file to write the output table to', stddir='out', defaults='-', nargs='*', default=None)
 
 
 def reformat_time_string(time):
@@ -45,14 +104,16 @@ def get_file_lines(file_name):
             lines = f.readlines()
     for line in lines:
         try:
-            yield line.decode('utf-8')
+            # Since we read the files in binary mode, we have to
+            # normalize Windows line endings from \r\n to \n
+            yield line.decode('utf-8').replace('\r\n', '\n')
         except UnicodeDecodeError: # invalid utf-8
             pass
 
 def get_file(file_name):
     return ''.join(get_file_lines(file_name))
 
-def get_times(file_name):
+def get_times(file_name, use_real=False):
     '''
     Reads the contents of file_name, which should be the output of
     'make TIMED=1', and parses it to construct a dict mapping file
@@ -60,28 +121,96 @@ def get_times(file_name):
     using STRIP_REG and STRIP_REP.
     '''
     lines = get_file(file_name)
-    reg = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE)
+    reg_user = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE)
+    reg_real = re.compile(r'^([^\s]+) \([^\)]*?real: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE)
+    reg = reg_real if use_real else reg_user
     times = reg.findall(lines)
     if all(time in ('0.00', '0.01') for name, time in times):
-        reg = re.compile(r'^([^\s]*) \([^\)]*?real: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE)
+        reg = reg_real
         times = reg.findall(lines)
     if all(STRIP_REG.search(name.strip()) for name, time in times):
         times = tuple((STRIP_REG.sub(STRIP_REP, name.strip()), time) for name, time in times)
     return dict((name, reformat_time_string(time)) for name, time in times)
 
-def get_single_file_times(file_name):
+def get_single_file_times(file_name, use_real=False):
     '''
     Reads the contents of file_name, which should be the output of
     'coqc -time', and parses it to construct a dict mapping lines to
     to compile durations, as strings.
     '''
     lines = get_file(file_name)
-    reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs (.*)$', re.MULTILINE)
+    reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs \(([0-9\.]+)u(.*)\)$', re.MULTILINE)
     times = reg.findall(lines)
     if len(times) == 0: return dict()
-    longest = max(max((len(start), len(stop))) for start, stop, name, time, extra in times)
+    longest = max(max((len(start), len(stop))) for start, stop, name, real, user, extra in times)
     FORMAT = 'Chars %%0%dd - %%0%dd %%s' % (longest, longest)
-    return dict((FORMAT % (int(start), int(stop), name), reformat_time_string(time)) for start, stop, name, time, extra in times)
+    return dict((FORMAT % (int(start), int(stop), name), reformat_time_string(real if use_real else user)) for start, stop, name, real, user, extra in times)
+
+def fuzz_merge(l1, l2, fuzz):
+    '''Takes two iterables of ((start, end, code), times) and a fuzz
+    parameter, and yields a single iterable of ((start, stop, code),
+    times1, times2)
+
+    We only give both left and right if (a) the codes are the same,
+    (b) the number of characters (stop - start) is the same, and (c)
+    the difference between left and right code locations is <= fuzz.
+
+    We keep a current guess at the overall offset, and prefer drawing
+    from whichever list is earliest after correcting for current
+    offset.
+
+    '''
+    assert(fuzz >= 0)
+    cur_fuzz = 0
+    l1 = list(l1)
+    l2 = list(l2)
+    cur1, cur2 = None, None
+    while (len(l1) > 0 or cur1 is not None) and (len(l2) > 0 or cur2 is not None):
+        if cur1 is None: cur1 = l1.pop(0)
+        if cur2 is None: cur2 = l2.pop(0)
+        ((s1, e1, c1), t1), ((s2, e2, c2), t2) = cur1, cur2
+        assert(t1 is not None)
+        assert(t2 is not None)
+        s2_adjusted, e2_adjusted = s2 + cur_fuzz, e2 + cur_fuzz
+        if cur1[0] == cur2[0]:
+            yield (cur1, cur2)
+            cur1, cur2 = None, None
+            cur_fuzz = 0
+        elif c1 == c2 and e1-s1 == e2-s2 and abs(s1 - s2) <= fuzz:
+            yield (((s1, e1, c1), t1), ((s2, e2, c2), t2))
+            cur1, cur2 = None, None
+            cur_fuzz = s1 - s2
+        elif s1 < s2_adjusted or (s1 == s2_adjusted and e1 <= e2):
+            yield (((s1, e1, c1), t1), ((s1 - cur_fuzz, e1 - cur_fuzz, c1), None))
+            cur1 = None
+        else:
+            yield (((s2 + cur_fuzz, e2 + cur_fuzz, c2), None), ((s2, e2, c2), t2))
+            cur2 = None
+    if len(l1) > 0:
+        for i in l1: yield (i, (i[0], None))
+    elif len(l2) > 0:
+        for i in l2: yield ((i[0], None), i)
+
+def adjust_fuzz(left_dict, right_dict, fuzz):
+    reg = re.compile(r'Chars ([0-9]+) - ([0-9]+) (.*)$')
+    left_dict_list = sorted(((int(s), int(e), c), v) for ((s, e, c), v) in ((reg.match(k).groups(), v) for k, v in left_dict.items()))
+    right_dict_list = sorted(((int(s), int(e), c), v) for ((s, e, c), v) in ((reg.match(k).groups(), v) for k, v in right_dict.items()))
+    merged = list(fuzz_merge(left_dict_list, right_dict_list, fuzz))
+    if len(merged) == 0:
+        # assert that both left and right dicts are empty
+        assert(not left_dict)
+        assert(not right_dict)
+        return left_dict, right_dict
+    longest = max(max((len(str(start1)), len(str(stop1)), len(str(start2)), len(str(stop2)))) for ((start1, stop1, code1), t1), ((start2, stop2, code2), t2) in merged)
+    FORMAT1 = 'Chars %%0%dd - %%0%dd %%s' % (longest, longest)
+    FORMAT2 = 'Chars %%0%dd-%%0%dd ~ %%0%dd-%%0%dd %%s' % (longest, longest, longest, longest)
+    if fuzz == 0:
+        left_dict = dict((FORMAT1 % k, t1) for (k, t1), _ in merged if t1 is not None)
+        right_dict = dict((FORMAT1 % k, t2) for _, (k, t2) in merged if t2 is not None)
+    else:
+        left_dict = dict((FORMAT2 % (s1, e1, s2, e2, c1), t1) for ((s1, e1, c1), t1), ((s2, e2, c2), t2) in merged if t1 is not None)
+        right_dict = dict((FORMAT2 % (s1, e1, s2, e2, c1), t2) for ((s1, e1, c1), t1), ((s2, e2, c2), t2) in merged if t2 is not None)
+    return left_dict, right_dict
 
 def fix_sign_for_sorting(num, descending=True):
     return -num if descending else num
diff --git a/tools/make-both-single-timing-files.py b/tools/make-both-single-timing-files.py
index fddf75f39f..a28da43043 100755
--- a/tools/make-both-single-timing-files.py
+++ b/tools/make-both-single-timing-files.py
@@ -1,12 +1,17 @@
 #!/usr/bin/env python3
-import sys
 from TimeFileMaker import *
 
 if __name__ == '__main__':
-    USAGE = 'Usage: %s [--sort-by=auto|absolute|diff] AFTER_FILE_NAME BEFORE_FILE_NAME [OUTPUT_FILE_NAME ..]' % sys.argv[0]
-    HELP_STRING = r'''Formats timing information from the output of two invocations of `coqc -time` into a sorted table'''
-    sort_by, args = parse_args(sys.argv, USAGE, HELP_STRING)
-    left_dict = get_single_file_times(args[1])
-    right_dict = get_single_file_times(args[2])
-    table = make_diff_table_string(left_dict, right_dict, tag="Code", sort_by=sort_by)
-    print_or_write_table(table, args[3:])
+    parser = argparse.ArgumentParser(description=r'''Formats timing information from the output of two invocations of `coqc -time` into a sorted table''')
+    add_sort_by(parser)
+    add_user(parser, single_timing=True)
+    add_fuzz(parser)
+    add_after_file_name(parser)
+    add_before_file_name(parser)
+    add_output_file_name(parser)
+    args = parser.parse_args()
+    left_dict = get_single_file_times(args.AFTER_FILE_NAME, use_real=args.real)
+    right_dict = get_single_file_times(args.BEFORE_FILE_NAME, use_real=args.real)
+    left_dict, right_dict = adjust_fuzz(left_dict, right_dict, fuzz=args.fuzz)
+    table = make_diff_table_string(left_dict, right_dict, tag="Code", sort_by=args.sort_by)
+    print_or_write_table(table, args.OUTPUT_FILE_NAME)
diff --git a/tools/make-both-time-files.py b/tools/make-both-time-files.py
index 8937d63c2f..5d88548bba 100755
--- a/tools/make-both-time-files.py
+++ b/tools/make-both-time-files.py
@@ -1,16 +1,15 @@
 #!/usr/bin/env python3
-import sys
 from TimeFileMaker import *
 
 if __name__ == '__main__':
-    USAGE = 'Usage: %s [--sort-by=auto|absolute|diff] AFTER_FILE_NAME BEFORE_FILE_NAME [OUTPUT_FILE_NAME ..]' % sys.argv[0]
-    HELP_STRING = r'''Formats timing information from the output of two invocations of `make TIMED=1` into a sorted table.
-
-The input is expected to contain lines in the format:
-FILE_NAME (...user: NUMBER_IN_SECONDS...)
-'''
-    sort_by, args = parse_args(sys.argv, USAGE, HELP_STRING)
-    left_dict = get_times(args[1])
-    right_dict = get_times(args[2])
-    table = make_diff_table_string(left_dict, right_dict, sort_by=sort_by)
-    print_or_write_table(table, args[3:])
+    parser = argparse.ArgumentParser(description=r'''Formats timing information from the output of two invocations of `make TIMED=1` into a sorted table.''')
+    add_sort_by(parser)
+    add_real(parser)
+    add_after_file_name(parser)
+    add_before_file_name(parser)
+    add_output_file_name(parser)
+    args = parser.parse_args()
+    left_dict = get_times(args.AFTER_FILE_NAME, use_real=args.real)
+    right_dict = get_times(args.BEFORE_FILE_NAME, use_real=args.real)
+    table = make_diff_table_string(left_dict, right_dict, sort_by=args.sort_by)
+    print_or_write_table(table, args.OUTPUT_FILE_NAME)
diff --git a/tools/make-one-time-file.py b/tools/make-one-time-file.py
index ad0a04ab07..3df7d7e584 100755
--- a/tools/make-one-time-file.py
+++ b/tools/make-one-time-file.py
@@ -3,19 +3,11 @@ import sys
 from TimeFileMaker import *
 
 if __name__ == '__main__':
-    USAGE = 'Usage: %s FILE_NAME [OUTPUT_FILE_NAME ..]' % sys.argv[0]
-    HELP_STRING = r'''Formats timing information from the output of `make TIMED=1` into a sorted table.
-
-The input is expected to contain lines in the format:
-FILE_NAME (...user: NUMBER_IN_SECONDS...)
-'''
-    if len(sys.argv) < 2 or '--help' in sys.argv[1:] or '-h' in sys.argv[1:]:
-        print(USAGE)
-        if '--help' in sys.argv[1:] or '-h' in sys.argv[1:]:
-            print(HELP_STRING)
-            if len(sys.argv) == 2: sys.exit(0)
-        sys.exit(1)
-    else:
-        times_dict = get_times(sys.argv[1])
-        table = make_table_string(times_dict)
-        print_or_write_table(table, sys.argv[2:])
+    parser = argparse.ArgumentParser(description=r'''Formats timing information from the output of `make TIMED=1` into a sorted table.''')
+    add_real(parser)
+    add_file_name(parser)
+    add_output_file_name(parser)
+    args = parser.parse_args()
+    times_dict = get_times(args.FILE_NAME, use_real=args.real)
+    table = make_table_string(times_dict)
+    print_or_write_table(table, args.OUTPUT_FILE_NAME)