from __future__ import with_statement from __future__ import division from __future__ import unicode_literals from __future__ import print_function import sys import re import argparse from io import open # This script parses the output of `make TIMED=1` into a dictionary # mapping names of compiled files to the number of minutes and seconds # that they took to compile. STRIP_REG = re.compile('^(coq/|contrib/|)(?:theories/|src/)?') STRIP_REP = r'\1' INFINITY = '\u221e' def nonnegative(arg): v = int(arg) if v < 0: raise argparse.ArgumentTypeError("%s is an invalid non-negative int value" % arg) return v def add_sort_by(parser): return parser.add_argument( '--sort-by', type=str, dest='sort_by', choices=('auto', 'absolute', 'diff'), default='auto', help=('How to sort the table entries.\n' + 'The "auto" method sorts by absolute time differences ' + 'rounded towards zero to a whole-number of seconds, then ' + 'by times in the "after" column, and finally ' + 'lexicographically by file name. This will put the ' + 'biggest changes in either direction first, and will ' + 'prefer sorting by build-time over subsecond changes in ' + 'build time (which are frequently noise); lexicographic ' + 'sorting forces an order on files which take effectively ' + 'no time to compile.\n' + 'The "absolute" method sorts by the total time taken.\n' + 'The "diff" method sorts by the signed difference in time.')) def add_fuzz(parser): return parser.add_argument( '--fuzz', dest='fuzz', metavar='N', type=nonnegative, default=0, help=('By default, two lines are only considered the same if ' + 'the character offsets and initial code strings match. ' 'This option relaxes this constraint by allowing the ' + 'character offsets to differ by up to N characters, as long ' + 'as the total number of characters and initial code strings ' + 'continue to match. This is useful when there are small changes ' + 'to a file, and you want to match later lines that have not ' + 'changed even though the character offsets have changed.')) def add_real(parser, single_timing=False): return parser.add_argument( '--real', action='store_true', help=(r'''Use real times rather than user times. ''' + ('''By default, the input is expected to contain lines in the format: FILE_NAME (...user: NUMBER_IN_SECONDS...) If --real is passed, then the lines are instead expected in the format: FILE_NAME (...real: NUMBER_IN_SECONDS...)''' if not single_timing else '''The input is expected to contain lines in the format: Chars START - END COMMAND NUMBER secs (NUMBERu...)'''))) def add_user(parser, single_timing=False): return parser.add_argument( '--user', dest='real', action='store_false', help=(r'''Use user times rather than real times. ''' + ('''By default, the input is expected to contain lines in the format: FILE_NAME (...real: NUMBER_IN_SECONDS...) If --user is passed, then the lines are instead expected in the format: FILE_NAME (...user: NUMBER_IN_SECONDS...)''' if not single_timing else '''The input is expected to contain lines in the format: Chars START - END COMMAND NUMBER secs (NUMBERu...)'''))) # N.B. We need to include default=None for nargs='*', c.f., https://bugs.python.org/issue28609#msg280180 def add_file_name_gen(parser, prefix='', descr='file containing the build log', stddir='in', defaults=None, **kwargs): extra = ('' if defaults is None else ' (defaults to %s if no argument is passed)' % defaults) return parser.add_argument( prefix + 'FILE_NAME', type=str, help=('The name of the %s (use "-" for std%s)%s.' % (descr, stddir, extra)), **kwargs) def add_file_name(parser): return add_file_name_gen(parser) def add_after_file_name(parser): return add_file_name_gen(parser, 'AFTER_', 'file containing the "after" build log') def add_before_file_name(parser): return add_file_name_gen(parser, 'BEFORE_', 'file containing the "before" build log') def add_output_file_name(parser): return add_file_name_gen(parser, 'OUTPUT_', 'file to write the output table to', stddir='out', defaults='-', nargs='*', default=None) def reformat_time_string(time): seconds, milliseconds = time.split('.') seconds = int(seconds) minutes, seconds = divmod(seconds, 60) return '%dm%02d.%ss' % (minutes, seconds, milliseconds) def get_file_lines(file_name): if file_name == '-': if hasattr(sys.stdin, 'buffer'): lines = sys.stdin.buffer.readlines() else: lines = sys.stdin.readlines() else: with open(file_name, 'rb') as f: lines = f.readlines() for line in lines: try: # Since we read the files in binary mode, we have to # normalize Windows line endings from \r\n to \n yield line.decode('utf-8').replace('\r\n', '\n') except UnicodeDecodeError: # invalid utf-8 pass def get_file(file_name): return ''.join(get_file_lines(file_name)) def get_times(file_name, use_real=False): ''' Reads the contents of file_name, which should be the output of 'make TIMED=1', and parses it to construct a dict mapping file names to compile durations, as strings. Removes common prefixes using STRIP_REG and STRIP_REP. ''' lines = get_file(file_name) reg_user = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE) reg_real = re.compile(r'^([^\s]+) \([^\)]*?real: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE) reg = reg_real if use_real else reg_user times = reg.findall(lines) if all(time in ('0.00', '0.01') for name, time in times): reg = reg_real times = reg.findall(lines) if all(STRIP_REG.search(name.strip()) for name, time in times): times = tuple((STRIP_REG.sub(STRIP_REP, name.strip()), time) for name, time in times) return dict((name, reformat_time_string(time)) for name, time in times) def get_single_file_times(file_name, use_real=False): ''' Reads the contents of file_name, which should be the output of 'coqc -time', and parses it to construct a dict mapping lines to to compile durations, as strings. ''' lines = get_file(file_name) reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs \(([0-9\.]+)u(.*)\)$', re.MULTILINE) times = reg.findall(lines) if len(times) == 0: return dict() longest = max(max((len(start), len(stop))) for start, stop, name, real, user, extra in times) FORMAT = 'Chars %%0%dd - %%0%dd %%s' % (longest, longest) return dict((FORMAT % (int(start), int(stop), name), reformat_time_string(real if use_real else user)) for start, stop, name, real, user, extra in times) def fuzz_merge(l1, l2, fuzz): '''Takes two iterables of ((start, end, code), times) and a fuzz parameter, and yields a single iterable of ((start, stop, code), times1, times2) We only give both left and right if (a) the codes are the same, (b) the number of characters (stop - start) is the same, and (c) the difference between left and right code locations is <= fuzz. We keep a current guess at the overall offset, and prefer drawing from whichever list is earliest after correcting for current offset. ''' assert(fuzz >= 0) cur_fuzz = 0 l1 = list(l1) l2 = list(l2) cur1, cur2 = None, None while (len(l1) > 0 or cur1 is not None) and (len(l2) > 0 or cur2 is not None): if cur1 is None: cur1 = l1.pop(0) if cur2 is None: cur2 = l2.pop(0) ((s1, e1, c1), t1), ((s2, e2, c2), t2) = cur1, cur2 assert(t1 is not None) assert(t2 is not None) s2_adjusted, e2_adjusted = s2 + cur_fuzz, e2 + cur_fuzz if cur1[0] == cur2[0]: yield (cur1, cur2) cur1, cur2 = None, None cur_fuzz = 0 elif c1 == c2 and e1-s1 == e2-s2 and abs(s1 - s2) <= fuzz: yield (((s1, e1, c1), t1), ((s2, e2, c2), t2)) cur1, cur2 = None, None cur_fuzz = s1 - s2 elif s1 < s2_adjusted or (s1 == s2_adjusted and e1 <= e2): yield (((s1, e1, c1), t1), ((s1 - cur_fuzz, e1 - cur_fuzz, c1), None)) cur1 = None else: yield (((s2 + cur_fuzz, e2 + cur_fuzz, c2), None), ((s2, e2, c2), t2)) cur2 = None if len(l1) > 0: for i in l1: yield (i, (i[0], None)) elif len(l2) > 0: for i in l2: yield ((i[0], None), i) def adjust_fuzz(left_dict, right_dict, fuzz): reg = re.compile(r'Chars ([0-9]+) - ([0-9]+) (.*)$') left_dict_list = sorted(((int(s), int(e), c), v) for ((s, e, c), v) in ((reg.match(k).groups(), v) for k, v in left_dict.items())) right_dict_list = sorted(((int(s), int(e), c), v) for ((s, e, c), v) in ((reg.match(k).groups(), v) for k, v in right_dict.items())) merged = list(fuzz_merge(left_dict_list, right_dict_list, fuzz)) if len(merged) == 0: # assert that both left and right dicts are empty assert(not left_dict) assert(not right_dict) return left_dict, right_dict longest = max(max((len(str(start1)), len(str(stop1)), len(str(start2)), len(str(stop2)))) for ((start1, stop1, code1), t1), ((start2, stop2, code2), t2) in merged) FORMAT1 = 'Chars %%0%dd - %%0%dd %%s' % (longest, longest) FORMAT2 = 'Chars %%0%dd-%%0%dd ~ %%0%dd-%%0%dd %%s' % (longest, longest, longest, longest) if fuzz == 0: left_dict = dict((FORMAT1 % k, t1) for (k, t1), _ in merged if t1 is not None) right_dict = dict((FORMAT1 % k, t2) for _, (k, t2) in merged if t2 is not None) else: left_dict = dict((FORMAT2 % (s1, e1, s2, e2, c1), t1) for ((s1, e1, c1), t1), ((s2, e2, c2), t2) in merged if t1 is not None) right_dict = dict((FORMAT2 % (s1, e1, s2, e2, c1), t2) for ((s1, e1, c1), t1), ((s2, e2, c2), t2) in merged if t2 is not None) return left_dict, right_dict def fix_sign_for_sorting(num, descending=True): return -num if descending else num def make_sorting_key(times_dict, descending=True): def get_key(name): minutes, seconds = times_dict[name].replace('s', '').split('m') return (fix_sign_for_sorting(int(minutes), descending=descending), fix_sign_for_sorting(float(seconds), descending=descending), name) return get_key def get_sorted_file_list_from_times_dict(times_dict, descending=True): ''' Takes the output dict of get_times and returns the list of keys, sorted by duration. ''' return sorted(times_dict.keys(), key=make_sorting_key(times_dict, descending=descending)) def to_seconds(time): ''' Converts a string time into a number of seconds. ''' minutes, seconds = time.replace('s', '').split('m') sign = -1 if time[0] == '-' else 1 return sign * (abs(int(minutes)) * 60 + float(seconds)) def from_seconds(seconds, signed=False): ''' Converts a number of seconds into a string time. ''' sign = ('-' if seconds < 0 else '+') if signed else '' seconds = abs(seconds) minutes = int(seconds) // 60 seconds -= minutes * 60 full_seconds = int(seconds) partial_seconds = int(100 * (seconds - full_seconds)) return sign + '%dm%02d.%02ds' % (minutes, full_seconds, partial_seconds) def sum_times(times, signed=False): ''' Takes the values of an output from get_times, parses the time strings, and returns their sum, in the same string format. ''' # sort the times before summing because floating point addition is not associative return from_seconds(sum(sorted(map(to_seconds, times))), signed=signed) def format_percentage(num, signed=True): sign = ('-' if num < 0 else '+') if signed else '' num = abs(num) whole_part = int(num * 100) frac_part = int(100 * (num * 100 - whole_part)) return sign + '%d.%02d%%' % (whole_part, frac_part) def make_diff_table_string(left_times_dict, right_times_dict, sort_by='auto', descending=True, left_tag="After", tag="File Name", right_tag="Before", with_percent=True, change_tag="Change", percent_change_tag="% Change"): # We first get the names of all of the compiled files: all files # that were compiled either before or after. all_names_dict = dict() all_names_dict.update(right_times_dict) all_names_dict.update(left_times_dict) # do the left (after) last, so that we give precedence to those ones if len(all_names_dict.keys()) == 0: return 'No timing data' prediff_times = tuple((name, to_seconds(left_times_dict.get(name,'0m0.0s')), to_seconds(right_times_dict.get(name,'0m0.0s'))) for name in all_names_dict.keys()) diff_times_dict = dict((name, from_seconds(lseconds - rseconds, signed=True)) for name, lseconds, rseconds in prediff_times) percent_diff_times_dict = dict((name, ((format_percentage((lseconds - rseconds) / rseconds)) if rseconds != 0 else (INFINITY if lseconds > 0 else 'N/A'))) for name, lseconds, rseconds in prediff_times) # update to sort by approximate difference, first get_key_abs = make_sorting_key(all_names_dict, descending=descending) get_key_diff_float = (lambda name: fix_sign_for_sorting(to_seconds(diff_times_dict[name]), descending=descending)) get_key_diff_absint = (lambda name: fix_sign_for_sorting(int(abs(to_seconds(diff_times_dict[name]))), descending=descending)) if sort_by == 'absolute': get_key = get_key_abs elif sort_by == 'diff': get_key = get_key_diff_float else: # sort_by == 'auto' get_key = (lambda name: (get_key_diff_absint(name), get_key_abs(name))) names = sorted(all_names_dict.keys(), key=get_key) #names = get_sorted_file_list_from_times_dict(all_names_dict, descending=descending) # set the widths of each of the columns by the longest thing to go in that column left_sum = sum_times(left_times_dict.values()) right_sum = sum_times(right_times_dict.values()) left_sum_float = sum(sorted(map(to_seconds, left_times_dict.values()))) right_sum_float = sum(sorted(map(to_seconds, right_times_dict.values()))) diff_sum = from_seconds(left_sum_float - right_sum_float, signed=True) percent_diff_sum = (format_percentage((left_sum_float - right_sum_float) / right_sum_float) if right_sum_float > 0 else 'N/A') left_width = max(max(map(len, ['N/A'] + list(left_times_dict.values()))), len(left_sum)) right_width = max(max(map(len, ['N/A'] + list(right_times_dict.values()))), len(right_sum)) far_right_width = max(max(map(len, ['N/A', change_tag] + list(diff_times_dict.values()))), len(diff_sum)) far_far_right_width = max(max(map(len, ['N/A', percent_change_tag] + list(percent_diff_times_dict.values()))), len(percent_diff_sum)) middle_width = max(map(len, names + [tag, "Total"])) format_string = ("%%(left)-%ds | %%(middle)-%ds | %%(right)-%ds || %%(far_right)-%ds" % (left_width, middle_width, right_width, far_right_width)) if with_percent: format_string += " | %%(far_far_right)-%ds" % far_far_right_width header = format_string % {'left': left_tag, 'middle': tag, 'right': right_tag, 'far_right': change_tag, 'far_far_right': percent_change_tag} total = format_string % {'left': left_sum, 'middle': "Total", 'right': right_sum, 'far_right': diff_sum, 'far_far_right': percent_diff_sum} # separator to go between headers and body sep = '-' * len(header) # the representation of the default value (0), to get replaced by N/A left_rep, right_rep, far_right_rep, far_far_right_rep = ("%%-%ds | " % left_width) % 0, (" | %%-%ds || " % right_width) % 0, ("|| %%-%ds" % far_right_width) % 0, ("| %%-%ds" % far_far_right_width) % 0 return '\n'.join([header, sep, total, sep] + [format_string % {'left': left_times_dict.get(name, 0), 'middle': name, 'right': right_times_dict.get(name, 0), 'far_right': diff_times_dict.get(name, 0), 'far_far_right': percent_diff_times_dict.get(name, 0)} for name in names]).replace(left_rep, 'N/A'.center(len(left_rep) - 3) + ' | ').replace(right_rep, ' | ' + 'N/A'.center(len(right_rep) - 7) + ' || ').replace(far_right_rep, '|| ' + 'N/A'.center(len(far_right_rep) - 3)).replace(far_far_right_rep, '| ' + 'N/A'.center(len(far_far_right_rep) - 2)) def make_table_string(times_dict, descending=True, tag="Time"): if len(times_dict.keys()) == 0: return 'No timing data' # We first get the names of all of the compiled files, sorted by # duration names = get_sorted_file_list_from_times_dict(times_dict, descending=descending) # compute the widths of the columns times_width = max(max(map(len, times_dict.values())), len(sum_times(times_dict.values()))) names_width = max(map(len, names + ["File Name", "Total"])) format_string = "%%-%ds | %%-%ds" % (times_width, names_width) header = format_string % (tag, "File Name") total = format_string % (sum_times(times_dict.values()), "Total") sep = '-' * len(header) return '\n'.join([header, sep, total, sep] + [format_string % (times_dict[name], name) for name in names]) def print_or_write_table(table, files): if len(files) == 0 or '-' in files: if hasattr(sys.stdout, 'buffer'): sys.stdout.buffer.write(table.encode("utf-8")) else: sys.stdout.write(table.encode("utf-8")) for file_name in files: if file_name != '-': with open(file_name, 'w', encoding="utf-8") as f: f.write(table)