From 4713f5c22bb21d341de32498f2a49d92aa1fc08c Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Thu, 14 Mar 2019 03:03:37 -0400 Subject: [pretty-print py]Don't print sys.stdout;better utf This should fix #9705 I'm kind-of cargo-cult coding here, from things like https://docs.python.org/3/library/sys.html#sys.displayhook and https://github.com/coq/coq/issues/9705#issuecomment-471996313, but hopefully this fixes the issue without breaking anything. (I am really a novice when it comes to the str/bytes distinction in python3.) --- tools/TimeFileMaker.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/TimeFileMaker.py') diff --git a/tools/TimeFileMaker.py b/tools/TimeFileMaker.py index 854dd25b75..0c5fdb50ac 100644 --- a/tools/TimeFileMaker.py +++ b/tools/TimeFileMaker.py @@ -209,11 +209,10 @@ def make_table_string(times_dict, def print_or_write_table(table, files): if len(files) == 0 or '-' in files: - try: - binary_stdout = sys.stdout.buffer - except AttributeError: - binary_stdout = sys.stdout - print(table.encode("utf-8"), file=binary_stdout) + if hasattr(sys.stdout, 'buffer'): + sys.stdout.buffer.write(table.encode("utf-8")) + else: + sys.stdout.write(table) for file_name in files: if file_name != '-': with open(file_name, 'w', encoding="utf-8") as f: -- cgit v1.2.3 From 2acd04d6d7d608920dd93b0a602e3214ffeb9ae5 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Sun, 31 Mar 2019 10:40:56 -0400 Subject: [pretty-timing scripts] Don't barf on non-utf-8 This fixes #9767 by silently ignoring input lines which are not valid UTF-8. We hereby assume that all file paths are valid UTF-8. We also now actually test both python2 and python3 on the CI. --- tools/TimeFileMaker.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'tools/TimeFileMaker.py') diff --git a/tools/TimeFileMaker.py b/tools/TimeFileMaker.py index 0c5fdb50ac..3d07661d56 100644 --- a/tools/TimeFileMaker.py +++ b/tools/TimeFileMaker.py @@ -34,6 +34,24 @@ def reformat_time_string(time): minutes, seconds = divmod(seconds, 60) return '%dm%02d.%ss' % (minutes, seconds, milliseconds) +def get_file_lines(file_name): + if file_name == '-': + if hasattr(sys.stdin, 'buffer'): + lines = sys.stdin.buffer.readlines() + else: + lines = sys.stdin.readlines() + else: + with open(file_name, 'rb') as f: + lines = f.readlines() + for line in lines: + try: + yield line.decode('utf-8') + except UnicodeDecodeError: # invalid utf-8 + pass + +def get_file(file_name): + return ''.join(get_file_lines(file_name)) + def get_times(file_name): ''' Reads the contents of file_name, which should be the output of @@ -41,11 +59,7 @@ def get_times(file_name): names to compile durations, as strings. Removes common prefixes using STRIP_REG and STRIP_REP. ''' - if file_name == '-': - lines = sys.stdin.read() - else: - with open(file_name, 'r', encoding="utf-8") as f: - lines = f.read() + lines = get_file(file_name) reg = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE) times = reg.findall(lines) if all(time in ('0.00', '0.01') for name, time in times): @@ -61,11 +75,7 @@ def get_single_file_times(file_name): 'coqc -time', and parses it to construct a dict mapping lines to to compile durations, as strings. ''' - if file_name == '-': - lines = sys.stdin.read() - else: - with open(file_name, 'r', encoding="utf-8") as f: - lines = f.read() + lines = get_file(file_name) reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs (.*)$', re.MULTILINE) times = reg.findall(lines) if len(times) == 0: return dict() @@ -212,7 +222,7 @@ def print_or_write_table(table, files): if hasattr(sys.stdout, 'buffer'): sys.stdout.buffer.write(table.encode("utf-8")) else: - sys.stdout.write(table) + sys.stdout.write(table.encode("utf-8")) for file_name in files: if file_name != '-': with open(file_name, 'w', encoding="utf-8") as f: -- cgit v1.2.3