aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJason Gross2019-03-31 10:40:56 -0400
committerJason Gross2019-03-31 14:09:06 -0400
commit2acd04d6d7d608920dd93b0a602e3214ffeb9ae5 (patch)
tree2cb75c49a849fbd3d0519de8bfe4cdf67c55d615 /tools
parent4713f5c22bb21d341de32498f2a49d92aa1fc08c (diff)
[pretty-timing scripts] Don't barf on non-utf-8
This fixes #9767 by silently ignoring input lines which are not valid UTF-8. We hereby assume that all file paths are valid UTF-8. We also now actually test both python2 and python3 on the CI.
Diffstat (limited to 'tools')
-rw-r--r--tools/TimeFileMaker.py32
1 files changed, 21 insertions, 11 deletions
diff --git a/tools/TimeFileMaker.py b/tools/TimeFileMaker.py
index 0c5fdb50ac..3d07661d56 100644
--- a/tools/TimeFileMaker.py
+++ b/tools/TimeFileMaker.py
@@ -34,6 +34,24 @@ def reformat_time_string(time):
minutes, seconds = divmod(seconds, 60)
return '%dm%02d.%ss' % (minutes, seconds, milliseconds)
+def get_file_lines(file_name):
+ if file_name == '-':
+ if hasattr(sys.stdin, 'buffer'):
+ lines = sys.stdin.buffer.readlines()
+ else:
+ lines = sys.stdin.readlines()
+ else:
+ with open(file_name, 'rb') as f:
+ lines = f.readlines()
+ for line in lines:
+ try:
+ yield line.decode('utf-8')
+ except UnicodeDecodeError: # invalid utf-8
+ pass
+
+def get_file(file_name):
+ return ''.join(get_file_lines(file_name))
+
def get_times(file_name):
'''
Reads the contents of file_name, which should be the output of
@@ -41,11 +59,7 @@ def get_times(file_name):
names to compile durations, as strings. Removes common prefixes
using STRIP_REG and STRIP_REP.
'''
- if file_name == '-':
- lines = sys.stdin.read()
- else:
- with open(file_name, 'r', encoding="utf-8") as f:
- lines = f.read()
+ lines = get_file(file_name)
reg = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE)
times = reg.findall(lines)
if all(time in ('0.00', '0.01') for name, time in times):
@@ -61,11 +75,7 @@ def get_single_file_times(file_name):
'coqc -time', and parses it to construct a dict mapping lines to
to compile durations, as strings.
'''
- if file_name == '-':
- lines = sys.stdin.read()
- else:
- with open(file_name, 'r', encoding="utf-8") as f:
- lines = f.read()
+ lines = get_file(file_name)
reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs (.*)$', re.MULTILINE)
times = reg.findall(lines)
if len(times) == 0: return dict()
@@ -212,7 +222,7 @@ def print_or_write_table(table, files):
if hasattr(sys.stdout, 'buffer'):
sys.stdout.buffer.write(table.encode("utf-8"))
else:
- sys.stdout.write(table)
+ sys.stdout.write(table.encode("utf-8"))
for file_name in files:
if file_name != '-':
with open(file_name, 'w', encoding="utf-8") as f: