diff options
| author | Jason Gross | 2019-03-31 10:40:56 -0400 |
|---|---|---|
| committer | Jason Gross | 2019-03-31 14:09:06 -0400 |
| commit | 2acd04d6d7d608920dd93b0a602e3214ffeb9ae5 (patch) | |
| tree | 2cb75c49a849fbd3d0519de8bfe4cdf67c55d615 /tools/TimeFileMaker.py | |
| parent | 4713f5c22bb21d341de32498f2a49d92aa1fc08c (diff) | |
[pretty-timing scripts] Don't barf on non-utf-8
This fixes #9767 by silently ignoring input lines which are not valid
UTF-8. We hereby assume that all file paths are valid UTF-8.
We also now actually test both python2 and python3 on the CI.
Diffstat (limited to 'tools/TimeFileMaker.py')
| -rw-r--r-- | tools/TimeFileMaker.py | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/tools/TimeFileMaker.py b/tools/TimeFileMaker.py index 0c5fdb50ac..3d07661d56 100644 --- a/tools/TimeFileMaker.py +++ b/tools/TimeFileMaker.py @@ -34,6 +34,24 @@ def reformat_time_string(time): minutes, seconds = divmod(seconds, 60) return '%dm%02d.%ss' % (minutes, seconds, milliseconds) +def get_file_lines(file_name): + if file_name == '-': + if hasattr(sys.stdin, 'buffer'): + lines = sys.stdin.buffer.readlines() + else: + lines = sys.stdin.readlines() + else: + with open(file_name, 'rb') as f: + lines = f.readlines() + for line in lines: + try: + yield line.decode('utf-8') + except UnicodeDecodeError: # invalid utf-8 + pass + +def get_file(file_name): + return ''.join(get_file_lines(file_name)) + def get_times(file_name): ''' Reads the contents of file_name, which should be the output of @@ -41,11 +59,7 @@ def get_times(file_name): names to compile durations, as strings. Removes common prefixes using STRIP_REG and STRIP_REP. ''' - if file_name == '-': - lines = sys.stdin.read() - else: - with open(file_name, 'r', encoding="utf-8") as f: - lines = f.read() + lines = get_file(file_name) reg = re.compile(r'^([^\s]+) \([^\)]*?user: ([0-9\.]+)[^\)]*?\)\s*$', re.MULTILINE) times = reg.findall(lines) if all(time in ('0.00', '0.01') for name, time in times): @@ -61,11 +75,7 @@ def get_single_file_times(file_name): 'coqc -time', and parses it to construct a dict mapping lines to to compile durations, as strings. ''' - if file_name == '-': - lines = sys.stdin.read() - else: - with open(file_name, 'r', encoding="utf-8") as f: - lines = f.read() + lines = get_file(file_name) reg = re.compile(r'^Chars ([0-9]+) - ([0-9]+) ([^ ]+) ([0-9\.]+) secs (.*)$', re.MULTILINE) times = reg.findall(lines) if len(times) == 0: return dict() @@ -212,7 +222,7 @@ def print_or_write_table(table, files): if hasattr(sys.stdout, 'buffer'): sys.stdout.buffer.write(table.encode("utf-8")) else: - sys.stdout.write(table) + sys.stdout.write(table.encode("utf-8")) for file_name in files: if file_name != '-': with open(file_name, 'w', encoding="utf-8") as f: |
