From bf09c614a0b3d23679848f21fd14cd69b9c13edf Mon Sep 17 00:00:00 2001 From: Siong Chin Date: Wed, 19 May 2021 11:13:09 +0100 Subject: [PATCH 1/5] Auto removal of trailing whitespace to skip binary file and give useful output NO_JIRA --- main/githooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index 3c81e95..922c343 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -377,7 +377,7 @@ def trim_trailing_whitespace_in_file(filename, new_file, dry_run, with open(filename, 'rb') as fileobj: lines = fileobj.read().decode().splitlines(True) except UnicodeDecodeError: - return + return 0 if new_file: line_nums = [f'1-{len(lines)}'] @@ -398,7 +398,7 @@ def trim_trailing_whitespace_in_file(filename, new_file, dry_run, if dry_run: modified_lines.append(str(line_num)) else: - print(f' Fixed line {line_num}') + print(f' Fixed line {filename}:{line_num}') modified_file = True lines[line_num-1] = after From 186b105df78897f0e0fd54f0796ca62929979240 Mon Sep 17 00:00:00 2001 From: Siong Chin Date: Wed, 19 May 2021 16:47:15 +0100 Subject: [PATCH 2/5] Add a test for when we can't decode a text file when checking whitespace NO_JIRA --- main/githooks.py | 11 +++++++++++ test/decode_error.txt | Bin 0 -> 8 bytes 2 files changed, 11 insertions(+) create mode 100644 test/decode_error.txt diff --git a/main/githooks.py b/main/githooks.py index 8b0ae6a..cefb1c7 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -5,8 +5,10 @@ ''' from collections import defaultdict +from io import StringIO from pathlib import Path from tempfile import NamedTemporaryFile +from unittest.mock import patch import os import platform import re @@ -377,6 +379,7 @@ def trim_trailing_whitespace_in_file(filename, new_file, dry_run, with open(filename, 'rb') as fileobj: lines = fileobj.read().decode().splitlines(True) except UnicodeDecodeError: + _skip(filename, 'File is not UTF-8 encoded') return 0 if new_file: @@ -436,6 +439,14 @@ def test_trim_trailing_whitespace(self): retval = trim_trailing_whitespace_in_file(tmp.name, True, True) self.assertEqual(retval, 0) + def test_decodeerror(self): + # A text file that is not utf-8 encoded - report and skip + test_file = '../test/decode_error.txt' + with patch('sys.stdout', new=StringIO()) as tmp_stdout: + retval = trim_trailing_whitespace_in_file(test_file, True, True) + self.assertEqual(retval, 0) + self.assertEqual(tmp_stdout.getvalue().strip(), f'SKIP {test_file}: File is not UTF-8 encoded') + def remove_trailing_white_space(files, new_files=False, dry_run=False): '''Remove trailing white spaces in all new and modified lines diff --git a/test/decode_error.txt b/test/decode_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..104e45f9402433c64b0767c2fc93ede0b5663cd7 GIT binary patch literal 8 NcmezWFO4A|2murD13~}* literal 0 HcmV?d00001 From ab42666d62d6ff541ee1f97d0ae90222584f7981 Mon Sep 17 00:00:00 2001 From: Siong Chin Date: Wed, 19 May 2021 16:51:40 +0100 Subject: [PATCH 3/5] Use absolute path for test file because tests don't run from here NO_JIRA --- main/githooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/githooks.py b/main/githooks.py index cefb1c7..ae1370c 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -441,7 +441,7 @@ def test_trim_trailing_whitespace(self): def test_decodeerror(self): # A text file that is not utf-8 encoded - report and skip - test_file = '../test/decode_error.txt' + test_file = Path(__file__).parent / '../test/decode_error.txt' with patch('sys.stdout', new=StringIO()) as tmp_stdout: retval = trim_trailing_whitespace_in_file(test_file, True, True) self.assertEqual(retval, 0) From c2dea0f1c7f5456230e23d365e24be63a61f4f7e Mon Sep 17 00:00:00 2001 From: Siong Chin Date: Wed, 19 May 2021 16:55:51 +0100 Subject: [PATCH 4/5] Catch another decode() that might raise. NO_JIRA --- main/githooks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index ae1370c..b17b1dc 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -292,8 +292,12 @@ def check_eol(files): def check_do_not_merge_in_file(filename, new_file=False): '''Check for "do not merge" in a filename''' - with open(filename, 'rb') as fileobj: - lines = fileobj.read().decode().splitlines(True) + try: + with open(filename, 'rb') as fileobj: + lines = fileobj.read().decode().splitlines(True) + except UnicodeDecodeError: + _skip(filename, 'File is not UTF-8 encoded') + return 0 if new_file: line_nums = [f'1-{len(lines)}'] From 2ec74c01dfd5a4a0158e22f350a39112b3441638 Mon Sep 17 00:00:00 2001 From: Siong Chin Date: Wed, 19 May 2021 16:59:03 +0100 Subject: [PATCH 5/5] Log to stdout if skipping a file due to decode error NO_JIRA --- main/githooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main/githooks.py b/main/githooks.py index b17b1dc..0f4362b 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -278,6 +278,7 @@ def check_eol(files): with open(filename, 'rb') as fileobj: data = fileobj.read().decode() except UnicodeDecodeError: + _skip(filename, 'File is not UTF-8 encoded') continue # Skip binary file