From 068f38ba74cbfb5592fcd35cd1925c5582dcb2e7 Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 08:44:27 +0000 Subject: [PATCH 1/6] Refactored literals --- pre_commit_hooks/end_of_file_fixer.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index a88425c6..fc05d160 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -5,6 +5,10 @@ from collections.abc import Sequence from typing import IO +LF = b'\n' +CR = b'\r' +CRLF = b'\r\n' + def fix_file(file_obj: IO[bytes]) -> int: # Test for newline at end of file @@ -15,13 +19,13 @@ def fix_file(file_obj: IO[bytes]) -> int: return 0 last_character = file_obj.read(1) # last_character will be '' for an empty file - if last_character not in {b'\n', b'\r'} and last_character != b'': + if last_character not in {LF, CR} and last_character != b'': # Needs this seek for windows, otherwise IOError file_obj.seek(0, os.SEEK_END) - file_obj.write(b'\n') + file_obj.write(LF) return 1 - while last_character in {b'\n', b'\r'}: + while last_character in {LF, CR}: # Deal with the beginning of the file if file_obj.tell() == 1: # If we've reached the beginning of the file and it is all @@ -38,7 +42,7 @@ def fix_file(file_obj: IO[bytes]) -> int: # newlines. If we find extraneous newlines, then backtrack and trim them. position = file_obj.tell() remaining = file_obj.read() - for sequence in (b'\n', b'\r\n', b'\r'): + for sequence in (LF, CRLF, CR): if remaining == sequence: return 0 elif remaining.startswith(sequence): From c21246f34f49dbd0e4b6d5b4e92a9b21e30395bf Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 08:45:10 +0000 Subject: [PATCH 2/6] Added test to reproduce case Case is for file using CRLF endings --- tests/end_of_file_fixer_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/end_of_file_fixer_test.py b/tests/end_of_file_fixer_test.py index 8a5d889e..05fdadee 100644 --- a/tests/end_of_file_fixer_test.py +++ b/tests/end_of_file_fixer_test.py @@ -18,6 +18,7 @@ (b'foo\n\n\n', 1, b'foo\n'), (b'\xe2\x98\x83', 1, b'\xe2\x98\x83\n'), (b'foo\r\n', 0, b'foo\r\n'), + (b'foo\r\nbar', 1, b'foo\r\nbar\r\n'), (b'foo\r\n\r\n\r\n', 1, b'foo\r\n'), (b'foo\r', 0, b'foo\r'), (b'foo\r\r\r\r', 1, b'foo\r'), From c881808ed66692568717b2e929dd5e583e4080a7 Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 08:52:52 +0000 Subject: [PATCH 3/6] Implementation to pass test --- pre_commit_hooks/end_of_file_fixer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index fc05d160..2f397eb0 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -20,9 +20,13 @@ def fix_file(file_obj: IO[bytes]) -> int: last_character = file_obj.read(1) # last_character will be '' for an empty file if last_character not in {LF, CR} and last_character != b'': + # Check if file uses CRLF endings + file_obj.seek(0, os.SEEK_SET) + content = file_obj.read() + ending = CRLF if CRLF in content else LF # Needs this seek for windows, otherwise IOError file_obj.seek(0, os.SEEK_END) - file_obj.write(LF) + file_obj.write(ending) return 1 while last_character in {LF, CR}: From 203735ecf7d3418d47fc4632b814d0fed7ca7c8f Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 09:00:23 +0000 Subject: [PATCH 4/6] Edge case with mixed endings Added case for which the file has mixed line endings. In this case, default into using LF for end of file line. --- pre_commit_hooks/end_of_file_fixer.py | 7 +++++-- tests/end_of_file_fixer_test.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index 2f397eb0..6bb452e3 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -20,10 +20,13 @@ def fix_file(file_obj: IO[bytes]) -> int: last_character = file_obj.read(1) # last_character will be '' for an empty file if last_character not in {LF, CR} and last_character != b'': - # Check if file uses CRLF endings + # Check for consistent CRLF usage file_obj.seek(0, os.SEEK_SET) content = file_obj.read() - ending = CRLF if CRLF in content else LF + lf_count = content.count(LF) + crlf_count = content.count(CRLF) + # Use CRLF only if all line endings are CRLF + ending = CRLF if crlf_count > 0 and crlf_count == lf_count else LF # Needs this seek for windows, otherwise IOError file_obj.seek(0, os.SEEK_END) file_obj.write(ending) diff --git a/tests/end_of_file_fixer_test.py b/tests/end_of_file_fixer_test.py index 05fdadee..00544d86 100644 --- a/tests/end_of_file_fixer_test.py +++ b/tests/end_of_file_fixer_test.py @@ -19,6 +19,7 @@ (b'\xe2\x98\x83', 1, b'\xe2\x98\x83\n'), (b'foo\r\n', 0, b'foo\r\n'), (b'foo\r\nbar', 1, b'foo\r\nbar\r\n'), + (b'foo\nbar\r\nbaz', 1, b'foo\nbar\r\nbaz\n'), (b'foo\r\n\r\n\r\n', 1, b'foo\r\n'), (b'foo\r', 0, b'foo\r'), (b'foo\r\r\r\r', 1, b'foo\r'), From 647959c4b4b340d609d17f73e71425b1bb63875a Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 15:58:17 +0000 Subject: [PATCH 5/6] Revert "Edge case with mixed endings" This reverts commit 203735ecf7d3418d47fc4632b814d0fed7ca7c8f. --- pre_commit_hooks/end_of_file_fixer.py | 7 ++----- tests/end_of_file_fixer_test.py | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index 6bb452e3..2f397eb0 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -20,13 +20,10 @@ def fix_file(file_obj: IO[bytes]) -> int: last_character = file_obj.read(1) # last_character will be '' for an empty file if last_character not in {LF, CR} and last_character != b'': - # Check for consistent CRLF usage + # Check if file uses CRLF endings file_obj.seek(0, os.SEEK_SET) content = file_obj.read() - lf_count = content.count(LF) - crlf_count = content.count(CRLF) - # Use CRLF only if all line endings are CRLF - ending = CRLF if crlf_count > 0 and crlf_count == lf_count else LF + ending = CRLF if CRLF in content else LF # Needs this seek for windows, otherwise IOError file_obj.seek(0, os.SEEK_END) file_obj.write(ending) diff --git a/tests/end_of_file_fixer_test.py b/tests/end_of_file_fixer_test.py index 00544d86..05fdadee 100644 --- a/tests/end_of_file_fixer_test.py +++ b/tests/end_of_file_fixer_test.py @@ -19,7 +19,6 @@ (b'\xe2\x98\x83', 1, b'\xe2\x98\x83\n'), (b'foo\r\n', 0, b'foo\r\n'), (b'foo\r\nbar', 1, b'foo\r\nbar\r\n'), - (b'foo\nbar\r\nbaz', 1, b'foo\nbar\r\nbaz\n'), (b'foo\r\n\r\n\r\n', 1, b'foo\r\n'), (b'foo\r', 0, b'foo\r'), (b'foo\r\r\r\r', 1, b'foo\r'), From 04de4f8042e6cf7cf81e8b060aeda7f2f04c48ec Mon Sep 17 00:00:00 2001 From: Edoardo Bezzeccheri Date: Wed, 7 May 2025 16:05:09 +0000 Subject: [PATCH 6/6] Using first line to determine EOL --- pre_commit_hooks/end_of_file_fixer.py | 11 ++++++++--- tests/end_of_file_fixer_test.py | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index 2f397eb0..0a729766 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -20,10 +20,15 @@ def fix_file(file_obj: IO[bytes]) -> int: last_character = file_obj.read(1) # last_character will be '' for an empty file if last_character not in {LF, CR} and last_character != b'': - # Check if file uses CRLF endings + # Look at first line to determine line ending file_obj.seek(0, os.SEEK_SET) - content = file_obj.read() - ending = CRLF if CRLF in content else LF + first_line = file_obj.readline() + if CRLF in first_line: + ending = CRLF + elif CR in first_line: + ending = CR + else: + ending = LF # Needs this seek for windows, otherwise IOError file_obj.seek(0, os.SEEK_END) file_obj.write(ending) diff --git a/tests/end_of_file_fixer_test.py b/tests/end_of_file_fixer_test.py index 05fdadee..99f40b7c 100644 --- a/tests/end_of_file_fixer_test.py +++ b/tests/end_of_file_fixer_test.py @@ -19,6 +19,7 @@ (b'\xe2\x98\x83', 1, b'\xe2\x98\x83\n'), (b'foo\r\n', 0, b'foo\r\n'), (b'foo\r\nbar', 1, b'foo\r\nbar\r\n'), + (b'foo\rbar', 1, b'foo\rbar\r'), (b'foo\r\n\r\n\r\n', 1, b'foo\r\n'), (b'foo\r', 0, b'foo\r'), (b'foo\r\r\r\r', 1, b'foo\r'),