@@ -107,13 +107,31 @@ def get_branch():
107
107
return _get_output ('git branch' ).split ()[- 1 ]
108
108
109
109
110
+ def get_file_content_as_binary (filename ):
111
+ '''Get content of a file in binary mode
112
+
113
+ Locally (ie. non-github event) we return the content of the staged file,
114
+ not the file in the working directory.
115
+ '''
116
+ if _is_github_event () or 'pytest' in sys .modules :
117
+ try :
118
+ with open (filename , 'rb' ) as fileobj :
119
+ data = fileobj .read ().decode ()
120
+ except UnicodeDecodeError :
121
+ _skip (filename , 'File is not UTF-8 encoded' )
122
+ data = None
123
+ else :
124
+ data = _get_output (f'git show :{ filename } ' )
125
+ return data
126
+
127
+
110
128
def get_text_file_content (filename ):
111
129
'''Get content of a text file
112
130
113
131
Locally (ie. non-github event) we return the content of the staged file,
114
132
not the file in the working directory.
115
133
'''
116
- if _is_github_event ():
134
+ if _is_github_event () or 'pytest' in sys . modules :
117
135
data = Path (filename ).read_text ()
118
136
else :
119
137
data = _get_output (f'git show :{ filename } ' )
@@ -292,11 +310,8 @@ def check_eol(files):
292
310
# As the client environment is not configured with autocrlf
293
311
# we need to ensure that every text file does not contain CRLF.
294
312
for filename in files :
295
- try :
296
- with open (filename , 'rb' ) as fileobj :
297
- data = fileobj .read ().decode ()
298
- except UnicodeDecodeError :
299
- _skip (filename , 'File is not UTF-8 encoded' )
313
+ data = get_file_content_as_binary (filename )
314
+ if data is None :
300
315
continue
301
316
302
317
# Skip binary file
@@ -311,12 +326,11 @@ def check_eol(files):
311
326
312
327
def check_do_not_merge_in_file (filename , new_file = False ):
313
328
'''Check for "do not merge" in a filename'''
314
- try :
315
- with open (filename , 'rb' ) as fileobj :
316
- lines = fileobj .read ().decode ().splitlines (True )
317
- except UnicodeDecodeError :
318
- _skip (filename , 'File is not UTF-8 encoded' )
329
+ data = get_file_content_as_binary (filename )
330
+ if data is None :
319
331
return 0
332
+ else :
333
+ lines = data .splitlines (True )
320
334
321
335
if new_file :
322
336
line_nums = [f'1-{ len (lines )} ' ]
@@ -398,12 +412,11 @@ def trim_trailing_whitespace_in_file(filename, new_file, dry_run,
398
412
:returns: If dry_run=True, 0 if no trailing whitespace is found, 1 if
399
413
trailing whitepsace is found.
400
414
'''
401
- try :
402
- with open (filename , 'rb' ) as fileobj :
403
- lines = fileobj .read ().decode ().splitlines (True )
404
- except UnicodeDecodeError :
405
- _skip (filename , 'File is not UTF-8 encoded' )
415
+ data = get_file_content_as_binary (filename )
416
+ if data is None :
406
417
return 0
418
+ else :
419
+ lines = data .splitlines (True )
407
420
408
421
if new_file :
409
422
line_nums = [f'1-{ len (lines )} ' ]
@@ -732,6 +745,24 @@ def test_match_word_boundaries(self):
732
745
cpp_throw_std_exception_pattern .search ('rethrow exception' ))
733
746
734
747
748
+ class TestCheckFileContent (unittest .TestCase ):
749
+ def test_various_files (self ):
750
+ def _test (filename , is_good , data = None ):
751
+ test_file = Path (__file__ ).parent / f'../test/{ filename } '
752
+ if data is None :
753
+ data = get_file_content (str (test_file ))
754
+ retval = check_file_content (filename , data )
755
+ self .assertEqual (retval == 0 , is_good )
756
+ def _test_good_file (filename , data = None ):
757
+ _test (filename , True , data = data )
758
+ def _test_bad_file (filename , data = None ):
759
+ _test (filename , False , data = data )
760
+ _test_bad_file ('do_not_commit.py' , data = 'do not ' + 'commit' )
761
+ _test_bad_file ('tab.py' , data = 'field\t field' )
762
+ _test_bad_file ('no_newline.cpp' , data = 'No terminating newline' )
763
+ _test_good_file ('good_file.cpp' )
764
+
765
+
735
766
def get_file_content (filename ):
736
767
'''Return the content of a file.
737
768
0 commit comments