@@ -77,7 +77,6 @@ def test_it_detects_correct_file_type_for_CFB_and_ZIP_subtypes_detected_by_direc
77
77
(FileType .HEIC , "img/DA-1p.heic" , "image/heic" ),
78
78
(FileType .HTML , "example-10k-1p.html" , "text/html" ),
79
79
(FileType .JPG , "img/example.jpg" , "image/jpeg" ),
80
- (FileType .JSON , "spring-weather.html.json" , "application/json" ),
81
80
(FileType .MD , "README.md" , "text/markdown" ),
82
81
(FileType .ORG , "README.org" , "text/org" ),
83
82
(FileType .PDF , "pdf/layout-parser-paper-fast.pdf" , "application/pdf" ),
@@ -116,7 +115,6 @@ def test_it_detects_correct_file_type_from_file_path_with_correct_asserted_conte
116
115
(FileType .HEIC , "img/DA-1p.heic" , "image/heic" ),
117
116
(FileType .HTML , "example-10k-1p.html" , "text/html" ),
118
117
(FileType .JPG , "img/example.jpg" , "image/jpeg" ),
119
- (FileType .JSON , "spring-weather.html.json" , "application/json" ),
120
118
(FileType .MD , "README.md" , "text/markdown" ),
121
119
(FileType .ORG , "README.org" , "text/org" ),
122
120
(FileType .PDF , "pdf/layout-parser-paper-fast.pdf" , "application/pdf" ),
@@ -154,10 +152,10 @@ def test_it_identifies_NDJSON_for_file_like_object_with_no_name_but_NDJSON_conte
154
152
assert detect_filetype (file = file , content_type = FileType .NDJSON .mime_type ) == FileType .NDJSON
155
153
156
154
157
- # TODO: ideally this test should pass, currently fails
158
- # def test_it_identifies_NDJSON_for_file_with_ndjson_extension_but_JSON_content_type():
159
- # file_path = example_doc_path("simple.ndjson")
160
- # assert detect_filetype(file_path, content_type=FileType.JSON.mime_type) == FileType.NDJSON
155
+ def test_it_identifies_NDJSON_for_file_with_ndjson_extension_but_JSON_content_type ():
156
+ file_path = example_doc_path ( "simple.ndjson" )
157
+ assert detect_filetype ( file_path , content_type = FileType . JSON . mime_type ) == FileType . NDJSON
158
+
161
159
162
160
# ================================================================================================
163
161
# STRATEGY #3 - GUESS MIME-TYPE WITH LIBMAGIC/FILETYPE LIBRARY
@@ -268,7 +266,6 @@ def test_it_detects_most_file_types_using_mime_guessing_when_libmagic_guesses_mi
268
266
(FileType .UNK , "stanley-cups.csv" ),
269
267
(FileType .UNK , "eml/fake-email.eml" ),
270
268
(FileType .UNK , "example-10k-1p.html" ),
271
- (FileType .UNK , "spring-weather.html.json" ),
272
269
(FileType .UNK , "README.md" ),
273
270
(FileType .UNK , "README.org" ),
274
271
(FileType .UNK , "README.rst" ),
@@ -333,6 +330,7 @@ def test_detect_filetype_from_file_warns_when_libmagic_is_not_installed(
333
330
(FileType .TXT , "norwich-city.txt" ),
334
331
(FileType .WAV , "CantinaBand3.wav" ),
335
332
(FileType .XML , "factbook.xml" ),
333
+ (FileType .NDJSON , "simple.ndjson" ),
336
334
],
337
335
)
338
336
def test_it_detects_correct_file_type_from_extension_when_that_maps_to_a_file_type (
@@ -395,6 +393,27 @@ def test_it_detects_HTML_from_guessed_mime_type_ending_with_xml_and_html_extensi
395
393
assert file_type is FileType .HTML
396
394
397
395
396
+ @pytest .mark .parametrize (
397
+ ("expected_value" , "file_name" ),
398
+ [(FileType .NDJSON , "simple.ndjson" ), (FileType .JSON , "spring-weather.html.json" )],
399
+ )
400
+ def test_it_detects_correct_json_type_without_extension (expected_value : FileType , file_name : str ):
401
+ with open (example_doc_path (file_name ), "rb" ) as f :
402
+ file = io .BytesIO (f .read ())
403
+
404
+ filetype = detect_filetype (file = file )
405
+ assert filetype == expected_value
406
+
407
+
408
+ @pytest .mark .parametrize (
409
+ ("expected_value" , "file_name" ),
410
+ [(FileType .NDJSON , "simple.ndjson" ), (FileType .JSON , "spring-weather.html.json" )],
411
+ )
412
+ def test_it_detects_correct_json_type_with_extension (expected_value : FileType , file_name : str ):
413
+ filetype = detect_filetype (file_path = example_doc_path (file_name ))
414
+ assert filetype == expected_value
415
+
416
+
398
417
@pytest .mark .parametrize (
399
418
("mime_type" , "file_name" ),
400
419
[
0 commit comments