Skip to content

Commit

Permalink
Bugfix when passing empty bytes to detector (#29)
Browse files Browse the repository at this point in the history
* Bugfix when passing empty seq to detector

* Add test for empty bytes case

* bump version 1.3.2
  • Loading branch information
Ousret authored Dec 13, 2019
1 parent 6254c79 commit b0e4e94
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
8 changes: 8 additions & 0 deletions charset_normalizer/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,14 @@ def from_bytes(sequences, steps=10, chunk_size=512, threshold=0.20, cp_isolation
if not explain:
logger.disable('charset_normalizer')

if len(sequences) == 0:
return CharsetNormalizerMatch(
sequences,
'utf-8',
0.,
[]
)

too_small_sequence = len(sequences) < 24

if too_small_sequence is True:
Expand Down
2 changes: 1 addition & 1 deletion charset_normalizer/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
Expose version
"""

__version__ = "1.3.1"
__version__ = "1.3.2"
VERSION = __version__.split('.')
12 changes: 12 additions & 0 deletions test/test_on_byte.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ def test_too_short_none(self):
CnM.from_bytes(b'\xfe\xff').best().first()
)

def test_empty_bytes(self):
r = CnM.from_bytes(b'').best().first()

self.assertIsNotNone(
r
)

self.assertEqual(
'utf-8',
r.encoding
)

def test_bom_detection(self):
with self.subTest('GB18030 UNAVAILABLE SIG'):
self.assertFalse(
Expand Down

0 comments on commit b0e4e94

Please sign in to comment.