diff --git a/HISTORY.md b/HISTORY.md index e5dd53c6..4078e9a2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,11 @@ will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you count on the broken behavior, please update your code when you upgrade to Tablib 2.x. +### Improvements + +- Tablib is now able to import CSV content where not all rows have the same + length. Missing columns on any line receive the empty string (#226). + ## 1.0.0 (2020-01-13) ### Breaking changes diff --git a/src/tablib/formats/_csv.py b/src/tablib/formats/_csv.py index 14d7bb27..b0bf4359 100644 --- a/src/tablib/formats/_csv.py +++ b/src/tablib/formats/_csv.py @@ -46,6 +46,8 @@ def import_set(cls, dset, in_stream, headers=True, **kwargs): if (i == 0) and (headers): dset.headers = row elif row: + if i > 0 and len(row) < dset.width: + row += [''] * (dset.width - len(row)) dset.append(row) @classmethod diff --git a/tests/test_tablib.py b/tests/test_tablib.py index b20b6693..3acdd443 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -804,6 +804,25 @@ def test_csv_import_set_with_unicode_str(self): data.csv = csv_text self.assertEqual(data.width, 7) + def test_csv_import_set_ragged(self): + """Import CSV set when not all rows have the same length.""" + csv_text = ( + "H1,H2,H3\n" + "A,B\n" + "C,D,E\n" + "\n" + "F\n" + ) + dataset = tablib.import_set(csv_text, format="csv") + self.assertEqual( + str(dataset), + 'H1|H2|H3\n' + '--|--|--\n' + 'A |B | \n' + 'C |D |E \n' + 'F | | ' + ) + def test_csv_export(self): """Verify exporting dataset object as CSV."""