From ac7cefbf3b275cd7b0abc586d5d3436fe0ce63bf Mon Sep 17 00:00:00 2001 From: Prathamesh Pandit Date: Wed, 20 Aug 2025 21:30:34 -0700 Subject: [PATCH] Allow multiple writes --- src/datacustomcode/scan.py | 4 ---- tests/test_scan.py | 22 ++++++++++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/datacustomcode/scan.py b/src/datacustomcode/scan.py index ca64c26..a2ffd9d 100644 --- a/src/datacustomcode/scan.py +++ b/src/datacustomcode/scan.py @@ -54,10 +54,6 @@ class DataAccessLayerCalls(pydantic.BaseModel): def validate_access_layer(self) -> DataAccessLayerCalls: if self.read_dlo and self.read_dmo: raise ValueError("Cannot read from DLO and DMO in the same file.") - if len(self.write_to_dlo) > 1 or len(self.write_to_dmo) > 1: - raise ValueError( - "Cannot write to more than one DLO or DMO in the same file." - ) if not self.read_dlo and not self.read_dmo: raise ValueError("Must read from at least one DLO or DMO.") if self.read_dlo and self.write_to_dmo: diff --git a/tests/test_scan.py b/tests/test_scan.py index e3bc3ce..c922877 100644 --- a/tests/test_scan.py +++ b/tests/test_scan.py @@ -247,7 +247,7 @@ def test_read_dmo_write_dlo_throws_error(self): finally: os.unlink(temp_path) - def test_invalid_multiple_writes(self): + def test_multiple_writes(self): """Test scanning a file with multiple write operations.""" content = textwrap.dedent( """ @@ -258,15 +258,25 @@ def test_invalid_multiple_writes(self): # Read from DLO df = client.read_dlo("input_dlo") - # Write to multiple DLOs - invalid - client.write_to_dlo("output_dlo_1", df, "overwrite") - client.write_to_dlo("output_dlo_2", df, "overwrite") + # Transform data for different outputs + df_filtered = df.filter(df.col > 10) + df_aggregated = df.groupBy("category").agg({"value": "sum"}) + + # Write to multiple DLOs + client.write_to_dlo("output_filtered", df_filtered, "overwrite") + client.write_to_dlo("output_aggregated", df_aggregated, "overwrite") """ ) temp_path = create_test_script(content) try: - with pytest.raises(ValueError, match="Cannot write to more than one DLO"): - scan_file(temp_path) + result = scan_file(temp_path) + assert "input_dlo" in result.read_dlo + assert "output_filtered" in result.write_to_dlo + assert "output_aggregated" in result.write_to_dlo + assert len(result.read_dlo) == 1 + assert len(result.write_to_dlo) == 2 + assert len(result.read_dmo) == 0 + assert len(result.write_to_dmo) == 0 finally: os.unlink(temp_path)