test(dcp): update dcp e2e tests with DCPOptimizedS3Reader

jet-tong · jet-tong · commit 68165e61d35c · 2025-10-17T10:54:21.000+01:00
- Add dcp_reader_constructor fixture for DCP tests
- Update test_e2e_s3_file_system.py to use dcp_reader_constructor fixture
- Update test_e2e_s3_storage_reader.py load ordering test to also cover dcop-optimized s3 reader
diff --git a/s3torchconnector/tst/conftest.py b/s3torchconnector/tst/conftest.py
@@ -11,17 +11,32 @@
 # Shared reader constructors for parametrized tests
 # TODO: use this variable in test_distributed_training.py and test_multiprocess_dataloading.py
 READER_CONSTRUCTORS = [
-    S3ReaderConstructor.sequential(),  # Sequential Reader
-    S3ReaderConstructor.range_based(),  # Default range-based reader, with buffer
-    S3ReaderConstructor.range_based(buffer_size=0),  # range-based reader, no buffer
+    ("sequential", S3ReaderConstructor.sequential()),
+    ("range_based_with_buffer", S3ReaderConstructor.range_based()),
+    ("range_based_no_buffer", S3ReaderConstructor.range_based(buffer_size=0)),
+]
+
+# Include dcp_optimized for DCP tests
+DCP_READER_CONSTRUCTORS = READER_CONSTRUCTORS + [
+    ("dcp_optimized", S3ReaderConstructor.dcp_optimized()),
 ]
 
 
 @pytest.fixture(
-    params=READER_CONSTRUCTORS,
-    ids=["sequential", "range_based_with_buffer", "range_based_no_buffer"],
+    params=[constructor for _, constructor in READER_CONSTRUCTORS],
+    ids=[name for name, _ in READER_CONSTRUCTORS],
     scope="module",
 )
 def reader_constructor(request) -> S3ReaderConstructorProtocol:
     """Provide reader constructor (partial(S3Reader)) instances for all supported reader types."""
     return request.param
+
+
+@pytest.fixture(
+    params=[constructor for _, constructor in DCP_READER_CONSTRUCTORS],
+    ids=[name for name, _ in DCP_READER_CONSTRUCTORS],
+    scope="module",
+)
+def dcp_reader_constructor(request) -> S3ReaderConstructorProtocol:
+    """Provide reader constructor instances for DCP tests including dcp_optimized."""
+    return request.param
diff --git a/s3torchconnector/tst/e2e/dcp/test_e2e_s3_file_system.py b/s3torchconnector/tst/e2e/dcp/test_e2e_s3_file_system.py
@@ -212,7 +212,7 @@ def test_dcp_when_multi_process(
     tensor_dimensions,
     thread_count,
     port_offset,
-    reader_constructor,
+    dcp_reader_constructor,
 ):
     multi_process_dcp_save_load(
         world_size=3,
@@ -221,7 +221,7 @@ def test_dcp_when_multi_process(
         tensor_dimensions=tensor_dimensions,
         port_offset=port_offset,
         prefix_strategy=None,
-        reader_constructor=reader_constructor,
+        reader_constructor=dcp_reader_constructor,
     )
 
 
diff --git a/s3torchconnector/tst/e2e/dcp/test_e2e_s3_storage_reader.py b/s3torchconnector/tst/e2e/dcp/test_e2e_s3_storage_reader.py
@@ -10,7 +10,7 @@
 
 from s3torchconnector import S3ReaderConstructor
 from s3torchconnector.dcp import S3StorageWriter, S3StorageReader
-from s3torchconnector.s3reader.sequential import SequentialS3Reader
+from s3torchconnector.s3reader import SequentialS3Reader, DCPOptimizedS3Reader
 
 
 SIMPLE_MODEL = torch.nn.Sequential(
@@ -39,19 +39,34 @@ def __init__(self):
 
 
 @pytest.mark.parametrize("model", [SIMPLE_MODEL, LARGER_MODEL])
-def test_dcp_load_reads_tensors_in_sequential_order(checkpoint_directory, model):
+@pytest.mark.parametrize(
+    "reader_class,reader_constructor",
+    [
+        (SequentialS3Reader, S3ReaderConstructor.sequential()),
+        (DCPOptimizedS3Reader, S3ReaderConstructor.dcp_optimized()),
+    ],
+)
+def test_dcp_load_reads_tensors_in_sequential_order(
+    checkpoint_directory, model, reader_class, reader_constructor
+):
     """
     Test that prepare_local_plan allows dcp.load() to read items in offset order.
 
     This does not prevent backwards seek, since torch.load() would still call
     backwards seek operations.
 
+    SequentialS3Reader:
     pytorch/torch/serialization.py load() function will call _is_zipfile(), which
     includes this read() call: f.read(len(local_header_magic_number)). This is
     followed by readinto() calls on the actual tensor.
 
+    DCPOptimizedS3Reader:
+    DCPOptimizedS3Reader.seekable() returns false, hence PyTorch would use read()
+    calls and make it seekable with `seekable = io.BytesIO(transform_from.read(-1))` in
+    pytorch/torch/distributed/checkpoint/filesystem.py read_data() method.
+
     Hence we can track read() call positions to determine if load ordering is
-    being applied correctly.
+    being applied correctly for both cases.
     """
     region = checkpoint_directory.region
     s3_uri = checkpoint_directory.s3_uri
@@ -61,21 +76,17 @@ def test_dcp_load_reads_tensors_in_sequential_order(checkpoint_directory, model)
     dcp.save(state_dict, storage_writer=storage_writer)
 
     read_positions = []
-
-    original_read = SequentialS3Reader.read
+    original_read = reader_class.read
 
     def track_reads(self, size=None):
         if not self.key.endswith(".metadata"):
             read_positions.append(self._position)
         return original_read(self, size)
 
-    # Load with position tracking on read() (called at the start of each torch.load())
-    with patch.object(SequentialS3Reader, "read", track_reads):
+    with patch.object(reader_class, "read", track_reads):
         loaded_state_dict = {k: torch.empty_like(v) for k, v in state_dict.items()}
         storage_reader = S3StorageReader(
-            region=region,
-            path=s3_uri,
-            reader_constructor=S3ReaderConstructor.sequential(),
+            region=region, path=s3_uri, reader_constructor=reader_constructor
         )
         dcp.load(loaded_state_dict, storage_reader=storage_reader)