From ee5696fa4461930eb0871e728518488a6190fdcf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 01:37:15 +0000
Subject: [PATCH 1/4] Initial plan


From 2064de8532a98dccd4575cd69235042c44500c1b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 01:44:20 +0000
Subject: [PATCH 2/4] Add tensor conversion methods to_torch() and
 to_tensorflow()

Co-authored-by: j143 <53068787+j143@users.noreply.github.com>
---
 paper/numpy_api.py              | 107 ++++++++
 tests/test_tensor_conversion.py | 420 ++++++++++++++++++++++++++++++++
 2 files changed, 527 insertions(+)
 create mode 100644 tests/test_tensor_conversion.py

diff --git a/paper/numpy_api.py b/paper/numpy_api.py
index dfc83a5..a812ded 100644
--- a/paper/numpy_api.py
+++ b/paper/numpy_api.py
@@ -165,6 +165,113 @@ def to_numpy(self) -> np.ndarray:
         """
         return self._materialize()
     
+    def to_torch(self, device: Optional[str] = None):
+        """
+        Convert the array to a PyTorch tensor with efficient memory mapping.
+        
+        This method leverages PyTorch's ability to create tensors from NumPy arrays
+        that share the same memory buffer when possible. For memory-mapped arrays,
+        this provides efficient zero-copy conversion.
+        
+        Args:
+            device: Optional device specification ('cpu', 'cuda', 'cuda:0', etc.).
+                   If None, uses CPU. For GPU devices, data will be copied to device memory.
+        
+        Returns:
+            torch.Tensor: A PyTorch tensor containing the data
+            
+        Raises:
+            ImportError: If PyTorch is not installed
+            
+        Examples:
+            >>> import paper.numpy_api as pnp
+            >>> a = pnp.array([[1, 2], [3, 4]])
+            >>> torch_tensor = a.to_torch()  # CPU tensor
+            >>> cuda_tensor = a.to_torch(device='cuda')  # GPU tensor (if available)
+            
+        Note:
+            - For CPU tensors, PyTorch can share memory with the underlying memory-mapped file
+            - For GPU tensors, data must be copied to device memory
+            - Large arrays may require chunked processing for GPU transfers
+        """
+        try:
+            import torch
+        except ImportError:
+            raise ImportError(
+                "PyTorch is not installed. Install it with: pip install torch"
+            )
+        
+        # For lazy arrays, we need to compute first
+        if self._is_lazy:
+            # Compute to a temporary file
+            materialized = self.compute()
+            return materialized.to_torch(device=device)
+        
+        # For materialized arrays, we can use the memory-mapped data
+        # PyTorch requires writable arrays, so we need to copy for read-only memmaps
+        # However, for write-mode memmaps, we can share memory
+        if device is None or device == 'cpu':
+            # Check if the memmap is writable
+            if self._matrix.data.flags.writeable:
+                # Zero-copy: Share memory with the memmap (efficient)
+                tensor = torch.from_numpy(np.asarray(self._matrix.data))
+            else:
+                # Read-only memmap: Need to copy to avoid PyTorch warnings
+                # This is still efficient as it uses memmap's lazy loading
+                tensor = torch.from_numpy(np.array(self._matrix.data, copy=True))
+            return tensor
+        else:
+            # For GPU devices, we need to copy data anyway
+            # Use the memmap data and copy to device
+            if self._matrix.data.flags.writeable:
+                cpu_tensor = torch.from_numpy(np.asarray(self._matrix.data))
+            else:
+                cpu_tensor = torch.from_numpy(np.array(self._matrix.data, copy=True))
+            # Transfer to specified device
+            return cpu_tensor.to(device)
+    
+    def to_tensorflow(self):
+        """
+        Convert the array to a TensorFlow tensor with efficient memory mapping.
+        
+        This method leverages TensorFlow's ability to create tensors from NumPy arrays.
+        For memory-mapped arrays, TensorFlow can efficiently work with the data.
+        
+        Returns:
+            tf.Tensor: A TensorFlow tensor containing the data
+            
+        Raises:
+            ImportError: If TensorFlow is not installed
+            
+        Examples:
+            >>> import paper.numpy_api as pnp
+            >>> a = pnp.array([[1, 2], [3, 4]])
+            >>> tf_tensor = a.to_tensorflow()
+            
+        Note:
+            - TensorFlow creates tensors from NumPy arrays efficiently
+            - The conversion leverages TensorFlow's buffer protocol support
+            - For large arrays, TensorFlow handles memory management internally
+        """
+        try:
+            import tensorflow as tf
+        except ImportError:
+            raise ImportError(
+                "TensorFlow is not installed. Install it with: pip install tensorflow"
+            )
+        
+        # For lazy arrays, we need to compute first
+        if self._is_lazy:
+            # Compute to a temporary file
+            materialized = self.compute()
+            return materialized.to_tensorflow()
+        
+        # TensorFlow can create tensors from NumPy arrays efficiently
+        # tf.convert_to_tensor handles memory-mapped arrays well
+        # It will use the buffer protocol when possible
+        tensor = tf.convert_to_tensor(self._matrix.data)
+        return tensor
+    
     def compute(self, output_path: Optional[str] = None, cache_size_tiles: Optional[int] = None):
         """
         Execute the lazy computation plan and return a materialized ndarray.
diff --git a/tests/test_tensor_conversion.py b/tests/test_tensor_conversion.py
new file mode 100644
index 0000000..ed5ac0e
--- /dev/null
+++ b/tests/test_tensor_conversion.py
@@ -0,0 +1,420 @@
+"""
+Unit tests for tensor conversion methods (PyTorch and TensorFlow).
+
+Tests the to_torch() and to_tensorflow() methods to ensure efficient
+conversion of out-of-core arrays to device tensors.
+"""
+
+import unittest
+import os
+import tempfile
+import shutil
+import numpy as np
+import sys
+
+# Add the parent directory to the path so we can import the paper module
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from paper import numpy_api as pnp
+
+
+class TestTorchConversion(unittest.TestCase):
+    """Test cases for PyTorch tensor conversion."""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.test_dir = tempfile.mkdtemp()
+        
+        # Check if PyTorch is available
+        try:
+            import torch
+            self.torch_available = True
+            self.torch = torch
+        except ImportError:
+            self.torch_available = False
+            self.torch = None
+    
+    def tearDown(self):
+        """Clean up after tests."""
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    
+    def test_to_torch_cpu_materialized(self):
+        """Test converting materialized array to PyTorch CPU tensor."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        # Create a simple array
+        data = [[1, 2, 3], [4, 5, 6]]
+        arr = pnp.array(data, dtype=np.float32)
+        
+        # Convert to PyTorch tensor
+        tensor = arr.to_torch()
+        
+        # Verify properties
+        self.assertIsInstance(tensor, self.torch.Tensor)
+        self.assertEqual(tensor.shape, (2, 3))
+        self.assertEqual(tensor.dtype, self.torch.float32)
+        self.assertEqual(tensor.device.type, 'cpu')
+        
+        # Verify data correctness
+        expected = self.torch.tensor(data, dtype=self.torch.float32)
+        self.assertTrue(self.torch.allclose(tensor, expected))
+    
+    def test_to_torch_cpu_lazy(self):
+        """Test converting lazy array to PyTorch CPU tensor."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        # Create lazy computation
+        a = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        b = pnp.array([[5, 6], [7, 8]], dtype=np.float32)
+        c = a + b  # Lazy
+        
+        # Convert to PyTorch tensor (should compute first)
+        tensor = c.to_torch()
+        
+        # Verify properties
+        self.assertIsInstance(tensor, self.torch.Tensor)
+        self.assertEqual(tensor.shape, (2, 2))
+        
+        # Verify data correctness
+        expected = self.torch.tensor([[6, 8], [10, 12]], dtype=self.torch.float32)
+        self.assertTrue(self.torch.allclose(tensor, expected))
+    
+    def test_to_torch_explicit_cpu_device(self):
+        """Test converting array to PyTorch tensor with explicit CPU device."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        arr = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        
+        # Convert with explicit CPU device
+        tensor = arr.to_torch(device='cpu')
+        
+        self.assertEqual(tensor.device.type, 'cpu')
+        self.assertEqual(tensor.shape, (2, 2))
+    
+    def test_to_torch_cuda_device(self):
+        """Test converting array to PyTorch CUDA tensor if available."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        if not self.torch.cuda.is_available():
+            self.skipTest("CUDA is not available")
+        
+        arr = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        
+        # Convert to CUDA tensor
+        tensor = arr.to_torch(device='cuda')
+        
+        self.assertEqual(tensor.device.type, 'cuda')
+        self.assertEqual(tensor.shape, (2, 2))
+        
+        # Verify data correctness
+        expected = self.torch.tensor([[1, 2], [3, 4]], dtype=self.torch.float32)
+        self.assertTrue(self.torch.allclose(tensor.cpu(), expected))
+    
+    def test_to_torch_memory_efficiency(self):
+        """Test that to_torch uses memory-mapped data efficiently."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        # Create a larger array to test memory efficiency
+        arr = pnp.random_rand((100, 100), dtype=np.float32)
+        
+        # Convert to PyTorch tensor
+        tensor = arr.to_torch()
+        
+        # Verify the tensor was created successfully
+        self.assertIsInstance(tensor, self.torch.Tensor)
+        self.assertEqual(tensor.shape, (100, 100))
+        
+        # Verify values are in expected range
+        self.assertTrue(self.torch.all(tensor >= 0))
+        self.assertTrue(self.torch.all(tensor < 1))
+    
+    def test_to_torch_with_loaded_file(self):
+        """Test converting loaded file-based array to PyTorch tensor."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        # Create and save an array
+        test_path = os.path.join(self.test_dir, "test.bin")
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
+        test_data.tofile(test_path)
+        
+        # Load using Paper API
+        arr = pnp.load(test_path, shape=(2, 2))
+        
+        # Convert to PyTorch tensor
+        tensor = arr.to_torch()
+        
+        # Verify data correctness
+        expected = self.torch.tensor([[1, 2], [3, 4]], dtype=self.torch.float32)
+        self.assertTrue(self.torch.allclose(tensor, expected))
+    
+    def test_to_torch_not_installed(self):
+        """Test error handling when PyTorch is not installed."""
+        # This test will only work if PyTorch is actually not installed
+        # We can't easily simulate this in the test environment
+        # But the method should raise ImportError if torch is not available
+        pass
+
+
+class TestTensorFlowConversion(unittest.TestCase):
+    """Test cases for TensorFlow tensor conversion."""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.test_dir = tempfile.mkdtemp()
+        
+        # Check if TensorFlow is available
+        try:
+            import tensorflow as tf
+            self.tf_available = True
+            self.tf = tf
+        except ImportError:
+            self.tf_available = False
+            self.tf = None
+    
+    def tearDown(self):
+        """Clean up after tests."""
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    
+    def test_to_tensorflow_materialized(self):
+        """Test converting materialized array to TensorFlow tensor."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create a simple array
+        data = [[1, 2, 3], [4, 5, 6]]
+        arr = pnp.array(data, dtype=np.float32)
+        
+        # Convert to TensorFlow tensor
+        tensor = arr.to_tensorflow()
+        
+        # Verify properties
+        self.assertIsInstance(tensor, self.tf.Tensor)
+        self.assertEqual(tensor.shape, (2, 3))
+        self.assertEqual(tensor.dtype, self.tf.float32)
+        
+        # Verify data correctness
+        expected = self.tf.constant(data, dtype=self.tf.float32)
+        self.assertTrue(self.tf.reduce_all(self.tf.equal(tensor, expected)).numpy())
+    
+    def test_to_tensorflow_lazy(self):
+        """Test converting lazy array to TensorFlow tensor."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create lazy computation
+        a = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        b = pnp.array([[5, 6], [7, 8]], dtype=np.float32)
+        c = a + b  # Lazy
+        
+        # Convert to TensorFlow tensor (should compute first)
+        tensor = c.to_tensorflow()
+        
+        # Verify properties
+        self.assertIsInstance(tensor, self.tf.Tensor)
+        self.assertEqual(tensor.shape, (2, 2))
+        
+        # Verify data correctness
+        expected = self.tf.constant([[6, 8], [10, 12]], dtype=self.tf.float32)
+        self.assertTrue(self.tf.reduce_all(self.tf.equal(tensor, expected)).numpy())
+    
+    def test_to_tensorflow_memory_efficiency(self):
+        """Test that to_tensorflow handles memory-mapped data efficiently."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create a larger array to test memory efficiency
+        arr = pnp.random_rand((100, 100), dtype=np.float32)
+        
+        # Convert to TensorFlow tensor
+        tensor = arr.to_tensorflow()
+        
+        # Verify the tensor was created successfully
+        self.assertIsInstance(tensor, self.tf.Tensor)
+        self.assertEqual(tensor.shape, (100, 100))
+        
+        # Verify values are in expected range
+        self.assertTrue(self.tf.reduce_all(tensor >= 0).numpy())
+        self.assertTrue(self.tf.reduce_all(tensor < 1).numpy())
+    
+    def test_to_tensorflow_with_loaded_file(self):
+        """Test converting loaded file-based array to TensorFlow tensor."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create and save an array
+        test_path = os.path.join(self.test_dir, "test.bin")
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
+        test_data.tofile(test_path)
+        
+        # Load using Paper API
+        arr = pnp.load(test_path, shape=(2, 2))
+        
+        # Convert to TensorFlow tensor
+        tensor = arr.to_tensorflow()
+        
+        # Verify data correctness
+        expected = self.tf.constant([[1, 2], [3, 4]], dtype=self.tf.float32)
+        self.assertTrue(self.tf.reduce_all(self.tf.equal(tensor, expected)).numpy())
+    
+    def test_to_tensorflow_float64(self):
+        """Test converting float64 array to TensorFlow tensor."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create array with float64
+        arr = pnp.array([[1, 2], [3, 4]], dtype=np.float64)
+        
+        # Convert to TensorFlow tensor
+        tensor = arr.to_tensorflow()
+        
+        # Verify dtype is preserved
+        self.assertEqual(tensor.dtype, self.tf.float64)
+    
+    def test_to_tensorflow_not_installed(self):
+        """Test error handling when TensorFlow is not installed."""
+        # This test will only work if TensorFlow is actually not installed
+        # We can't easily simulate this in the test environment
+        # But the method should raise ImportError if tensorflow is not available
+        pass
+
+
+class TestTensorConversionIntegration(unittest.TestCase):
+    """Integration tests for tensor conversion with Paper operations."""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.test_dir = tempfile.mkdtemp()
+        
+        # Check availability
+        try:
+            import torch
+            self.torch_available = True
+            self.torch = torch
+        except ImportError:
+            self.torch_available = False
+            self.torch = None
+        
+        try:
+            import tensorflow as tf
+            self.tf_available = True
+            self.tf = tf
+        except ImportError:
+            self.tf_available = False
+            self.tf = None
+    
+    def tearDown(self):
+        """Clean up after tests."""
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    
+    def test_api_example_workflow_torch(self):
+        """Test the example workflow from the issue with PyTorch."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        # Create test data file
+        test_path = os.path.join(self.test_dir, "large_matrix.dat")
+        test_data = np.random.rand(100, 100).astype(np.float32)
+        test_data.tofile(test_path)
+        
+        # Load a large out-of-core array (no data read yet)
+        arr = pnp.load(test_path, shape=(100, 100), dtype=np.float32)
+        
+        # Build computation graph (lazy, nothing loaded)
+        # Using scalar multiplication which is supported
+        c = arr * 2
+        
+        # Execute the computation plan
+        result = c.compute()
+        
+        # Convert result to PyTorch tensor
+        torch_tensor = result.to_torch()
+        
+        # Verify
+        self.assertIsInstance(torch_tensor, self.torch.Tensor)
+        self.assertEqual(torch_tensor.shape, (100, 100))
+        
+        # Verify computation correctness
+        expected = self.torch.from_numpy(test_data * 2)
+        self.assertTrue(self.torch.allclose(torch_tensor, expected, rtol=1e-5))
+    
+    def test_api_example_workflow_tensorflow(self):
+        """Test the example workflow from the issue with TensorFlow."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        # Create test data file
+        test_path = os.path.join(self.test_dir, "large_matrix.dat")
+        test_data = np.random.rand(100, 100).astype(np.float32)
+        test_data.tofile(test_path)
+        
+        # Load a large out-of-core array (no data read yet)
+        arr = pnp.load(test_path, shape=(100, 100), dtype=np.float32)
+        
+        # Build computation graph (lazy, nothing loaded)
+        # Using scalar multiplication which is supported
+        c = arr * 2
+        
+        # Execute the computation plan
+        result = c.compute()
+        
+        # Convert result to TensorFlow tensor
+        tf_tensor = result.to_tensorflow()
+        
+        # Verify
+        self.assertIsInstance(tf_tensor, self.tf.Tensor)
+        self.assertEqual(tf_tensor.shape, (100, 100))
+        
+        # Verify computation correctness
+        expected = self.tf.constant(test_data * 2, dtype=self.tf.float32)
+        self.assertTrue(self.tf.reduce_all(
+            self.tf.abs(tf_tensor - expected) < 1e-5
+        ).numpy())
+    
+    def test_chained_operations_to_torch(self):
+        """Test chained operations followed by PyTorch conversion."""
+        if not self.torch_available:
+            self.skipTest("PyTorch is not installed")
+        
+        a = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        b = pnp.array([[5, 6], [7, 8]], dtype=np.float32)
+        
+        # Chain operations
+        result = ((a + b) * 2).compute()
+        
+        # Convert to PyTorch
+        tensor = result.to_torch()
+        
+        # Verify
+        expected = self.torch.tensor([[12, 16], [20, 24]], dtype=self.torch.float32)
+        self.assertTrue(self.torch.allclose(tensor, expected))
+    
+    def test_chained_operations_to_tensorflow(self):
+        """Test chained operations followed by TensorFlow conversion."""
+        if not self.tf_available:
+            self.skipTest("TensorFlow is not installed")
+        
+        a = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+        b = pnp.array([[5, 6], [7, 8]], dtype=np.float32)
+        
+        # Chain operations
+        result = ((a + b) * 2).compute()
+        
+        # Convert to TensorFlow
+        tensor = result.to_tensorflow()
+        
+        # Verify
+        expected = self.tf.constant([[12, 16], [20, 24]], dtype=self.tf.float32)
+        self.assertTrue(self.tf.reduce_all(self.tf.equal(tensor, expected)).numpy())
+
+
+if __name__ == '__main__':
+    unittest.main()

From b76bfc17b486a495a652f85c778f5212a8a5476d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 01:45:33 +0000
Subject: [PATCH 3/4] Add tensor conversion demo example

Co-authored-by: j143 <53068787+j143@users.noreply.github.com>
---
 examples/tensor_conversion_demo.py | 249 +++++++++++++++++++++++++++++
 1 file changed, 249 insertions(+)
 create mode 100644 examples/tensor_conversion_demo.py

diff --git a/examples/tensor_conversion_demo.py b/examples/tensor_conversion_demo.py
new file mode 100644
index 0000000..6f543b3
--- /dev/null
+++ b/examples/tensor_conversion_demo.py
@@ -0,0 +1,249 @@
+"""
+Demonstration of Direct Conversion to Device Tensors
+
+This example demonstrates how to convert Paper's out-of-core arrays
+to PyTorch and TensorFlow tensors with efficient memory handling.
+
+Features demonstrated:
+- Loading large arrays from disk
+- Building lazy computation graphs
+- Converting to PyTorch tensors (CPU and GPU)
+- Converting to TensorFlow tensors
+- Memory-efficient operations
+"""
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from paper import numpy_api as pnp
+
+
+def demo_basic_conversion():
+    """Demonstrate basic tensor conversion."""
+    print("=" * 70)
+    print("DEMO 1: Basic Tensor Conversion")
+    print("=" * 70)
+    
+    # Create a simple array
+    print("\n1. Creating a Paper array...")
+    arr = pnp.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+    print(f"   Array shape: {arr.shape}")
+    print(f"   Array dtype: {arr.dtype}")
+    
+    # Convert to PyTorch (if available)
+    try:
+        import torch
+        print("\n2. Converting to PyTorch tensor...")
+        torch_tensor = arr.to_torch()
+        print(f"   PyTorch tensor shape: {torch_tensor.shape}")
+        print(f"   PyTorch tensor dtype: {torch_tensor.dtype}")
+        print(f"   PyTorch tensor device: {torch_tensor.device}")
+        print(f"   Tensor data:\n{torch_tensor}")
+    except ImportError:
+        print("\n2. PyTorch not available (skipping PyTorch conversion)")
+    
+    # Convert to TensorFlow (if available)
+    try:
+        import tensorflow as tf
+        print("\n3. Converting to TensorFlow tensor...")
+        tf_tensor = arr.to_tensorflow()
+        print(f"   TensorFlow tensor shape: {tf_tensor.shape}")
+        print(f"   TensorFlow tensor dtype: {tf_tensor.dtype}")
+        print(f"   Tensor data:\n{tf_tensor.numpy()}")
+    except ImportError:
+        print("\n3. TensorFlow not available (skipping TensorFlow conversion)")
+
+
+def demo_lazy_computation():
+    """Demonstrate tensor conversion with lazy computation."""
+    print("\n" + "=" * 70)
+    print("DEMO 2: Lazy Computation with Tensor Conversion")
+    print("=" * 70)
+    
+    # Create arrays
+    print("\n1. Creating Paper arrays...")
+    a = pnp.array([[1, 2], [3, 4]], dtype=np.float32)
+    b = pnp.array([[5, 6], [7, 8]], dtype=np.float32)
+    print(f"   Array A shape: {a.shape}")
+    print(f"   Array B shape: {b.shape}")
+    
+    # Build lazy computation
+    print("\n2. Building lazy computation: (A + B) * 2")
+    c = (a + b) * 2
+    print(f"   Result is lazy: {c._is_lazy}")
+    
+    # Execute computation
+    print("\n3. Computing result...")
+    result = c.compute()
+    print(f"   Result computed, is lazy: {result._is_lazy}")
+    
+    # Convert to tensors
+    try:
+        import torch
+        print("\n4. Converting to PyTorch tensor...")
+        torch_tensor = result.to_torch()
+        print(f"   PyTorch result:\n{torch_tensor}")
+    except ImportError:
+        print("\n4. PyTorch not available")
+    
+    try:
+        import tensorflow as tf
+        print("\n5. Converting to TensorFlow tensor...")
+        tf_tensor = result.to_tensorflow()
+        print(f"   TensorFlow result:\n{tf_tensor.numpy()}")
+    except ImportError:
+        print("\n5. TensorFlow not available")
+
+
+def demo_out_of_core_workflow():
+    """Demonstrate the complete out-of-core workflow from the issue."""
+    print("\n" + "=" * 70)
+    print("DEMO 3: Out-of-Core Array to Device Tensor Workflow")
+    print("=" * 70)
+    
+    # Create a temporary directory for our data
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create a larger dataset
+        print("\n1. Creating a large array on disk (1000x1000)...")
+        large_matrix_path = os.path.join(temp_dir, "large_matrix.dat")
+        test_data = np.random.rand(1000, 1000).astype(np.float32)
+        test_data.tofile(large_matrix_path)
+        print(f"   Created file: {large_matrix_path}")
+        print(f"   File size: {os.path.getsize(large_matrix_path) / (1024*1024):.2f} MB")
+        
+        # Load as out-of-core array (memory-mapped)
+        print("\n2. Loading array with memory mapping (no data loaded yet)...")
+        arr = pnp.load(large_matrix_path, shape=(1000, 1000), dtype=np.float32)
+        print(f"   Array shape: {arr.shape}")
+        print(f"   Memory-mapped: Yes")
+        
+        # Build computation graph
+        print("\n3. Building lazy computation graph: arr * 2")
+        c = arr * 2
+        print(f"   Computation graph built (lazy: {c._is_lazy})")
+        
+        # Execute computation
+        print("\n4. Executing computation plan...")
+        result = c.compute()
+        print(f"   Computation complete")
+        
+        # Convert to device tensors
+        try:
+            import torch
+            print("\n5. Converting to PyTorch tensor (efficient conversion)...")
+            torch_tensor = result.to_torch()
+            print(f"   PyTorch tensor created")
+            print(f"   Shape: {torch_tensor.shape}")
+            print(f"   Device: {torch_tensor.device}")
+            print(f"   Sample values (first 3x3):\n{torch_tensor[:3, :3]}")
+            
+            # Verify computation correctness
+            expected_sample = test_data[:3, :3] * 2
+            actual_sample = torch_tensor[:3, :3].numpy()
+            matches = np.allclose(actual_sample, expected_sample)
+            print(f"   Computation verified: {'✓' if matches else '✗'}")
+            
+        except ImportError:
+            print("\n5. PyTorch not available")
+        
+        try:
+            import tensorflow as tf
+            print("\n6. Converting to TensorFlow tensor (efficient conversion)...")
+            tf_tensor = result.to_tensorflow()
+            print(f"   TensorFlow tensor created")
+            print(f"   Shape: {tf_tensor.shape}")
+            print(f"   Sample values (first 3x3):\n{tf_tensor[:3, :3].numpy()}")
+            
+            # Verify computation correctness
+            expected_sample = test_data[:3, :3] * 2
+            actual_sample = tf_tensor[:3, :3].numpy()
+            matches = np.allclose(actual_sample, expected_sample)
+            print(f"   Computation verified: {'✓' if matches else '✗'}")
+            
+        except ImportError:
+            print("\n6. TensorFlow not available")
+
+
+def demo_gpu_conversion():
+    """Demonstrate GPU tensor conversion if CUDA is available."""
+    print("\n" + "=" * 70)
+    print("DEMO 4: GPU Tensor Conversion")
+    print("=" * 70)
+    
+    try:
+        import torch
+        
+        if not torch.cuda.is_available():
+            print("\nCUDA is not available. GPU conversion demo skipped.")
+            return
+        
+        print("\n1. Creating Paper array...")
+        arr = pnp.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        
+        print("\n2. Converting to CUDA tensor...")
+        cuda_tensor = arr.to_torch(device='cuda')
+        print(f"   CUDA tensor created")
+        print(f"   Shape: {cuda_tensor.shape}")
+        print(f"   Device: {cuda_tensor.device}")
+        print(f"   Tensor data:\n{cuda_tensor}")
+        
+        print("\n3. Performing GPU computation...")
+        result = cuda_tensor * 3 + 1
+        print(f"   Result on GPU:\n{result}")
+        
+    except ImportError:
+        print("\nPyTorch not available. GPU conversion demo skipped.")
+
+
+def main():
+    """Run all demonstrations."""
+    print("\n" + "=" * 70)
+    print("Paper Framework: Out-of-Core Arrays to Device Tensors")
+    print("=" * 70)
+    
+    # Check which frameworks are available
+    frameworks = []
+    try:
+        import torch
+        frameworks.append(f"PyTorch {torch.__version__}")
+    except ImportError:
+        pass
+    
+    try:
+        import tensorflow as tf
+        frameworks.append(f"TensorFlow {tf.__version__}")
+    except ImportError:
+        pass
+    
+    if frameworks:
+        print(f"\nAvailable frameworks: {', '.join(frameworks)}")
+    else:
+        print("\nNo deep learning frameworks detected.")
+        print("Install PyTorch: pip install torch")
+        print("Install TensorFlow: pip install tensorflow")
+    
+    # Run demonstrations
+    demo_basic_conversion()
+    demo_lazy_computation()
+    demo_out_of_core_workflow()
+    demo_gpu_conversion()
+    
+    print("\n" + "=" * 70)
+    print("Demo Complete!")
+    print("=" * 70)
+    print("\nKey Benefits:")
+    print("  • Efficient memory usage with memory-mapped arrays")
+    print("  • Zero-copy conversion where possible")
+    print("  • Seamless integration with PyTorch and TensorFlow")
+    print("  • Support for both CPU and GPU devices")
+    print("  • Lazy evaluation for optimized computation")
+    print("=" * 70 + "\n")
+
+
+if __name__ == "__main__":
+    main()

From dc05211e65787d8f7fd73df5508ef4c020268928 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 01:47:06 +0000
Subject: [PATCH 4/4] Update README with tensor conversion documentation

Co-authored-by: j143 <53068787+j143@users.noreply.github.com>
---
 README.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/README.md b/README.md
index de1e839..56e0638 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@ print(result.to_numpy())
 - **Automatic Optimization**: Operator fusion and intelligent caching applied automatically
 - **Out-of-Core Support**: Handle datasets larger than memory seamlessly
 - **Matrix Operations**: Support for addition, scalar multiplication, and matrix multiplication (@)
+- **Device Tensor Conversion**: Direct conversion to PyTorch and TensorFlow tensors with efficient memory handling
 
 ### Supported Operations
 
@@ -56,6 +57,51 @@ print(result.to_numpy())
 - `pnp.load(filepath, shape)` - Load array from file
 - `pnp.save(filepath, array)` - Save array to file
 
+**Tensor Conversion:**
+- `array.to_torch(device)` - Convert to PyTorch tensor (CPU or GPU)
+- `array.to_tensorflow()` - Convert to TensorFlow tensor
+
+### Device Tensor Conversion
+
+Paper supports efficient conversion of out-of-core arrays to PyTorch and TensorFlow tensors, enabling seamless integration with deep learning frameworks.
+
+#### Quick Example
+
+```python
+from paper import numpy_api as pnp
+import numpy as np
+
+# Load a large out-of-core array (memory-mapped)
+arr = pnp.load("large_matrix.dat", shape=(10000, 10000), dtype=np.float32)
+
+# Build lazy computation graph
+c = arr * 2
+
+# Execute computation
+result = c.compute()
+
+# Convert to PyTorch tensor (efficient, memory-mapped conversion)
+torch_tensor = result.to_torch()      # CPU tensor
+cuda_tensor = result.to_torch(device='cuda')  # GPU tensor (if available)
+
+# Convert to TensorFlow tensor
+tf_tensor = result.to_tensorflow()
+```
+
+#### Key Benefits
+
+- **Memory Efficiency**: Leverages memory-mapped files for minimal RAM usage
+- **Zero-Copy Conversion**: Direct memory sharing where possible (writable arrays)
+- **GPU Support**: Easy transfer to CUDA devices with PyTorch
+- **Lazy Evaluation**: Only computes when needed, then converts efficiently
+- **Framework Agnostic**: Works with both PyTorch and TensorFlow
+
+#### Running the Demo
+
+```bash
+python examples/tensor_conversion_demo.py
+```
+
 ### Examples
 
 See `examples/numpy_api_example.py` for comprehensive examples demonstrating:
@@ -65,9 +111,16 @@ See `examples/numpy_api_example.py` for comprehensive examples demonstrating:
 - File I/O
 - Large array handling (out-of-core)
 
+See `examples/tensor_conversion_demo.py` for tensor conversion examples:
+- Basic tensor conversion (PyTorch and TensorFlow)
+- Lazy computation with tensor conversion
+- Complete out-of-core workflow
+- GPU conversion (when available)
+
 Run the examples:
 ```bash
 python examples/numpy_api_example.py
+python examples/tensor_conversion_demo.py
 ```
 
 ### Architecture