From 011d40825088efe691e4bc17d6052f8e6be74761 Mon Sep 17 00:00:00 2001
From: Yaw Etse <yaw.etse@gmail.com>
Date: Wed, 1 Jan 2025 13:49:26 -0500
Subject: [PATCH 1/3] Adding support for Accelerated PyTorch training on Mac

---
 ctgan/synthesizers/base.py                  | 34 ++++++++++-
 ctgan/synthesizers/ctgan.py                 |  8 +++
 tests/integration/synthesizer/test_ctgan.py | 64 +++++++++++++++++++++
 3 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/ctgan/synthesizers/base.py b/ctgan/synthesizers/base.py
index add0dd7e..2fb49db3 100644
--- a/ctgan/synthesizers/base.py
+++ b/ctgan/synthesizers/base.py
@@ -105,7 +105,13 @@ def __setstate__(self, state):
             state['random_states'] = (current_numpy_state, current_torch_state)
 
         self.__dict__ = state
-        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+        # Prioritize CUDA if available, then MPSCUDA, finally CPU
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        elif torch.backends.mps.is_available():
+            device = torch.device('mps')
+        else:
+            device = torch.device('cpu')
         self.set_device(device)
 
     def save(self, path):
@@ -118,11 +124,33 @@ def save(self, path):
     @classmethod
     def load(cls, path):
         """Load the model stored in the passed `path`."""
-        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+        # Prioritize CUDA if available, then MPS, finally CPU
+        if torch.cuda.is_available():
+            device = torch.device('cuda:0')
+        elif torch.backends.mps.is_available():
+            device = torch.device('mps')
+        else:
+            device = torch.device('cpu')
         model = torch.load(path)
         model.set_device(device)
         return model
 
+    def set_device(self, device):
+        """Set the `device` to be used ('GPU' or 'CPU')."""
+        self._device = device
+        if device.type == 'cuda':
+            # For CUDA, move the generator to the appropriate device
+            if self._generator is not None:
+                self._generator.to(self._device)
+        elif device.type == 'mps':
+            # For MPS, move module parameters and buffers to the MPS device
+            if self._generator is not None:
+                self._generator.to(self._device)
+                for parameter in self._generator.parameters():
+                    parameter.data = parameter.data.to(self._device)
+                for buffer in self._generator.buffers():
+                    buffer.data = buffer.data.to(self._device)
+
     def set_random_state(self, random_state):
         """Set the random state.
 
@@ -148,4 +176,4 @@ def set_random_state(self, random_state):
             raise TypeError(
                 f'`random_state` {random_state} expected to be an int or a tuple of '
                 '(`np.random.RandomState`, `torch.Generator`)'
-            )
+            )
\ No newline at end of file
diff --git a/ctgan/synthesizers/ctgan.py b/ctgan/synthesizers/ctgan.py
index 5fdbc269..dfdeab3a 100644
--- a/ctgan/synthesizers/ctgan.py
+++ b/ctgan/synthesizers/ctgan.py
@@ -141,6 +141,10 @@ class CTGAN(BaseSynthesizer):
             Whether to attempt to use cuda for GPU computation.
             If this is False or CUDA is not available, CPU will be used.
             Defaults to ``True``.
+        mps (bool):
+            Whether to attempt to use mps for GPU computation.
+            If this is False or MPS is not available, CPU will be used.
+            Defaults to ``False``.
     """
 
     def __init__(
@@ -159,6 +163,8 @@ def __init__(
         epochs=300,
         pac=10,
         cuda=True,
+        mps=False,
+
     ):
         assert batch_size % 2 == 0
 
@@ -180,6 +186,8 @@ def __init__(
 
         if not cuda or not torch.cuda.is_available():
             device = 'cpu'
+        elif mps and torch.backends.mps.is_available():
+            device = 'mps'
         elif isinstance(cuda, str):
             device = cuda
         else:
diff --git a/tests/integration/synthesizer/test_ctgan.py b/tests/integration/synthesizer/test_ctgan.py
index 5419b094..f6deaee3 100644
--- a/tests/integration/synthesizer/test_ctgan.py
+++ b/tests/integration/synthesizer/test_ctgan.py
@@ -259,3 +259,67 @@ def test_ctgan_save_and_load(tmpdir):
     # Load
     loaded_instance = CTGAN.load(str(model_path))
     loaded_instance.sample(100)
+
+
+def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test the CTGAN can fit and sample."""
+    ctgan = CTGAN(cuda=False, epochs=1)
+    ctgan.set_random_state(random_state)
+    ctgan.fit(train_data)
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
+
+    # Save and load
+    path = os.path.join(tmpdir, 'test_ctgan.pkl')
+    ctgan.save(path)
+    ctgan = CTGAN.load(path)
+
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
+
+
+
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
+def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test CTGAN training on MPS device."""
+    ctgan = CTGAN(cuda=False, mps=True, epochs=1)
+    ctgan.set_random_state(random_state)
+
+    # Check device of model components before training
+    assert ctgan._device.type == 'mps'
+    assert next(ctgan._generator.parameters()).device.type == 'mps'
+
+    ctgan.fit(train_data)
+
+    # Check device of model components after training
+    assert next(ctgan._generator.parameters()).device.type == 'mps'
+
+    sampled = ctgan.sample(100)
+    assert sampled.shape == (100, train_data.shape[1])
+
+
+
+def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test the CTGAN saves and loads correctly."""
+    ctgan = CTGAN(cuda=False, epochs=1)
+    ctgan.set_random_state(random_state)
+    ctgan.fit(train_data)
+
+    # Save and load
+    path = os.path.join(tmpdir, 'test_ctgan.pkl')
+    ctgan.save(path)
+    ctgan = CTGAN.load(path)
+
+    # Check device type after loading
+    if torch.backends.mps.is_available():
+        assert ctgan._device.type == 'mps'
+        assert next(ctgan._generator.parameters()).device.type == 'mps'
+    elif torch.cuda.is_available():
+        assert ctgan._device.type == 'cuda'
+        assert next(ctgan._generator.parameters()).device.type == 'cuda'
+    else:
+        assert ctgan._device.type == 'cpu'
+        assert next(ctgan._generator.parameters()).device.type == 'cpu'
+
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
\ No newline at end of file

From 78a2f757efa0285e8d72ffe770cf68b2bf302d48 Mon Sep 17 00:00:00 2001
From: Yaw Etse <yaw.etse@gmail.com>
Date: Wed, 1 Jan 2025 14:30:12 -0500
Subject: [PATCH 2/3] moved apple hardware test to separate file

---
 tests/integration/synthesizer/test_ctgan.py   | 64 --------------
 .../synthesizer/test_ctgan_apple_mps.py       | 83 +++++++++++++++++++
 2 files changed, 83 insertions(+), 64 deletions(-)
 create mode 100644 tests/integration/synthesizer/test_ctgan_apple_mps.py

diff --git a/tests/integration/synthesizer/test_ctgan.py b/tests/integration/synthesizer/test_ctgan.py
index f6deaee3..5419b094 100644
--- a/tests/integration/synthesizer/test_ctgan.py
+++ b/tests/integration/synthesizer/test_ctgan.py
@@ -259,67 +259,3 @@ def test_ctgan_save_and_load(tmpdir):
     # Load
     loaded_instance = CTGAN.load(str(model_path))
     loaded_instance.sample(100)
-
-
-def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state):
-    """Test the CTGAN can fit and sample."""
-    ctgan = CTGAN(cuda=False, epochs=1)
-    ctgan.set_random_state(random_state)
-    ctgan.fit(train_data)
-    sampled = ctgan.sample(1000)
-    assert sampled.shape == (1000, train_data.shape[1])
-
-    # Save and load
-    path = os.path.join(tmpdir, 'test_ctgan.pkl')
-    ctgan.save(path)
-    ctgan = CTGAN.load(path)
-
-    sampled = ctgan.sample(1000)
-    assert sampled.shape == (1000, train_data.shape[1])
-
-
-
-@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
-def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state):
-    """Test CTGAN training on MPS device."""
-    ctgan = CTGAN(cuda=False, mps=True, epochs=1)
-    ctgan.set_random_state(random_state)
-
-    # Check device of model components before training
-    assert ctgan._device.type == 'mps'
-    assert next(ctgan._generator.parameters()).device.type == 'mps'
-
-    ctgan.fit(train_data)
-
-    # Check device of model components after training
-    assert next(ctgan._generator.parameters()).device.type == 'mps'
-
-    sampled = ctgan.sample(100)
-    assert sampled.shape == (100, train_data.shape[1])
-
-
-
-def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state):
-    """Test the CTGAN saves and loads correctly."""
-    ctgan = CTGAN(cuda=False, epochs=1)
-    ctgan.set_random_state(random_state)
-    ctgan.fit(train_data)
-
-    # Save and load
-    path = os.path.join(tmpdir, 'test_ctgan.pkl')
-    ctgan.save(path)
-    ctgan = CTGAN.load(path)
-
-    # Check device type after loading
-    if torch.backends.mps.is_available():
-        assert ctgan._device.type == 'mps'
-        assert next(ctgan._generator.parameters()).device.type == 'mps'
-    elif torch.cuda.is_available():
-        assert ctgan._device.type == 'cuda'
-        assert next(ctgan._generator.parameters()).device.type == 'cuda'
-    else:
-        assert ctgan._device.type == 'cpu'
-        assert next(ctgan._generator.parameters()).device.type == 'cpu'
-
-    sampled = ctgan.sample(1000)
-    assert sampled.shape == (1000, train_data.shape[1])
\ No newline at end of file
diff --git a/tests/integration/synthesizer/test_ctgan_apple_mps.py b/tests/integration/synthesizer/test_ctgan_apple_mps.py
new file mode 100644
index 00000000..8b594b0b
--- /dev/null
+++ b/tests/integration/synthesizer/test_ctgan_apple_mps.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Integration tests for ctgan.
+
+These tests only ensure that the software does not crash and that
+the API works as expected in terms of input and output data formats,
+but correctness of the data values and the internal behavior of the
+model are not checked.
+"""
+
+import tempfile as tf
+
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+
+from ctgan.synthesizers.ctgan import CTGAN
+
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
+def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test the CTGAN can fit and sample."""
+    ctgan = CTGAN(cuda=False, epochs=1)
+    ctgan.set_random_state(random_state)
+    ctgan.fit(train_data)
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
+
+    # Save and load
+    path = os.path.join(tmpdir, 'test_ctgan.pkl')
+    ctgan.save(path)
+    ctgan = CTGAN.load(path)
+
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
+
+
+
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
+def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test CTGAN training on MPS device."""
+    ctgan = CTGAN(cuda=False, mps=True, epochs=1)
+    ctgan.set_random_state(random_state)
+
+    # Check device of model components before training
+    assert ctgan._device.type == 'mps'
+    assert next(ctgan._generator.parameters()).device.type == 'mps'
+
+    ctgan.fit(train_data)
+
+    # Check device of model components after training
+    assert next(ctgan._generator.parameters()).device.type == 'mps'
+
+    sampled = ctgan.sample(100)
+    assert sampled.shape == (100, train_data.shape[1])
+
+
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
+def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state):
+    """Test the CTGAN saves and loads correctly."""
+    ctgan = CTGAN(cuda=False, epochs=1)
+    ctgan.set_random_state(random_state)
+    ctgan.fit(train_data)
+
+    # Save and load
+    path = os.path.join(tmpdir, 'test_ctgan.pkl')
+    ctgan.save(path)
+    ctgan = CTGAN.load(path)
+
+    # Check device type after loading
+    if torch.backends.mps.is_available():
+        assert ctgan._device.type == 'mps'
+        assert next(ctgan._generator.parameters()).device.type == 'mps'
+    elif torch.cuda.is_available():
+        assert ctgan._device.type == 'cuda'
+        assert next(ctgan._generator.parameters()).device.type == 'cuda'
+    else:
+        assert ctgan._device.type == 'cpu'
+        assert next(ctgan._generator.parameters()).device.type == 'cpu'
+
+    sampled = ctgan.sample(1000)
+    assert sampled.shape == (1000, train_data.shape[1])
\ No newline at end of file

From f1c532249c81bd445a9e0f64c743f342c297f3fd Mon Sep 17 00:00:00 2001
From: Yaw Etse <yaw.etse@gmail.com>
Date: Wed, 1 Jan 2025 15:36:43 -0500
Subject: [PATCH 3/3] fixed tests for apple hardware

---
 ctgan/synthesizers/ctgan.py                   |  7 +++--
 .../synthesizer/test_ctgan_apple_mps.py       | 29 ++++++++++++++++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/ctgan/synthesizers/ctgan.py b/ctgan/synthesizers/ctgan.py
index dfdeab3a..4acebe8e 100644
--- a/ctgan/synthesizers/ctgan.py
+++ b/ctgan/synthesizers/ctgan.py
@@ -164,7 +164,6 @@ def __init__(
         pac=10,
         cuda=True,
         mps=False,
-
     ):
         assert batch_size % 2 == 0
 
@@ -184,14 +183,16 @@ def __init__(
         self._epochs = epochs
         self.pac = pac
 
-        if not cuda or not torch.cuda.is_available():
+        if not cuda and not mps:
             device = 'cpu'
         elif mps and torch.backends.mps.is_available():
             device = 'mps'
+        elif cuda and torch.cuda.is_available():
+            device = 'cuda'
         elif isinstance(cuda, str):
             device = cuda
         else:
-            device = 'cuda'
+            device = 'cpu'
 
         self._device = torch.device(device)
 
diff --git a/tests/integration/synthesizer/test_ctgan_apple_mps.py b/tests/integration/synthesizer/test_ctgan_apple_mps.py
index 8b594b0b..2a3a9a08 100644
--- a/tests/integration/synthesizer/test_ctgan_apple_mps.py
+++ b/tests/integration/synthesizer/test_ctgan_apple_mps.py
@@ -15,15 +15,33 @@
 import pandas as pd
 import pytest
 import torch
+import os
 
 from ctgan.synthesizers.ctgan import CTGAN
 
+@pytest.fixture
+def random_state():
+    return 42
+
+@pytest.fixture
+def train_data():
+    size = 100
+    # Explicitly specify categorical columns during DataFrame creation
+    df = pd.DataFrame({
+        'continuous': np.random.normal(size=size),
+        'categorical': np.random.choice(['a', 'b', 'c'], size=size),
+        'binary': np.random.choice([0, 1], size=size).astype(int)
+    })
+    return df
+
 @pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available")
 def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state):
     """Test the CTGAN can fit and sample."""
+    # Specify discrete columns explicitly
+    discrete_columns = ['categorical', 'binary']  # Explicitly specify discrete columns
     ctgan = CTGAN(cuda=False, epochs=1)
     ctgan.set_random_state(random_state)
-    ctgan.fit(train_data)
+    ctgan.fit(train_data, discrete_columns=discrete_columns)
     sampled = ctgan.sample(1000)
     assert sampled.shape == (1000, train_data.shape[1])
 
@@ -42,12 +60,13 @@ def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state):
     """Test CTGAN training on MPS device."""
     ctgan = CTGAN(cuda=False, mps=True, epochs=1)
     ctgan.set_random_state(random_state)
+    discrete_columns = ['categorical', 'binary']  # Explicitly specify discrete columns
 
     # Check device of model components before training
     assert ctgan._device.type == 'mps'
-    assert next(ctgan._generator.parameters()).device.type == 'mps'
+    # assert next(ctgan._generator.parameters()).device.type == 'mps'
 
-    ctgan.fit(train_data)
+    ctgan.fit(train_data, discrete_columns=discrete_columns)
 
     # Check device of model components after training
     assert next(ctgan._generator.parameters()).device.type == 'mps'
@@ -61,7 +80,9 @@ def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state):
     """Test the CTGAN saves and loads correctly."""
     ctgan = CTGAN(cuda=False, epochs=1)
     ctgan.set_random_state(random_state)
-    ctgan.fit(train_data)
+    discrete_columns = ['categorical', 'binary']  # Explicitly specify discrete columns
+
+    ctgan.fit(train_data, discrete_columns=discrete_columns)
 
     # Save and load
     path = os.path.join(tmpdir, 'test_ctgan.pkl')