From 011d40825088efe691e4bc17d6052f8e6be74761 Mon Sep 17 00:00:00 2001 From: Yaw Etse Date: Wed, 1 Jan 2025 13:49:26 -0500 Subject: [PATCH 1/3] Adding support for Accelerated PyTorch training on Mac --- ctgan/synthesizers/base.py | 34 ++++++++++- ctgan/synthesizers/ctgan.py | 8 +++ tests/integration/synthesizer/test_ctgan.py | 64 +++++++++++++++++++++ 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/ctgan/synthesizers/base.py b/ctgan/synthesizers/base.py index add0dd7e..2fb49db3 100644 --- a/ctgan/synthesizers/base.py +++ b/ctgan/synthesizers/base.py @@ -105,7 +105,13 @@ def __setstate__(self, state): state['random_states'] = (current_numpy_state, current_torch_state) self.__dict__ = state - device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + # Prioritize CUDA if available, then MPSCUDA, finally CPU + if torch.cuda.is_available(): + device = torch.device('cuda:0') + elif torch.backends.mps.is_available(): + device = torch.device('mps') + else: + device = torch.device('cpu') self.set_device(device) def save(self, path): @@ -118,11 +124,33 @@ def save(self, path): @classmethod def load(cls, path): """Load the model stored in the passed `path`.""" - device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + # Prioritize CUDA if available, then MPS, finally CPU + if torch.cuda.is_available(): + device = torch.device('cuda:0') + elif torch.backends.mps.is_available(): + device = torch.device('mps') + else: + device = torch.device('cpu') model = torch.load(path) model.set_device(device) return model + def set_device(self, device): + """Set the `device` to be used ('GPU' or 'CPU').""" + self._device = device + if device.type == 'cuda': + # For CUDA, move the generator to the appropriate device + if self._generator is not None: + self._generator.to(self._device) + elif device.type == 'mps': + # For MPS, move module parameters and buffers to the MPS device + if self._generator is not None: + self._generator.to(self._device) + for parameter in self._generator.parameters(): + parameter.data = parameter.data.to(self._device) + for buffer in self._generator.buffers(): + buffer.data = buffer.data.to(self._device) + def set_random_state(self, random_state): """Set the random state. @@ -148,4 +176,4 @@ def set_random_state(self, random_state): raise TypeError( f'`random_state` {random_state} expected to be an int or a tuple of ' '(`np.random.RandomState`, `torch.Generator`)' - ) + ) \ No newline at end of file diff --git a/ctgan/synthesizers/ctgan.py b/ctgan/synthesizers/ctgan.py index 5fdbc269..dfdeab3a 100644 --- a/ctgan/synthesizers/ctgan.py +++ b/ctgan/synthesizers/ctgan.py @@ -141,6 +141,10 @@ class CTGAN(BaseSynthesizer): Whether to attempt to use cuda for GPU computation. If this is False or CUDA is not available, CPU will be used. Defaults to ``True``. + mps (bool): + Whether to attempt to use mps for GPU computation. + If this is False or MPS is not available, CPU will be used. + Defaults to ``False``. """ def __init__( @@ -159,6 +163,8 @@ def __init__( epochs=300, pac=10, cuda=True, + mps=False, + ): assert batch_size % 2 == 0 @@ -180,6 +186,8 @@ def __init__( if not cuda or not torch.cuda.is_available(): device = 'cpu' + elif mps and torch.backends.mps.is_available(): + device = 'mps' elif isinstance(cuda, str): device = cuda else: diff --git a/tests/integration/synthesizer/test_ctgan.py b/tests/integration/synthesizer/test_ctgan.py index 5419b094..f6deaee3 100644 --- a/tests/integration/synthesizer/test_ctgan.py +++ b/tests/integration/synthesizer/test_ctgan.py @@ -259,3 +259,67 @@ def test_ctgan_save_and_load(tmpdir): # Load loaded_instance = CTGAN.load(str(model_path)) loaded_instance.sample(100) + + +def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state): + """Test the CTGAN can fit and sample.""" + ctgan = CTGAN(cuda=False, epochs=1) + ctgan.set_random_state(random_state) + ctgan.fit(train_data) + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) + + # Save and load + path = os.path.join(tmpdir, 'test_ctgan.pkl') + ctgan.save(path) + ctgan = CTGAN.load(path) + + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) + + + +@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") +def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state): + """Test CTGAN training on MPS device.""" + ctgan = CTGAN(cuda=False, mps=True, epochs=1) + ctgan.set_random_state(random_state) + + # Check device of model components before training + assert ctgan._device.type == 'mps' + assert next(ctgan._generator.parameters()).device.type == 'mps' + + ctgan.fit(train_data) + + # Check device of model components after training + assert next(ctgan._generator.parameters()).device.type == 'mps' + + sampled = ctgan.sample(100) + assert sampled.shape == (100, train_data.shape[1]) + + + +def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state): + """Test the CTGAN saves and loads correctly.""" + ctgan = CTGAN(cuda=False, epochs=1) + ctgan.set_random_state(random_state) + ctgan.fit(train_data) + + # Save and load + path = os.path.join(tmpdir, 'test_ctgan.pkl') + ctgan.save(path) + ctgan = CTGAN.load(path) + + # Check device type after loading + if torch.backends.mps.is_available(): + assert ctgan._device.type == 'mps' + assert next(ctgan._generator.parameters()).device.type == 'mps' + elif torch.cuda.is_available(): + assert ctgan._device.type == 'cuda' + assert next(ctgan._generator.parameters()).device.type == 'cuda' + else: + assert ctgan._device.type == 'cpu' + assert next(ctgan._generator.parameters()).device.type == 'cpu' + + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) \ No newline at end of file From 78a2f757efa0285e8d72ffe770cf68b2bf302d48 Mon Sep 17 00:00:00 2001 From: Yaw Etse Date: Wed, 1 Jan 2025 14:30:12 -0500 Subject: [PATCH 2/3] moved apple hardware test to separate file --- tests/integration/synthesizer/test_ctgan.py | 64 -------------- .../synthesizer/test_ctgan_apple_mps.py | 83 +++++++++++++++++++ 2 files changed, 83 insertions(+), 64 deletions(-) create mode 100644 tests/integration/synthesizer/test_ctgan_apple_mps.py diff --git a/tests/integration/synthesizer/test_ctgan.py b/tests/integration/synthesizer/test_ctgan.py index f6deaee3..5419b094 100644 --- a/tests/integration/synthesizer/test_ctgan.py +++ b/tests/integration/synthesizer/test_ctgan.py @@ -259,67 +259,3 @@ def test_ctgan_save_and_load(tmpdir): # Load loaded_instance = CTGAN.load(str(model_path)) loaded_instance.sample(100) - - -def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state): - """Test the CTGAN can fit and sample.""" - ctgan = CTGAN(cuda=False, epochs=1) - ctgan.set_random_state(random_state) - ctgan.fit(train_data) - sampled = ctgan.sample(1000) - assert sampled.shape == (1000, train_data.shape[1]) - - # Save and load - path = os.path.join(tmpdir, 'test_ctgan.pkl') - ctgan.save(path) - ctgan = CTGAN.load(path) - - sampled = ctgan.sample(1000) - assert sampled.shape == (1000, train_data.shape[1]) - - - -@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") -def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state): - """Test CTGAN training on MPS device.""" - ctgan = CTGAN(cuda=False, mps=True, epochs=1) - ctgan.set_random_state(random_state) - - # Check device of model components before training - assert ctgan._device.type == 'mps' - assert next(ctgan._generator.parameters()).device.type == 'mps' - - ctgan.fit(train_data) - - # Check device of model components after training - assert next(ctgan._generator.parameters()).device.type == 'mps' - - sampled = ctgan.sample(100) - assert sampled.shape == (100, train_data.shape[1]) - - - -def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state): - """Test the CTGAN saves and loads correctly.""" - ctgan = CTGAN(cuda=False, epochs=1) - ctgan.set_random_state(random_state) - ctgan.fit(train_data) - - # Save and load - path = os.path.join(tmpdir, 'test_ctgan.pkl') - ctgan.save(path) - ctgan = CTGAN.load(path) - - # Check device type after loading - if torch.backends.mps.is_available(): - assert ctgan._device.type == 'mps' - assert next(ctgan._generator.parameters()).device.type == 'mps' - elif torch.cuda.is_available(): - assert ctgan._device.type == 'cuda' - assert next(ctgan._generator.parameters()).device.type == 'cuda' - else: - assert ctgan._device.type == 'cpu' - assert next(ctgan._generator.parameters()).device.type == 'cpu' - - sampled = ctgan.sample(1000) - assert sampled.shape == (1000, train_data.shape[1]) \ No newline at end of file diff --git a/tests/integration/synthesizer/test_ctgan_apple_mps.py b/tests/integration/synthesizer/test_ctgan_apple_mps.py new file mode 100644 index 00000000..8b594b0b --- /dev/null +++ b/tests/integration/synthesizer/test_ctgan_apple_mps.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Integration tests for ctgan. + +These tests only ensure that the software does not crash and that +the API works as expected in terms of input and output data formats, +but correctness of the data values and the internal behavior of the +model are not checked. +""" + +import tempfile as tf + +import numpy as np +import pandas as pd +import pytest +import torch + +from ctgan.synthesizers.ctgan import CTGAN + +@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") +def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state): + """Test the CTGAN can fit and sample.""" + ctgan = CTGAN(cuda=False, epochs=1) + ctgan.set_random_state(random_state) + ctgan.fit(train_data) + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) + + # Save and load + path = os.path.join(tmpdir, 'test_ctgan.pkl') + ctgan.save(path) + ctgan = CTGAN.load(path) + + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) + + + +@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") +def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state): + """Test CTGAN training on MPS device.""" + ctgan = CTGAN(cuda=False, mps=True, epochs=1) + ctgan.set_random_state(random_state) + + # Check device of model components before training + assert ctgan._device.type == 'mps' + assert next(ctgan._generator.parameters()).device.type == 'mps' + + ctgan.fit(train_data) + + # Check device of model components after training + assert next(ctgan._generator.parameters()).device.type == 'mps' + + sampled = ctgan.sample(100) + assert sampled.shape == (100, train_data.shape[1]) + + +@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") +def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state): + """Test the CTGAN saves and loads correctly.""" + ctgan = CTGAN(cuda=False, epochs=1) + ctgan.set_random_state(random_state) + ctgan.fit(train_data) + + # Save and load + path = os.path.join(tmpdir, 'test_ctgan.pkl') + ctgan.save(path) + ctgan = CTGAN.load(path) + + # Check device type after loading + if torch.backends.mps.is_available(): + assert ctgan._device.type == 'mps' + assert next(ctgan._generator.parameters()).device.type == 'mps' + elif torch.cuda.is_available(): + assert ctgan._device.type == 'cuda' + assert next(ctgan._generator.parameters()).device.type == 'cuda' + else: + assert ctgan._device.type == 'cpu' + assert next(ctgan._generator.parameters()).device.type == 'cpu' + + sampled = ctgan.sample(1000) + assert sampled.shape == (1000, train_data.shape[1]) \ No newline at end of file From f1c532249c81bd445a9e0f64c743f342c297f3fd Mon Sep 17 00:00:00 2001 From: Yaw Etse Date: Wed, 1 Jan 2025 15:36:43 -0500 Subject: [PATCH 3/3] fixed tests for apple hardware --- ctgan/synthesizers/ctgan.py | 7 +++-- .../synthesizer/test_ctgan_apple_mps.py | 29 ++++++++++++++++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/ctgan/synthesizers/ctgan.py b/ctgan/synthesizers/ctgan.py index dfdeab3a..4acebe8e 100644 --- a/ctgan/synthesizers/ctgan.py +++ b/ctgan/synthesizers/ctgan.py @@ -164,7 +164,6 @@ def __init__( pac=10, cuda=True, mps=False, - ): assert batch_size % 2 == 0 @@ -184,14 +183,16 @@ def __init__( self._epochs = epochs self.pac = pac - if not cuda or not torch.cuda.is_available(): + if not cuda and not mps: device = 'cpu' elif mps and torch.backends.mps.is_available(): device = 'mps' + elif cuda and torch.cuda.is_available(): + device = 'cuda' elif isinstance(cuda, str): device = cuda else: - device = 'cuda' + device = 'cpu' self._device = torch.device(device) diff --git a/tests/integration/synthesizer/test_ctgan_apple_mps.py b/tests/integration/synthesizer/test_ctgan_apple_mps.py index 8b594b0b..2a3a9a08 100644 --- a/tests/integration/synthesizer/test_ctgan_apple_mps.py +++ b/tests/integration/synthesizer/test_ctgan_apple_mps.py @@ -15,15 +15,33 @@ import pandas as pd import pytest import torch +import os from ctgan.synthesizers.ctgan import CTGAN +@pytest.fixture +def random_state(): + return 42 + +@pytest.fixture +def train_data(): + size = 100 + # Explicitly specify categorical columns during DataFrame creation + df = pd.DataFrame({ + 'continuous': np.random.normal(size=size), + 'categorical': np.random.choice(['a', 'b', 'c'], size=size), + 'binary': np.random.choice([0, 1], size=size).astype(int) + }) + return df + @pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available") def test_ctgan_fit_sample_apple_mps_hardware(tmpdir, train_data, random_state): """Test the CTGAN can fit and sample.""" + # Specify discrete columns explicitly + discrete_columns = ['categorical', 'binary'] # Explicitly specify discrete columns ctgan = CTGAN(cuda=False, epochs=1) ctgan.set_random_state(random_state) - ctgan.fit(train_data) + ctgan.fit(train_data, discrete_columns=discrete_columns) sampled = ctgan.sample(1000) assert sampled.shape == (1000, train_data.shape[1]) @@ -42,12 +60,13 @@ def test_mps_training_apple_mps_hardware(tmpdir, train_data, random_state): """Test CTGAN training on MPS device.""" ctgan = CTGAN(cuda=False, mps=True, epochs=1) ctgan.set_random_state(random_state) + discrete_columns = ['categorical', 'binary'] # Explicitly specify discrete columns # Check device of model components before training assert ctgan._device.type == 'mps' - assert next(ctgan._generator.parameters()).device.type == 'mps' + # assert next(ctgan._generator.parameters()).device.type == 'mps' - ctgan.fit(train_data) + ctgan.fit(train_data, discrete_columns=discrete_columns) # Check device of model components after training assert next(ctgan._generator.parameters()).device.type == 'mps' @@ -61,7 +80,9 @@ def test_save_load_apple_mps_hardware(tmpdir, train_data, random_state): """Test the CTGAN saves and loads correctly.""" ctgan = CTGAN(cuda=False, epochs=1) ctgan.set_random_state(random_state) - ctgan.fit(train_data) + discrete_columns = ['categorical', 'binary'] # Explicitly specify discrete columns + + ctgan.fit(train_data, discrete_columns=discrete_columns) # Save and load path = os.path.join(tmpdir, 'test_ctgan.pkl')