diff --git a/README.md b/README.md index fad8f03..f065963 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ num_samples = sr * 5 transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=0.001, max_snr=0.005)], p=0.3), + RandomApply([Noise(min_snr=-60, max_snr=-46)], p=0.3), RandomApply([Gain()], p=0.2), HighLowPass(sample_rate=sr), # this augmentation will always be applied in this aumgentation chain! RandomApply([Delay(sample_rate=sr)], p=0.5), @@ -43,7 +43,7 @@ We can also define a stochastic augmentation on multiple transformations. The fo ```python transforms = [ RandomResizedCrop(n_samples=num_samples), - RandomApply([PolarityInversion(), Noise(min_snr=0.001, max_snr=0.005)], p=0.8), + RandomApply([PolarityInversion(), Noise(min_snr=-60, max_snr=-46)], p=0.8), RandomApply([Gain()], p=0.2), RandomApply([Delay(sample_rate=sr), Reverb(sample_rate=sr)], p=0.5) ] diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index e15ce09..acc25b2 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -179,7 +179,7 @@ def test_gain(num_channels): def test_noise(num_channels): audio = generate_waveform(sample_rate, num_samples, num_channels) transform = Compose( - [Noise(min_snr=0.5, max_snr=1)], + [Noise(min_snr=-6, max_snr=0)], ) t_audio = transform(audio) diff --git a/tests/test_readme_example.py b/tests/test_readme_example.py index ce71f73..5ef8bdb 100644 --- a/tests/test_readme_example.py +++ b/tests/test_readme_example.py @@ -29,7 +29,7 @@ def test_readme_example(): transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=0.3, max_snr=0.5)], p=0.3), + RandomApply([Noise(min_snr=-10, max_snr=-6)], p=0.3), RandomApply([Gain()], p=0.2), RandomApply([HighLowPass(sample_rate=sr)], p=0.8), RandomApply([Delay(sample_rate=sr)], p=0.5), diff --git a/torchaudio_augmentations/augmentations/noise.py b/torchaudio_augmentations/augmentations/noise.py index 668a495..86c7fd1 100644 --- a/torchaudio_augmentations/augmentations/noise.py +++ b/torchaudio_augmentations/augmentations/noise.py @@ -4,10 +4,10 @@ class Noise(torch.nn.Module): - def __init__(self, min_snr=0.0001, max_snr=0.01): + def __init__(self, min_snr=-80, max_snr=-40): """ - :param min_snr: Minimum signal-to-noise ratio - :param max_snr: Maximum signal-to-noise ratio + :param min_snr: Minimum signal-to-noise ratio in dB. + :param max_snr: Maximum signal-to-noise ratio in dB. """ super().__init__() self.min_snr = min_snr @@ -15,7 +15,8 @@ def __init__(self, min_snr=0.0001, max_snr=0.01): def forward(self, audio): std = torch.std(audio) - noise_std = random.uniform(self.min_snr * std, self.max_snr * std) + snr = random.uniform(self.min_snr, self.max_snr) + noise_std = 10**(snr/20) * std noise = np.random.normal(0.0, noise_std, size=audio.shape).astype(np.float32)