From 97f7c1a6333967693cbc1e1411c5413b4e8ab25f Mon Sep 17 00:00:00 2001 From: V-assim Date: Sun, 9 Jan 2022 13:53:26 +0100 Subject: [PATCH 1/2] Expressing SNR in dB. --- README.md | 4 ++-- tests/test_augmentations.py | 2 +- tests/test_readme_example.py | 2 +- torchaudio_augmentations/augmentations/noise.py | 9 +++++---- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fad8f03..b6b8f74 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ num_samples = sr * 5 transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=0.001, max_snr=0.005)], p=0.3), + RandomApply([Noise(min_snr=-3, max_snr=-2.3)], p=0.3), RandomApply([Gain()], p=0.2), HighLowPass(sample_rate=sr), # this augmentation will always be applied in this aumgentation chain! RandomApply([Delay(sample_rate=sr)], p=0.5), @@ -43,7 +43,7 @@ We can also define a stochastic augmentation on multiple transformations. The fo ```python transforms = [ RandomResizedCrop(n_samples=num_samples), - RandomApply([PolarityInversion(), Noise(min_snr=0.001, max_snr=0.005)], p=0.8), + RandomApply([PolarityInversion(), Noise(min_snr=-3, max_snr=-2.3)], p=0.8), RandomApply([Gain()], p=0.2), RandomApply([Delay(sample_rate=sr), Reverb(sample_rate=sr)], p=0.5) ] diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index e15ce09..3af9d3b 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -179,7 +179,7 @@ def test_gain(num_channels): def test_noise(num_channels): audio = generate_waveform(sample_rate, num_samples, num_channels) transform = Compose( - [Noise(min_snr=0.5, max_snr=1)], + [Noise(min_snr=-0.7, max_snr=0)], ) t_audio = transform(audio) diff --git a/tests/test_readme_example.py b/tests/test_readme_example.py index ce71f73..9a93cae 100644 --- a/tests/test_readme_example.py +++ b/tests/test_readme_example.py @@ -29,7 +29,7 @@ def test_readme_example(): transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=0.3, max_snr=0.5)], p=0.3), + RandomApply([Noise(min_snr=-0.5, max_snr=-0.7)], p=0.3), RandomApply([Gain()], p=0.2), RandomApply([HighLowPass(sample_rate=sr)], p=0.8), RandomApply([Delay(sample_rate=sr)], p=0.5), diff --git a/torchaudio_augmentations/augmentations/noise.py b/torchaudio_augmentations/augmentations/noise.py index 668a495..edd2c91 100644 --- a/torchaudio_augmentations/augmentations/noise.py +++ b/torchaudio_augmentations/augmentations/noise.py @@ -4,10 +4,10 @@ class Noise(torch.nn.Module): - def __init__(self, min_snr=0.0001, max_snr=0.01): + def __init__(self, min_snr=-4, max_snr=-2): """ - :param min_snr: Minimum signal-to-noise ratio - :param max_snr: Maximum signal-to-noise ratio + :param min_snr: Minimum signal-to-noise ratio in dB. + :param max_snr: Maximum signal-to-noise ratio in dB. """ super().__init__() self.min_snr = min_snr @@ -15,7 +15,8 @@ def __init__(self, min_snr=0.0001, max_snr=0.01): def forward(self, audio): std = torch.std(audio) - noise_std = random.uniform(self.min_snr * std, self.max_snr * std) + snr = random.uniform(self.min_snr, self.max_snr) + noise_std = 10**(snr/20) * std noise = np.random.normal(0.0, noise_std, size=audio.shape).astype(np.float32) From 6bae117c06dae78cdca15b2bc475b03290e85b17 Mon Sep 17 00:00:00 2001 From: V-assim Date: Sun, 9 Jan 2022 14:18:51 +0100 Subject: [PATCH 2/2] Correcting the values of SNR --- README.md | 4 ++-- tests/test_augmentations.py | 2 +- tests/test_readme_example.py | 2 +- torchaudio_augmentations/augmentations/noise.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b6b8f74..f065963 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ num_samples = sr * 5 transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=-3, max_snr=-2.3)], p=0.3), + RandomApply([Noise(min_snr=-60, max_snr=-46)], p=0.3), RandomApply([Gain()], p=0.2), HighLowPass(sample_rate=sr), # this augmentation will always be applied in this aumgentation chain! RandomApply([Delay(sample_rate=sr)], p=0.5), @@ -43,7 +43,7 @@ We can also define a stochastic augmentation on multiple transformations. The fo ```python transforms = [ RandomResizedCrop(n_samples=num_samples), - RandomApply([PolarityInversion(), Noise(min_snr=-3, max_snr=-2.3)], p=0.8), + RandomApply([PolarityInversion(), Noise(min_snr=-60, max_snr=-46)], p=0.8), RandomApply([Gain()], p=0.2), RandomApply([Delay(sample_rate=sr), Reverb(sample_rate=sr)], p=0.5) ] diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index 3af9d3b..acc25b2 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -179,7 +179,7 @@ def test_gain(num_channels): def test_noise(num_channels): audio = generate_waveform(sample_rate, num_samples, num_channels) transform = Compose( - [Noise(min_snr=-0.7, max_snr=0)], + [Noise(min_snr=-6, max_snr=0)], ) t_audio = transform(audio) diff --git a/tests/test_readme_example.py b/tests/test_readme_example.py index 9a93cae..5ef8bdb 100644 --- a/tests/test_readme_example.py +++ b/tests/test_readme_example.py @@ -29,7 +29,7 @@ def test_readme_example(): transforms = [ RandomResizedCrop(n_samples=num_samples), RandomApply([PolarityInversion()], p=0.8), - RandomApply([Noise(min_snr=-0.5, max_snr=-0.7)], p=0.3), + RandomApply([Noise(min_snr=-10, max_snr=-6)], p=0.3), RandomApply([Gain()], p=0.2), RandomApply([HighLowPass(sample_rate=sr)], p=0.8), RandomApply([Delay(sample_rate=sr)], p=0.5), diff --git a/torchaudio_augmentations/augmentations/noise.py b/torchaudio_augmentations/augmentations/noise.py index edd2c91..86c7fd1 100644 --- a/torchaudio_augmentations/augmentations/noise.py +++ b/torchaudio_augmentations/augmentations/noise.py @@ -4,7 +4,7 @@ class Noise(torch.nn.Module): - def __init__(self, min_snr=-4, max_snr=-2): + def __init__(self, min_snr=-80, max_snr=-40): """ :param min_snr: Minimum signal-to-noise ratio in dB. :param max_snr: Maximum signal-to-noise ratio in dB.