Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ num_samples = sr * 5
transforms = [
RandomResizedCrop(n_samples=num_samples),
RandomApply([PolarityInversion()], p=0.8),
RandomApply([Noise(min_snr=0.001, max_snr=0.005)], p=0.3),
RandomApply([Noise(min_snr=-60, max_snr=-46)], p=0.3),
RandomApply([Gain()], p=0.2),
HighLowPass(sample_rate=sr), # this augmentation will always be applied in this aumgentation chain!
RandomApply([Delay(sample_rate=sr)], p=0.5),
Expand All @@ -43,7 +43,7 @@ We can also define a stochastic augmentation on multiple transformations. The fo
```python
transforms = [
RandomResizedCrop(n_samples=num_samples),
RandomApply([PolarityInversion(), Noise(min_snr=0.001, max_snr=0.005)], p=0.8),
RandomApply([PolarityInversion(), Noise(min_snr=-60, max_snr=-46)], p=0.8),
RandomApply([Gain()], p=0.2),
RandomApply([Delay(sample_rate=sr), Reverb(sample_rate=sr)], p=0.5)
]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_gain(num_channels):
def test_noise(num_channels):
audio = generate_waveform(sample_rate, num_samples, num_channels)
transform = Compose(
[Noise(min_snr=0.5, max_snr=1)],
[Noise(min_snr=-6, max_snr=0)],
)

t_audio = transform(audio)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_readme_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_readme_example():
transforms = [
RandomResizedCrop(n_samples=num_samples),
RandomApply([PolarityInversion()], p=0.8),
RandomApply([Noise(min_snr=0.3, max_snr=0.5)], p=0.3),
RandomApply([Noise(min_snr=-10, max_snr=-6)], p=0.3),
RandomApply([Gain()], p=0.2),
RandomApply([HighLowPass(sample_rate=sr)], p=0.8),
RandomApply([Delay(sample_rate=sr)], p=0.5),
Expand Down
9 changes: 5 additions & 4 deletions torchaudio_augmentations/augmentations/noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@


class Noise(torch.nn.Module):
def __init__(self, min_snr=0.0001, max_snr=0.01):
def __init__(self, min_snr=-80, max_snr=-40):
"""
:param min_snr: Minimum signal-to-noise ratio
:param max_snr: Maximum signal-to-noise ratio
:param min_snr: Minimum signal-to-noise ratio in dB.
:param max_snr: Maximum signal-to-noise ratio in dB.
"""
super().__init__()
self.min_snr = min_snr
self.max_snr = max_snr

def forward(self, audio):
std = torch.std(audio)
noise_std = random.uniform(self.min_snr * std, self.max_snr * std)
snr = random.uniform(self.min_snr, self.max_snr)
noise_std = 10**(snr/20) * std

noise = np.random.normal(0.0, noise_std, size=audio.shape).astype(np.float32)

Expand Down