-
Notifications
You must be signed in to change notification settings - Fork 0
/
transform.py
147 lines (121 loc) · 6.17 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os, sys
import requests
import argparse
import json
from datetime import datetime
from scipy.io import wavfile
from audiomentations.core.audio_loading_utils import load_sound_file
from audiomentations import AddBackgroundNoise, PolarityInversion
parser = argparse.ArgumentParser(description='Mix noise to audio files with SNR targets')
parser.add_argument('--noise-dir', type=str, required=True, help="Bucket path/prefix with noise files")
parser.add_argument('--audio-dir', type=str, required=True, help="Bucket path/prefix with audio/clean files")
parser.add_argument('--min-snr', type=float, required=True, help="Min SNR when mixing with noise files")
parser.add_argument('--max-snr', type=float, required=True, help="Max SNR when mixing with noise files")
parser.add_argument('--labelling-method', type=str, required=True, default="manual", help="Labelling method ('auto' will extract label from filename such as <label>.<rest-of-filename>.wav)")
parser.add_argument('--label', type=str, required=False, help="Label for the audio samples")
parser.add_argument('--upload-category', type=str, required=False, default="split", help="Which category to upload data to in Edge Impulse")
parser.add_argument('--synthetic-data-job-id', type=int, required=False, help="If specified, sets the synthetic_data_job_id metadata key")
parser.add_argument('--skip-upload', action="store_true", help="Skip uploading to EI")
parser.add_argument('--out-directory', type=str, required=False, default="output", help="Directory to save files to")
args, unknown = parser.parse_known_args()
if not args.skip_upload:
if not os.getenv('EI_PROJECT_API_KEY'):
print('Missing EI_PROJECT_API_KEY')
sys.exit(1)
else:
API_KEY = os.environ.get("EI_PROJECT_API_KEY")
INGESTION_HOST = os.environ.get("EI_INGESTION_HOST", "edgeimpulse.com")
INGESTION_URL = "https://ingestion." + INGESTION_HOST
if (INGESTION_HOST.endswith('.test.edgeimpulse.com')):
INGESTION_URL = "http://ingestion." + INGESTION_HOST
if (INGESTION_HOST == 'host.docker.internal'):
INGESTION_URL = "http://" + INGESTION_HOST + ":4810"
output_folder = args.out_directory
if not os.path.exists(output_folder):
os.makedirs(output_folder)
noise_dir = args.noise_dir
audio_dir = args.audio_dir
# Check if directories exist
if not os.path.exists(noise_dir):
print(f'Noise directory {noise_dir} not found')
print('Directories under the bucket:', os.listdir('/mnt/s3fs/'))
sys.exit(1)
if not os.path.exists(audio_dir):
print(f'Audio directory {audio_dir} not found')
print('Directories under the bucket:', os.listdir('/mnt/s3fs/'))
sys.exit(1)
# Check SNR values
min_snr = args.min_snr
max_snr = args.max_snr
if min_snr > max_snr:
print(f'min SNR ({min_snr} cannot be superior to max SNR ({max_snr}))')
sys.exit(1)
labelling_method = args.labelling_method
upload_category = args.upload_category
synthetic_data_job_id = args.synthetic_data_job_id
# Loop through audio files
for audio_file in os.listdir(audio_dir):
if not audio_file.endswith('.wav'):
print(f'Ignoring file {audio_file}, not a .wav file')
continue
audio_file_full_path = os.path.join(audio_dir, audio_file)
# Define audio transformation function to mix background noise
# One random noise file selected from noise_dir
transform = AddBackgroundNoise(
sounds_path=noise_dir,
min_snr_db=min_snr,
max_snr_db=max_snr,
noise_transform=PolarityInversion(),
p=1.0
)
print(f'Loading file {audio_file}...')
# We load audio file as nparray and fix to 16 kHz, could be added as a parameter
audio_samples, sr = load_sound_file(audio_file_full_path, sample_rate=16000, mono=True)
# Mix noise to audio sample
augmented_samples = transform(audio_samples, sample_rate=16000)
# print mixed noise filename
noise_file_path = transform.parameters["noise_file_path"]
print(f'Mixing with noise file {noise_file_path} - min SNR={min_snr}dB, max SNR={max_snr}dB')
noise_file_noext = os.path.splitext(os.path.basename(noise_file_path))[0]
# Save to wavfile
audio_file_noext = os.path.splitext(audio_file)[0]
output_file = f'{audio_file_noext}_snr{min_snr}_{max_snr}_{noise_file_noext}.wav'
output_file_path = os.path.join(output_folder, output_file)
wavfile.write(output_file_path, rate=16000, data=augmented_samples)
print(f'Generated {output_file} file.')
generated_at = str(datetime.now())
# Push new wav file to project
if not args.skip_upload:
# Assign label
if labelling_method == 'auto':
audio_file_label = audio_file.split('.')[0]
else:
if args.label is None:
print('Labelling method set to manual but --label parameter not set')
sys.exit(1)
audio_file_label = args.label
res = requests.post(url=INGESTION_URL + '/api/' + upload_category + '/files',
headers={
'x-label': audio_file_label,
'x-api-key': API_KEY,
'x-metadata': json.dumps({
'generated_by': 'mix-audio-generator',
'generated_at': generated_at,
'min_snr': str(min_snr),
'max_snr': str(max_snr),
'noise_file': noise_file_noext,
'clean_file': audio_file_noext
}),
'x-synthetic-data-job-id': str(synthetic_data_job_id) if synthetic_data_job_id is not None else None,
},
files = { 'data': (os.path.basename(output_file_path), open(output_file_path, 'rb'), 'audio/wav') }
)
if (res.status_code != 200):
raise Exception(f'Failed to upload file to Edge Impulse (status_code={str(res.status_code)}): {res.content.decode("utf-8")}')
else:
body = json.loads(res.content.decode("utf-8"))
if (body['success'] != True):
raise Exception('Failed to upload file to Edge Impulse: ' + body['error'])
if (body['files'][0]['success'] != True):
raise Exception('Failed to upload file to Edge Impulse: ' + body['files'][0]['error'])
print(f'file {output_file} pushed to Studio')