forked from tuwien-musicir/rp_extract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
audiofile_read.py
315 lines (221 loc) · 11.8 KB
/
audiofile_read.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# 2015-04 by Thomas Lidy
# MP3 READ: mini function to decode mp3 using external program
# as there is no Python library for it, we need to use external tools (mpg123, lame, ffmpeg)
import os # for calling external program for mp3 decoding
import subprocess # for subprocess calls
import tempfile
import uuid
# Reading WAV files
# from scipy.io import wavfile
# scipy.io.wavfile does not support 24 bit Wav files
# therefore we switch to wavio by Warren Weckesser - https://github.com/WarrenWeckesser/wavio - BSD 3-Clause License
import wavio
class DecoderException(Exception):
def __init__(self, message, command=[], orig_error=None):
# Call the base class constructor with the parameters it needs
super(DecoderException, self).__init__(message)
self.command = command
self.original_error = orig_error
# Normalize integer WAV data to float in range (-1,1)
# Note that this works fine with Wav files read with Wavio
# when using scipy.io.wavfile to read Wav files, use divisor = np.iinfo(wavedata.dtype).max + 1
# but in this case it will not work with 24 bit files due to scipy scaling 24 bit up to 32bit
def normalize_wav(wavedata,samplewidth):
# samplewidth in byte (i.e.: 1 = 8bit, 2 = 16bit, 3 = 24bit, 4 = 32bit)
divisor = 2**(8*samplewidth)/2
wavedata = wavedata / float(divisor)
return (wavedata)
def wav_read(filename,normalize=True,verbose=True,auto_resample=True):
'''read WAV files
:param filename: input filename to read from
:param normalize: normalize the read values (usually signed integers) to range (-1,1)
:param verbose: output some information during reading
:param auto_resample: auto-resampling: if sample rate is different than 11, 22 or 44 kHz it will resample to 44 khZ
:return: tuple of 3 elements: samplereate (e.g. 44100), samplewith (e.g. 2 for 16 bit) and wavedata (simple array for mono, 2-dim. array for stereo)
'''
# check if file exists
if not os.path.exists(filename):
raise NameError("File does not exist:" + filename)
samplerate, samplewidth, wavedata = wavio.readwav(filename)
if auto_resample and samplerate != 11025 and samplerate != 22050 and samplerate != 44100:
#print original file info
if verbose:
print samplerate, "Hz,", wavedata.shape[1], "channel(s),", wavedata.shape[0], "samples"
to_samplerate = 22050 if samplerate < 22050 else 44100
filename2 = resample(filename, to_samplerate, normalize=True, verbose=verbose)
samplerate, samplewidth, wavedata = wavio.readwav(filename2)
#os.remove(filename2) # delete temp file
if (normalize):
wavedata = normalize_wav(wavedata,samplewidth)
return (samplerate, samplewidth, wavedata)
def get_temp_filename(suffix=None):
temp_dir = tempfile.gettempdir()
rand_filename = str(uuid.uuid4())
if suffix != None:
rand_filename = "%s%s" % (rand_filename, suffix)
return os.path.join(temp_dir, rand_filename)
def resample(filename, to_samplerate=44100, normalize=True, verbose=True):
tempfile = get_temp_filename(suffix='.wav')
try:
cmd = ['ffmpeg','-v','1','-y','-i', filename, '-ar', str(to_samplerate), tempfile]
if verbose:
print "Resampling to", to_samplerate, "..."
#print " ".join(cmd)
return_code = subprocess.call(cmd) # subprocess.call takes a list of command + arguments
if return_code != 0:
raise DecoderException("Problem appeared during resampling.", command=cmd)
#if verbose: print 'Resampled with:', " ".join(cmd)
except OSError as e:
if os.path.exists(tempfile):
os.remove(tempfile)
if e.errno == 2: # probably ffmpeg binary not found
try:
subprocess.call(cmd[0]) # check if we can just call the binary
except OSError as e:
raise DecoderException("Decoder not found. Please install " + cmd[0], command=cmd, orig_error=e)
raise DecoderException("Unknown problem appeared during resampling.", command=cmd, orig_error=e)
return tempfile
def mp3_decode(in_filename, out_filename=None, verbose=True):
''' mp3_decode
decoding of MP3 files
now handled by decode function (for parameters see there)
kept for code compatibility
'''
return decode(in_filename, out_filename, verbose)
def decode(in_filename, out_filename=None, verbose=True):
''' calls external decoder to convert an MP3, AIF(F) or M4A file to a WAV file
One of the following decoder programs must be installed on the system:
ffmpeg: for mp3, aif(f), or m4a
mpg123: for mp3
lame: for mp3
(consider adding their path using os.environ['PATH'] += os.pathsep + path )
in_filename: input audio file name to process
out_filename: output filename after conversion; if omitted, the input filename is used, replacing the extension by .wav
verbose: print decoding command line information or not
'''
basename, ext = os.path.splitext(in_filename)
ext = ext.lower()
if out_filename == None:
out_filename = basename + '.wav'
# check a number of external MP3 decoder tools whether they are available on the system
# for subprocess.call, we prepare the commands and the arguments as a list
# cmd_list is a list of commands with their arguments, which will be iterated over to try to find each tool
# cmd_types is a list of file types supported by each command/tool
cmd1 = ['ffmpeg','-v','1','-y','-i', in_filename, out_filename] # -v adjusts log level, -y option overwrites output file, because it has been created already by tempfile above
cmd1_types = ('.mp3','.aif','.aiff','.m4a')
cmd2 = ['mpg123','-q', '-w', out_filename, in_filename]
cmd2_types = '.mp3'
cmd3 = ['lame','--quiet','--decode', in_filename, out_filename]
cmd3_types = '.mp3'
cmd_list = [cmd1,cmd2,cmd3]
cmd_types = [cmd1_types,cmd2_types,cmd3_types]
success = False
for cmd, types in zip(cmd_list,cmd_types):
if ext in types: # only if the current command supports the file type that we are having
try:
return_code = subprocess.call(cmd) # subprocess.call takes a list of command + arguments
if return_code != 0: raise DecoderException("Problem appeared during decoding.", command=cmd)
if verbose: print 'Decoded', ext, 'with:', " ".join(cmd)
success = True
except OSError as e:
if e.errno != 2: # 2 = No such file or directory (i.e. decoder not found, which we want to catch at the end below)
raise DecoderException("Problem appeared during decoding.", command=cmd, orig_error=e)
if success:
break # no need to loop further
if not success:
commands = ", ".join( c[0] for c in cmd_list)
raise OSError("No appropriate decoder found for" + ext + "file. Check if any of these programs is on your system path: " + commands + \
". Otherwise install one of these and/or add them to the path using os.environ['PATH'] += os.pathsep + path.")
# testing decoding to memory instead of file: did NOT bring any speedup!
# Also note: sample rate and number of channels not returned with this method. can be derived with
# ffprobe -v quiet -show_streams -of json <input_file>
# which already converts plain text to json, but then the json needs to be parsed.
def decode_to_memory(in_filename, verbose=True):
cmd1 = ['ffmpeg','-v','1','-y','-i', in_filename, "-f", "f32le", "pipe:1"] # -v adjusts log level, -y option overwrites output file, because it has been created already by tempfile above
# "pipe:1" sends output to std_out (probably Linux only)
# original: call = [cmd, "-v", "quiet", "-i", infile, "-f", "f32le", "-ar", str(sample_rate), "-ac", "1", "pipe:1"]
# for Windows: \\.\pipe\from_ffmpeg # http://stackoverflow.com/questions/32157774/ffmpeg-output-pipeing-to-named-windows-pipe
cmd1_types = ('.mp3','.aif','.aiff','.m4a')
ext = ''
if verbose: print 'Decoding', ext, 'with:', " ".join(cmd1)
import numpy as np
decoded_wav = subprocess.check_output(cmd1)
wavedata = np.frombuffer(decoded_wav, dtype=np.float32)
return wavedata
def mp3_read(filename,normalize=True,verbose=True):
''' mp3_read:
call mp3_decode and read from wav file ,then delete wav file
returns samplereate (e.g. 44100), samplewith (e.g. 2 for 16 bit) and wavedata (simple array for mono, 2-dim. array for stereo)
'''
try:
tempfile = get_temp_filename(suffix='.wav')
decode(filename,tempfile,verbose)
samplerate, samplewidth, wavedata = wav_read(tempfile,normalize,verbose)
finally: # delete temp file
if os.path.exists(tempfile):
os.remove(tempfile)
return (samplerate, samplewidth, wavedata)
def audiofile_read(filename,normalize=True,verbose=True):
''' audiofile_read
generic function capable of reading WAV, MP3 and AIF(F) files
:param filename: file name path to audio file
:param normalize: normalize to (-1,1) if True (default), or keep original values (16 bit, 24 bit or 32 bit)
:param verbose: whether to print a message while decoding files or not
:return: a tuple with 3 entries: samplerate in Hz (e.g. 44100), samplewidth in bytes (e.g. 2 for 16 bit) and wavedata (simple array for mono, 2-dim. array for stereo)
Example:
>>> samplerate, samplewidth, wavedata = audiofile_read("music/BoxCat_Games_-_10_-_Epic_Song.mp3",verbose=False)
>>> print samplerate, "Hz,", samplewidth*8, "bit,", wavedata.shape[1], "channels,", wavedata.shape[0], "samples"
44100 Hz, 16 bit, 2 channels, 2421504 samples
'''
# check if file exists
if not os.path.exists(filename):
raise NameError("File does not exist:" + filename)
basename, ext = os.path.splitext(filename)
ext = ext.lower()
if ext == '.wav':
return(wav_read(filename,normalize,verbose))
else:
try: # try to decode
tempfile = get_temp_filename(suffix='.wav')
mp3_decode(filename,tempfile,verbose)
samplerate, samplewidth, wavedata = wav_read(tempfile,normalize,verbose)
finally: # delete temp file in any case
if os.path.exists(tempfile):
os.remove(tempfile)
return (samplerate, samplewidth, wavedata)
# function to self test audiofile_read if working properly
def self_test():
import doctest
#doctest.testmod()
doctest.run_docstring_examples(audiofile_read, globals())
# main routine: to test if decoding works properly
if __name__ == '__main__':
# to run self test:
#self_test()
#exit()
# (no output means that everything went fine)
import sys
# if your MP3 decoder is not on the system PATH, add it like this:
# path = '/path/to/ffmpeg/'
# os.environ['PATH'] += os.pathsep + path
# test audio file: "Epic Song" by "BoxCat Game" (included in repository)
# Epic Song by BoxCat Games is licensed under a Creative Commons Attribution License.
# http://freemusicarchive.org/music/BoxCat_Games/Nameless_the_Hackers_RPG_Soundtrack/BoxCat_Games_-_Nameless-_the_Hackers_RPG_Soundtrack_-_10_Epic_Song
if len(sys.argv) > 1:
file = sys.argv[1]
else:
file = "music/BoxCat_Games_-_10_-_Epic_Song.mp3"
# import time
# start = time.time()
samplerate, samplewidth, wavedata = audiofile_read(file)
# print time.time() - start
# print wavedata.shape
#
# start = time.time()
# wavedata2 = decode_to_memory(file)
# print time.time() - start
# print wavedata2.shape
#
# print "EQUAL" if wavedata == wavedata2 else "NOT EQUAL"
print "Successfully read audio file:"
print samplerate, "Hz,", samplewidth*8, "bit,", wavedata.shape[1], "channels,", wavedata.shape[0], "samples"