-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAuto_Encoder.py
297 lines (234 loc) · 11.5 KB
/
Auto_Encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
from mido import MidiFile
import os
import csv
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
# keras specific imports
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Input
from keras import regularizers
def midiToCSV(directoriesIn, directoryOut):
directories = directoriesIn
totalFiles = 0
for directoryIndex in range(len(directories)):
directory = directories[directoryIndex]
print('==> Analyzing directory %s'%(directory))
# grab the files, assume they're all midi
files = os.listdir(directory)
for i in range(len(files)):
if '.DS_Store' not in files[i] and '._' not in files[i]:
totalFiles = totalFiles + 1
print('Processing file %g out of %g in directory: %s'%(i, len(files), files[i]))
mid = MidiFile(directory + files[i])
noteonList = []
midimsgList = []
accTime = []
countNote = 0
startTime = 0
for msg in mid:
# add all messages to list for ease of access
midimsgList.append(msg)
# Iterate through all midi message and accumulate time stamps
if accTime == []:
accTime.append(startTime)
else:
accTime.append(msg.time + accTime[-1])
for j in range(len(midimsgList)):
msg = midimsgList[j]
if msg.type == 'note_on' and msg.velocity > 0:
countNote = countNote + 1 # for verification
# loop through the message after the j-th message to find corresponding note-off event
for k in range(j+1, len(midimsgList)):
nextmsg = midimsgList[k]
# Note: some midi files have no note-off events. Instead, it is note-on event with velocity = 0
if nextmsg.type == 'note_off' or (nextmsg.type == 'note_on' and nextmsg.velocity == 0):
if nextmsg.note == msg.note:
noteonList.append([msg.note, accTime[j], msg.velocity, accTime[k]-accTime[j]])
break
# Check that every note onset gets released
if countNote != len(noteonList):
print("Mismatched number of data points. Something is wrong!", countNote, len(noteonList))
else:
# Save list of note onsets to csv file
name = files[i].split('.')
outFileName = directoryOut +name[0]+'_'+directory[:-1]+'.csv'
with open(outFileName, 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for row in noteonList:
spamwriter.writerow(row)
def createMatrixFromCSV(filepath, onsetOnly, timePerChunk=0.1):
'''
Take a CSV file where row represents a note onset (note, onset_time, velocity, duration)
and convert it into a matrix of size num_notes (128) x timeChunks x 2. The first element in the third
dimension will be duration and the second will be velocity
If onsetOnly is true, the duration and velocity will only appear in the time chunk of onset
If onsetOnly is false, the duration and velocity will appear in any time chunks the note is held during
Inputs
filepath: Filepath of the CSV file
onsetOnly: whether to fill the matrix only at onset, or throughout its duration
timePerChunk: how much time each column in the matrix represents
Output:
a num_notes x timeChunks x 2 (duration and velocity) matrix.
'''
numPitches = 128;
# open your csv file
with open(filepath, 'r') as csvfile:
print("==> Reading File: %s"%(filepath))
myReader = csv.reader(csvfile)
# find the max time to figure out how big your matrix should be
maxTime = max([float(row[1]) for row in myReader])
# reset the reader
csvfile.seek(0)
# open it again to actually parse it, now that you know how big your matrix should be
curMatrix = np.zeros((numPitches, int(np.ceil(maxTime / timePerChunk)), 2))
# read each row and add into the matrix that represents the song
# the csv is in the form: note, time, velocity, duration and has a row for every note onset in the song
numNotes = 0
for row in myReader:
numNotes = numNotes + 1
[note, curTime, velocity, duration] = [int(row[0]), float(row[1]), int(row[2]), float(row[3])]
# either put an entry just for the onset, or put an entry at every point in its duration
if onsetOnly:
curMatrix[note, int(np.floor(curTime / timePerChunk)), :] = [duration, velocity]
else :
# fill in spots after the onset based on the duration
# any spot in the matrix it is on during gets turned on
curMatrix[note, int(np.floor(curTime / timePerChunk)):int(np.floor((curTime + duration) / timePerChunk)) + 1, :] = [duration, velocity]
return curMatrix
def sampleFromMatrix(matrix_in, sample_rows, sample_cols):
'''
Sample a sub-matrix from a matrix
Input: a matrix of size mxn and the dimensions of the sample
Output: a randomly selected sample of size sample_rows x sample_cols that is entirely contained within matrixIn
'''
in_rows = matrix_in.shape[0]
in_cols = matrix_in.shape[1]
# select a start row and column for your submatrix
start_row = np.random.randint(in_rows - sample_rows + 1) # not inclusive, so between 0 and inRows - sample_rows inclusive
start_col = np.random.randint(in_cols - sample_cols + 1)
return matrix_in[start_row:(start_row + sample_rows), start_col:(start_col + sample_cols)]
def getSamplesFromSong(song_matrix, num_samples, sample_rows, sample_cols):
'''
Get many samples from a song, and output a matrix where each row corresponds to a sample
Input: song_matrix - a 2-d matrix where the rows correspond to pitch and the cols correspond to time. We'll sample from this
num_samples - the number of samples to Take
sample_rows - rows in the sample
sample_cols - columns in the sample
Output: A 2-d matrix, where each row corresponds to an "unwrapped sample"
where "unwrapping" is taking the 2-d sample and rolling it out into a row.
Columns are iterated over first then rows e.g x = [1, 2] becomes [1, 2, 3, 4]
[3, 4]
'''
# make an empty matrix to store your samples
sample_matrix = np.empty((num_samples, sample_rows * sample_cols))
# Sample, unwrap, and store in sample_matrix num_samples times
for i in range(num_samples):
cur_sample = sampleFromMatrix(song_matrix, sample_rows, sample_cols)
unwrapped_sample = np.reshape(cur_sample, (1, -1)) # -1 lets it decide that dimension
sample_matrix[i, :] = unwrapped_sample
return sample_matrix
'''
Set some parameters
'''
# min 1, max 128
sample_rows = 128
# min 1, max depends on chunk size when turn into a matrix
sample_cols = 20
num_training_samples = 10000
num_validation_samples = 2000
num_epochs = 50
batch_size = 128
time_per_chunk = 0.1
encoding_dim = 64
'''
Preprocessing - getting a binary of note onsets
'''
# get the CSV files, then create an np array from one CSV file
#midiToCSV(['MidiFiles/'], 'CSV_From_Midi/')
song_matrix = createMatrixFromCSV('./CSV_From_Midi/original_MidiFiles.csv', True, timePerChunk=time_per_chunk)
# pull out the velocity (ignore duration, which is at index 0 in the 3rd dimension)
velocity_only = song_matrix[:,:, 1] # pick out just the velocity
# turn into binary based on whether 0 or not
velocity_only_binary = (velocity_only > 0).astype(float)
# visualize your matrix
im = Image.fromarray(velocity_only_binary * 256)
im.show()
'''
Create a training and validation dataset
'''
# NOTE IS USING THE VELOCITY NOT THE BINARY ONE RIGHT NOW
X_train = getSamplesFromSong(velocity_only, num_training_samples, sample_rows, sample_cols)
# note validation may contain some exact copies of training
X_val = getSamplesFromSong(velocity_only, num_validation_samples, sample_rows, sample_cols)
'''
Set up our keras model of an autoencoder
'''
input_img = Input(shape=(sample_rows * sample_cols,))
# create the encoding and decoding layers
encoded = Dense(units=encoding_dim,
input_dim=sample_rows * sample_cols,
activation='relu',
activity_regularizer = regularizers.l1(10e-5))(input_img,)
decoded = Dense(units=sample_rows * sample_cols, activation='relu')(encoded) # output the same dim as the original input
# create your autoencoder model
autoencoder = Model(inputs=input_img, outputs=decoded)
# make a separate encoding and decoding layer
encoder = Model(input_img, encoded)
# placeholder for the encoded input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))
# set the loss and optimizer
autoencoder.compile(loss='mean_squared_error',
optimizer='sgd')
'''
Train our model
'''
autoencoder.fit(X_train, X_train, epochs=num_epochs, batch_size=batch_size, validation_data=(X_val, X_val))
# epochs_between_eval = 5
# # loop so can print out stuff each epoch
# epoch_nums = [0]
# training_loss = [model.evaluate(X_train, X_train, batch_size=128)]
# validation_loss = [model.evaluate(X_train, X_train, batch_size=128)]
# for i in range(num_epochs / epochs_between_eval):
# model.fit(X_train, X_train, epochs=epochs_between_eval, batch_size=batch_size)
# # evaluate and print
# epoch_nums = epoch_nums + [epochs_between_eval * (i+1)]
# training_loss = training_loss + [model.evaluate(X_train, X_train, batch_size=128)]
# validation_loss = validation_loss + [model.evaluate(X_val, X_val, batch_size=128)]
# print("Training, Validation Loss: %g, %g"%(training_loss[-1], validation_loss[-1]))
# print("Completed %d Epochs"%(epochs_between_eval * (i+1)))
'''
Plot Training Process
'''
# grab out data from the history stored in the autoencoder
training_loss = autoencoder.history.history['loss']
validation_loss = autoencoder.history.history['val_loss']
epoch_nums = range(length(validation_loss))
print a graph of validation and training loss vs. epoch
fig = plt.figure(figsize=(7, 4))
myPlot = fig.add_subplot(111)
myPlot.plot(epoch_nums, training_loss, '-', label="Training")
myPlot.plot(epoch_nums, validation_loss, '-', label="Validation")
myPlot.set_xlabel("Epoch Number")
myPlot.set_ylabel("Loss")
myPlot.set_title("Training and Validation Loss vs. Epoch Number")
myPlot.legend(loc="best", frameon=False)
# Write the figure
fig.savefig('TrainingPlot_AutoEncoder')
'''
Analyze the weights of our model
'''
weights = autoencoder.get_weights()
layer_one_weights = weights[0]
layer_two_weights = weights[1]
num_to_display = 1
for i in range(num_to_display):
# get a single filter
curFilter = np.reshape(layer_one_weights[:, i], (sample_rows, sample_cols))
# display as an image
im = Image.fromarray(curFilter * 256. / np.mean(curFilter)) # scale so fills whole range
im.show()