Skip to content

Commit 7221c72

Browse files
committed
Basic line alignment and cleanup
1 parent 9918440 commit 7221c72

7 files changed

+224
-12
lines changed

autosynch/align.py

+216
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,222 @@ def seg_align_eval(dump_dir, tagged_dir, out_file, verbose=False):
355355

356356
f.write('\n')
357357

358+
def line_align(songs, dump_dir, boundary_algorithm='olda', label_algorithm='fmc2d', do_twinnet=True):
359+
360+
logging.info('Beginning alignment...')
361+
362+
if isinstance(songs, dict):
363+
songs = [songs]
364+
365+
# Module initializations
366+
snd = SND(silencedb=-15)
367+
sc = SyllableCounter()
368+
369+
# Perform MaD TwinNet in one batch
370+
if do_twinnet:
371+
paths = [song['path'] for song in songs]
372+
twinnet.twinnet_process(paths)
373+
else:
374+
logging.info('Skipping MaD TwinNet')
375+
376+
for song in songs:
377+
378+
logging.info('Processing {} by {}'.format(song['song'], song['artist']))
379+
380+
start_time = time.time()
381+
382+
# Get file names
383+
mixed_path = song['path']
384+
voice_path = os.path.splitext(song['path'])[0] + '_voice.wav'
385+
386+
# Get lyrics from Genius
387+
lyrics = get_lyrics(song['song'], song['artist'])
388+
389+
# Get syllable count from lyrics
390+
formatted_lyrics = sc.build_lyrics(lyrics)
391+
syl_lyrics = sc.get_syllable_count_lyrics(formatted_lyrics)
392+
sc_syllables = sc.get_syllable_count_per_section(syl_lyrics)
393+
394+
# Get syllable count from SND
395+
snd_syllables = snd.run(voice_path)
396+
397+
# Structural segmentation analysis on original audio
398+
sections, labels = msaf.process(mixed_path, boundaries_id=boundary_algorithm, labels_id=label_algorithm)
399+
400+
# Save instrumental section indices
401+
instrumentals = []
402+
403+
# Get SND counts, densities per label
404+
max_count = 0
405+
406+
labels_density = {}
407+
i_s = 0
408+
for i, section in enumerate(zip(labels, sections[:-1], sections[1:])):
409+
count = 0
410+
while i_s < len(snd_syllables) and snd_syllables[i_s] < section[2]:
411+
count += 1
412+
i_s += 1
413+
max_count = max(max_count, count)
414+
415+
density = count/(section[2]-section[1])
416+
417+
# TODO: fix instrumentalization
418+
# if density <= 0.7:
419+
# instrumentals.append(i)
420+
# else:
421+
# if section[0] not in labels_density:
422+
# labels_density[section[0]] = [[], []]
423+
# labels_density[section[0]][0].append(count)
424+
# labels_density[section[0]][1].append(density)
425+
if section[0] not in labels_density:
426+
labels_density[section[0]] = [[], []]
427+
labels_density[section[0]][0].append(count)
428+
labels_density[section[0]][1].append(density)
429+
430+
# Normalize SND syllable counts
431+
for label in labels_density:
432+
labels_density[label][0] = [count/max_count for count in labels_density[label][0]]
433+
434+
# Normalize SSA syllable counts
435+
gt_max_syl = max(section[1] for section in sc_syllables)
436+
gt_chorus_syl = mean(section[1]/gt_max_syl for section in sc_syllables if section[0] == 'chorus')
437+
438+
# Find label most similar to chorus
439+
min_label = labels[0]
440+
min_distance = float('inf')
441+
for label in labels_density:
442+
if len(labels_density[label][0]) < 2:
443+
continue
444+
445+
# TODO: Fix distance scales
446+
mean_syl = mean(labels_density[label][0])
447+
std_den = stdev(labels_density[label][1])
448+
distance = sqrt(((mean_syl - gt_chorus_syl)/gt_chorus_syl)**2 + std_den**2)
449+
450+
if distance < min_distance:
451+
min_distance = distance
452+
min_label = label
453+
454+
# Relabel
455+
relabels = [''] * len(labels)
456+
457+
temp = defaultdict(list)
458+
for i, label in enumerate(labels):
459+
temp[label].append(i)
460+
for label in temp:
461+
for i in temp[label]:
462+
if i in instrumentals:
463+
continue
464+
elif label == min_label:
465+
relabels[i] = 'chorus'
466+
elif len(temp[label]) > 1:
467+
relabels[i] = 'verse'
468+
else:
469+
relabels[i] = 'other'
470+
del temp
471+
472+
relabels = [label for label in relabels if label]
473+
if not relabels:
474+
logging.error('Whole song tagged as instrumental! Skipping...')
475+
continue
476+
477+
# Calculate accumulated error matrix
478+
dp = [[-1 for j in range(len(relabels))] for i in range(len(sc_syllables))]
479+
for i in range(len(sc_syllables)):
480+
for j in range(len(relabels)):
481+
dp[i][j] = dp_err_matrix[sc_syllables[i][0]][relabels[j]]
482+
if i == 0 and j == 0:
483+
pass
484+
elif i == 0:
485+
dp[i][j] += dp[i][j-1]
486+
elif j == 0:
487+
dp[i][j] += dp[i-1][j]
488+
else:
489+
dp[i][j] += min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
490+
491+
# Backtrack
492+
i, j = len(sc_syllables)-1, len(relabels)-1
493+
path = []
494+
while True:
495+
path.append((i, j))
496+
if (i, j) == (0, 0):
497+
break
498+
elif i == 0:
499+
j -= 1
500+
elif j == 0:
501+
i -= 1
502+
else:
503+
min_dir = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
504+
if dp[i-1][j] == min_dir:
505+
i -= 1
506+
elif dp[i][j-1] == min_dir:
507+
j -= 1
508+
else:
509+
i -= 1
510+
j -= 1
511+
path.reverse()
512+
513+
# Process alignment and write to file
514+
alignment = [[] for i in range(len(labels))]
515+
for i in instrumentals:
516+
alignment[i].append('instrumental')
517+
518+
section_id = 0
519+
j_prev = 0
520+
for (i, j) in path:
521+
if j != j_prev:
522+
section_id += 1
523+
j_prev = j
524+
while 'instrumental' in alignment[section_id]:
525+
section_id += 1
526+
alignment[section_id].append(i)
527+
528+
end_time = time.time()
529+
530+
align_data = {'song': song['song'],
531+
'artist': song['artist'],
532+
'genre': song['genre'],
533+
'process time': end_time - start_time,
534+
'duration': round((sections[-1] - sections[0]).item(), 2),
535+
'align': []}
536+
537+
cur_lyric_section = -1
538+
for i, section in enumerate(alignment):
539+
for n, lyric_section in enumerate(section):
540+
if lyric_section != cur_lyric_section:
541+
break_point = round((sections[i] + n * (sections[i+1]-sections[i])/len(section)).item(), 2)
542+
if cur_lyric_section != 'instrumental' and align_data['align']:
543+
align_data['align'][-1]['end'] = break_point
544+
if lyric_section != 'instrumental':
545+
align_data['align'].append({'label': sc_syllables[lyric_section][0],
546+
'syllables': sc_syllables[lyric_section][1],
547+
'start': break_point,
548+
'lines': []})
549+
cur_lyric_section = lyric_section
550+
551+
if 'end' not in align_data['align'][-1]:
552+
align_data['align'][-1]['end'] = break_point
553+
554+
for i, section in enumerate(align_data['align']):
555+
duration = section['end'] - section['start']
556+
line_start = section['start']
557+
for j, line in enumerate(formatted_lyrics[i][1]):
558+
line_text = ' '.join(line)
559+
line_syls = sum(syl_lyrics[i][1][j])
560+
line_duration = line_syls/align_data['align'][i]['syllables'] * duration
561+
562+
align_data['align'][i]['lines'].append({'start': line_start,
563+
'text': line_text})
564+
565+
line_start += line_duration
566+
567+
file_name = '{}_{}.yml'.format(song['artist'], song['song']).replace(' ', '')
568+
file_path = os.path.join(dump_dir, file_name)
569+
570+
with open(file_path, 'w') as f:
571+
yaml.dump(align_data, f, default_flow_style=False)
572+
573+
358574
def iter_boundary_label_algorithms(songs, dump_dir, tagged_dir, evals_dir, do_twinnet=False, verbose=True):
359575
for b_alg in msaf.get_all_boundary_algorithms():
360576
if b_alg == 'example':

autosynch/syllable_counter.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def dfs(node, path, arcs):
267267

268268
return n_syllables
269269

270-
def _build_lyrics(self, lyrics):
270+
def build_lyrics(self, lyrics):
271271
"""
272272
Constructs segmented lyrics structure by song section, line, and word.
273273
@@ -356,23 +356,21 @@ def get_syllable_count_word(self, word):
356356

357357
return n_syllables
358358

359-
def get_syllable_count_lyrics(self, lyrics):
359+
def get_syllable_count_lyrics(self, formatted_lyrics):
360360
"""
361361
Formats and retrieves syllable counts for each word in lyrics.
362362
363363
Returns of list of tuples representing sections, each of which contains
364364
a list of lists representing lines of lyrics, each of which is a list of
365-
syllable counts of words in that line. See _build_lyrics() for more
365+
syllable counts of words in that line. See build_lyrics() for more
366366
information.
367367
368-
:param lyrics: Lyrics in format of Genius.com.
369-
:type lyrics: str
368+
:param formatted_lyrics: Lyrics output from build_lyrics().
369+
:type formatted_lyrics: list[tuple(str, list[list[str]])]
370370
:return syl_lyrics: Syllable counts for words in segmented format.
371371
:rtype: list[tuple(str, list[list[int]])]
372372
"""
373373

374-
formatted_lyrics = self._build_lyrics(lyrics)
375-
376374
syl_lyrics = []
377375
syl_section = []
378376
for section in formatted_lyrics:
@@ -383,18 +381,17 @@ def get_syllable_count_lyrics(self, lyrics):
383381

384382
return syl_lyrics
385383

386-
def get_syllable_count_per_section(self, lyrics):
384+
def get_syllable_count_per_section(self, syl_lyrics):
387385
"""
388386
Formats and retrieves syllable counts per section in lyrics.
389387
390388
Sums syllable counts from each section in return value of
391389
get_syllable_count_lyrics().
392390
393-
:param lyrics: Lyrics in format of Genius.com.
394-
:type lyrics: str
391+
:param syl_lyrics: Lyrics output from get_syllable_count_lyrics().
392+
:type lyrics: list[tuple(str, list[list[int]])]
395393
:return: Syllable counts for each section in segmented format.
396394
:rtype: list[tuple(str, int)]
397395
"""
398396

399-
syl_lyrics = self.get_syllable_count_lyrics(lyrics)
400397
return [(section[0], sum(sum(line) for line in section[1])) for section in syl_lyrics]
-47.9 MB
Binary file not shown.
Binary file not shown.
-33.5 MB
Binary file not shown.
Binary file not shown.

resources/outputs/.dummy.txt

-1
This file was deleted.

0 commit comments

Comments
 (0)