SwagLyrics
diff --git a/‎autosynch/align.py
+216 b/‎autosynch/align.py
+216
diff --git a/‎autosynch/syllable_counter.py
+8-11 b/‎autosynch/syllable_counter.py
+8-11
diff --git a/‎resources/examples/LizNelson_Rainfall_MIX.wav
-47.9 MB b/‎resources/examples/LizNelson_Rainfall_MIX.wav
-47.9 MB
diff --git a/‎resources/examples/LizNelson_Rainfall_VOCALS.wav
-24 MB b/‎resources/examples/LizNelson_Rainfall_VOCALS.wav
-24 MB
diff --git a/‎resources/examples/MarvinGaye_Grapevine_MIX.wav
-33.5 MB b/‎resources/examples/MarvinGaye_Grapevine_MIX.wav
-33.5 MB
diff --git a/‎resources/examples/MarvinGaye_Grapevine_VOCALS.wav
-32.4 MB b/‎resources/examples/MarvinGaye_Grapevine_VOCALS.wav
-32.4 MB
diff --git a/‎resources/outputs/.dummy.txt
-1 b/‎resources/outputs/.dummy.txt
-1
@@ -355,6 +355,222 @@ def seg_align_eval(dump_dir, tagged_dir, out_file, verbose=False):
 
         f.write('\n')
 
+def line_align(songs, dump_dir, boundary_algorithm='olda', label_algorithm='fmc2d', do_twinnet=True):
+
+    logging.info('Beginning alignment...')
+
+    if isinstance(songs, dict):
+        songs = [songs]
+
+    # Module initializations
+    snd = SND(silencedb=-15)
+    sc = SyllableCounter()
+
+    # Perform MaD TwinNet in one batch
+    if do_twinnet:
+        paths = [song['path'] for song in songs]
+        twinnet.twinnet_process(paths)
+    else:
+        logging.info('Skipping MaD TwinNet')
+
+    for song in songs:
+
+        logging.info('Processing {} by {}'.format(song['song'], song['artist']))
+
+        start_time = time.time()
+
+        # Get file names
+        mixed_path = song['path']
+        voice_path = os.path.splitext(song['path'])[0] + '_voice.wav'
+
+        # Get lyrics from Genius
+        lyrics = get_lyrics(song['song'], song['artist'])
+
+        # Get syllable count from lyrics
+        formatted_lyrics = sc.build_lyrics(lyrics)
+        syl_lyrics = sc.get_syllable_count_lyrics(formatted_lyrics)
+        sc_syllables = sc.get_syllable_count_per_section(syl_lyrics)
+
+        # Get syllable count from SND
+        snd_syllables = snd.run(voice_path)
+
+        # Structural segmentation analysis on original audio
+        sections, labels = msaf.process(mixed_path, boundaries_id=boundary_algorithm, labels_id=label_algorithm)
+
+        # Save instrumental section indices
+        instrumentals = []
+
+        # Get SND counts, densities per label
+        max_count = 0
+
+        labels_density = {}
+        i_s = 0
+        for i, section in enumerate(zip(labels, sections[:-1], sections[1:])):
+            count = 0
+            while i_s < len(snd_syllables) and snd_syllables[i_s] < section[2]:
+                count += 1
+                i_s += 1
+            max_count = max(max_count, count)
+
+            density = count/(section[2]-section[1])
+
+            # TODO: fix instrumentalization
+            # if density <= 0.7:
+            #     instrumentals.append(i)
+            # else:
+            #     if section[0] not in labels_density:
+            #         labels_density[section[0]] = [[], []]
+            #     labels_density[section[0]][0].append(count)
+            #     labels_density[section[0]][1].append(density)
+            if section[0] not in labels_density:
+                labels_density[section[0]] = [[], []]
+            labels_density[section[0]][0].append(count)
+            labels_density[section[0]][1].append(density)
+
+        # Normalize SND syllable counts
+        for label in labels_density:
+            labels_density[label][0] = [count/max_count for count in labels_density[label][0]]
+
+        # Normalize SSA syllable counts
+        gt_max_syl = max(section[1] for section in sc_syllables)
+        gt_chorus_syl = mean(section[1]/gt_max_syl for section in sc_syllables if section[0] == 'chorus')
+
+        # Find label most similar to chorus
+        min_label = labels[0]
+        min_distance = float('inf')
+        for label in labels_density:
+            if len(labels_density[label][0]) < 2:
+                continue
+
+            # TODO: Fix distance scales
+            mean_syl = mean(labels_density[label][0])
+            std_den  = stdev(labels_density[label][1])
+            distance = sqrt(((mean_syl - gt_chorus_syl)/gt_chorus_syl)**2 + std_den**2)
+
+            if distance < min_distance:
+                min_distance = distance
+                min_label = label
+
+        # Relabel
+        relabels = [''] * len(labels)
+
+        temp = defaultdict(list)
+        for i, label in enumerate(labels):
+            temp[label].append(i)
+        for label in temp:
+            for i in temp[label]:
+                if i in instrumentals:
+                    continue
+                elif label == min_label:
+                    relabels[i] = 'chorus'
+                elif len(temp[label]) > 1:
+                    relabels[i] = 'verse'
+                else:
+                    relabels[i] = 'other'
+        del temp
+
+        relabels = [label for label in relabels if label]
+        if not relabels:
+            logging.error('Whole song tagged as instrumental! Skipping...')
+            continue
+
+        # Calculate accumulated error matrix
+        dp = [[-1 for j in range(len(relabels))] for i in range(len(sc_syllables))]
+        for i in range(len(sc_syllables)):
+            for j in range(len(relabels)):
+                dp[i][j] = dp_err_matrix[sc_syllables[i][0]][relabels[j]]
+                if i == 0 and j == 0:
+                    pass
+                elif i == 0:
+                    dp[i][j] += dp[i][j-1]
+                elif j == 0:
+                    dp[i][j] += dp[i-1][j]
+                else:
+                    dp[i][j] += min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
+
+        # Backtrack
+        i, j = len(sc_syllables)-1, len(relabels)-1
+        path = []
+        while True:
+            path.append((i, j))
+            if (i, j) == (0, 0):
+                break
+            elif i == 0:
+                j -= 1
+            elif j == 0:
+                i -= 1
+            else:
+                min_dir = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
+                if dp[i-1][j] == min_dir:
+                    i -= 1
+                elif dp[i][j-1] == min_dir:
+                    j -= 1
+                else:
+                    i -= 1
+                    j -= 1
+        path.reverse()
+
+        # Process alignment and write to file
+        alignment = [[] for i in range(len(labels))]
+        for i in instrumentals:
+            alignment[i].append('instrumental')
+
+        section_id = 0
+        j_prev = 0
+        for (i, j) in path:
+            if j != j_prev:
+                section_id += 1
+                j_prev = j
+            while 'instrumental' in alignment[section_id]:
+                section_id += 1
+            alignment[section_id].append(i)
+
+        end_time = time.time()
+
+        align_data = {'song': song['song'],
+                      'artist': song['artist'],
+                      'genre': song['genre'],
+                      'process time': end_time - start_time,
+                      'duration': round((sections[-1] - sections[0]).item(), 2),
+                      'align': []}
+
+        cur_lyric_section = -1
+        for i, section in enumerate(alignment):
+            for n, lyric_section in enumerate(section):
+                if lyric_section != cur_lyric_section:
+                    break_point = round((sections[i] + n * (sections[i+1]-sections[i])/len(section)).item(), 2)
+                    if cur_lyric_section != 'instrumental' and align_data['align']:
+                        align_data['align'][-1]['end'] = break_point
+                    if lyric_section != 'instrumental':
+                        align_data['align'].append({'label': sc_syllables[lyric_section][0],
+                                                    'syllables': sc_syllables[lyric_section][1],
+                                                    'start': break_point,
+                                                    'lines': []})
+                    cur_lyric_section = lyric_section
+
+        if 'end' not in align_data['align'][-1]:
+            align_data['align'][-1]['end'] = break_point
+
+        for i, section in enumerate(align_data['align']):
+            duration = section['end'] - section['start']
+            line_start = section['start']
+            for j, line in enumerate(formatted_lyrics[i][1]):
+                line_text = ' '.join(line)
+                line_syls = sum(syl_lyrics[i][1][j])
+                line_duration = line_syls/align_data['align'][i]['syllables'] * duration
+
+                align_data['align'][i]['lines'].append({'start': line_start,
+                                                        'text': line_text})
+
+                line_start += line_duration
+
+        file_name = '{}_{}.yml'.format(song['artist'], song['song']).replace(' ', '')
+        file_path = os.path.join(dump_dir, file_name)
+
+        with open(file_path, 'w') as f:
+            yaml.dump(align_data, f, default_flow_style=False)
+
+
 def iter_boundary_label_algorithms(songs, dump_dir, tagged_dir, evals_dir, do_twinnet=False, verbose=True):
     for b_alg in msaf.get_all_boundary_algorithms():
         if b_alg == 'example':
 
@@ -267,7 +267,7 @@ def dfs(node, path, arcs):
 
         return n_syllables
 
-    def _build_lyrics(self, lyrics):
+    def build_lyrics(self, lyrics):
         """
         Constructs segmented lyrics structure by song section, line, and word.
 
@@ -356,23 +356,21 @@ def get_syllable_count_word(self, word):
 
         return n_syllables
 
-    def get_syllable_count_lyrics(self, lyrics):
+    def get_syllable_count_lyrics(self, formatted_lyrics):
         """
         Formats and retrieves syllable counts for each word in lyrics.
 
         Returns of list of tuples representing sections, each of which contains
         a list of lists representing lines of lyrics, each of which is a list of
-        syllable counts of words in that line. See _build_lyrics() for more
+        syllable counts of words in that line. See build_lyrics() for more
         information.
 
-        :param lyrics: Lyrics in format of Genius.com.
-        :type lyrics: str
+        :param formatted_lyrics: Lyrics output from build_lyrics().
+        :type formatted_lyrics: list[tuple(str, list[list[str]])]
         :return syl_lyrics: Syllable counts for words in segmented format.
         :rtype: list[tuple(str, list[list[int]])]
         """
 
-        formatted_lyrics = self._build_lyrics(lyrics)
-
         syl_lyrics = []
         syl_section = []
         for section in formatted_lyrics:
@@ -383,18 +381,17 @@ def get_syllable_count_lyrics(self, lyrics):
 
         return syl_lyrics
 
-    def get_syllable_count_per_section(self, lyrics):
+    def get_syllable_count_per_section(self, syl_lyrics):
         """
         Formats and retrieves syllable counts per section in lyrics.
 
         Sums syllable counts from each section in return value of
         get_syllable_count_lyrics().
 
-        :param lyrics: Lyrics in format of Genius.com.
-        :type lyrics: str
+        :param syl_lyrics: Lyrics output from get_syllable_count_lyrics().
+        :type lyrics: list[tuple(str, list[list[int]])]
         :return: Syllable counts for each section in segmented format.
         :rtype: list[tuple(str, int)]
         """
 
-        syl_lyrics = self.get_syllable_count_lyrics(lyrics)
         return [(section[0], sum(sum(line) for line in section[1])) for section in syl_lyrics]