walkthrough code, fixing os path errors

LINCellularNeuroscience · Mar 22, 2021 · 9697543 · 9697543
1 parent 9ed6b42
commit 9697543
Show file tree

Hide file tree

Showing 15 changed files with 144 additions and 61,053 deletions.
diff --git a/examples/video-1-PE-seq.npy b/examples/video-1-PE-seq.npy
diff --git a/examples/video-1.csv b/examples/video-1.csv
diff --git a/vame/analysis/community_analysis.py b/vame/analysis/community_analysis.py
@@ -64,7 +64,7 @@ def get_transition_matrix(adjacency_matrix, threshold = 0.0):
 def get_labels(cfg, files, model_name, n_cluster):
     labels = []
     for file in files:
-        path_to_file = os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+        path_to_file = os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
         label = np.load(path_to_file+'/'+str(n_cluster)+'_km_label_'+file+'.npy')
         labels.append(label)
     return labels
@@ -89,6 +89,7 @@ def create_community_bag(files, labels, transition_matrices, cut_tree, n_cluster
         if cut_tree != None:
             community_bag =  traverse_tree_cutline(T,cutline=cut_tree)
             communities_all.append(community_bag)
+            draw_tree(T)
         else:
             draw_tree(T)
             plt.pause(0.5)
@@ -138,34 +139,41 @@ def get_community_labels(files, labels, communities_all):
     return community_labels_all
 
 
-def umap_embedding(cfg, files, model_name, n_cluster):
-    embeds = []
+def umap_embedding(cfg, file, model_name, n_cluster):
     reducer = umap.UMAP(n_components=2, min_dist=cfg['min_dist'], n_neighbors=cfg['n_neighbors'], 
                         random_state=cfg['random_state']) 
 
-    for i, file in enumerate(files):
-        print("UMAP calculation for file %s" %file)
-        folder = os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
-        latent_vector = np.load(os.path.join(folder,"",'latent_vector_'+file+'.npy'))
-        embed = reducer.fit_transform(latent_vector[:30000,:])
-        embeds.append(embed)
+    print("UMAP calculation for file %s" %file)
+
+    folder = os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
+    latent_vector = np.load(os.path.join(folder,'latent_vector_'+file+'.npy'))
+
+    num_points = cfg['num_points']
+    if num_points > latent_vector.shape[0]:
+        num_points = latent_vector.shape[0]
+    print("Embedding %d data points.." %num_points)
 
-    return embeds
+    embed = reducer.fit_transform(latent_vector[:num_points,:])
+
+    return embed
 
 
-def umap_vis(files, embeds, community_labels_all):
-    window_slice = slice(0, 500)
-    for idx, animal in enumerate(files):
-        num = np.unique(community_labels_all[idx]).shape[0]
-        fig = plt.figure(1)
-        plt.scatter(embeds[idx][:,0], embeds[idx][:,1],  c=community_labels_all[idx][:30000], cmap='Spectral', s=2, alpha=1)
-        plt.plot(embeds[idx][window_slice,0], embeds[idx][window_slice,1], 'k', alpha=.7)
-        plt.colorbar(boundaries=np.arange(num+1)-0.5).set_ticks(np.arange(num))
-        plt.gca().set_aspect('equal', 'datalim')
-        plt.grid(False)
+def umap_vis(cfg, file, embed, community_labels_all):
+    num_points = cfg['num_points']
+    if num_points > community_labels_all.shape[0]:
+        num_points = community_labels_all.shape[0]
+    print("Embedding %d data points.." %num_points)
+
+    num = np.unique(community_labels_all)
+
+    fig = plt.figure(1)
+    plt.scatter(embed[:,0], embed[:,1],  c=community_labels_all[:num_points], cmap='Spectral', s=2, alpha=1)
+    plt.colorbar(boundaries=np.arange(np.max(num)+2)-0.5).set_ticks(np.arange(np.max(num)+1))
+    plt.gca().set_aspect('equal', 'datalim')
+    plt.grid(False)
 
 
-def community(config, umap_vis=False, cut_tree=None):
+def community(config, show_umap=False, cut_tree=None):
     config_file = Path(config).resolve()
     cfg = read_config(config_file)
     model_name = cfg['model_name']
@@ -197,22 +205,21 @@ def community(config, umap_vis=False, cut_tree=None):
     transition_matrices = compute_transition_matrices(files, labels, n_cluster)
     communities_all, trees = create_community_bag(files, labels, transition_matrices, cut_tree, n_cluster)
     community_labels_all = get_community_labels(files, labels, communities_all)    
-    embeds = umap_embedding(cfg, files, model_name, n_cluster)
 
     for idx, file in enumerate(files):
-        path_to_file=os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+        path_to_file=os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
         if not os.path.exists(os.path.join(path_to_file,"community")):
             os.mkdir(os.path.join(path_to_file,"community"))
 
-        np.save(os.path.join(path_to_file,"community","","transition_matrix_"+file+'.npy'),transition_matrices[idx])
-        np.save(os.path.join(path_to_file,"community","","community_label_"+file+'.npy'), community_labels_all[idx])
-        np.save(os.path.join(path_to_file,"community","","umap_embedding_"+file+'.npy'), embeds[idx])
+        np.save(os.path.join(path_to_file,"community","transition_matrix_"+file+'.npy'),transition_matrices[idx])
+        np.save(os.path.join(path_to_file,"community","community_label_"+file+'.npy'), community_labels_all[idx])
 
-        with open(os.path.join(path_to_file,"community","","hierarchy"+file+".txt"), "wb") as fp:   #Pickling
+        with open(os.path.join(path_to_file,"community","hierarchy"+file+".txt"), "wb") as fp:   #Pickling
             pickle.dump(communities_all[idx], fp)
 
-    if umap_vis == True:
-        umap_vis(files, embeds, community_labels_all)
+        if show_umap == True:
+            embed = umap_embedding(cfg, file, model_name, n_cluster)
+            umap_vis(cfg, files, embed, community_labels_all[idx])
 
 
 

diff --git a/vame/analysis/gif_creator.py b/vame/analysis/gif_creator.py
@@ -21,7 +21,7 @@
 from vame.util.gif_pose_helper import get_animal_frames
 
 
-def create_video(path_to_file, file, embed, clabel, frames, start, length): 
+def create_video(path_to_file, file, embed, clabel, frames, start, length, max_lag): 
     # set matplotlib colormap
     cmap = matplotlib.cm.gray
     cmap_reversed = matplotlib.cm.get_cmap('gray_r')
@@ -36,8 +36,8 @@ def create_video(path_to_file, file, embed, clabel, frames, start, length):
     ax2.grid(False)
     lag = 0
     for i in tqdm.tqdm(range(length)):
-        if i > 30:
-            lag = i - 30
+        if i > max_lag:
+            lag = i - max_lag
         ax1.cla()
         ax1.axis('off')
         ax1.grid(False)
@@ -52,7 +52,7 @@ def create_video(path_to_file, file, embed, clabel, frames, start, length):
         fig.savefig(os.path.join(path_to_file,"gif_frames",file+'gif_%d.png') %i) 
 
 
-def gif(config, pose_ref_index, start=None, length=500, file_format='.mp4', crop_size=(300,300)):
+def gif(config, pose_ref_index, subtract_background=True, start=None, length=500, max_lag=30, file_format='.mp4', crop_size=(300,300)):
     config_file = Path(config).resolve()
     cfg = read_config(config_file)
     model_name = cfg['model_name']
@@ -94,8 +94,8 @@ def gif(config, pose_ref_index, start=None, length=500, file_format='.mp4', crop
         else:
             start = start
 
-        frames = get_animal_frames(cfg, file, pose_ref_index, start, length, file_format, crop_size)
-        create_video(path_to_file, file, embed, community_label, frames, start, length)
+        frames = get_animal_frames(cfg, file, pose_ref_index, start, length, subtract_background, file_format, crop_size)
+        create_video(path_to_file, file, embed, community_label, frames, start, length, max_lag)
 
 
 

diff --git a/vame/analysis/pose_segmentation.py b/vame/analysis/pose_segmentation.py
@@ -51,7 +51,7 @@ def load_model(cfg, model_name, legacy):
                                 hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, 
                                 dropout_rec, dropout_pred).cuda()
 
-    model.load_state_dict(torch.load(cfg['project_path']+'/'+'model/best_model/'+model_name+'_'+cfg['Project']+'.pkl'))
+    model.load_state_dict(torch.load(os.path.join(cfg['project_path'],'model','best_model',model_name+'_'+cfg['Project']+'.pkl')))
     model.eval()
 
     return model
@@ -140,8 +140,8 @@ def pose_segmentation(config):
         ind_param = cfg['individual_parameterization']
 
         for folders in cfg['video_sets']:
-            if not os.path.exists(os.path.join(cfg['project_path'],"results",folders,"",model_name)):
-                os.mkdir(os.path.join(cfg['project_path'],"results",folders,"",model_name))
+            if not os.path.exists(os.path.join(cfg['project_path'],"results",folders,model_name,"")):
+                os.mkdir(os.path.join(cfg['project_path'],"results",folders,model_name,""))
 
         files = []
         if cfg['all_data'] == 'No':
@@ -173,25 +173,28 @@ def pose_segmentation(config):
             print("CUDA is not working! Attempting to use the CPU...")
             torch.device("cpu")
 
-        folder = os.path.dirname(os.path.join(cfg['project_path'],"results",file,"",model_name,""))
+        folder = os.path.dirname(os.path.join(cfg['project_path'],"results",file,model_name,""))
         if not os.listdir(folder):
-            print(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster)))
+            print(os.path.join(cfg['project_path'],"results",file,model_name,""))
             model = load_model(cfg, model_name, legacy)
             latent_vectors = embedd_latent_vectors(cfg, files, model, legacy)
 
             if ind_param == False:
                 print("For all animals the same k-Means parameterization of latent vectors is applied for %d cluster" %n_cluster)
                 labels, cluster_center = same_parameterization(cfg, files, latent_vectors, n_cluster)
-
-            if ind_param == True:
+            else:
                 print("Individual k-Means parameterization of latent vectors for %d cluster" %n_cluster)
                 labels, cluster_center = individual_parameterization(cfg, files, latent_vectors, n_cluster)
 
             for idx, file in enumerate(files):
-                if not os.path.exists(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))):
-                    os.mkdir(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster)))
+                print(os.path.join(cfg['project_path'],"results",file,"",model_name,'kmeans-'+str(n_cluster),""))
+                if not os.path.exists(os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")):                    
+                    try:
+                        os.mkdir(os.path.join(cfg['project_path'],"results",file,"",model_name,'kmeans-'+str(n_cluster),""))
+                    except OSError as error:
+                        print(error)                    
 
-                save_data = os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+                save_data = os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
                 np.save(save_data+'/'+str(n_cluster)+'_km_label_'+file, labels[idx])
                 np.save(save_data+'/cluster_center_'+file, cluster_center[idx])
                 np.save(save_data+'/'+'latent_vector_'+file, latent_vectors[idx])
@@ -201,33 +204,36 @@ def pose_segmentation(config):
                   'For model %s a latent vector embedding already exists. \n' 
                   'Parameterization of latent vector with %d k-Means cluster \n' %(model_name, n_cluster))
 
-            if os.path.exists(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))):
+            if os.path.exists(os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")):
                 flag = input('WARNING: A parameterization for the chosen cluster size of the model already exists! \n'
-                             'Do you want to continue? The motif numbers will change! (yes/no) ')
+                             'Do you want to continue? A new k-Means assignment will be computed! (yes/no) ')
             else:
                 flag = 'yes'
 
             if flag == 'yes':
                 path_to_latent_vector = os.listdir(folder)[0]
                 latent_vectors = []
                 for file in files:
-                    latent_vector = np.load(os.path.join(cfg['project_path'],"results",file,"",model_name,"",path_to_latent_vector,"",'latent_vector_'+file+'.npy'))
+                    latent_vector = np.load(os.path.join(cfg['project_path'],"results",file,model_name,path_to_latent_vector,'latent_vector_'+file+'.npy'))
                     latent_vectors.append(latent_vector)
 
                 if ind_param == False:
                     print("For all animals the same k-Means parameterization of latent vectors is applied for %d cluster" %n_cluster)
                     labels, cluster_center = same_parameterization(cfg, files, latent_vectors, n_cluster)
-
-                if ind_param == True:
+                else:
                     print("Individual k-Means parameterization of latent vectors for %d cluster" %n_cluster)
                     labels, cluster_center = individual_parameterization(cfg, files, latent_vectors, n_cluster)
 
 
                 for idx, file in enumerate(files):
-                    if not os.path.exists(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))):
-                        os.mkdir(os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster)))
+                    print(os.path.join(cfg['project_path'],"results",file,"",model_name,'kmeans-'+str(n_cluster),""))
+                    if not os.path.exists(os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")):                    
+                        try:
+                            os.mkdir(os.path.join(cfg['project_path'],"results",file,"",model_name,'kmeans-'+str(n_cluster),""))
+                        except OSError as error:
+                            print(error)   
 
-                    save_data = os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+                    save_data = os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster))
                     np.save(save_data+'/'+str(n_cluster)+'_km_label_'+file, labels[idx])
                     np.save(save_data+'/cluster_center_'+file, cluster_center[idx])
                     np.save(save_data+'/'+'latent_vector_'+file, latent_vectors[idx])

diff --git a/vame/analysis/tree_hierarchy.py b/vame/analysis/tree_hierarchy.py
@@ -1,11 +1,15 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Created on Fri Jun 19 11:23:43 2020
+Variational Animal Motion Embedding 1.0-alpha Toolbox
+© K. Luxem & P. Bauer, Department of Cellular Neuroscience
+Leibniz Institute for Neurobiology, Magdeburg, Germany
 
-@author: luxemk
+https://github.com/LINCellularNeuroscience/VAME
+Licensed under GNU General Public License v3.0
 """
 
+
 import numpy as np
 import networkx as nx
 import random

diff --git a/vame/analysis/umap_visualization.py b/vame/analysis/umap_visualization.py
@@ -83,7 +83,14 @@ def visualization(config, label=None):
                     random_state=cfg['random_state']) 
 
             latent_vector = np.load(os.path.join(path_to_file,"",'latent_vector_'+file+'.npy'))
-            embed = reducer.fit_transform(latent_vector[:30000,:])
+
+            num_points = cfg['num_points']
+            if num_points > latent_vector.shape[0]:
+                num_points = latent_vector.shape[0]
+            print("Embedding %d data points.." %num_points)
+
+            embed = reducer.fit_transform(latent_vector[:num_points,:])
+            np.save(os.path.join(path_to_file,"community","umap_embedding_"+file+'.npy'), embed)
 
         if label == None:                    
             umap_vis(file, embed)

diff --git a/vame/analysis/videowriter.py b/vame/analysis/videowriter.py
@@ -13,17 +13,18 @@
 from pathlib import Path
 import numpy as np
 import cv2 as cv
+import tqdm
 
 from vame.util.auxiliary import read_config
 
 
 def get_cluster_vid(cfg, path_to_file, file, n_cluster, videoType, flag):
     if flag == "motif":
         print("Motif videos getting created for "+file+" ...")
-        labels = np.load(os.path.join(path_to_file,"",str(n_cluster)+'_km_label_'+file+'.npy'))
+        labels = np.load(os.path.join(path_to_file,str(n_cluster)+'_km_label_'+file+'.npy'))
     if flag == "community":
         print("Community videos getting created for "+file+" ...")
-        labels = np.load(os.path.join(path_to_file,"","community","",'community_label_'+file+'.npy'))
+        labels = np.load(os.path.join(path_to_file,"community",'community_label_'+file+'.npy'))
     capture = cv.VideoCapture(os.path.join(cfg['project_path'],"videos",file+videoType))
 
     if capture.isOpened():
@@ -41,18 +42,18 @@ def get_cluster_vid(cfg, path_to_file, file, n_cluster, videoType, flag):
         cluster_lbl = cluster_lbl[0]
 
         if flag == "motif":
-            output = os.path.join(path_to_file,"cluster_videos",file+'motif_%d.avi' %cluster)
+            output = os.path.join(path_to_file,"cluster_videos",file+'-motif_%d.avi' %cluster)
         if flag == "community":
-            output = os.path.join(path_to_file,"community_videos",file+'motif_%d.avi' %cluster)
+            output = os.path.join(path_to_file,"community_videos",file+'-community_%d.avi' %cluster)
 
         video = cv.VideoWriter(output, cv.VideoWriter_fourcc('M','J','P','G'), fps, (int(width), int(height)))
 
-        if len(cluster_lbl) < cfg['lenght_of_motif_video']:
+        if len(cluster_lbl) < cfg['length_of_motif_video']:
             vid_length = len(cluster_lbl)
         else:
-            vid_length = cfg['lenght_of_motif_video']
+            vid_length = cfg['length_of_motif_video']
 
-        for num in range(vid_length):
+        for num in tqdm.tqdm(range(vid_length)):
             idx = cluster_lbl[num]
             capture.set(1,idx+cluster_start)
             ret, frame = capture.read()
@@ -93,7 +94,7 @@ def motif_videos(config, videoType='.mp4'):
 
     print("Cluster size is: %d " %n_cluster)
     for file in files:
-        path_to_file=os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+        path_to_file=os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
         if not os.path.exists(os.path.join(path_to_file,"cluster_videos")):
             os.mkdir(os.path.join(path_to_file,"cluster_videos"))
 
@@ -133,7 +134,7 @@ def community_videos(config, videoType='.mp4'):
 
     print("Cluster size is: %d " %n_cluster)
     for file in files:
-        path_to_file=os.path.join(cfg['project_path'],"results",file,"",model_name,"",'kmeans-'+str(n_cluster))
+        path_to_file=os.path.join(cfg['project_path'],"results",file,model_name,'kmeans-'+str(n_cluster),"")
         if not os.path.exists(os.path.join(path_to_file,"community_videos")):
             os.mkdir(os.path.join(path_to_file,"community_videos"))
 

diff --git a/vame/initialize_project/new.py b/vame/initialize_project/new.py
@@ -130,25 +130,25 @@ def init_new_project(project, videos, working_directory=None, videotype='.mp4'):
     cfg_file['dropout_rec']=0
     cfg_file['hidden_size_pred']=256
     cfg_file['dropout_pred']=0
-    cfg_file['kl_start']=3
-    cfg_file['annealtime']=8
+    cfg_file['kl_start']=2
+    cfg_file['annealtime']=4
     cfg_file['mse_reconstruction_reduction']='sum'
     cfg_file['mse_prediction_reduction']='sum'
     cfg_file['kmeans_loss']=cfg_file['zdims']
     cfg_file['kmeans_lambda']=0.1
     cfg_file['scheduler']=1
-    cfg_file['lenght_of_motif_video'] = 1000
+    cfg_file['length_of_motif_video'] = 1000
     cfg_file['noise'] = False
     cfg_file['scheduler_step_size'] = 100
     cfg_file['legacy'] = False
     cfg_file['individual_parameterization'] = False
     cfg_file['random_state_kmeans'] = 42
     cfg_file['n_init_kmeans'] = 15
-    cfg_file['model_name '] = 'VAME'
+    cfg_file['model_name']='VAME'
     cfg_file['n_cluster'] = 15
     cfg_file['pretrained_weights'] = False
-    cfg_file['pretrained_model'] = None
-    cfg_file['mind_dist'] = 0.1
+    cfg_file['pretrained_model']='None'
+    cfg_file['min_dist'] = 0.1
     cfg_file['n_neighbors'] = 200
     cfg_file['random_state'] = 42
     cfg_file['num_points'] = 30000