pearsonlab · chaichontat · Sep 1, 2021 · Sep 1, 2021 · Sep 1, 2021 · Sep 1, 2021
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -0,0 +1,86 @@
+name: Run w/ Synthetic Data
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  schedule:
+    - cron: "2 5 * * *"
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    env:
+      PATH_NEUROPIXELS: ${{ github.workspace }}/data/neuropixels
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Add conda to system path
+        run: echo $CONDA/bin >> $GITHUB_PATH
+
+      - name: Cache Data
+        uses: actions/cache@v2
+        with:
+          path: |
+            $PATH_NEUROPIXELS/faces.zip
+            $PATH_NEUROPIXELS/data/spks.zip
+          key: ${{ runner.os }}-${{ hashFiles('**/neuropixels/*.zip')}}
+          restore-keys: |
+            ${{ runner.os }}-
+
+      - name: Download Neuropixels Data
+        working-directory: ${{ env.PATH_NEUROPIXELS }}
+        # Neuropixels data see https://figshare.com/articles/dataset/Eight-probe_Neuropixels_recordings_during_spontaneous_behaviors/7739750
+        # Only using data from mouse Waksman
+        run: |
+          wget -O faces.zip -N https://figshare.com/ndownloader/files/14405048
+          wget -O spks.zip  -N https://figshare.com/ndownloader/files/14405057
+          wget -O probeLocations.mat https://figshare.com/ndownloader/files/14570108
+          wget -O probeBorders.mat   https://figshare.com/ndownloader/files/14653298
+          unzip faces.zip -x faces/Robbins_face_proc.mat faces/Krebs_face_proc.mat
+          unzip spks.zip -x spks/Robbins_Feb18.mat spks/Krebs_Feb18.mat
+
+      - name: Set up MATLAB
+        uses: matlab-actions/setup-matlab@v1
+
+      # Result in WaksmanwithFaces_KS2.mat
+      - name: Process raw neuropixels data
+        uses: matlab-actions/run-command@v1
+        with:
+          command: cd('${{ env.PATH_NEUROPIXELS }}'), ephysLoad
+
+      - name: Install dependencies
+        run: |
+          conda config --add channels conda-forge 
+          conda env update --file environment-cpu.yml --name base
+
+      - name: Run Bubblewrap
+        run: |
+          python scripts/dimension_reduction_neuropixels.py ${{ env.PATH_NEUROPIXELS }}/WaksmanwithFaces_KS2.mat
+
+          conda install jupytext
+          pytest datagen.py
+          jupytext --from py --to ipynb scripts/run_bubblewrap.py --execute --run-path .
+          jupyter nbconvert --to html scripts/run_bubblewrap.ipynb
+          mkdir public
+          mv scripts/run_bubblewrap.html public/index.html
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: Synthetic data run
+          path: public/index.html
+
+      - name: Deploy to GitHub Pages
+        uses: peaceiris/actions-gh-pages@v3
+        if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/github_actions'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: public
diff --git a/data/neuropixels/ephysLoad.m b/data/neuropixels/ephysLoad.m
@@ -0,0 +1,106 @@
+% Neuropixels data
+% Download all the files at https://figshare.com/articles/dataset/Eight-probe_Neuropixels_recordings_during_spontaneous_behaviors/7739750
+% Slightly modified from https://figshare.com/ndownloader/files/14570105
+
+ephysroot = '.'
+% mouse names
+% Original: mstr = {'Krebs','Waksman','Robbins'};
+% doing only Waksman
+mstr = {'Waksman'};
+% start of spontaneous activity in each mouse
+tstart = [3811 3633 3323];
+
+% probe location in brain information
+% borders tells you region based on depth
+% ccfCoords tells you position in microns relative to allen CCF
+load(fullfile(ephysroot, 'probeLocations.mat'));
+
+% possible brain regions (as strings)
+areaLabels = {'FrCtx','FrMoCtx','SomMoCtx','SSCtx','V1','V2','RSP',...
+'CP','LS','LH','HPF','TH','SC','MB'};
+
+%%
+% which mouse
+brainLocAll=[];
+for imouse = [1:length(mstr)]
+  mouse_name = mstr{imouse};
+
+  % load spikes
+  load(fullfile(ephysroot, sprintf('spks/spks%s_Feb18.mat',mouse_name)));
+  % spks(k).st = spike times (in seconds)
+  % spks(k).clu = cluster identity of each spike in st (which neuron does spike belong to)
+  % spks(k).Wheights = height of each cluster on the probe
+
+  % load behavior file
+  beh = load(fullfile(ephysroot, sprintf('faces/%s_face_proc.mat',mouse_name)));
+  % movie of mouse face during recording was dimensionality reduced using SVD
+  % beh.motionSVD = timepoints x components
+  % beh.motionMask = pixels x pixels x components
+  % beh.times = times of behavioral frames (in seconds) in same timeframe as spikes
+  motSVD = beh.motionSVD;
+  tVid = beh.times; % times of movie frames in spike reference frame
+
+  %% extract spike times and create a matrix neurons x time
+  stall = zeros(5e3,5500,'uint8');
+  ij = 0;
+  maxt=0;
+  Wh = [];
+  iprobe=[];
+  brainLoc=[];
+  srate = 30; % sampling rate in Hz (how much to bin matrix)
+  % loop over probes
+  for k = 1:numel(spks)
+    clu = spks(k).clu; % cluster ids
+    st  = spks(k).st; % spike times in seconds
+    st = round(st*srate); % spike times in 30Hz
+
+    % transform spike times into a matrix
+    % any duplicates of spike times are added together
+    S = sparse(st, clu, ones(1, numel(clu)));
+    S = uint8(full(S))';
+    % there might be missing numbers (bad spike clusters)
+    S = S(sort(unique(clu)),:);
+
+    % add these to the big matrix with all probes
+    stall(ij+[1:size(S,1)],1:size(S,2)) = S;
+    ij = ij + size(S,1);
+    maxt = max(maxt, size(S,2));
+
+    % height of clusters on the probe
+    % we will use these to determine the brain area
+    whp = spks(k).Wheights(sort(unique(clu)));
+
+    % borders of brain areas wrt the probe
+    lowerBorder = probeLocations(imouse).probe(k).borders.lowerBorder;
+    upperBorder = probeLocations(imouse).probe(k).borders.upperBorder;
+    acronym     = probeLocations(imouse).probe(k).borders.acronym;
+    loc = zeros(numel(whp),1);
+    % determine brain area for each cluster based on whp
+    for j = 1:numel(acronym)
+      whichArea = find(strcmp(areaLabels, acronym{j}));
+      loc(whp >= lowerBorder(j) & whp < upperBorder(j)) = whichArea;
+    end
+
+    % concatenate for all probes
+    Wh = [Wh; whp];
+    brainLoc = [brainLoc; loc];
+    iprobe=[iprobe; k * ones(size(S,1),1)];
+  end
+  %%
+  stall = stall(1:ij, 1:maxt);
+
+  % only use spontaneous time frames
+  tspont = tstart(imouse)*srate : min(floor(tVid(end)*srate), size(stall,2)-4);
+  stall = stall(:,tspont);
+  tspont = tspont / srate; % put back in seconds
+
+  % to put the behavioral data into the spike frames (no time delays)
+  x = interp1(tVid, motSVD, tspont);
+
+  %%% save the extracted spike matrix and brain locations and faces %%%
+  save(fullfile('.', sprintf('%swithFaces_KS2.mat',mouse_name)), 'stall','Wh','iprobe',...
+  'motSVD','tspont','tVid','srate','brainLoc','areaLabels');
+
+  brainLocAll=[brainLocAll;brainLoc];
+
+end
diff --git a/datagen.py b/datagen.py
@@ -89,7 +89,7 @@ def gen_data_diffeq(f: Callable, projection: Callable, *, t, x0: np.ndarray, dim
     return ivp["t"], y, projed
 
 
-def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise):
+def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise, save=True):
     xx = [] # states
     projeds = [] # observations
     rng = np.random.RandomState(39)
@@ -110,8 +110,9 @@ def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise):
     ys = projeds
     us = np.zeros((xx.shape[0], xx.shape[1], 1))
 
-    filename = f"{f.__name__}_{num_trajectories}trajectories_{num_dim}dim_{begin}to{end}_noise{noise}.npz"
-    np.savez(filename, x = xs, y = ys, u = us)
+    if save:
+        filename = f"{f.__name__}_{num_trajectories}trajectories_{num_dim}dim_{begin}to{end}_noise{noise}.npz"
+        np.savez(filename, x = xs, y = ys, u = us)
 
 
 def generate_lorenz():
@@ -144,6 +145,19 @@ def generate_vdp():
                     make_dataset(vdp, x0=np.array([0.1, 0.1]), num_trajectories=num_trajectory, num_dim=num_dim, begin=begin, end=end,
                                      noise=noise)
 
+def test_make_dataset():
+    params = dict(
+        num_trajectories=1,
+        begin=500,
+        end=20500,
+        noise=0.05,
+        save=True
+    )
+
+    make_dataset(lorenz, num_dim=3, x0=np.array([0., 1., 1.05]), **params)
+    make_dataset(vdp,    num_dim=2, x0=np.array([0.1, 0.1])    , **params)
+
+
 if __name__ == '__main__':
     if len(sys.argv) == 2 and sys.argv[1] == 'lorenz':
         generate_lorenz()

diff --git a/environment-cpu.yml b/environment-cpu.yml
@@ -0,0 +1,15 @@
+name: neuralflow
+channels:
+  - conda-forge
+dependencies:
+  - python=3.9
+  - jax
+  - jupyter
+  - jupyterlab
+  - matplotlib
+  - numpy
+  - pip
+  - pytest
+  - scipy
+  - scikit-learn
+  - tqdm
diff --git a/scripts/dimension_reduction_neuropixels.py b/scripts/dimension_reduction_neuropixels.py
@@ -1,16 +1,23 @@
 #%%
+import os
+import sys
+
+sys.path.append(os.path.abspath(os.path.dirname("__file__")))
+
 import time
+from pathlib import Path
+
 import numpy as np
+import proSVD
 from scipy import io as sio
-
 from sklearn import random_projection as rp
-import proSVD
 
 #%% loading processed matlab data
 # one mouse
-dataloc = './'
-file = 'WaksmanwithFaces_KS2.mat'
-matdict = sio.loadmat(dataloc+file, squeeze_me=True)
+assert len(sys.argv) == 2
+file = Path(sys.argv[1])
+
+matdict = sio.loadmat(file, squeeze_me=True)
 spks = matdict['stall']
 
 # truncate so it doesn't take forever
@@ -44,6 +51,8 @@ def reduce_sparseRP(X, comps=100, eps=0.1, transformer=None):
 t = l1
 transformer = rp.SparseRandomProjection(n_components=rp_dim)
 for i in range(num_iters):
+    if i & 15 == 0:  # Check if multiple of 16
+        print(f"ssSVD Iteration {i}/{num_iters}")
     start_rp = time.time()
     X_rp = reduce_sparseRP(X, transformer=transformer)
     end_rp = time.time()
@@ -75,6 +84,6 @@ def reduce_sparseRP(X, comps=100, eps=0.1, transformer=None):
             projs[i][:, t:t+l] = curr_basis.T @ curr_neural
             t += l
 
-np.savez('neuropixel_reduced.npz', ssSVD10=proj_stream_ssSVD)
+np.savez(file.parent, ssSVD10=proj_stream_ssSVD)
 
 # %%
diff --git a/scripts/run_bubblewrap.py b/scripts/run_bubblewrap.py
@@ -6,7 +6,6 @@
 import numpy as np
 
 import matplotlib
-matplotlib.use('TkAgg')
 import matplotlib.pylab as plt
 from matplotlib.patches import Ellipse
 from mpl_toolkits.mplot3d import Axes3D
@@ -21,7 +20,7 @@
 s = np.load('lorenz_1trajectories_3dim_500to20500_noise0.05.npz')
 data = s['y'][0]
 
-T = data.shape[0]       # should be 20k
+T = data.shape[0] // 10 # should be 20k
 d = data.shape[1]       # should be 2
 
 ## Parameters