diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml new file mode 100644 index 0000000..24781c6 --- /dev/null +++ b/.github/workflows/python.yaml @@ -0,0 +1,86 @@ +name: Run w/ Synthetic Data + +on: + push: + branches: + - main + pull_request: + schedule: + - cron: "2 5 * * *" + +jobs: + test: + runs-on: ubuntu-latest + env: + PATH_NEUROPIXELS: ${{ github.workspace }}/data/neuropixels + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + + - name: Cache Data + uses: actions/cache@v2 + with: + path: | + $PATH_NEUROPIXELS/faces.zip + $PATH_NEUROPIXELS/data/spks.zip + key: ${{ runner.os }}-${{ hashFiles('**/neuropixels/*.zip')}} + restore-keys: | + ${{ runner.os }}- + + - name: Download Neuropixels Data + working-directory: ${{ env.PATH_NEUROPIXELS }} + # Neuropixels data see https://figshare.com/articles/dataset/Eight-probe_Neuropixels_recordings_during_spontaneous_behaviors/7739750 + # Only using data from mouse Waksman + run: | + wget -O faces.zip -N https://figshare.com/ndownloader/files/14405048 + wget -O spks.zip -N https://figshare.com/ndownloader/files/14405057 + wget -O probeLocations.mat https://figshare.com/ndownloader/files/14570108 + wget -O probeBorders.mat https://figshare.com/ndownloader/files/14653298 + unzip faces.zip -x faces/Robbins_face_proc.mat faces/Krebs_face_proc.mat + unzip spks.zip -x spks/Robbins_Feb18.mat spks/Krebs_Feb18.mat + + - name: Set up MATLAB + uses: matlab-actions/setup-matlab@v1 + + # Result in WaksmanwithFaces_KS2.mat + - name: Process raw neuropixels data + uses: matlab-actions/run-command@v1 + with: + command: cd('${{ env.PATH_NEUROPIXELS }}'), ephysLoad + + - name: Install dependencies + run: | + conda config --add channels conda-forge + conda env update --file environment-cpu.yml --name base + + - name: Run Bubblewrap + run: | + python scripts/dimension_reduction_neuropixels.py ${{ env.PATH_NEUROPIXELS }}/WaksmanwithFaces_KS2.mat + + conda install jupytext + pytest datagen.py + jupytext --from py --to ipynb scripts/run_bubblewrap.py --execute --run-path . + jupyter nbconvert --to html scripts/run_bubblewrap.ipynb + mkdir public + mv scripts/run_bubblewrap.html public/index.html + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: Synthetic data run + path: public/index.html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/github_actions' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: public diff --git a/data/neuropixels/ephysLoad.m b/data/neuropixels/ephysLoad.m new file mode 100644 index 0000000..430e8fd --- /dev/null +++ b/data/neuropixels/ephysLoad.m @@ -0,0 +1,106 @@ +% Neuropixels data +% Download all the files at https://figshare.com/articles/dataset/Eight-probe_Neuropixels_recordings_during_spontaneous_behaviors/7739750 +% Slightly modified from https://figshare.com/ndownloader/files/14570105 + +ephysroot = '.' +% mouse names +% Original: mstr = {'Krebs','Waksman','Robbins'}; +% doing only Waksman +mstr = {'Waksman'}; +% start of spontaneous activity in each mouse +tstart = [3811 3633 3323]; + +% probe location in brain information +% borders tells you region based on depth +% ccfCoords tells you position in microns relative to allen CCF +load(fullfile(ephysroot, 'probeLocations.mat')); + +% possible brain regions (as strings) +areaLabels = {'FrCtx','FrMoCtx','SomMoCtx','SSCtx','V1','V2','RSP',... +'CP','LS','LH','HPF','TH','SC','MB'}; + +%% +% which mouse +brainLocAll=[]; +for imouse = [1:length(mstr)] + mouse_name = mstr{imouse}; + + % load spikes + load(fullfile(ephysroot, sprintf('spks/spks%s_Feb18.mat',mouse_name))); + % spks(k).st = spike times (in seconds) + % spks(k).clu = cluster identity of each spike in st (which neuron does spike belong to) + % spks(k).Wheights = height of each cluster on the probe + + % load behavior file + beh = load(fullfile(ephysroot, sprintf('faces/%s_face_proc.mat',mouse_name))); + % movie of mouse face during recording was dimensionality reduced using SVD + % beh.motionSVD = timepoints x components + % beh.motionMask = pixels x pixels x components + % beh.times = times of behavioral frames (in seconds) in same timeframe as spikes + motSVD = beh.motionSVD; + tVid = beh.times; % times of movie frames in spike reference frame + + %% extract spike times and create a matrix neurons x time + stall = zeros(5e3,5500,'uint8'); + ij = 0; + maxt=0; + Wh = []; + iprobe=[]; + brainLoc=[]; + srate = 30; % sampling rate in Hz (how much to bin matrix) + % loop over probes + for k = 1:numel(spks) + clu = spks(k).clu; % cluster ids + st = spks(k).st; % spike times in seconds + st = round(st*srate); % spike times in 30Hz + + % transform spike times into a matrix + % any duplicates of spike times are added together + S = sparse(st, clu, ones(1, numel(clu))); + S = uint8(full(S))'; + % there might be missing numbers (bad spike clusters) + S = S(sort(unique(clu)),:); + + % add these to the big matrix with all probes + stall(ij+[1:size(S,1)],1:size(S,2)) = S; + ij = ij + size(S,1); + maxt = max(maxt, size(S,2)); + + % height of clusters on the probe + % we will use these to determine the brain area + whp = spks(k).Wheights(sort(unique(clu))); + + % borders of brain areas wrt the probe + lowerBorder = probeLocations(imouse).probe(k).borders.lowerBorder; + upperBorder = probeLocations(imouse).probe(k).borders.upperBorder; + acronym = probeLocations(imouse).probe(k).borders.acronym; + loc = zeros(numel(whp),1); + % determine brain area for each cluster based on whp + for j = 1:numel(acronym) + whichArea = find(strcmp(areaLabels, acronym{j})); + loc(whp >= lowerBorder(j) & whp < upperBorder(j)) = whichArea; + end + + % concatenate for all probes + Wh = [Wh; whp]; + brainLoc = [brainLoc; loc]; + iprobe=[iprobe; k * ones(size(S,1),1)]; + end + %% + stall = stall(1:ij, 1:maxt); + + % only use spontaneous time frames + tspont = tstart(imouse)*srate : min(floor(tVid(end)*srate), size(stall,2)-4); + stall = stall(:,tspont); + tspont = tspont / srate; % put back in seconds + + % to put the behavioral data into the spike frames (no time delays) + x = interp1(tVid, motSVD, tspont); + + %%% save the extracted spike matrix and brain locations and faces %%% + save(fullfile('.', sprintf('%swithFaces_KS2.mat',mouse_name)), 'stall','Wh','iprobe',... + 'motSVD','tspont','tVid','srate','brainLoc','areaLabels'); + + brainLocAll=[brainLocAll;brainLoc]; + +end \ No newline at end of file diff --git a/datagen.py b/datagen.py index 4e10736..53558f2 100644 --- a/datagen.py +++ b/datagen.py @@ -89,7 +89,7 @@ def gen_data_diffeq(f: Callable, projection: Callable, *, t, x0: np.ndarray, dim return ivp["t"], y, projed -def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise): +def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise, save=True): xx = [] # states projeds = [] # observations rng = np.random.RandomState(39) @@ -110,8 +110,9 @@ def make_dataset(f, x0, num_trajectories, num_dim, begin, end, noise): ys = projeds us = np.zeros((xx.shape[0], xx.shape[1], 1)) - filename = f"{f.__name__}_{num_trajectories}trajectories_{num_dim}dim_{begin}to{end}_noise{noise}.npz" - np.savez(filename, x = xs, y = ys, u = us) + if save: + filename = f"{f.__name__}_{num_trajectories}trajectories_{num_dim}dim_{begin}to{end}_noise{noise}.npz" + np.savez(filename, x = xs, y = ys, u = us) def generate_lorenz(): @@ -144,6 +145,19 @@ def generate_vdp(): make_dataset(vdp, x0=np.array([0.1, 0.1]), num_trajectories=num_trajectory, num_dim=num_dim, begin=begin, end=end, noise=noise) +def test_make_dataset(): + params = dict( + num_trajectories=1, + begin=500, + end=20500, + noise=0.05, + save=True + ) + + make_dataset(lorenz, num_dim=3, x0=np.array([0., 1., 1.05]), **params) + make_dataset(vdp, num_dim=2, x0=np.array([0.1, 0.1]) , **params) + + if __name__ == '__main__': if len(sys.argv) == 2 and sys.argv[1] == 'lorenz': generate_lorenz() diff --git a/environment-cpu.yml b/environment-cpu.yml new file mode 100644 index 0000000..1ba67aa --- /dev/null +++ b/environment-cpu.yml @@ -0,0 +1,15 @@ +name: neuralflow +channels: + - conda-forge +dependencies: + - python=3.9 + - jax + - jupyter + - jupyterlab + - matplotlib + - numpy + - pip + - pytest + - scipy + - scikit-learn + - tqdm diff --git a/scripts/dimension_reduction_neuropixels.py b/scripts/dimension_reduction_neuropixels.py index b868b3b..53d273f 100644 --- a/scripts/dimension_reduction_neuropixels.py +++ b/scripts/dimension_reduction_neuropixels.py @@ -1,16 +1,23 @@ #%% +import os +import sys + +sys.path.append(os.path.abspath(os.path.dirname("__file__"))) + import time +from pathlib import Path + import numpy as np +import proSVD from scipy import io as sio - from sklearn import random_projection as rp -import proSVD #%% loading processed matlab data # one mouse -dataloc = './' -file = 'WaksmanwithFaces_KS2.mat' -matdict = sio.loadmat(dataloc+file, squeeze_me=True) +assert len(sys.argv) == 2 +file = Path(sys.argv[1]) + +matdict = sio.loadmat(file, squeeze_me=True) spks = matdict['stall'] # truncate so it doesn't take forever @@ -44,6 +51,8 @@ def reduce_sparseRP(X, comps=100, eps=0.1, transformer=None): t = l1 transformer = rp.SparseRandomProjection(n_components=rp_dim) for i in range(num_iters): + if i & 15 == 0: # Check if multiple of 16 + print(f"ssSVD Iteration {i}/{num_iters}") start_rp = time.time() X_rp = reduce_sparseRP(X, transformer=transformer) end_rp = time.time() @@ -75,6 +84,6 @@ def reduce_sparseRP(X, comps=100, eps=0.1, transformer=None): projs[i][:, t:t+l] = curr_basis.T @ curr_neural t += l -np.savez('neuropixel_reduced.npz', ssSVD10=proj_stream_ssSVD) +np.savez(file.parent, ssSVD10=proj_stream_ssSVD) # %% diff --git a/scripts/run_bubblewrap.py b/scripts/run_bubblewrap.py index 549e06a..9dd21d6 100644 --- a/scripts/run_bubblewrap.py +++ b/scripts/run_bubblewrap.py @@ -6,7 +6,6 @@ import numpy as np import matplotlib -matplotlib.use('TkAgg') import matplotlib.pylab as plt from matplotlib.patches import Ellipse from mpl_toolkits.mplot3d import Axes3D @@ -21,7 +20,7 @@ s = np.load('lorenz_1trajectories_3dim_500to20500_noise0.05.npz') data = s['y'][0] -T = data.shape[0] # should be 20k +T = data.shape[0] // 10 # should be 20k d = data.shape[1] # should be 2 ## Parameters