scenic+ temp_dir changed

openproblems-bio · Aug 28, 2024 · 3282935 · 3282935
1 parent d2a82e2
commit 3282935
Show file tree

Hide file tree

Showing 24 changed files with 661 additions and 276 deletions.
diff --git a/hg38_screen_v10_clust.regions_vs_motifs.rankings.feather.aF53FD8D b/hg38_screen_v10_clust.regions_vs_motifs.rankings.feather.aF53FD8D
diff --git a/hg38_screen_v10_clust.regions_vs_motifs.scores.feather.e391A0Ff b/hg38_screen_v10_clust.regions_vs_motifs.scores.feather.e391A0Ff
diff --git a/runs.ipynb b/runs.ipynb
@@ -3432,9 +3432,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_iter = 100\n",
-    "# net_corr = net.sample(len(tfs), axis=1)\n",
-    "# net_corr"
+    "n_iter = 100"
    ]
   },
   {
@@ -5226,252 +5224,151 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 59,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[0.6338300913837829,\n",
-       " 0.6306478594813784,\n",
-       " 0.6326987253863688,\n",
-       " 0.6343378657887133,\n",
-       " 0.6326487820571753,\n",
-       " 0.6356536986072162,\n",
-       " 0.6301186542660742,\n",
-       " 0.6333196717157581,\n",
-       " 0.6273994641913987,\n",
-       " 0.6308608591441454,\n",
-       " 0.6289856651506277,\n",
-       " 0.6324828084784677,\n",
-       " 0.6334077063898018,\n",
-       " 0.6330415245102411,\n",
-       " 0.6333819774731821,\n",
-       " 0.6338439982092101,\n",
-       " 0.6319344177161561,\n",
-       " 0.6341557592562631,\n",
-       " 0.6307591889466913,\n",
-       " 0.6322247379885256,\n",
-       " 0.6306792388537548,\n",
-       " 0.6306696924928431,\n",
-       " 0.6285119151639946,\n",
-       " 0.6304831829144517,\n",
-       " 0.6308121613551088,\n",
-       " 0.6327618388683754,\n",
-       " 0.6314231674476822,\n",
-       " 0.6312884282584914,\n",
-       " 0.631142772964502,\n",
-       " 0.6320765893840486,\n",
-       " 0.6325844910761261,\n",
-       " 0.6301334652402374,\n",
-       " 0.6318591760956985,\n",
-       " 0.6320593747906565,\n",
-       " 0.6331810761679495,\n",
-       " 0.6314261332296257,\n",
-       " 0.6319687151358445,\n",
-       " 0.6307678470102952,\n",
-       " 0.6281131512485407,\n",
-       " 0.6348718405843499,\n",
-       " 0.6291553093895569,\n",
-       " 0.6303376046037746,\n",
-       " 0.6330107358097978,\n",
-       " 0.6351402003670711,\n",
-       " 0.6346943470719033,\n",
-       " 0.6286424673972038,\n",
-       " 0.6339761428059248,\n",
-       " 0.6276169663820544,\n",
-       " 0.6286235357673238,\n",
-       " 0.6342169461721089,\n",
-       " 0.6290226581971139,\n",
-       " 0.6314228512414537,\n",
-       " 0.6311366707045039,\n",
-       " 0.6327379499422254,\n",
-       " 0.6300788825982234,\n",
-       " 0.6334970039445497,\n",
-       " 0.6315234639260399,\n",
-       " 0.631137721674292,\n",
-       " 0.6322800527494545,\n",
-       " 0.6310507562501161,\n",
-       " 0.6327209453667971,\n",
-       " 0.627939229431186,\n",
-       " 0.6331771446984572,\n",
-       " 0.6335969281489019,\n",
-       " 0.6311342753756505,\n",
-       " 0.6318286221517255,\n",
-       " 0.6302232282749309,\n",
-       " 0.6289309088132349,\n",
-       " 0.6290424006853144,\n",
-       " 0.6337237311185296,\n",
-       " 0.6277177540589839,\n",
-       " 0.6291169600379366,\n",
-       " 0.6320369605246907,\n",
-       " 0.6280705657169147,\n",
-       " 0.6329689969853456,\n",
-       " 0.6285159998085228,\n",
-       " 0.6293034934160794,\n",
-       " 0.6327535340594785,\n",
-       " 0.6314765267336526,\n",
-       " 0.6355853836785672,\n",
-       " 0.6318860982148191,\n",
-       " 0.6326334172009915,\n",
-       " 0.6318694762265682,\n",
-       " 0.6335425559144152,\n",
-       " 0.6339342326320021,\n",
-       " 0.6297642298354758,\n",
-       " 0.6265141566138808,\n",
-       " 0.6340795309921302,\n",
-       " 0.6315806409233686,\n",
-       " 0.6328343170100053,\n",
-       " 0.6315533175464346,\n",
-       " 0.6314159987840019,\n",
-       " 0.6342163661185687,\n",
-       " 0.6323350759040771,\n",
-       " 0.6303224210995871,\n",
-       " 0.6312488527545446,\n",
-       " 0.6326553474980217,\n",
-       " 0.6336276777942278,\n",
-       " 0.6323158375908737]"
+       "95"
       ]
      },
-     "execution_count": 57,
+     "execution_count": 59,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "\n",
-    "corr_scores"
+    "(corr_scores<pc_score).sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Corr grn from multiomics"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 69,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6346976227062416"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "pc_score"
+    "multiomics_rna = ad.read_h5ad('resources/grn-benchmark/multiomics_rna.h5ad')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 70,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "95"
-      ]
-     },
-     "execution_count": 59,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "(corr_scores<pc_score).sum()"
+    "import scanpy as sc \n",
+    "sc.pp.normalize_total(multiomics_rna)\n",
+    "sc.pp.log1p(multiomics_rna)\n",
+    "sc.pp.scale(multiomics_rna)\n",
+    "X = multiomics_rna.X"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "Index(['AAGCCACGTCAGGCAT3', 'AAGGATGTCAGGCTAT3', 'AAGTTACGTTAGGCTA3',\n",
-       "       'ACCTGGATCCCTCAGT3', 'ACCTTGTGTACTTCAC3', 'AGACTATGTGCGCGTA3',\n",
-       "       'AGAGAGGAGCTTAACA3', 'AGCTATATCATGTCAA3', 'AGCTTGGTCCTAGTTT3',\n",
-       "       'AGTAGCTTCCTTAGGG3',\n",
-       "       ...\n",
-       "       'CTTACTAGTGCCTCAC3', 'AACTAGCTCTGGCAAT3', 'TCCTCTAAGGCGCTAC3',\n",
-       "       'TCATGTTTCGTTACTT3', 'ACTTGAATCAGCACGC3', 'TAGTGTGGTTGGTTGA3',\n",
-       "       'GTCATGCCACCTCGCT3', 'AGTAAACAGTCACCAG3', 'ACCTCACTCAAGGACA3',\n",
-       "       'CTTAACAAGAGCCGGA3'],\n",
-       "      dtype='object', length=2000)"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Processing groups: 100%|██████████| 4/4 [00:43<00:00, 10.93s/it]\n"
+     ]
     }
    ],
    "source": [
-    "import pandas as pd \n",
-    "pd.read_csv('resources_test/prior/cell_topic.csv', index_col=0).index"
+    "def corr_grn(X: np.ndarray, groups: np.ndarray):\n",
+    "    grns = []\n",
+    "    for group in tqdm(np.unique(groups), desc=\"Processing groups\"):\n",
+    "        X_sub = X[groups == group, :]\n",
+    "        X_sub = StandardScaler().fit_transform(X_sub)\n",
+    "        grn = np.dot(X_sub.T, X_sub) / X_sub.shape[0]\n",
+    "        grns.append(grn)\n",
+    "    return np.mean(grns, axis=0)\n",
+    "groups = multiomics_rna.obs.cell_type\n",
+    "corr_net = corr_grn(X, groups)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 79,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['GTCTTTAGTTAGTGAT3', 'TAATGCATCGAGGAGT3', 'TCTAGCACACTATGGC3',\n",
-       "       'GCTAATATCGTCAAGT3', 'GTTACGTAGGTGCGGA3', 'CATTCCTCATTAAGTC3',\n",
-       "       'GGGTGTTGTTACTTGC3', 'GCAAGCCTCTAAGTCA3', 'CAATCTAAGGCTACAT3',\n",
-       "       'AAGCGAGGTTGTTGCT3',\n",
-       "       ...\n",
-       "       'GTGCTTACATGGCCCA3', 'ACGTTACAGCGAGCGA3', 'TGAGCACGTTGCCTCA3',\n",
-       "       'CTTAATGAGTTTCCTG3', 'AGGTGAATCGAGGAAC3', 'TCATGAGGTTTGTTGC3',\n",
-       "       'AAGTGAAGTCTTACTA3', 'ATACCTGCAGGAACTG3', 'CGCAATAGTGTAATAC3',\n",
-       "       'CTGCAATAGGTCCAAT3'],\n",
-       "      dtype='object', name='obs_id', length=2000)"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "ad.read_h5ad('resources_test/grn-benchmark/multiomics_rna.h5ad').obs.index.str.replace('-','')"
+    "corr_net = pd.DataFrame(corr_net, index=multiomics_rna.var_names, columns=multiomics_rna.var_names)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tfs = corr_net.abs().sum(axis=0).argsort()[::-1][:1000].index.to_numpy()\n",
+    "corr_net_sub = corr_net[tfs]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "corr_net_sub = corr_net_sub.reset_index().melt(id_vars='location', var_name='source', value_name='weight')\n",
+    "corr_net_sub.rename(columns={'location': 'target'}, inplace=True)\n",
+    "corr_net_sub.to_csv('output/causal/grns/corr_net_sub.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reading input files\n",
+      "(549, 15215)\n",
+      "Compute metrics for layer: pearson\n",
+      "ex(False)_tf(-1)\n",
+      "\n",
+      "Processing groups:   0%|          | 0/5 [00:00<?, ?it/s]\n",
+      "Processing groups:  20%|██        | 1/5 [00:00<00:01,  2.06it/s]\n",
+      "Processing groups:  40%|████      | 2/5 [00:01<00:01,  1.99it/s]\n",
+      "Processing groups:  60%|██████    | 3/5 [00:01<00:00,  2.38it/s]\n",
+      "Processing groups:  80%|████████  | 4/5 [00:01<00:00,  2.28it/s]\n",
+      "Processing groups: 100%|██████████| 5/5 [00:02<00:00,  2.48it/s]\n",
+      "Processing groups: 100%|██████████| 5/5 [00:02<00:00,  2.35it/s]\n",
+      "/viash_automount/tmp/viash-run-regression_1-Itbpvt.py:48: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
+      "  output = ad.AnnData(\n",
+      "   ex(False)_tf(-1)      Mean\n",
+      "0          0.118948  0.118948\n",
+      "(2,) (2,)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "2000"
+       "CompletedProcess(args='viash run src/metrics/regression_1/config.vsh.yaml -- --prediction output/causal/grns/corr_net_sub.csv --score output/causal/scores/corr_net_sub.h5ad', returncode=0)"
       ]
      },
-     "execution_count": 62,
+     "execution_count": 101,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import anndata as ad \n",
-    "ad.read_h5ad('resources_test/grn-benchmark/multiomics_rna.h5ad').obs.index.str.replace('-','').isin(pd.read_csv('resources_test/prior/cell_topic.csv', index_col=0).index).sum()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cell_topic=pd.read_csv('output/scenicplus/cell_topic.csv', index_col=0)\n",
-    "cell_topic.index = cell_topic.index.str.split('-').str[0]\n",
-    "cell_topic.to_csv('output/scenicplus/cell_topic.csv')"
+    "cmd = f\"viash run src/metrics/regression_1/config.vsh.yaml -- --prediction output/causal/grns/corr_net_sub.csv --score output/causal/scores/corr_net_sub.h5ad\"\n",
+    "subprocess.run(cmd, shell=True)"
    ]
   }
  ],