DendrouLab · bio-la · Sep 4, 2024 · Jul 26, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/.github/workflows/integration03-ci.yml b/.github/workflows/integration03-ci.yml
@@ -0,0 +1,99 @@
+name: Run integration03
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+env:
+  debug: 'true'
+
+jobs:
+  integration:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ["ubuntu-latest"] # , "macos-latest", "windows-latest"
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree
+
+      - uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-version: latest
+          auto-activate-base: true
+          auto-update-conda: true
+          channels: conda-forge
+          channel-priority: strict
+          activate-environment: pipeline_env
+          environment-file: pipeline_env.yaml
+# important: this patch is only to test if multivi integration works
+# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2
+# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4
+      - name: Install Panpipes
+        shell: bash -el {0}
+        run: |
+          pip install -e . 
+          conda list
+
+      - name: Conda info
+        if: env.debug == 'true'
+        shell: bash -el {0}
+        run: conda info
+
+      - name: Conda list
+        if: env.debug == 'true'
+        shell: pwsh
+        run: conda list
+
+      # Note: all three files are renamed during the download to trim the "subsample_" prefix
+      - name: Preparing the data
+        run: |
+          mkdir -p teaseq/integration && cd teaseq/integration
+          curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44796985
+
+      # Note: we run the following to test that the commands works
+      # However, the following task will replace the file anyway
+      - name: Preparing the configuration file
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration config
+
+      - name: Edit the submission file
+        run: |
+          cd teaseq/integration
+          curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/1849a8c65aa67702f423da2c3b2d1d9238adac6d/tests/integration_3/pipeline.yml
+      - name: Replace template contents in configuration file
+        run: |
+          cd teaseq/integration
+          sed -i 's+/Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env+pipeline_env+g' pipeline.yml
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree teaseq
+
+      - name: Review pipeline tasks
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration show full --local
+
+      - name: Run pipeline tasks
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration make full --local
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree teaseq
diff --git a/README.md b/README.md
@@ -7,7 +7,9 @@
 Panpipes is a set of computational workflows designed to automate multimodal single-cell and spatial transcriptomic analyses by incorporating widely-used Python-based tools to perform quality control, preprocessing, integration, clustering, and reference mapping at scale.
 Panpipes allows reliable and customisable analysis and evaluation of individual and integrated modalities, thereby empowering decision-making before downstream investigations.
 
-**See our [documentation](https://panpipes-pipelines.readthedocs.io/en/latest/) and our [preprint](https://www.biorxiv.org/content/10.1101/2023.03.11.532085v2)**  
+**See our [documentation](https://panpipes-pipelines.readthedocs.io/en/latest/)**  
+
+**Panpipes is on [Genome Biology!](https://link.springer.com/article/10.1186/s13059-024-03322-7)**
 
 These workflows make use of [cgat-core](https://github.com/cgat-developers/cgat-core)
 
@@ -53,9 +55,9 @@ Check the example [submission file](https://github.com/DendrouLab/panpipes/blob/
 
 ## Citation
 
-[Panpipes: a pipeline for multiomic single-cell and spatial transcriptomic data analysis
-Fabiola Curion, Charlotte Rich-Griffin, Devika Agarwal, Sarah Ouologuem, Tom Thomas, Fabian J. Theis, Calliope A. Dendrou
-bioRxiv 2023.03.11.532085; doi: https://doi.org/10.1101/2023.03.11.532085](https://www.biorxiv.org/content/10.1101/2023.03.11.532085v2)
+[Curion, F., Rich-Griffin, C., Agarwal, D. et al. Panpipes: a pipeline for multiomic single-cell and spatial transcriptomic data analysis. Genome Biol 25, 181 (2024). 
+doi: https://doi.org/10.1186/s13059-024-03322-7](https://link.springer.com/article/10.1186/s13059-024-03322-7)
+
 
 ## Contributors
 

diff --git a/panpipes/panpipes/pipeline_integration.py b/panpipes/panpipes/pipeline_integration.py
@@ -545,8 +545,7 @@ def run_bbknn_atac(outfile):
         cmd += " --neighbors_within_batch %i" % PARAMS['atac']['bbknn']['neighbors_within_batch']
     if PARAMS['atac']['neighbors']['npcs'] is not None:
         cmd += " --neighbors_n_pcs %s" % PARAMS['atac']['neighbors']['npcs']
-    #Forcing bbknn to run on PCA in case of atac
-    cmd += " --dimred PCA"
+    #cmd += " --dimred PCA"
     cmd += " > logs/3_atac_bbknn.log "
     if PARAMS['queues_long'] is not None:
         job_kwargs["job_queue"] = PARAMS['queues_long']

diff --git a/panpipes/python_scripts/batch_correct_bbknn.py b/panpipes/python_scripts/batch_correct_bbknn.py
@@ -56,32 +56,26 @@
 # bbknn can't integrate on 2+ variables, so create a fake column with combined information
 columns = [x.strip() for x in args.integration_col.split(",")]
 
-if args.modality =="atac":
-    if "scaled_counts" in adata.layers.keys():
-        pass
-    else:
-        L.info("To run BBKNN on ATAC, PCA is needed. Computing PCA now.")
-        L.info("Scaling data and saving scaled counts to .layers['scaled_counts']")
-        sc.pp.scale(adata)
-        adata.layers["scaled_counts"] = adata.X.copy()
-        L.info("Computing PCA")
-        sc.tl.pca(adata, n_comps=min(50,adata.var.shape[0]-1), svd_solver='arpack', random_state=0) 
-
-if "X_pca" not in adata.obsm:
-    L.warning("X_pca could not be found in adata.obsm. Computing PCA with default parameters.")
+if args.dimred == "PCA":
+    dimred = "X_pca"
+elif args.dimred == "LSI":
+    dimred = "X_lsi"
+
+if dimred not in adata.obsm:
+    L.warning("Dimred '%s' could not be found in adata.obsm. Computing PCA with default parameters." % dimred)
+    dimred = "X_pca" 
     n_pcs = 50
     if adata.var.shape[0] < n_pcs:
         L.info("You have less features than number of PCs you intend to calculate")
         n_pcs = adata.var.shape[0] - 1
-        L.info("Setting n PCS to %i" % int(n_pcs))   
-    L.info("Scaling data") 
+        L.info("Setting n PCS to %i" % int(n_pcs)) 
+    L.info("Scaling data")   
     sc.pp.scale(adata)
     L.info("Computing PCA")
     sc.tl.pca(adata, n_comps=n_pcs, 
                     svd_solver='arpack', 
                     random_state=0) 
 
-
 L.info("Preparing for integration")
 
 if len(columns) > 1:
@@ -99,6 +93,7 @@
     # run bbknn
     L.info("Running BBKNN")
     adata = sc.external.pp.bbknn(adata,
+                        use_rep=dimred,
                         batch_key=args.integration_col,
                         copy=True,
                         n_pcs = int(args.neighbors_n_pcs),

diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml
@@ -154,7 +154,7 @@ atac:
   # True or false depending on whether you want to run batch correction
   run: True
   # which dimensionality reduction to expect, LSI or PCA
-  dimred: LSI 
+  dimred: PCA 
   # what method(s) to use to run batch correction, you can specify multiple 
   # (comma-seprated string, no spaces)
   # choices: harmony,bbknn,combat