Merge pull request #34 from crichgriffin/different_adt_libs

[QC_MM] Multiple different ADT libraries
DendrouLab · Apr 12, 2023 · 270e8a6 · 270e8a6
2 parents 92452a3 + aa9c6ba
commit 270e8a6
Show file tree

Hide file tree

Showing 10 changed files with 45 additions and 52 deletions.
diff --git a/panpipes/funcs/processing.py b/panpipes/funcs/processing.py
@@ -384,4 +384,3 @@ def mu_get_obs(mdata, features=[],modalities=[], layers=None):
     df = concat(out, axis=1)
     df.columns = ['-'.join(col).strip() for col in df.columns.values]
     return df
-
diff --git a/panpipes/panpipes/pipeline_clustering.py b/panpipes/panpipes/pipeline_clustering.py
@@ -22,7 +22,6 @@
 from panpipes.funcs.processing import extract_parameter_from_fname
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 PARAMS['py_path'] =  os.path.join(os.path.dirname(os.path.dirname(__file__)), 'python_scripts')

diff --git a/panpipes/panpipes/pipeline_integration.py b/panpipes/panpipes/pipeline_integration.py
@@ -12,7 +12,6 @@
 
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 

diff --git a/panpipes/panpipes/pipeline_preprocess.py b/panpipes/panpipes/pipeline_preprocess.py
@@ -17,7 +17,6 @@
 
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 

diff --git a/panpipes/panpipes/pipeline_qc_mm.py b/panpipes/panpipes/pipeline_qc_mm.py
@@ -14,7 +14,6 @@
 
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 PARAMS['py_path'] =  os.path.join(os.path.dirname(os.path.dirname(__file__)), 'python_scripts')
@@ -126,10 +125,6 @@ def load_mudatas(rna_path, outfile,
         cmd += " --fragments_file %(fragments_file)s"
     if peak_annotation_file is not None and pd.notna(peak_annotation_file):
         cmd += " --peak_annotation_file %(peak_annotation_file)s"
-    if PARAMS['protein_metadata_table'] is not None:
-        cmd += " --protein_var_table %(protein_metadata_table)s"
-    if PARAMS['index_col_choice'] is not None:
-        cmd += " --protein_new_index_col %(index_col_choice)s"
       # ~ means this tests "is not nan"
     if tcr_path is not None and pd.notna(tcr_path):
         cmd += " --tcr_filtered_contigs %(tcr_path)s"
@@ -165,6 +160,10 @@ def concat_filtered_mudatas(infiles, outfile):
     if PARAMS["barcode_mtd_include"] is True:
         cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
         cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
+    if PARAMS['protein_metadata_table'] is not None:
+        cmd += " --protein_var_table %(protein_metadata_table)s"
+    if PARAMS['index_col_choice'] is not None:
+        cmd += " --protein_new_index_col %(index_col_choice)s"
     cmd += " > logs/concat_filtered_mudatas.log"
     job_kwargs["job_threads"] = PARAMS['resources_threads_high']
     P.run(cmd, **job_kwargs)

diff --git a/panpipes/panpipes/pipeline_refmap.py b/panpipes/panpipes/pipeline_refmap.py
@@ -12,7 +12,6 @@
 # __file__="/well/cartography/users/zsj686/non_cart_projects/005-multimodal_scpipelines/src/sc_pipelines_muon_dev/panpipes/pipeline_refmap.py"
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 job_kwargs = {}

diff --git a/panpipes/panpipes/pipeline_vis.py b/panpipes/panpipes/pipeline_vis.py
@@ -12,7 +12,6 @@
 
 PARAMS = P.get_parameters(
     ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-     "../pipeline.yml",
      "pipeline.yml"])
 
 PARAMS['py_path'] =  os.path.join(os.path.dirname(os.path.dirname(__file__)), 'python_scripts')

diff --git a/panpipes/python_scripts/concat_adata.py b/panpipes/python_scripts/concat_adata.py
@@ -52,7 +52,13 @@
                     help='csv file contining barcode level metadata')
 parser.add_argument('--barcode_mtd_metadatacols',
                     default=None,
-                    help='comma separated strings listing the column you want to keep in barcode_mtd_df')                    
+                    help='comma separated strings listing the column you want to keep in barcode_mtd_df')            
+parser.add_argument('--protein_var_table',
+                    default=None,
+                    help='')
+parser.add_argument('--protein_new_index_col',
+                    default=None,
+                    help='')        
 
 parser.set_defaults(verbose=True)
 args, opt = parser.parse_known_args()
@@ -172,7 +178,32 @@
 
 L.debug(mdata.obs.columns)
 L.debug(mdata.obs.head())
-
+# update the protein variable to add in extra info like isotype and alternate name for adt
+if args.protein_var_table is not None:
+    try:
+        df = pd.read_csv(args.protein_var_table, sep='\t', index_col=0)
+        L.info("merging protein table with var")
+        # add_var_mtd(mdata['prot'], df)
+        var_df = mdata['prot'].var.merge(df, left_index=True, right_index=True)
+        if args.protein_new_index_col is not None:
+            L.info("updating prot.var index")
+            # update_var_index(mdata['prot'], args.protein_new_index_col)
+            var_df =var_df.reset_index().set_index(args.protein_new_index_col)
+            var_df = var_df.rename(columns={'index':'orig_id'})
+            var_df.index.name = None
+        mdata['prot'].var = var_df
+        mdata.update_var()
+        mdata.update()
+        # we might want to split hashing antibodies into a separate modalities
+        # we assume this has been inidicated in a "hashing_ab" column in the protein metadata file
+        if "hashing_ab" in mdata['prot'].var.columns:
+            # create new modality for hashing
+            mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
+            # subset old modality to remove hashing
+            mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
+    except FileNotFoundError:
+        warnings.warn("protein metadata table not found")
+mdata.update()
 # tidy up metadata
 # move sample_id to the front
 # cols = mdata.obs.columns.tolist()
@@ -181,6 +212,7 @@
 L.debug(mdata.obs.dtypes)
 
 L.info("writing to file {}".format(str(args.output_file)))
+
 mdata.write(args.output_file)
 
 L.info("done")
diff --git a/panpipes/python_scripts/make_adata_from_csv.py b/panpipes/python_scripts/make_adata_from_csv.py
@@ -67,12 +67,6 @@
 parser.add_argument('--output_file',
                     default=None,
                     help='')
-parser.add_argument('--protein_var_table',
-                    default=None,
-                    help='')
-parser.add_argument('--protein_new_index_col',
-                    default=None,
-                    help='')
 parser.add_argument('--per_barcode_metrics_file',
                     default=None,
                     help='ATAC/Multiome specific input file from csv')                    
@@ -123,31 +117,6 @@
             intersect_obs_by_mod(mdata, ['rna', 'prot'])
             mdata.update()
     L.info(mdata['prot'].var.head())
-    if args.protein_var_table is not None:
-        try:
-            df = pd.read_csv(args.protein_var_table, sep='\t', index_col=0)
-            L.info("merging protein table with var")
-            # add_var_mtd(mdata['prot'], df)
-            var_df = mdata['prot'].var.merge(df, left_index=True, right_index=True)
-            if args.protein_new_index_col is not None:
-                L.info("updating prot.var index")
-                # update_var_index(mdata['prot'], args.protein_new_index_col)
-                var_df =var_df.reset_index().set_index(args.protein_new_index_col)
-                var_df = var_df.rename(columns={'index':'orig_id'})
-                var_df.index.name = None
-            mdata['prot'].var = var_df
-            mdata.update_var()
-            mdata.update()
-            # we might want to split hashing antibodies into a separate modalities
-            # we assume this has been inidicated in a "hashing_ab" column in the protein metadata file
-            if "hashing_ab" in mdata['prot'].var.columns:
-                # create new modality for hashing
-                mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
-                # subset old modality to remove hashing
-                mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
-        except FileNotFoundError:
-            warnings.warn("protein metadata table not found")
-    mdata.update()
 
 if 'atac' in mdata.mod.keys():
     mdata['atac'].var_names_make_unique()

diff --git a/panpipes/python_scripts/plot_scanpy_markers.py b/panpipes/python_scripts/plot_scanpy_markers.py
@@ -8,17 +8,12 @@
 import muon as mu
 import pandas as pd
 import argparse
-import os
-import re
+
 sc.settings.autoshow = False
-from panpipes.funcs.io import read_yaml
+
 import matplotlib
 matplotlib.use('agg')
 
-
-# from panpipes.funcs.processing import check_for_bool
-# from panpipes.funcs.io import read_anndata, write_anndata
-
 import sys
 import logging
 L = logging.getLogger()
@@ -65,7 +60,11 @@ def calc_dendrogram(adata, group_col):
         if "X_pca" not in adata.obsm.keys():
             sc.pp.pca(adata)
         L.info("calculating dendrogram")
-        sc.tl.dendrogram(adata, groupby=group_col, use_rep="X_pca")
+        try:
+            sc.tl.dendrogram(adata, groupby=group_col, use_rep="X_pca")
+        except ValueError:
+            L.info("cannot calculate dendrogram")
+            incl_dendrogram = False
     else:
         incl_dendrogram = False
     return incl_dendrogram
Original file line number	Diff line number	Diff line change
Expand Up		@@ -384,4 +384,3 @@ def mu_get_obs(mdata, features=[],modalities=[], layers=None):
		df = concat(out, axis=1)
		df.columns = ['-'.join(col).strip() for col in df.columns.values]
		return df