Skip to content

Commit

Permalink
regression 2 and impute fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 12, 2024
1 parent 5db75e1 commit 4514482
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 10 deletions.
14 changes: 9 additions & 5 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# reg_type=${1} #GB, ridge
reg_type=ridge

RUN_ID="grn_evaluation_so_all_${reg_type}"
RUN_ID="grn_evaluation_all_${reg_type}"
# resources_dir="s3://openproblems-data/resources/grn"
resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
Expand All @@ -13,7 +13,7 @@ grn_models_folder="${resources_dir}/grn_models"
subsample=-2
max_workers=10
layer=scgen_pearson
metric_ids="[regression_1]"
metric_ids="[regression_1, regression_2]"

param_file="./params/${RUN_ID}.yaml"

Expand Down Expand Up @@ -74,6 +74,9 @@ HERE
if [ -n "$5" ]; then
echo " metacell: ${5}" >> $param_file
fi
if [ -n "$6" ]; then
echo " impute: ${6}" >> $param_file
fi
}

# #Loop through grn_names and layers
Expand All @@ -86,10 +89,11 @@ HERE
# append_entry_control "positive_control" "False" ""
# append_entry_control "baseline_pearson" "False" "pearson"
# append_entry_control "baseline_dotproduct" "False" "dotproduct"
append_entry_control "baseline_pearson_causal" "True" "pearson"
# append_entry_control "baseline_pearson_causal" "True" "pearson"
append_entry_control "baseline_dotproduct_causal" "True" "dotproduct"
append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true"
append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true"
# append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true"
# append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true"
append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true"
# append_entry_control "baseline_corr_causal_spearman" "True" "spearman"


Expand Down
8 changes: 7 additions & 1 deletion src/control_methods/baseline_corr/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ functionality:
direction: input
default: false
description: whether to pseudobulk scRNA-seq with metacells
- name: --impute
type: boolean
direction: input
default: false
description: whether to impute scRNA-seq

resources:
- type: python_script
path: script.py
Expand All @@ -44,7 +50,7 @@ platforms:
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
packages: [ ]
packages: [ magic-impute ]
- type: native
- type: nextflow
directives:
Expand Down
32 changes: 32 additions & 0 deletions src/control_methods/baseline_corr/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,21 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
if method == "dotproduct":
print('dotproduct')
net = X_sub.T.dot(X_sub)
elif method == "pearson":
print('pearson')
net = np.corrcoef(X_sub.T)
elif method == "spearman":
net = spearmanr(X_sub).statistic
net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0)

net = pd.DataFrame(net, index=gene_names, columns=gene_names)
if par['causal']:
print('causal')
net = net[tf_all]
else:
print('non causal')
net = net.sample(len(tf_all), axis=1, random_state=par['seed'])

net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
Expand All @@ -44,7 +48,9 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
# multiomics_rna = multiomics_rna[:,:2000] #TODO: togo

if par['metacell']:
print('metacell')
def create_meta_cells(df, n_cells=15):
meta_x = []
for i in range(0, df.shape[0], n_cells):
Expand All @@ -71,6 +77,32 @@ def create_meta_cells(df, n_cells=15):
sc.pp.log1p(multiomics_rna)
sc.pp.scale(multiomics_rna)

if par['impute']:
print("imputing")
# import magic
# import scprep

# magic_operator = magic.MAGIC()

# multiomics_rna = magic_operator.fit_transform(multiomics_rna)
from sklearn.impute import KNNImputer
import numpy as np

print("Imputing with KNN")

# Convert to dense if the matrix is sparse
if sc.sparse.issparse(multiomics_rna.X):
multiomics_rna_dense = multiomics_rna.X.toarray()
else:
multiomics_rna_dense = multiomics_rna.X

# Apply KNN imputation
knn_imputer = KNNImputer(n_neighbors=5) # You can adjust the number of neighbors
multiomics_rna_imputed = knn_imputer.fit_transform(multiomics_rna_dense)

# Update the AnnData object with the imputed values
multiomics_rna.X = multiomics_rna_imputed
print('zero ration: ', (multiomics_rna.X==0).sum()/multiomics_rna.size)
print('Create corr net')
net = create_corr_net(multiomics_rna.X, groups, par['corr_method'])

Expand Down
8 changes: 4 additions & 4 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,10 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
# Load inferred GRN
print(f'Loading GRN', flush=True)
grn = load_grn(par['prediction'], gene_names)
if 'cell_type' in grn.columns:
print('Non specific')
grn.drop(columns=['cell_type'], inplace=True)
grn = grn.groupby(['source', 'target']).mean().reset_index()
# if 'cell_type' in grn.columns:
# print('Non specific')
# grn.drop(columns=['cell_type'], inplace=True)
# grn = grn.groupby(['source', 'target']).mean().reset_index()

# Load and standardize perturbation data
layer = par['layer']
Expand Down
5 changes: 5 additions & 0 deletions src/workflows/run_grn_evaluation/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ functionality:
required: false
direction: input
default: false
- name: --impute
type: boolean
required: false
direction: input
default: false

- name: Outputs
arguments:
Expand Down
19 changes: 19 additions & 0 deletions src/workflows/run_grn_evaluation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,25 @@ workflow run_wf {
]
}
)
| baseline_corr.run(
runIf: { id, state ->
state.method_id == 'baseline_dotproduct_causal_impute'
},
fromState: [
multiomics_rna: "multiomics_rna",
layer: "layer",
tf_all: "tf_all",
causal: "causal",
corr_method: "corr_method",
metacell: "metacell",
impute: "impute"
],
toState: {id, output, state ->
state + [
prediction: output.prediction
]
}
)
| baseline_corr.run(
runIf: { id, state ->
state.method_id == 'baseline_corr_causal_spearman'
Expand Down

0 comments on commit 4514482

Please sign in to comment.