Skip to content

Commit

Permalink
small changes to regressions
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 20, 2024
1 parent 06a699a commit 8d0e477
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 39 deletions.
28 changes: 14 additions & 14 deletions scripts/run_grn_evaluation_tw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

RUN_ID="scgen_pearson_gb_sub549"
RUN_ID="pearson_gb"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
# grn_models_folder="${resources_dir}/supplementary/grn_models_noised"
grn_models_folder="${resources_dir}/grn_models"
reg_type=GB
subsample=-1
subsample=-2
max_workers=20

param_file="./params/${RUN_ID}.yaml"
Expand All @@ -24,7 +24,7 @@ grn_names=(
)

# layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")
layers=( "scgen_pearson" )
layers=( "pearson" )

# Start writing to the YAML file
cat > $param_file << HERE
Expand Down Expand Up @@ -52,17 +52,17 @@ for grn_name in "${grn_names[@]}"; do
done
done

# # Append negative control
# grn_name="negative_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "" "true"
# done
# Append negative control
grn_name="negative_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "" "true"
done

# # Append positive controls
# grn_name="positive_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "true"
# done
# Append positive controls
grn_name="positive_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "true"
done

# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
Expand All @@ -84,7 +84,7 @@ HERE
--main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
--workspace 53907369739130 `
--compute-env 6TeIFgV5OY4pJCk8I0bfOh `
--params-file ./params/scgen_pearson_gb_sub549.yaml `
--params-file ./params/scgen_pearson_gb_pcs.yaml `
--config src/common/nextflow_helpers/labels_tw.config


2 changes: 2 additions & 0 deletions src/metrics/regression_1/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ def main(par):
perturbation_data = perturbation_data[mask,:]
else:
perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :]

print(perturbation_data.shape)

pert_df = pert_df.T # make it gene*sample

Expand Down
49 changes: 25 additions & 24 deletions src/metrics/regression_1/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,41 +52,42 @@ def create_negative_control(gene_names) -> np.ndarray:
for ii, layer in enumerate(layers):
par["layer"] = layer
if grn_model=='positive_control':
# print('Inferring GRN')
# net = create_positive_control(perturbation_data.layers[par["layer"]], groups)
print('Inferring positive control')
net = create_positive_control(perturbation_data.layers[par["layer"]], groups)

# net = pd.DataFrame(net, index=gene_names, columns=gene_names)
# net = net.loc[:, net.columns.isin(tf_all)]
net = pd.DataFrame(net, index=gene_names, columns=gene_names)
net = net.loc[:, net.columns.isin(tf_all)]

# pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')

# pivoted_net = pivoted_net.rename(columns={'index': 'target'})
# pivoted_net = pivoted_net[pivoted_net['weight'] != 0]
pivoted_net = pivoted_net.rename(columns={'index': 'target'})
pivoted_net = pivoted_net[pivoted_net['weight'] != 0]
par['prediction'] = f"{par['temp_dir']}/{layer}_positive_control.csv"
# pivoted_net.to_csv(par['prediction'])
print(par['prediction'])
pivoted_net.to_csv(par['prediction'])
elif grn_model=='negative_control':
# print('Inferring GRN')
# net = create_negative_control(gene_names)
print('Inferring negative control')
net = create_negative_control(gene_names)

# pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')

# pivoted_net = pivoted_net.rename(columns={'index': 'target'})
# pivoted_net = pivoted_net[pivoted_net['weight'] != 0]
pivoted_net = pivoted_net.rename(columns={'index': 'target'})
pivoted_net = pivoted_net[pivoted_net['weight'] != 0]
par['prediction'] = f"{par['temp_dir']}/negative_control.csv"
# pivoted_net.to_csv(par['prediction'])
pivoted_net.to_csv(par['prediction'])
else:
par['prediction'] = f"resources/grn_models/{grn_model}.csv"
output = main(par)
output.index = [layer]
# output = main(par)
# output.index = [layer]

if ii == 0:
score = output
else:
score = pd.concat([score, output], axis=0)
# if ii == 0:
# score = output
# else:
# score = pd.concat([score, output], axis=0)

print("Write output to file", flush=True)
print(grn_model, layer, score)
# print("Write output to file", flush=True)
# print(grn_model, layer, score)

print("Write output to file", flush=True)
score.to_csv(par['score'])
# print("Write output to file", flush=True)
# score.to_csv(par['score'])

2 changes: 1 addition & 1 deletion src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def static_approach(grn: np.ndarray, n_features: np.ndarray, X: np.ndarray, grou
# Cross-validate each gene using the inferred GRN to define select input features
res = cross_validate(reg_type, gene_names, X, groups, grn, n_features, n_jobs=n_jobs)
mean_r2_scores = np.asarray([res['scores'][j]['avg-r2'] for j in range(len(res['scores']))])
mean_r2_scores = mean_r2_scores[mean_r2_scores>-1]
mean_r2_scores = mean_r2_scores[mean_r2_scores>-10]

return np.mean(mean_r2_scores)

Expand Down

0 comments on commit 8d0e477

Please sign in to comment.