Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@

* `transform/regress_out`: Allow providing 'input' and 'output' layers for scanpy regress_out functionality (PR #863).

* `interpret/atac_marker_peaks`: Added a component to identify marker peaks in ATAC-seq data (PR #868).
*
* Added `transform/tfidf` component: normalize ATAC data with TF-IDF (PR #870).

* Added `dimred/lsi` component (PR #552).
Expand Down
84 changes: 84 additions & 0 deletions src/interpret/atac_marker_peaks/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: atac_marker_peaks
namespace: "interpret"
description: |
Rank differentially accessible peaks for each group of cells.

authors:
- __merge__: /src/authors/vladimir_shitov.yaml
roles: [ author ]
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
description: Input h5mu file
direction: input
required: true
example: input.h5mu
- name: "--modality"
type: string
default: "atac"
required: false
- name: Outputs
arguments:
- name: "--output"
type: file
description: Output h5mu file.
direction: output
example: output.h5mu
- name: "--output_compression"
type: string
description: The compression format to be used on the output h5mu object.
choices: ["gzip", "lzf"]
required: false
example: "gzip"
- name: "--output_marker_peaks"
type: file
description: Output tsv file with marker peaks.
direction: output
example: marker_peaks.tsv
- name: Arguments
arguments:
- name: "--groupby"
type: string
description: The group to use for ranking peaks.
required: true
example: "leiden"
- name: "--method"
type: string
description: The method to use for ranking peaks.
choices: ["t-test", "wilcoxon", "logreg", "t-test_overestim_var"]
required: false
default: "t-test"
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
test_resources:
- type: python_script
path: test.py
- path: /resources_test/cellranger_atac_tiny_bcl/counts/
engines:
- type: docker
image: python:3.11-slim
setup:
- type: apt
packages:
- procps
- pkg-config # Otherwise h5py installation fails, which is required for scanpy
- libhdf5-dev
- gcc
- type: python
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
__merge__: [ /src/base/requirements/scanpy.yaml, .]
packages:
- muon~=0.1.5
- pysam~=0.22.0
test_setup:
- type: python
__merge__: [ /src/base/requirements/viashpy.yaml, .]
runners:
- type: executable
- type: nextflow
directives:
label: [singlecpu, midmem]
42 changes: 42 additions & 0 deletions src/interpret/atac_marker_peaks/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import mudata
import muon as mu
import pandas as pd

### VIASH START
par = {
"input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
"output": "foo.h5mu",
"output_marker_peaks": "marker_peaks.tsv",
"output_compression": "gzip",
"modality": "atac",
"groupby": "leiden",
"method": "t-test",
}
### VIASH END

def main():
mdata = mudata.read(par["input"].strip())
mod = mdata.mod[par["modality"]]

if "peak_annotation" not in mod.uns["atac"]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you create an argument for the location of the peaks?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the default value from muon: https://muon.readthedocs.io/en/latest/api/generated/muon.atac.tl.add_peak_annotation.html

Other muon functions check exactly this location, so if we store it somewhere else, the errors will appear

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what we could do is add an argument here (with a default value peak_annotation) and also add this argument to other components where needed?

raise ValueError("Peak annotation not found. Please run `muon.atac.tl.add_peak_annotation` first.")

mu.atac.tl.rank_peaks_groups(mod, par["groupby"], method=par["method"])

result = mod.uns["rank_genes_groups"]
groups = result["names"].dtype.names

marker_peaks_df = pd.DataFrame({
group + "_" + key: result[key][group]
for group in groups
for key in ["names", "genes", "pvals_adj"]
})

mdata.mod[par["modality"]].uns["rank_genes_groups"] = mod.uns["rank_genes_groups"]

mdata.write_h5mu(par["output"].strip(), compression=par["output_compression"])

marker_peaks_df.to_csv(par["output_marker_peaks"].strip(), sep="\t", index=False)

if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions src/interpret/atac_marker_peaks/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import sys
from pathlib import Path
import os
import pytest

import mudata as md
import muon as mu
import numpy as np
import pandas as pd

## VIASH START
meta = {
'executable': './target/docker/qc/calculate_atac_qc_metrics/calculate_atac_qc_metrics',
'resources_dir': "./resources_test/cellranger_atac_tiny_bcl/counts/",
'config': './src/qc/calculate_atac_qc_metrics/config.vsh.yaml',
'cpus': 2
}
## VIASH END

@pytest.fixture
def tiny_atac_mudata(tmp_path):
resources_dir = Path(meta["resources_dir"])
mdata = mu.read_10x_h5(resources_dir / "counts" / "filtered_peak_bc_matrix.h5")
mu.atac.tl.locate_fragments(mdata, fragments=str(resources_dir / "counts" / "fragments.tsv.gz"))
assert "files" in mdata.mod["atac"].uns.keys()
assert "fragments" in mdata.mod["atac"].uns["files"].keys()

# Read features annotation and save it to uns
peak_annotation = pd.read_csv(resources_dir / "counts" / "peak_annotation.tsv", sep="\t")
peak_annotation["gene"] = peak_annotation["gene"].astype(str) # Fixes saving error
mu.atac.tl.add_peak_annotation(mdata.mod["atac"], peak_annotation)

# Simulate clustering to not install leiden dependencies
mdata.mod["atac"].obs["leiden"] = pd.Categorical(np.random.choice(np.arange(5), size=mdata.n_obs))

mdata_path = tmp_path / "tiny_atac.h5mu"
mdata.write(mdata_path)

return mdata_path

@pytest.mark.parametrize("mudata", ["tiny_atac_mudata"])
def test_marker_peaks(run_component, request, mudata, tmp_path):
input_path = request.getfixturevalue(mudata)
output_path = tmp_path / "foo.h5mu"

args = [
"--input", str(input_path),
"--output", str(output_path),
"--output_marker_peaks", str(tmp_path / "marker_peaks.tsv"),
"--modality", "atac",
"--groupby", "leiden"
]

run_component(args)
assert output_path.is_file()
assert "marker_peaks.tsv" in os.listdir(output_path.parent)

data_with_markers = md.read(output_path)

assert "rank_genes_groups" in data_with_markers.mod["atac"].uns

marker_peaks = pd.read_csv(tmp_path / "marker_peaks.tsv", sep="\t")
assert marker_peaks.shape[0] > 0
assert "0_pvals_adj" in marker_peaks.columns


if __name__ == "__main__":
sys.exit(pytest.main([__file__]))
Loading