diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5978b32..7a49240 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,22 +18,26 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 + with: + fetch-depth: 2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + - name: Install uv + uses: astral-sh/setup-uv@v6 with: - python-version: ${{ matrix.python-version }} + enable-cache: true + cache-dependency-glob: "uv.lock" - - name: Install package with pip - run: | - pipx install hatch - hatch env create dev + - name: Set up Python + run: uv python install + + - name: Install dependencies + run: uv sync --all-extras --dev - name: Run tests run: | - hatch run dev:pytest + uv run pytest tests - name: Run fermo_core on test dataset run: | - hatch run dev:fermo_core --parameters tests/test_data/test.parameters.json \ No newline at end of file + uv run fermo_core --parameters tests/test_data/test.parameters.json \ No newline at end of file diff --git a/.gitignore b/.gitignore index 28f1dbb..0ebbf9d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ results/ data_working/ .idea/ fermo_analysis/ +uv.lock # ms2query files fermo_core/libraries/ms2query/results diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c02a5b3..96f03fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: hooks: - id: pytest name: pytest - entry: hatch run dev:pytest tests/ + entry: uv run pytest tests/ language: system types: [ file, python ] - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/CHANGELOG.md b/CHANGELOG.md index 06adffe..6a83d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.0] 29-01-2026 + +## Added + +- Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) +- Added Cytoscape-compatible ion identity network for additional output for adduct annotation +- Quantitative phenotype: Added Spearman correlation, optionally with permutation test +- Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes (disabled by setting p_value_theshold to 0 or specifying "None"). + +## Changed + +- Removed z-transformation from Pearson correlation in quantitative phenotype score calculation +- For qualitative phenotype assigning, set p-value to "N/A" if not calculated (was 1 before) + ## [0.6.4] 13-05-2025 ## Changed diff --git a/README.md b/README.md index 9b823be..94ffd71 100644 --- a/README.md +++ b/README.md @@ -173,12 +173,12 @@ Instructions for setting up a development environment. #### Package Installation -*Assumes that `hatch` is installed* +*Assumes that `uv` is installed* ```commandline -hatch env create dev -hatch run dev:pre-commit install -hatch run dev:pytest --run_slow +uv sync --extra dev +uv run pre-commit install +uv run pytest --run_slow ``` ### Documentation diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json index 4ac4107..8ba7977 100644 --- a/fermo_core/config/schema.json +++ b/fermo_core/config/schema.json @@ -253,6 +253,17 @@ "height", "area" ] + }, + "p_val_cutoff": { + "$ref": "#/$defs/r_perc" + }, + "test": { + "type": "string", + "enum": [ + "None", + "Welsh", + "Wilcoxon" + ] } } }, @@ -290,7 +301,7 @@ }, "algorithm": { "type": "string", - "enum": ["pearson"] + "enum": ["pearson", "spearman", "spearman_permutation"] }, "fdr_corr": { "type": "string", diff --git a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py index 786cf48..6172550 100644 --- a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py +++ b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py @@ -25,6 +25,7 @@ import logging from typing import Self +import networkx as nx from pydantic import BaseModel from fermo_core.config.class_default_settings import DefaultMasses as Mass @@ -32,9 +33,10 @@ Adduct, Annotations, Feature, + SimNetworks, ) from fermo_core.data_processing.class_repository import Repository -from fermo_core.data_processing.class_stats import Stats +from fermo_core.data_processing.class_stats import SpecSimNet, Stats from fermo_core.input_output.class_parameter_manager import ParameterManager from fermo_core.utils.utility_method_manager import UtilityMethodManager @@ -59,13 +61,13 @@ class AdductAnnotator(BaseModel): features: Repository samples: Repository - def return_features(self: Self) -> Repository: + def return_features(self: Self) -> tuple[Repository, Stats]: """Returns modified attributes from AdductAnnotator to the calling function Returns: - Modified Feature Repository objects. + Modified Feature Repository objects and Stats Object """ - return self.features + return self.features, self.stats def run_analysis(self: Self): """Organizes calling of data analysis steps.""" @@ -85,6 +87,74 @@ def run_analysis(self: Self): self.annotate_adducts_neg(s_name) self.dereplicate_adducts() + self.create_network() + + def create_network(self: Self): + """Creates ion identity networks + + add_node() in NetworkX is idempotent — if the node already exists, it is ignored. + """ + logger.info("'AnnotationManager/AdductAnnotator': started ion identity network") + + g = nx.Graph() + + for f_id in self.stats.active_features: + feature = self.features.get(f_id) + + if ( + not feature.Annotations + or not feature.Annotations.adducts + or len(feature.Annotations.adducts) == 0 + ): + g.add_node(f_id) + continue + + g.add_node(f_id) + + for adduct in feature.Annotations.adducts: + g.add_node(adduct.partner_id) + g.add_edge( + f_id, + adduct.partner_id, + ppm=adduct.diff_ppm, + adduct_type=adduct.adduct_type, + partner_adduct=adduct.partner_adduct, + ) + + subnetworks = {} + for i, component in enumerate(nx.connected_components(g)): + subnetworks[i] = g.subgraph(component).copy() + subnetworks[i].graph["name"] = i + + summary = {} + for sub in subnetworks: + ids = {int(node) for node in subnetworks[sub].nodes} + summary[sub] = ids + + if not self.stats.networks: + self.stats.networks = {} + self.stats.networks["ion_identity"] = SpecSimNet( + algorithm="ion_identity", + network=g, + subnetworks=subnetworks, + summary=summary, + ) + + for f_id in self.stats.active_features: + feature = self.features.get(f_id) + if feature.networks is None: + feature.networks = {} + + for cluster_id in summary: + if f_id in summary[cluster_id]: + feature.networks["ion_identity"] = SimNetworks( + algorithm="ion_identity", network_id=cluster_id + ) + self.features.modify(f_id, feature) + + logger.info( + "'AnnotationManager/AdductAnnotator': completed ion identity network" + ) @staticmethod def add_adduct_info(feature: Feature) -> Feature: @@ -281,6 +351,18 @@ def annotate_adducts_pos(self: Self, s_name: str | int): feat2.f_id, feat1.f_id, s_name ): continue + elif self.double_quadruple( + feat1.f_id, feat2.f_id, s_name + ) or self.double_quadruple(feat2.f_id, feat1.f_id, s_name): + continue + elif self.double_triple( + feat1.f_id, feat2.f_id, s_name + ) or self.double_triple(feat2.f_id, feat1.f_id, s_name): + continue + elif self.quadruple_triple( + feat1.f_id, feat2.f_id, s_name + ) or self.quadruple_triple(feat2.f_id, feat1.f_id, s_name): + continue def sodium_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool: """Determination of [M+Na]+ adduct, add information @@ -1411,3 +1493,147 @@ def acetate_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool: return True else: return False + + def double_quadruple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+2H]2+ and [M+4H]4+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + double = self.features.get(feat1) + quadruple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (double.mz * 2) - (2 * Mass().H), + (quadruple.mz * 4) - (4 * Mass().H), + quadruple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + double = self.add_adduct_info(double) + quadruple = self.add_adduct_info(quadruple) + double.Annotations.adducts.append( + Adduct( + adduct_type="[M+2H]2+", + partner_adduct="[M+4H]4+", + partner_id=quadruple.f_id, + partner_mz=quadruple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + quadruple.Annotations.adducts.append( + Adduct( + adduct_type="[M+4H]4+", + partner_adduct="[M+2H]2+", + partner_id=double.f_id, + partner_mz=double.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, double) + self.features.modify(feat2, quadruple) + return True + else: + return False + + def double_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+2H]2+ and [M+3H]3+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + double = self.features.get(feat1) + triple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (double.mz * 2) - (2 * Mass().H), + (triple.mz * 3) - (3 * Mass().H), + triple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + double = self.add_adduct_info(double) + triple = self.add_adduct_info(triple) + double.Annotations.adducts.append( + Adduct( + adduct_type="[M+2H]2+", + partner_adduct="[M+3H]3+", + partner_id=triple.f_id, + partner_mz=triple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + triple.Annotations.adducts.append( + Adduct( + adduct_type="[M+3H]3+", + partner_adduct="[M+2H]2+", + partner_id=double.f_id, + partner_mz=double.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, double) + self.features.modify(feat2, triple) + return True + else: + return False + + def quadruple_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+4H]4+ and [M+3H]3+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + quadruple = self.features.get(feat1) + triple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (quadruple.mz * 4) - (4 * Mass().H), + (triple.mz * 3) - (3 * Mass().H), + triple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + quadruple = self.add_adduct_info(quadruple) + triple = self.add_adduct_info(triple) + quadruple.Annotations.adducts.append( + Adduct( + adduct_type="[M+4H]4+", + partner_adduct="[M+3H]3+", + partner_id=triple.f_id, + partner_mz=triple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + triple.Annotations.adducts.append( + Adduct( + adduct_type="[M+3H]3+", + partner_adduct="[M+4H]4+", + partner_id=quadruple.f_id, + partner_mz=quadruple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, quadruple) + self.features.modify(feat2, triple) + return True + else: + return False diff --git a/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py b/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py index 347ecac..f1fe2b5 100644 --- a/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py +++ b/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py @@ -260,7 +260,7 @@ def run_feature_adduct_annotation(self: Self): samples=self.samples, ) adduct_annotator.run_analysis() - self.features = adduct_annotator.return_features() + self.features, self.stats = adduct_annotator.return_features() self.params.AdductAnnotationParameters.module_passed = True except Exception as e: logger.error(str(e)) diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py index 9e9ae64..99bcfa5 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py @@ -25,7 +25,9 @@ from statistics import mean, median from typing import Optional, Self +import numpy as np from pydantic import BaseModel +from scipy.stats import ranksums, ttest_ind from fermo_core.data_processing.builder_feature.dataclass_feature import ( Annotations, @@ -143,6 +145,48 @@ def get_value(self: Self, f_id: int, sample_ids: set) -> list: else: return values_s_ids + def determine_active( + self, feature: Feature, fct: float, act: list, inact: list + ) -> Feature: + """Determine active based on fold-change and test""" + if fct < self.params.PhenoQualAssgnParams.factor: + return feature + + phenotype = Phenotype( + score=1.0, + format="qualitative", + descr=f"Fold-difference: '{round(fct, 2)}'", + ) + + if ( + len(act) > 3 + and len(inact) > 3 + and self.params.PhenoQualAssgnParams.p_val_cutoff != 0 + ): + log_act = np.log10(act) + log_inact = np.log10(inact) + + match self.params.PhenoQualAssgnParams.test: + case "Welsh": + stat, pval = ttest_ind(log_act, log_inact, equal_var=False) + if pval < self.params.PhenoQualAssgnParams.p_val_cutoff: + phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Welsh's t-test statistic: '{round(stat, 2)}'" + phenotype.p_value = pval + case "Wilcoxon": + stat, pval = ranksums(log_act, log_inact) + if pval < self.params.PhenoQualAssgnParams.p_val_cutoff: + phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Wilcoxon rank-sum test statistic: '{round(stat, 2)}'" + phenotype.p_value = pval + case _: + logger.warning( + f"PhenQualAssigner: test '{self.params.PhenoQualAssgnParams.test}' not recognized - SKIP" + ) + + feature = self.add_annotation_attribute(feature=feature) + feature.Annotations.phenotypes.append(phenotype) + self.stats.phenotypes[0].f_ids_positive.add(feature.f_id) + return feature + def bin_intersection(self: Self): """Bin the intersection between positive and negative f_ids based on factor @@ -164,41 +208,26 @@ def bin_intersection(self: Self): match self.params.PhenoQualAssgnParams.algorithm: case "minmax": - factor = min(vals_act) / max(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=min(vals_act) / max(vals_inact), + act=vals_act, + inact=vals_inact, + ) case "mean": - factor = mean(vals_act) / mean(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=mean(vals_act) / mean(vals_inact), + act=vals_act, + inact=vals_inact, + ) case "median": - factor = median(vals_act) / median(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=median(vals_act) / median(vals_inact), + act=vals_act, + inact=vals_inact, + ) case _: raise RuntimeError("'PhenQualAssigner': Unsupported algorithm.") diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py index 402fc99..044c3ec 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py @@ -22,7 +22,7 @@ import numpy as np from pydantic import BaseModel -from scipy.stats import pearsonr, zscore +from scipy import stats from statsmodels.stats.multitest import multipletests from fermo_core.data_processing.builder_feature.dataclass_feature import ( @@ -44,6 +44,7 @@ class PhenQuantAss(BaseModel): p_val_cutoff: the corrected p-value cutoff fdr_corr: the type of FDR correction mode: the type of quantitative data + alg: the algorithm used for correlation stats: Stats object, holds stats on molecular features and samples features: Repository object, holds "General Feature" objects samples: Repository object holding Sample objects @@ -55,6 +56,7 @@ class PhenQuantAss(BaseModel): p_val_cutoff: float fdr_corr: str mode: str + alg: str stats: Stats features: Repository samples: Repository @@ -146,31 +148,72 @@ def valid_constant(values: list) -> bool: return True @staticmethod - def pearson_percentage(areas: list, activs: list) -> tuple[float, float]: + def pearson(areas: list, activs: list, mode: str) -> tuple[float, float]: """Calculate regular pearson coefficient Args: areas: the feature areas per sample activs: the measured activities per sample + mode: percentage or concentration Returns: - the pearson score and the p-value + the Pearson score and the p-value """ - return pearsonr(zscore(areas), zscore(activs)) + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + return stats.pearsonr(areas, activs) @staticmethod - def pearson_concentration(areas: list, activs: list) -> tuple[float, float]: - """Calculate regular pearson coefficient + def spearman(areas: list, activs: list, mode: str) -> tuple[float, float]: + """Calculate Spearman rank correlation + + Args: + areas: the feature areas per sample + activs: the measured activities per sample + mode: percentage or concentration + + Returns: + the Spearman score and the p-value + """ + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + r_s = stats.spearmanr(areas, activs).statistic + p = stats.spearmanr(areas, activs).pvalue + return r_s, p + + @staticmethod + def spearman_permutation( + areas: list, activs: list, mode: str + ) -> tuple[float, float]: + """Calculate Spearman rank correlation with two-sided permutation test Args: areas: the feature areas per sample activs: the measured activities per sample + mode: percentage or concentration Returns: - the pearson score and the p-value + the Spearman score and the p-value """ - activs = [(1 / val) if val != 0 else 0 for val in activs] - return pearsonr(zscore(areas), zscore(activs)) + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + r_s = stats.spearmanr(areas, activs).statistic + + def statistic(x): + return stats.spearmanr(x, activs).statistic + + res = stats.permutation_test( + (areas,), + statistic, + permutation_type="pairings", + n_resamples=10000, + alternative="two-sided", + ) + + return r_s, res.pvalue def calculate_correlation(self: Self): """Collect data and prepare calculation @@ -200,22 +243,24 @@ def calculate_correlation(self: Self): if not self.valid_constant(areas) or not self.valid_constant(activs): continue - if self.mode == "percentage": - pearson_s, p_val = self.pearson_percentage( - areas=areas, activs=activs + if self.alg == "pearson": + corr, pval = self.pearson( + areas=areas, activs=activs, mode=self.mode ) - elif self.mode == "concentration": - pearson_s, p_val = self.pearson_concentration( - areas=areas, activs=activs + elif self.alg == "spearman": + corr, pval = self.spearman( + areas=areas, activs=activs, mode=self.mode ) - else: - raise KeyError( - f"Unsupported type of input data '{self.mode}' - SKIP" + elif self.alg == "spearman_permutation": + corr, pval = self.spearman_permutation( + areas=areas, activs=activs, mode=self.mode ) + else: + raise KeyError(f"Unsupported algorithm '{self.alg}' - SKIP") self.assays[assay.category][f_id] = {} - self.assays[assay.category][f_id]["corr"] = pearson_s - self.assays[assay.category][f_id]["raw_p"] = p_val + self.assays[assay.category][f_id]["corr"] = corr + self.assays[assay.category][f_id]["raw_p"] = pval self.assays[assay.category][f_id]["datatype"] = assay.datatype def correct_p_val(self) -> None: diff --git a/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py b/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py index 576701b..5eeb4d3 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py @@ -131,6 +131,7 @@ def run_assigner_quant_percentage(self: Self): p_val_cutoff=self.params.PhenoQuantPercentAssgnParams.p_val_cutoff, fdr_corr=self.params.PhenoQuantPercentAssgnParams.fdr_corr, mode="percentage", + alg=self.params.PhenoQuantPercentAssgnParams.algorithm, features=self.features, stats=self.stats, samples=self.samples, @@ -170,6 +171,7 @@ def run_assigner_quant_concentration(self: Self): coeff_cutoff=self.params.PhenoQuantConcAssgnParams.coeff_cutoff, p_val_cutoff=self.params.PhenoQuantConcAssgnParams.p_val_cutoff, fdr_corr=self.params.PhenoQuantConcAssgnParams.fdr_corr, + alg=self.params.PhenoQuantConcAssgnParams.algorithm, mode="percentage", features=self.features, stats=self.stats, diff --git a/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py b/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py index e85c3d8..0bf83f8 100644 --- a/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py +++ b/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py @@ -54,7 +54,7 @@ class SimNetworksManager(BaseModel): samples: Repository object, holds "Sample" objects Notes: - `UtilityMethodManager` baseclass gives additional utility methods. + `UtilityMethodManager` class gives additional utility methods. """ params: ParameterManager diff --git a/fermo_core/data_processing/builder_feature/dataclass_feature.py b/fermo_core/data_processing/builder_feature/dataclass_feature.py index 9cddf68..2398efb 100644 --- a/fermo_core/data_processing/builder_feature/dataclass_feature.py +++ b/fermo_core/data_processing/builder_feature/dataclass_feature.py @@ -180,9 +180,9 @@ def to_json(self: Self) -> dict: "category": self.category if self.category is not None else "N/A", "descr": self.descr if self.descr is not None else "N/A", "score": round(self.score, 6), - "p_value": round(self.p_value, 10) if self.p_value is not None else 1.0, + "p_value": round(self.p_value, 10) if self.p_value is not None else "N/A", "p_value_corr": ( - round(self.p_value_corr, 10) if self.p_value_corr is not None else 1.0 + round(self.p_value_corr, 10) if self.p_value_corr is not None else "N/A" ), } diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index be723ac..fab7ffb 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -625,6 +625,8 @@ class PhenoQualAssgnParams(BaseModel): factor: An integer fold-change to differentiate phenotype-assoc. features. algorithm: the algorithm to summarize values of active vs inactive samples. value: the type of value to use for determination + p_val_cutoff: minimum significance threshold derived by test; 0 to disable test + test: type of test performed module_passed: indicates that the module ran without errors """ @@ -632,12 +634,16 @@ class PhenoQualAssgnParams(BaseModel): factor: PositiveInt algorithm: str value: str + p_val_cutoff: float = 0.0 + test: str = "None" module_passed: bool = False @model_validator(mode="after") def val(self): ValidationManager.validate_allowed(self.algorithm, ["mean", "median", "minmax"]) ValidationManager.validate_allowed(self.value, ["height", "area"]) + ValidationManager.validate_allowed(self.test, ["None", "Welsh", "Wilcoxon"]) + ValidationManager.validate_float_zero_one(self.p_val_cutoff) return self def to_json(self: Self) -> dict: @@ -680,7 +686,9 @@ class PhenoQuantPercentAssgnParams(BaseModel): def val(self): ValidationManager.validate_allowed(self.sample_avg, ["mean", "median"]) ValidationManager.validate_allowed(self.value, ["area"]) - ValidationManager.validate_allowed(self.algorithm, ["pearson"]) + ValidationManager.validate_allowed( + self.algorithm, ["pearson", "spearman", "spearman_permutation"] + ) ValidationManager.validate_allowed( self.fdr_corr, [ @@ -743,7 +751,9 @@ class PhenoQuantConcAssgnParams(BaseModel): def val(self): ValidationManager.validate_allowed(self.sample_avg, ["mean", "median"]) ValidationManager.validate_allowed(self.value, ["area"]) - ValidationManager.validate_allowed(self.algorithm, ["pearson"]) + ValidationManager.validate_allowed( + self.algorithm, ["pearson", "spearman", "spearman_permutation"] + ) ValidationManager.validate_allowed( self.fdr_corr, [ diff --git a/pyproject.toml b/pyproject.toml index 5bc8ce4..647eb71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fermo_core" -version = "0.6.4" +version = "0.7.0" description = "Data processing/analysis functionality of metabolomics dashboard FERMO" readme = "README.md" requires-python = ">=3.11,<3.12" @@ -62,30 +62,6 @@ fermo_core = "fermo_core.main:main_cli" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.hatch.envs.default] -installer = "uv" - -[tool.hatch.envs.dev] -installer = "uv" -features = [ - "dev" -] - -[tool.hatch.envs.doc] -installer = "uv" -features = [ - "doc" -] - - -[tool.hatch.envs.test] -dependencies = [ - "pytest~=7.4.2", -] - -[[tool.hatch.envs.test.matrix]] -python = ["3.11"] - [tool.hatch.build.targets.sdist] exclude = [ diff --git a/tests/test_data/test.parameters.json b/tests/test_data/test.parameters.json index 5d4ebed..c782a2c 100644 --- a/tests/test_data/test.parameters.json +++ b/tests/test_data/test.parameters.json @@ -76,7 +76,8 @@ "activate_module": true, "factor": 5, "algorithm": "minmax", - "value": "area" + "value": "area", + "p_val_cutoff": 0.05 }, "PhenoQuantPercentAssgnParameters": { "activate_module": false, diff --git a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py index a260345..95809bd 100644 --- a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py +++ b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py @@ -1,9 +1,14 @@ +import networkx as nx import pytest from fermo_core.data_analysis.annotation_manager.class_adduct_annotator import ( AdductAnnotator, ) -from fermo_core.data_processing.builder_feature.dataclass_feature import Adduct, Feature +from fermo_core.data_processing.builder_feature.dataclass_feature import ( + Adduct, + Feature, + SimNetworks, +) from fermo_core.data_processing.class_repository import Repository from fermo_core.data_processing.class_stats import Stats from fermo_core.input_output.class_parameter_manager import ParameterManager @@ -46,10 +51,73 @@ def test_return_features_valid(adduct_annotator_min): def test_run_analysis_valid(adduct_annotator): adduct_annotator.run_analysis() - features = adduct_annotator.return_features() + features, stats = adduct_annotator.return_features() assert features.entries[131].Annotations.adducts[0] is not None +def test_create_network_singletons_valid(adduct_annotator_min): + """Condiction: feature have no adducts - singletons in graph""" + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 2 + ) + + +def test_create_network_subgraph_valid(adduct_annotator_min): + """Condition: features are related and in same subgraph""" + adduct_annotator_min.features.entries[1].mz = 415.2098 + adduct_annotator_min.features.entries[2].mz = 437.1912 + adduct_annotator_min.sodium_adduct(1, 2, "sample1") + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 1 + ) + + +def test_create_network_two_subgraphs(adduct_annotator_min): + """Condition: two subgraphs of two features each""" + adduct_annotator_min.features.entries[1].mz = 415.2098 + adduct_annotator_min.features.entries[2].mz = 437.1912 + adduct_annotator_min.sodium_adduct(1, 2, "sample1") + + adduct_annotator_min.features.add(3, Feature(f_id=3, mz=1648.4547)) + adduct_annotator_min.features.add(4, Feature(f_id=4, mz=1649.4578)) + adduct_annotator_min.stats.active_features.update([3, 4]) + adduct_annotator_min.plus1_isotope(3, 4, "sample1") + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 2 + ) + assert adduct_annotator_min.stats.networks["ion_identity"].summary == { + 0: {1, 2}, + 1: {3, 4}, + } + assert isinstance( + adduct_annotator_min.features.entries[1].networks["ion_identity"], SimNetworks + ) + + def test_add_adduct_info_valid(adduct_annotator_min): feature = adduct_annotator_min.add_adduct_info(Feature()) assert feature.Annotations is not None @@ -57,7 +125,7 @@ def test_add_adduct_info_valid(adduct_annotator_min): def test_annotate_spec_features_valid(adduct_annotator): adduct_annotator.annotate_adducts_pos("5458_5457_mod.mzXML") - features = adduct_annotator.return_features() + features, stats = adduct_annotator.return_features() assert features.entries[131].Annotations.adducts[0] is not None @@ -76,7 +144,7 @@ def test_dereplicate_adducts_valid(adduct_annotator_min): ) ) adduct_annotator_min.dereplicate_adducts() - features = adduct_annotator_min.return_features() + features, stats = adduct_annotator_min.return_features() adduct_dict = features.entries[1].Annotations.adducts[0].to_json() assert len(adduct_dict["samples"]) == 2 @@ -223,3 +291,21 @@ def test_acetate_adduct_valid(adduct_annotator_min): adduct_annotator_min.features.entries[1].mz = 852.323614 adduct_annotator_min.features.entries[2].mz = 912.344741 assert adduct_annotator_min.acetate_adduct(1, 2, "sample1") + + +def test_double_quadruple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 1405.58893 + adduct_annotator_min.features.entries[2].mz = 703.29907 + assert adduct_annotator_min.double_quadruple(1, 2, "sample1") + + +def test_double_triple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 1405.58893 + adduct_annotator_min.features.entries[2].mz = 937.39628 + assert adduct_annotator_min.double_triple(1, 2, "sample1") + + +def test_quadruple_triple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 703.29907 + adduct_annotator_min.features.entries[2].mz = 937.39628 + assert adduct_annotator_min.quadruple_triple(1, 2, "sample1") diff --git a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py index b1c2c22..5d54b0a 100644 --- a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py +++ b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py @@ -81,7 +81,13 @@ def phen_qual(): ) ] phen_qual.params.PhenoQualAssgnParams = PhenoQualAssgnParams( - **{"activate_module": True, "factor": 5, "algorithm": "minmax", "value": "area"} + **{ + "activate_module": True, + "factor": 5, + "algorithm": "minmax", + "value": "area", + "p_val_cutoff": 0.0, + } ) phen_qual.params.PhenotypeParameters = PhenotypeParameters( **{ @@ -133,3 +139,15 @@ def test_bin_intersection_negative(phen_qual): ] phen_qual.bin_intersection() assert len(phen_qual.stats.phenotypes[0].f_ids_positive) == 0 + + +def test_determine_active(phen_qual): + phen_qual.params.PhenoQualAssgnParams.p_val_cutoff = 0.05 + phen_qual.params.PhenoQualAssgnParams.test = "Wilcoxon" + feature = phen_qual.determine_active( + feature=phen_qual.features.get(1), + fct=10, + act=[66, 77, 88, 99, 111], + inact=[6, 7, 8, 9], + ) + assert feature.Annotations.phenotypes[0].p_value is not None diff --git a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py index 3ee06c4..88f84f9 100644 --- a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py +++ b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py @@ -17,6 +17,7 @@ def phen_quant_perc(): coeff_cutoff=0.7, p_val_cutoff=0.05, fdr_corr="bonferroni", + alg="pearson", mode="percentage", stats=Stats(), features=Repository(), @@ -85,6 +86,7 @@ def phen_quant_conc(): coeff_cutoff=0.7, p_val_cutoff=0.05, fdr_corr="bonferroni", + alg="pearson", mode="concentration", stats=Stats(), features=Repository(), @@ -227,20 +229,38 @@ def test_valid_constant_invalid(phen_quant_conc): def test_pearson_percentage_valid(phen_quant_perc): - x, y = phen_quant_perc.pearson_percentage([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]) + x, y = phen_quant_perc.pearson([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage") assert x == 1 -def test_pearson_percentage_invalid(phen_quant_perc): - with pytest.raises(ValueError): - phen_quant_perc.pearson_percentage([1, 1, 1, 1, 1], [1, 2, 3, 4, 5]) +def test_pearson_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.pearson( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) + assert round(x, 2) == 0.9 -def test_pearson_concentration_valid(phen_quant_conc): - x, y = phen_quant_conc.pearson_concentration([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) +def test_spearman_percentage_valid(phen_quant_perc): + x, y = phen_quant_perc.pearson([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage") + assert x == 1 + + +def test_spearman_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.pearson( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) assert round(x, 2) == 0.9 -def test_pearson_concentration_invalid(phen_quant_conc): - with pytest.raises(ValueError): - phen_quant_conc.pearson_concentration([1, 1, 1, 1, 1], [5, 4, 3, 2, 1]) +def test_spearman_perm_percentage_valid(phen_quant_perc): + x, y = phen_quant_perc.spearman_permutation( + [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage" + ) + assert round(x, 2) == 1 + + +def test_spearman_perm_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.spearman_permutation( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) + assert round(x, 2) == 1.0