From fda96efddeabf1f10b36a55f0ab467303c74c045 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Fri, 24 Oct 2025 10:42:32 +0200 Subject: [PATCH 01/16] Update metadata --- CHANGELOG.md | 8 ++++++++ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06adffe..e324e62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.5] UNRELEASED + +## Added + +- Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) + + + ## [0.6.4] 13-05-2025 ## Changed diff --git a/pyproject.toml b/pyproject.toml index 5bc8ce4..d13a7aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fermo_core" -version = "0.6.4" +version = "0.6.5" description = "Data processing/analysis functionality of metabolomics dashboard FERMO" readme = "README.md" requires-python = ">=3.11,<3.12" From e7a4229d284a0183c1436dbe9cfaa0f00282b5e5 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Fri, 24 Oct 2025 10:56:34 +0200 Subject: [PATCH 02/16] installer update --- .gitignore | 1 + .pre-commit-config.yaml | 2 +- CHANGELOG.md | 2 -- README.md | 8 ++++---- pyproject.toml | 24 ------------------------ 5 files changed, 6 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 28f1dbb..0ebbf9d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ results/ data_working/ .idea/ fermo_analysis/ +uv.lock # ms2query files fermo_core/libraries/ms2query/results diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c02a5b3..96f03fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: hooks: - id: pytest name: pytest - entry: hatch run dev:pytest tests/ + entry: uv run pytest tests/ language: system types: [ file, python ] - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/CHANGELOG.md b/CHANGELOG.md index e324e62..fea3214 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) - - ## [0.6.4] 13-05-2025 ## Changed diff --git a/README.md b/README.md index 9b823be..94ffd71 100644 --- a/README.md +++ b/README.md @@ -173,12 +173,12 @@ Instructions for setting up a development environment. #### Package Installation -*Assumes that `hatch` is installed* +*Assumes that `uv` is installed* ```commandline -hatch env create dev -hatch run dev:pre-commit install -hatch run dev:pytest --run_slow +uv sync --extra dev +uv run pre-commit install +uv run pytest --run_slow ``` ### Documentation diff --git a/pyproject.toml b/pyproject.toml index d13a7aa..215edaa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,30 +62,6 @@ fermo_core = "fermo_core.main:main_cli" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.hatch.envs.default] -installer = "uv" - -[tool.hatch.envs.dev] -installer = "uv" -features = [ - "dev" -] - -[tool.hatch.envs.doc] -installer = "uv" -features = [ - "doc" -] - - -[tool.hatch.envs.test] -dependencies = [ - "pytest~=7.4.2", -] - -[[tool.hatch.envs.test.matrix]] -python = ["3.11"] - [tool.hatch.build.targets.sdist] exclude = [ From f3042b416ce21ea3d51c8441a461e6169579c964 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Fri, 24 Oct 2025 17:27:57 +0200 Subject: [PATCH 03/16] added multicharged adduct support --- .../class_adduct_annotator.py | 144 ++++++++++++++++++ .../test_class_adduct_annotator.py | 18 +++ 2 files changed, 162 insertions(+) diff --git a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py index 786cf48..8b70b06 100644 --- a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py +++ b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py @@ -1411,3 +1411,147 @@ def acetate_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool: return True else: return False + + def double_quadruple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+2H]2+ and [M+4H]4+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + double = self.features.get(feat1) + quadruple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (double.mz * 2) - (2 * Mass().H), + (quadruple.mz * 4) - (4 * Mass().H), + quadruple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + double = self.add_adduct_info(double) + quadruple = self.add_adduct_info(quadruple) + double.Annotations.adducts.append( + Adduct( + adduct_type="[M+2H]2+", + partner_adduct="[M+4H]4+", + partner_id=quadruple.f_id, + partner_mz=quadruple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + quadruple.Annotations.adducts.append( + Adduct( + adduct_type="[M+4H]4+", + partner_adduct="[M+2H]2+", + partner_id=double.f_id, + partner_mz=double.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, double) + self.features.modify(feat2, quadruple) + return True + else: + return False + + def double_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+2H]2+ and [M+3H]3+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + double = self.features.get(feat1) + triple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (double.mz * 2) - (2 * Mass().H), + (triple.mz * 3) - (3 * Mass().H), + triple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + double = self.add_adduct_info(double) + triple = self.add_adduct_info(triple) + double.Annotations.adducts.append( + Adduct( + adduct_type="[M+2H]2+", + partner_adduct="[M+3H]3+", + partner_id=triple.f_id, + partner_mz=triple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + triple.Annotations.adducts.append( + Adduct( + adduct_type="[M+3H]3+", + partner_adduct="[M+2H]2+", + partner_id=double.f_id, + partner_mz=double.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, double) + self.features.modify(feat2, triple) + return True + else: + return False + + def quadruple_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool: + """Determination of relationship between [M+4H]4+ and [M+3H]3+ + + Arguments: + feat1: feature 1 identifier + feat2: feature 2 identifier + s_name: the sample identifier + + Returns: + A bool indicating the outcome + """ + quadruple = self.features.get(feat1) + triple = self.features.get(feat2) + + ppm = UtilityMethodManager.mass_deviation( + (quadruple.mz * 4) - (4 * Mass().H), + (triple.mz * 3) - (3 * Mass().H), + triple.f_id, + ) + if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm: + quadruple = self.add_adduct_info(quadruple) + triple = self.add_adduct_info(triple) + quadruple.Annotations.adducts.append( + Adduct( + adduct_type="[M+4H]4+", + partner_adduct="[M+3H]3+", + partner_id=triple.f_id, + partner_mz=triple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + triple.Annotations.adducts.append( + Adduct( + adduct_type="[M+3H]3+", + partner_adduct="[M+4H]4+", + partner_id=quadruple.f_id, + partner_mz=quadruple.mz, + diff_ppm=ppm, + sample=s_name, + ) + ) + self.features.modify(feat1, quadruple) + self.features.modify(feat2, triple) + return True + else: + return False diff --git a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py index a260345..e10c770 100644 --- a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py +++ b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py @@ -223,3 +223,21 @@ def test_acetate_adduct_valid(adduct_annotator_min): adduct_annotator_min.features.entries[1].mz = 852.323614 adduct_annotator_min.features.entries[2].mz = 912.344741 assert adduct_annotator_min.acetate_adduct(1, 2, "sample1") + + +def test_double_quadruple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 1405.58893 + adduct_annotator_min.features.entries[2].mz = 703.29907 + assert adduct_annotator_min.double_quadruple(1, 2, "sample1") + + +def test_double_triple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 1405.58893 + adduct_annotator_min.features.entries[2].mz = 937.39628 + assert adduct_annotator_min.double_triple(1, 2, "sample1") + + +def test_quadruple_triple_valid(adduct_annotator_min): + adduct_annotator_min.features.entries[1].mz = 703.29907 + adduct_annotator_min.features.entries[2].mz = 937.39628 + assert adduct_annotator_min.quadruple_triple(1, 2, "sample1") From 4c5c28a955cb804546fab5c5c5cdeb583bcd7bda Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Fri, 24 Oct 2025 17:47:59 +0200 Subject: [PATCH 04/16] implemented quadruple --- .../annotation_manager/class_adduct_annotator.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py index 8b70b06..95cee26 100644 --- a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py +++ b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py @@ -281,6 +281,18 @@ def annotate_adducts_pos(self: Self, s_name: str | int): feat2.f_id, feat1.f_id, s_name ): continue + elif self.double_quadruple( + feat1.f_id, feat2.f_id, s_name + ) or self.double_quadruple(feat2.f_id, feat1.f_id, s_name): + continue + elif self.double_triple( + feat1.f_id, feat2.f_id, s_name + ) or self.double_triple(feat2.f_id, feat1.f_id, s_name): + continue + elif self.quadruple_triple( + feat1.f_id, feat2.f_id, s_name + ) or self.quadruple_triple(feat2.f_id, feat1.f_id, s_name): + continue def sodium_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool: """Determination of [M+Na]+ adduct, add information From 9a874fd0802652c5d894dc7b6bfe65518033b51d Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Sat, 25 Oct 2025 10:45:30 +0200 Subject: [PATCH 05/16] Implemented ion identity network --- .../class_adduct_annotator.py | 78 ++++++++++++++++++- .../class_annotation_manager.py | 2 +- .../class_sim_networks_manager.py | 2 +- .../test_class_adduct_annotator.py | 76 +++++++++++++++++- 4 files changed, 148 insertions(+), 10 deletions(-) diff --git a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py index 95cee26..6172550 100644 --- a/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py +++ b/fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py @@ -25,6 +25,7 @@ import logging from typing import Self +import networkx as nx from pydantic import BaseModel from fermo_core.config.class_default_settings import DefaultMasses as Mass @@ -32,9 +33,10 @@ Adduct, Annotations, Feature, + SimNetworks, ) from fermo_core.data_processing.class_repository import Repository -from fermo_core.data_processing.class_stats import Stats +from fermo_core.data_processing.class_stats import SpecSimNet, Stats from fermo_core.input_output.class_parameter_manager import ParameterManager from fermo_core.utils.utility_method_manager import UtilityMethodManager @@ -59,13 +61,13 @@ class AdductAnnotator(BaseModel): features: Repository samples: Repository - def return_features(self: Self) -> Repository: + def return_features(self: Self) -> tuple[Repository, Stats]: """Returns modified attributes from AdductAnnotator to the calling function Returns: - Modified Feature Repository objects. + Modified Feature Repository objects and Stats Object """ - return self.features + return self.features, self.stats def run_analysis(self: Self): """Organizes calling of data analysis steps.""" @@ -85,6 +87,74 @@ def run_analysis(self: Self): self.annotate_adducts_neg(s_name) self.dereplicate_adducts() + self.create_network() + + def create_network(self: Self): + """Creates ion identity networks + + add_node() in NetworkX is idempotent — if the node already exists, it is ignored. + """ + logger.info("'AnnotationManager/AdductAnnotator': started ion identity network") + + g = nx.Graph() + + for f_id in self.stats.active_features: + feature = self.features.get(f_id) + + if ( + not feature.Annotations + or not feature.Annotations.adducts + or len(feature.Annotations.adducts) == 0 + ): + g.add_node(f_id) + continue + + g.add_node(f_id) + + for adduct in feature.Annotations.adducts: + g.add_node(adduct.partner_id) + g.add_edge( + f_id, + adduct.partner_id, + ppm=adduct.diff_ppm, + adduct_type=adduct.adduct_type, + partner_adduct=adduct.partner_adduct, + ) + + subnetworks = {} + for i, component in enumerate(nx.connected_components(g)): + subnetworks[i] = g.subgraph(component).copy() + subnetworks[i].graph["name"] = i + + summary = {} + for sub in subnetworks: + ids = {int(node) for node in subnetworks[sub].nodes} + summary[sub] = ids + + if not self.stats.networks: + self.stats.networks = {} + self.stats.networks["ion_identity"] = SpecSimNet( + algorithm="ion_identity", + network=g, + subnetworks=subnetworks, + summary=summary, + ) + + for f_id in self.stats.active_features: + feature = self.features.get(f_id) + if feature.networks is None: + feature.networks = {} + + for cluster_id in summary: + if f_id in summary[cluster_id]: + feature.networks["ion_identity"] = SimNetworks( + algorithm="ion_identity", network_id=cluster_id + ) + self.features.modify(f_id, feature) + + logger.info( + "'AnnotationManager/AdductAnnotator': completed ion identity network" + ) @staticmethod def add_adduct_info(feature: Feature) -> Feature: diff --git a/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py b/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py index 347ecac..f1fe2b5 100644 --- a/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py +++ b/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py @@ -260,7 +260,7 @@ def run_feature_adduct_annotation(self: Self): samples=self.samples, ) adduct_annotator.run_analysis() - self.features = adduct_annotator.return_features() + self.features, self.stats = adduct_annotator.return_features() self.params.AdductAnnotationParameters.module_passed = True except Exception as e: logger.error(str(e)) diff --git a/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py b/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py index e85c3d8..0bf83f8 100644 --- a/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py +++ b/fermo_core/data_analysis/sim_networks_manager/class_sim_networks_manager.py @@ -54,7 +54,7 @@ class SimNetworksManager(BaseModel): samples: Repository object, holds "Sample" objects Notes: - `UtilityMethodManager` baseclass gives additional utility methods. + `UtilityMethodManager` class gives additional utility methods. """ params: ParameterManager diff --git a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py index e10c770..95809bd 100644 --- a/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py +++ b/tests/test_data_analysis/test_annotation_manager/test_class_adduct_annotator.py @@ -1,9 +1,14 @@ +import networkx as nx import pytest from fermo_core.data_analysis.annotation_manager.class_adduct_annotator import ( AdductAnnotator, ) -from fermo_core.data_processing.builder_feature.dataclass_feature import Adduct, Feature +from fermo_core.data_processing.builder_feature.dataclass_feature import ( + Adduct, + Feature, + SimNetworks, +) from fermo_core.data_processing.class_repository import Repository from fermo_core.data_processing.class_stats import Stats from fermo_core.input_output.class_parameter_manager import ParameterManager @@ -46,10 +51,73 @@ def test_return_features_valid(adduct_annotator_min): def test_run_analysis_valid(adduct_annotator): adduct_annotator.run_analysis() - features = adduct_annotator.return_features() + features, stats = adduct_annotator.return_features() assert features.entries[131].Annotations.adducts[0] is not None +def test_create_network_singletons_valid(adduct_annotator_min): + """Condiction: feature have no adducts - singletons in graph""" + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 2 + ) + + +def test_create_network_subgraph_valid(adduct_annotator_min): + """Condition: features are related and in same subgraph""" + adduct_annotator_min.features.entries[1].mz = 415.2098 + adduct_annotator_min.features.entries[2].mz = 437.1912 + adduct_annotator_min.sodium_adduct(1, 2, "sample1") + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 1 + ) + + +def test_create_network_two_subgraphs(adduct_annotator_min): + """Condition: two subgraphs of two features each""" + adduct_annotator_min.features.entries[1].mz = 415.2098 + adduct_annotator_min.features.entries[2].mz = 437.1912 + adduct_annotator_min.sodium_adduct(1, 2, "sample1") + + adduct_annotator_min.features.add(3, Feature(f_id=3, mz=1648.4547)) + adduct_annotator_min.features.add(4, Feature(f_id=4, mz=1649.4578)) + adduct_annotator_min.stats.active_features.update([3, 4]) + adduct_annotator_min.plus1_isotope(3, 4, "sample1") + adduct_annotator_min.create_network() + assert ( + len( + list( + nx.connected_components( + adduct_annotator_min.stats.networks["ion_identity"].network + ) + ) + ) + == 2 + ) + assert adduct_annotator_min.stats.networks["ion_identity"].summary == { + 0: {1, 2}, + 1: {3, 4}, + } + assert isinstance( + adduct_annotator_min.features.entries[1].networks["ion_identity"], SimNetworks + ) + + def test_add_adduct_info_valid(adduct_annotator_min): feature = adduct_annotator_min.add_adduct_info(Feature()) assert feature.Annotations is not None @@ -57,7 +125,7 @@ def test_add_adduct_info_valid(adduct_annotator_min): def test_annotate_spec_features_valid(adduct_annotator): adduct_annotator.annotate_adducts_pos("5458_5457_mod.mzXML") - features = adduct_annotator.return_features() + features, stats = adduct_annotator.return_features() assert features.entries[131].Annotations.adducts[0] is not None @@ -76,7 +144,7 @@ def test_dereplicate_adducts_valid(adduct_annotator_min): ) ) adduct_annotator_min.dereplicate_adducts() - features = adduct_annotator_min.return_features() + features, stats = adduct_annotator_min.return_features() adduct_dict = features.entries[1].Annotations.adducts[0].to_json() assert len(adduct_dict["samples"]) == 2 From 8dc41deeac8a98b868214d44baedeed1bf6eac8e Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Wed, 21 Jan 2026 15:26:04 +0100 Subject: [PATCH 06/16] implemented spearman correlation --- CHANGELOG.md | 7 +- fermo_core/config/schema.json | 2 +- .../phenotype_manager/class_phen_quant_ass.py | 85 ++++++++++++++----- .../class_phenotype_manager.py | 2 + fermo_core/input_output/param_handlers.py | 6 +- pyproject.toml | 2 +- .../test_class_phen_quant_assigner.py | 38 +++++++-- 7 files changed, 108 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fea3214..171374b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.6.5] UNRELEASED +## [0.7.0] UNRELEASED ## Added - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) +- Added Spearman correlation, optionally with permutation test, for phenotype correlation + +## Changed + +- Removed z-transformation from Pearson correlation in quantitative phenotype score calculation ## [0.6.4] 13-05-2025 diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json index 4ac4107..9f80bdb 100644 --- a/fermo_core/config/schema.json +++ b/fermo_core/config/schema.json @@ -290,7 +290,7 @@ }, "algorithm": { "type": "string", - "enum": ["pearson"] + "enum": ["pearson", "spearman", "spearman_permutation"] }, "fdr_corr": { "type": "string", diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py index 402fc99..044c3ec 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_ass.py @@ -22,7 +22,7 @@ import numpy as np from pydantic import BaseModel -from scipy.stats import pearsonr, zscore +from scipy import stats from statsmodels.stats.multitest import multipletests from fermo_core.data_processing.builder_feature.dataclass_feature import ( @@ -44,6 +44,7 @@ class PhenQuantAss(BaseModel): p_val_cutoff: the corrected p-value cutoff fdr_corr: the type of FDR correction mode: the type of quantitative data + alg: the algorithm used for correlation stats: Stats object, holds stats on molecular features and samples features: Repository object, holds "General Feature" objects samples: Repository object holding Sample objects @@ -55,6 +56,7 @@ class PhenQuantAss(BaseModel): p_val_cutoff: float fdr_corr: str mode: str + alg: str stats: Stats features: Repository samples: Repository @@ -146,31 +148,72 @@ def valid_constant(values: list) -> bool: return True @staticmethod - def pearson_percentage(areas: list, activs: list) -> tuple[float, float]: + def pearson(areas: list, activs: list, mode: str) -> tuple[float, float]: """Calculate regular pearson coefficient Args: areas: the feature areas per sample activs: the measured activities per sample + mode: percentage or concentration Returns: - the pearson score and the p-value + the Pearson score and the p-value """ - return pearsonr(zscore(areas), zscore(activs)) + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + return stats.pearsonr(areas, activs) @staticmethod - def pearson_concentration(areas: list, activs: list) -> tuple[float, float]: - """Calculate regular pearson coefficient + def spearman(areas: list, activs: list, mode: str) -> tuple[float, float]: + """Calculate Spearman rank correlation + + Args: + areas: the feature areas per sample + activs: the measured activities per sample + mode: percentage or concentration + + Returns: + the Spearman score and the p-value + """ + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + r_s = stats.spearmanr(areas, activs).statistic + p = stats.spearmanr(areas, activs).pvalue + return r_s, p + + @staticmethod + def spearman_permutation( + areas: list, activs: list, mode: str + ) -> tuple[float, float]: + """Calculate Spearman rank correlation with two-sided permutation test Args: areas: the feature areas per sample activs: the measured activities per sample + mode: percentage or concentration Returns: - the pearson score and the p-value + the Spearman score and the p-value """ - activs = [(1 / val) if val != 0 else 0 for val in activs] - return pearsonr(zscore(areas), zscore(activs)) + if mode == "concentration": + activs = [(1 / val) if val != 0 else 0 for val in activs] + + r_s = stats.spearmanr(areas, activs).statistic + + def statistic(x): + return stats.spearmanr(x, activs).statistic + + res = stats.permutation_test( + (areas,), + statistic, + permutation_type="pairings", + n_resamples=10000, + alternative="two-sided", + ) + + return r_s, res.pvalue def calculate_correlation(self: Self): """Collect data and prepare calculation @@ -200,22 +243,24 @@ def calculate_correlation(self: Self): if not self.valid_constant(areas) or not self.valid_constant(activs): continue - if self.mode == "percentage": - pearson_s, p_val = self.pearson_percentage( - areas=areas, activs=activs + if self.alg == "pearson": + corr, pval = self.pearson( + areas=areas, activs=activs, mode=self.mode ) - elif self.mode == "concentration": - pearson_s, p_val = self.pearson_concentration( - areas=areas, activs=activs + elif self.alg == "spearman": + corr, pval = self.spearman( + areas=areas, activs=activs, mode=self.mode ) - else: - raise KeyError( - f"Unsupported type of input data '{self.mode}' - SKIP" + elif self.alg == "spearman_permutation": + corr, pval = self.spearman_permutation( + areas=areas, activs=activs, mode=self.mode ) + else: + raise KeyError(f"Unsupported algorithm '{self.alg}' - SKIP") self.assays[assay.category][f_id] = {} - self.assays[assay.category][f_id]["corr"] = pearson_s - self.assays[assay.category][f_id]["raw_p"] = p_val + self.assays[assay.category][f_id]["corr"] = corr + self.assays[assay.category][f_id]["raw_p"] = pval self.assays[assay.category][f_id]["datatype"] = assay.datatype def correct_p_val(self) -> None: diff --git a/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py b/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py index 576701b..5eeb4d3 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py @@ -131,6 +131,7 @@ def run_assigner_quant_percentage(self: Self): p_val_cutoff=self.params.PhenoQuantPercentAssgnParams.p_val_cutoff, fdr_corr=self.params.PhenoQuantPercentAssgnParams.fdr_corr, mode="percentage", + alg=self.params.PhenoQuantPercentAssgnParams.algorithm, features=self.features, stats=self.stats, samples=self.samples, @@ -170,6 +171,7 @@ def run_assigner_quant_concentration(self: Self): coeff_cutoff=self.params.PhenoQuantConcAssgnParams.coeff_cutoff, p_val_cutoff=self.params.PhenoQuantConcAssgnParams.p_val_cutoff, fdr_corr=self.params.PhenoQuantConcAssgnParams.fdr_corr, + alg=self.params.PhenoQuantConcAssgnParams.algorithm, mode="percentage", features=self.features, stats=self.stats, diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index be723ac..f241665 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -680,7 +680,7 @@ class PhenoQuantPercentAssgnParams(BaseModel): def val(self): ValidationManager.validate_allowed(self.sample_avg, ["mean", "median"]) ValidationManager.validate_allowed(self.value, ["area"]) - ValidationManager.validate_allowed(self.algorithm, ["pearson"]) + ValidationManager.validate_allowed(self.algorithm, ["pearson", "spearman"]) ValidationManager.validate_allowed( self.fdr_corr, [ @@ -743,7 +743,9 @@ class PhenoQuantConcAssgnParams(BaseModel): def val(self): ValidationManager.validate_allowed(self.sample_avg, ["mean", "median"]) ValidationManager.validate_allowed(self.value, ["area"]) - ValidationManager.validate_allowed(self.algorithm, ["pearson"]) + ValidationManager.validate_allowed( + self.algorithm, ["pearson", "spearman", "spearman_permutation"] + ) ValidationManager.validate_allowed( self.fdr_corr, [ diff --git a/pyproject.toml b/pyproject.toml index 215edaa..647eb71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fermo_core" -version = "0.6.5" +version = "0.7.0" description = "Data processing/analysis functionality of metabolomics dashboard FERMO" readme = "README.md" requires-python = ">=3.11,<3.12" diff --git a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py index 3ee06c4..88f84f9 100644 --- a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py +++ b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_quant_assigner.py @@ -17,6 +17,7 @@ def phen_quant_perc(): coeff_cutoff=0.7, p_val_cutoff=0.05, fdr_corr="bonferroni", + alg="pearson", mode="percentage", stats=Stats(), features=Repository(), @@ -85,6 +86,7 @@ def phen_quant_conc(): coeff_cutoff=0.7, p_val_cutoff=0.05, fdr_corr="bonferroni", + alg="pearson", mode="concentration", stats=Stats(), features=Repository(), @@ -227,20 +229,38 @@ def test_valid_constant_invalid(phen_quant_conc): def test_pearson_percentage_valid(phen_quant_perc): - x, y = phen_quant_perc.pearson_percentage([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]) + x, y = phen_quant_perc.pearson([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage") assert x == 1 -def test_pearson_percentage_invalid(phen_quant_perc): - with pytest.raises(ValueError): - phen_quant_perc.pearson_percentage([1, 1, 1, 1, 1], [1, 2, 3, 4, 5]) +def test_pearson_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.pearson( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) + assert round(x, 2) == 0.9 -def test_pearson_concentration_valid(phen_quant_conc): - x, y = phen_quant_conc.pearson_concentration([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) +def test_spearman_percentage_valid(phen_quant_perc): + x, y = phen_quant_perc.pearson([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage") + assert x == 1 + + +def test_spearman_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.pearson( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) assert round(x, 2) == 0.9 -def test_pearson_concentration_invalid(phen_quant_conc): - with pytest.raises(ValueError): - phen_quant_conc.pearson_concentration([1, 1, 1, 1, 1], [5, 4, 3, 2, 1]) +def test_spearman_perm_percentage_valid(phen_quant_perc): + x, y = phen_quant_perc.spearman_permutation( + [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], mode="percentage" + ) + assert round(x, 2) == 1 + + +def test_spearman_perm_concentration_valid(phen_quant_conc): + x, y = phen_quant_conc.spearman_permutation( + [1, 2, 3, 4, 5], [5, 4, 3, 2, 1], mode="concentration" + ) + assert round(x, 2) == 1.0 From 8ec640e7a9e99b3d1adaf25d842b28b5c27b034f Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Sun, 25 Jan 2026 19:46:58 +0100 Subject: [PATCH 07/16] implemented t-test for qualitative bioactivity --- CHANGELOG.md | 3 +- fermo_core/config/schema.json | 3 + .../class_phen_qual_assigner.py | 77 +++++++++++-------- fermo_core/input_output/param_handlers.py | 3 + tests/test_data/test.parameters.json | 3 +- .../test_class_phen_qual_assigner.py | 19 ++++- 6 files changed, 72 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 171374b..0dc31d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## Added - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) -- Added Spearman correlation, optionally with permutation test, for phenotype correlation +- Quantitative phenotype: Added Spearman correlation, optionally with permutation test +- Qualitative phenotype: Added optional t-test, can be disabled by setting p_value_theshold to 0 ## Changed diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json index 9f80bdb..6081ae0 100644 --- a/fermo_core/config/schema.json +++ b/fermo_core/config/schema.json @@ -253,6 +253,9 @@ "height", "area" ] + }, + "p_val_cutoff": { + "$ref": "#/$defs/r_perc" } } }, diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py index 9e9ae64..ca80e72 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py @@ -25,7 +25,9 @@ from statistics import mean, median from typing import Optional, Self +import numpy as np from pydantic import BaseModel +from scipy.stats import ttest_ind from fermo_core.data_processing.builder_feature.dataclass_feature import ( Annotations, @@ -143,6 +145,30 @@ def get_value(self: Self, f_id: int, sample_ids: set) -> list: else: return values_s_ids + def determine_active( + self, feature: Feature, fct: float, act: list, inact: list + ) -> Feature: + """Determine active based on fold-change and t-test""" + if fct < self.params.PhenoQualAssgnParams.factor: + return feature + + phenotype = Phenotype( + score=1.0, + format="qualitative", + descr=f"Fold-difference: '{round(fct, 2)}'", + ) + + if len(act) > 3 and len(inact): + stat, pval = ttest_ind(np.log10(act), np.log10(inact), equal_var=False) + if pval <= self.params.PhenoQualAssgnParams.p_val_cutoff: + phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Welsh's t-test statistic: '{round(stat, 2)}'" + phenotype.p_value = pval + + feature = self.add_annotation_attribute(feature=feature) + feature.Annotations.phenotypes.append(phenotype) + self.stats.phenotypes[0].f_ids_positive.add(feature.f_id) + return feature + def bin_intersection(self: Self): """Bin the intersection between positive and negative f_ids based on factor @@ -164,41 +190,26 @@ def bin_intersection(self: Self): match self.params.PhenoQualAssgnParams.algorithm: case "minmax": - factor = min(vals_act) / max(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=min(vals_act) / max(vals_inact), + act=vals_act, + inact=vals_inact, + ) case "mean": - factor = mean(vals_act) / mean(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=mean(vals_act) / mean(vals_inact), + act=vals_act, + inact=vals_inact, + ) case "median": - factor = median(vals_act) / median(vals_inact) - if factor >= self.params.PhenoQualAssgnParams.factor: - feature = self.add_annotation_attribute(feature=feature) - feature.Annotations.phenotypes.append( - Phenotype( - score=1.0, - format="qualitative", - descr=f"Fold-difference: '{round(factor, 2)}'", - ) - ) - self.stats.phenotypes[0].f_ids_positive.add(f_id) + feature = self.determine_active( + feature=feature, + fct=median(vals_act) / median(vals_inact), + act=vals_act, + inact=vals_inact, + ) case _: raise RuntimeError("'PhenQualAssigner': Unsupported algorithm.") diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index f241665..b156e33 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -625,6 +625,7 @@ class PhenoQualAssgnParams(BaseModel): factor: An integer fold-change to differentiate phenotype-assoc. features. algorithm: the algorithm to summarize values of active vs inactive samples. value: the type of value to use for determination + p_val_cutoff: minimum significance threshold derived by t-test; 0 to disable t-test module_passed: indicates that the module ran without errors """ @@ -632,12 +633,14 @@ class PhenoQualAssgnParams(BaseModel): factor: PositiveInt algorithm: str value: str + p_val_cutoff: float = 0.0 module_passed: bool = False @model_validator(mode="after") def val(self): ValidationManager.validate_allowed(self.algorithm, ["mean", "median", "minmax"]) ValidationManager.validate_allowed(self.value, ["height", "area"]) + ValidationManager.validate_float_zero_one(self.p_val_cutoff) return self def to_json(self: Self) -> dict: diff --git a/tests/test_data/test.parameters.json b/tests/test_data/test.parameters.json index 5d4ebed..c782a2c 100644 --- a/tests/test_data/test.parameters.json +++ b/tests/test_data/test.parameters.json @@ -76,7 +76,8 @@ "activate_module": true, "factor": 5, "algorithm": "minmax", - "value": "area" + "value": "area", + "p_val_cutoff": 0.05 }, "PhenoQuantPercentAssgnParameters": { "activate_module": false, diff --git a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py index b1c2c22..4c5e5ae 100644 --- a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py +++ b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py @@ -81,7 +81,13 @@ def phen_qual(): ) ] phen_qual.params.PhenoQualAssgnParams = PhenoQualAssgnParams( - **{"activate_module": True, "factor": 5, "algorithm": "minmax", "value": "area"} + **{ + "activate_module": True, + "factor": 5, + "algorithm": "minmax", + "value": "area", + "p_val_cutoff": 0.0, + } ) phen_qual.params.PhenotypeParameters = PhenotypeParameters( **{ @@ -133,3 +139,14 @@ def test_bin_intersection_negative(phen_qual): ] phen_qual.bin_intersection() assert len(phen_qual.stats.phenotypes[0].f_ids_positive) == 0 + + +def test_determine_active(phen_qual): + phen_qual.params.PhenoQualAssgnParams.p_val_cutoff = 0.05 + feature = phen_qual.determine_active( + feature=phen_qual.features.get(1), + fct=10, + act=[66, 77, 88, 99, 111], + inact=[6, 7, 8, 9], + ) + assert feature.Annotations.phenotypes[0].p_value is not None From 2f390e2729b54973b97ae75f886f36bb509f3170 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Tue, 27 Jan 2026 14:37:36 +0100 Subject: [PATCH 08/16] Added spearman-permutation as option to quant phenotype --- fermo_core/input_output/param_handlers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index b156e33..8d52df2 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -683,7 +683,9 @@ class PhenoQuantPercentAssgnParams(BaseModel): def val(self): ValidationManager.validate_allowed(self.sample_avg, ["mean", "median"]) ValidationManager.validate_allowed(self.value, ["area"]) - ValidationManager.validate_allowed(self.algorithm, ["pearson", "spearman"]) + ValidationManager.validate_allowed( + self.algorithm, ["pearson", "spearman", "spearman_permutation"] + ) ValidationManager.validate_allowed( self.fdr_corr, [ From 933400b3b45200a025e6ef18dbb6ca47558ced88 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 08:14:58 +0100 Subject: [PATCH 09/16] implemented wilcoxon for qual phenotype data --- fermo_core/config/schema.json | 8 +++++ .../class_phen_qual_assigner.py | 34 +++++++++++++++---- fermo_core/input_output/param_handlers.py | 5 ++- .../test_class_phen_qual_assigner.py | 1 + 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json index 6081ae0..33923a6 100644 --- a/fermo_core/config/schema.json +++ b/fermo_core/config/schema.json @@ -256,6 +256,14 @@ }, "p_val_cutoff": { "$ref": "#/$defs/r_perc" + }, + "test": { + "type": "string", + "enum": [ + "No test", + "Welsh", + "Wilcoxon" + ] } } }, diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py index ca80e72..2dc3f23 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py @@ -27,7 +27,7 @@ import numpy as np from pydantic import BaseModel -from scipy.stats import ttest_ind +from scipy.stats import ranksums, ttest_ind from fermo_core.data_processing.builder_feature.dataclass_feature import ( Annotations, @@ -148,7 +148,7 @@ def get_value(self: Self, f_id: int, sample_ids: set) -> list: def determine_active( self, feature: Feature, fct: float, act: list, inact: list ) -> Feature: - """Determine active based on fold-change and t-test""" + """Determine active based on fold-change and test""" if fct < self.params.PhenoQualAssgnParams.factor: return feature @@ -158,11 +158,31 @@ def determine_active( descr=f"Fold-difference: '{round(fct, 2)}'", ) - if len(act) > 3 and len(inact): - stat, pval = ttest_ind(np.log10(act), np.log10(inact), equal_var=False) - if pval <= self.params.PhenoQualAssgnParams.p_val_cutoff: - phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Welsh's t-test statistic: '{round(stat, 2)}'" - phenotype.p_value = pval + if len(act) > 3 and len(inact) > 3: + log_act = np.log10(act) + log_inact = np.log10(inact) + + match self.params.PhenoQualAssgnParams.test: + case "Welsh": + stat, pval = ttest_ind(log_act, log_inact, equal_var=False) + if ( + pval < self.params.PhenoQualAssgnParams.p_val_cutoff + or self.params.PhenoQualAssgnParams.p_val_cutoff == 0 + ): + phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Welsh's t-test statistic: '{round(stat, 2)}'" + phenotype.p_value = pval + case "Wilcoxon": + stat, pval = ranksums(log_act, log_inact) + if ( + pval < self.params.PhenoQualAssgnParams.p_val_cutoff + or self.params.PhenoQualAssgnParams.p_val_cutoff == 0 + ): + phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Wilcoxon rank-sum test statistic: '{round(stat, 2)}'" + phenotype.p_value = pval + case _: + logger.warning( + f"PhenQualAssigner: test '{self.params.PhenoQualAssgnParams.test}' not recognized - SKIP" + ) feature = self.add_annotation_attribute(feature=feature) feature.Annotations.phenotypes.append(phenotype) diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index 8d52df2..520e3c5 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -625,7 +625,8 @@ class PhenoQualAssgnParams(BaseModel): factor: An integer fold-change to differentiate phenotype-assoc. features. algorithm: the algorithm to summarize values of active vs inactive samples. value: the type of value to use for determination - p_val_cutoff: minimum significance threshold derived by t-test; 0 to disable t-test + p_val_cutoff: minimum significance threshold derived by test; 0 to disable test + test: type of test performed module_passed: indicates that the module ran without errors """ @@ -634,12 +635,14 @@ class PhenoQualAssgnParams(BaseModel): algorithm: str value: str p_val_cutoff: float = 0.0 + test: str = "No test" module_passed: bool = False @model_validator(mode="after") def val(self): ValidationManager.validate_allowed(self.algorithm, ["mean", "median", "minmax"]) ValidationManager.validate_allowed(self.value, ["height", "area"]) + ValidationManager.validate_allowed(self.test, ["No test", "Welsh", "Wilcoxon"]) ValidationManager.validate_float_zero_one(self.p_val_cutoff) return self diff --git a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py index 4c5e5ae..5d54b0a 100644 --- a/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py +++ b/tests/test_data_analysis/test_phenotype_manager/test_class_phen_qual_assigner.py @@ -143,6 +143,7 @@ def test_bin_intersection_negative(phen_qual): def test_determine_active(phen_qual): phen_qual.params.PhenoQualAssgnParams.p_val_cutoff = 0.05 + phen_qual.params.PhenoQualAssgnParams.test = "Wilcoxon" feature = phen_qual.determine_active( feature=phen_qual.features.get(1), fct=10, From a71eabf654a4038f490a96e2a8a8546170fe4c2f Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 08:17:12 +0100 Subject: [PATCH 10/16] updated changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dc31d8..b783de6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.7.0] UNRELEASED +## [0.7.0] 29-01-2026 ## Added - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) - Quantitative phenotype: Added Spearman correlation, optionally with permutation test -- Qualitative phenotype: Added optional t-test, can be disabled by setting p_value_theshold to 0 +- Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes. Can be disabled by setting p_value_theshold to 0 or selecting "no test" ## Changed From 4ef8920e4c8cfee651b21af636575a9cc01b8bee Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 19:40:19 +0100 Subject: [PATCH 11/16] Added Wilcoxon test for qualitative data --- CHANGELOG.md | 3 ++- fermo_core/config/schema.json | 2 +- .../data_processing/builder_feature/dataclass_feature.py | 4 ++-- fermo_core/input_output/param_handlers.py | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b783de6..e834146 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) - Quantitative phenotype: Added Spearman correlation, optionally with permutation test -- Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes. Can be disabled by setting p_value_theshold to 0 or selecting "no test" +- Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes (disabled by setting p_value_theshold to 0 or specifying "None"). ## Changed - Removed z-transformation from Pearson correlation in quantitative phenotype score calculation +- For qualitative phenotype assigning, set p-value to "N/A" if not calculated (was 1 before) ## [0.6.4] 13-05-2025 diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json index 33923a6..8ba7977 100644 --- a/fermo_core/config/schema.json +++ b/fermo_core/config/schema.json @@ -260,7 +260,7 @@ "test": { "type": "string", "enum": [ - "No test", + "None", "Welsh", "Wilcoxon" ] diff --git a/fermo_core/data_processing/builder_feature/dataclass_feature.py b/fermo_core/data_processing/builder_feature/dataclass_feature.py index 9cddf68..2398efb 100644 --- a/fermo_core/data_processing/builder_feature/dataclass_feature.py +++ b/fermo_core/data_processing/builder_feature/dataclass_feature.py @@ -180,9 +180,9 @@ def to_json(self: Self) -> dict: "category": self.category if self.category is not None else "N/A", "descr": self.descr if self.descr is not None else "N/A", "score": round(self.score, 6), - "p_value": round(self.p_value, 10) if self.p_value is not None else 1.0, + "p_value": round(self.p_value, 10) if self.p_value is not None else "N/A", "p_value_corr": ( - round(self.p_value_corr, 10) if self.p_value_corr is not None else 1.0 + round(self.p_value_corr, 10) if self.p_value_corr is not None else "N/A" ), } diff --git a/fermo_core/input_output/param_handlers.py b/fermo_core/input_output/param_handlers.py index 520e3c5..fab7ffb 100644 --- a/fermo_core/input_output/param_handlers.py +++ b/fermo_core/input_output/param_handlers.py @@ -635,14 +635,14 @@ class PhenoQualAssgnParams(BaseModel): algorithm: str value: str p_val_cutoff: float = 0.0 - test: str = "No test" + test: str = "None" module_passed: bool = False @model_validator(mode="after") def val(self): ValidationManager.validate_allowed(self.algorithm, ["mean", "median", "minmax"]) ValidationManager.validate_allowed(self.value, ["height", "area"]) - ValidationManager.validate_allowed(self.test, ["No test", "Welsh", "Wilcoxon"]) + ValidationManager.validate_allowed(self.test, ["None", "Welsh", "Wilcoxon"]) ValidationManager.validate_float_zero_one(self.p_val_cutoff) return self From 14f081155a761bdee701408f4b6bacf4ba990224 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 19:44:30 +0100 Subject: [PATCH 12/16] Updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e834146..6a83d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## Added - Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides) +- Added Cytoscape-compatible ion identity network for additional output for adduct annotation - Quantitative phenotype: Added Spearman correlation, optionally with permutation test - Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes (disabled by setting p_value_theshold to 0 or specifying "None"). From 6863e64cc8aec09041029b0125fbaa4697d8d4e9 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 19:48:18 +0100 Subject: [PATCH 13/16] Fixed logic when p_value_threshold set to 0 (disabled) --- .../class_phen_qual_assigner.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py index 2dc3f23..99bcfa5 100644 --- a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py +++ b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py @@ -158,25 +158,23 @@ def determine_active( descr=f"Fold-difference: '{round(fct, 2)}'", ) - if len(act) > 3 and len(inact) > 3: + if ( + len(act) > 3 + and len(inact) > 3 + and self.params.PhenoQualAssgnParams.p_val_cutoff != 0 + ): log_act = np.log10(act) log_inact = np.log10(inact) match self.params.PhenoQualAssgnParams.test: case "Welsh": stat, pval = ttest_ind(log_act, log_inact, equal_var=False) - if ( - pval < self.params.PhenoQualAssgnParams.p_val_cutoff - or self.params.PhenoQualAssgnParams.p_val_cutoff == 0 - ): + if pval < self.params.PhenoQualAssgnParams.p_val_cutoff: phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Welsh's t-test statistic: '{round(stat, 2)}'" phenotype.p_value = pval case "Wilcoxon": stat, pval = ranksums(log_act, log_inact) - if ( - pval < self.params.PhenoQualAssgnParams.p_val_cutoff - or self.params.PhenoQualAssgnParams.p_val_cutoff == 0 - ): + if pval < self.params.PhenoQualAssgnParams.p_val_cutoff: phenotype.descr = f"Fold-difference: '{round(fct, 2)}', Wilcoxon rank-sum test statistic: '{round(stat, 2)}'" phenotype.p_value = pval case _: From 3bd63c219392dec7be85ea914be6a93e18e02d85 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 19:53:42 +0100 Subject: [PATCH 14/16] updated cicd to uv --- .github/workflows/ci.yml | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5978b32..6d85c5d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,22 +18,26 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 + with: + fetch-depth: 2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + - name: Install uv + uses: astral-sh/setup-uv@v6 with: - python-version: ${{ matrix.python-version }} + enable-cache: true + cache-dependency-glob: "uv.lock" - - name: Install package with pip - run: | - pipx install hatch - hatch env create dev + - name: Set up Python + run: uv python install + + - name: Install dependencies + run: uv sync --all-groups - name: Run tests run: | - hatch run dev:pytest + uv run pytest - name: Run fermo_core on test dataset run: | - hatch run dev:fermo_core --parameters tests/test_data/test.parameters.json \ No newline at end of file + uv run fermo_core --parameters tests/test_data/test.parameters.json \ No newline at end of file From f7b099af1cfc6bb6c3e9f797f315bcdd996f48ff Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 19:59:29 +0100 Subject: [PATCH 15/16] fixing ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d85c5d..1e4fed0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: - name: Run tests run: | - uv run pytest + uv run pytest tests - name: Run fermo_core on test dataset run: | From e2c2c0fae8bb84bd36e0b8fa7cb7c68102fd4b84 Mon Sep 17 00:00:00 2001 From: "Mitja M. Zdouc" Date: Thu, 29 Jan 2026 20:00:20 +0100 Subject: [PATCH 16/16] fixing ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e4fed0..7a49240 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: run: uv python install - name: Install dependencies - run: uv sync --all-groups + run: uv sync --all-extras --dev - name: Run tests run: |