Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,26 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
fetch-depth: 2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install package with pip
run: |
pipx install hatch
hatch env create dev
- name: Set up Python
run: uv python install

- name: Install dependencies
run: uv sync --all-extras --dev

- name: Run tests
run: |
hatch run dev:pytest
uv run pytest tests

- name: Run fermo_core on test dataset
run: |
hatch run dev:fermo_core --parameters tests/test_data/test.parameters.json
uv run fermo_core --parameters tests/test_data/test.parameters.json
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ results/
data_working/
.idea/
fermo_analysis/
uv.lock

# ms2query files
fermo_core/libraries/ms2query/results
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
hooks:
- id: pytest
name: pytest
entry: hatch run dev:pytest tests/
entry: uv run pytest tests/
language: system
types: [ file, python ]
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.7.0] 29-01-2026

## Added

- Additional ion adducts to annotate multicharged ions where the [M+H]+ is not visible (e.g. often the case with peptides)
- Added Cytoscape-compatible ion identity network for additional output for adduct annotation
- Quantitative phenotype: Added Spearman correlation, optionally with permutation test
- Qualitative phenotype: Added optional testing (Welsh, Wilcoxon) to reduce spurious hits at low fold-changes (disabled by setting p_value_theshold to 0 or specifying "None").

## Changed

- Removed z-transformation from Pearson correlation in quantitative phenotype score calculation
- For qualitative phenotype assigning, set p-value to "N/A" if not calculated (was 1 before)

## [0.6.4] 13-05-2025

## Changed
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,12 @@ Instructions for setting up a development environment.

#### Package Installation

*Assumes that `hatch` is installed*
*Assumes that `uv` is installed*

```commandline
hatch env create dev
hatch run dev:pre-commit install
hatch run dev:pytest --run_slow
uv sync --extra dev
uv run pre-commit install
uv run pytest --run_slow
```

### Documentation
Expand Down
13 changes: 12 additions & 1 deletion fermo_core/config/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,17 @@
"height",
"area"
]
},
"p_val_cutoff": {
"$ref": "#/$defs/r_perc"
},
"test": {
"type": "string",
"enum": [
"None",
"Welsh",
"Wilcoxon"
]
}
}
},
Expand Down Expand Up @@ -290,7 +301,7 @@
},
"algorithm": {
"type": "string",
"enum": ["pearson"]
"enum": ["pearson", "spearman", "spearman_permutation"]
},
"fdr_corr": {
"type": "string",
Expand Down
234 changes: 230 additions & 4 deletions fermo_core/data_analysis/annotation_manager/class_adduct_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,18 @@
import logging
from typing import Self

import networkx as nx
from pydantic import BaseModel

from fermo_core.config.class_default_settings import DefaultMasses as Mass
from fermo_core.data_processing.builder_feature.dataclass_feature import (
Adduct,
Annotations,
Feature,
SimNetworks,
)
from fermo_core.data_processing.class_repository import Repository
from fermo_core.data_processing.class_stats import Stats
from fermo_core.data_processing.class_stats import SpecSimNet, Stats
from fermo_core.input_output.class_parameter_manager import ParameterManager
from fermo_core.utils.utility_method_manager import UtilityMethodManager

Expand All @@ -59,13 +61,13 @@ class AdductAnnotator(BaseModel):
features: Repository
samples: Repository

def return_features(self: Self) -> Repository:
def return_features(self: Self) -> tuple[Repository, Stats]:
"""Returns modified attributes from AdductAnnotator to the calling function

Returns:
Modified Feature Repository objects.
Modified Feature Repository objects and Stats Object
"""
return self.features
return self.features, self.stats

def run_analysis(self: Self):
"""Organizes calling of data analysis steps."""
Expand All @@ -85,6 +87,74 @@ def run_analysis(self: Self):
self.annotate_adducts_neg(s_name)

self.dereplicate_adducts()
self.create_network()

def create_network(self: Self):
"""Creates ion identity networks

add_node() in NetworkX is idempotent — if the node already exists, it is ignored.
"""
logger.info("'AnnotationManager/AdductAnnotator': started ion identity network")

g = nx.Graph()

for f_id in self.stats.active_features:
feature = self.features.get(f_id)

if (
not feature.Annotations
or not feature.Annotations.adducts
or len(feature.Annotations.adducts) == 0
):
g.add_node(f_id)
continue

g.add_node(f_id)

for adduct in feature.Annotations.adducts:
g.add_node(adduct.partner_id)
g.add_edge(
f_id,
adduct.partner_id,
ppm=adduct.diff_ppm,
adduct_type=adduct.adduct_type,
partner_adduct=adduct.partner_adduct,
)

subnetworks = {}
for i, component in enumerate(nx.connected_components(g)):
subnetworks[i] = g.subgraph(component).copy()
subnetworks[i].graph["name"] = i

summary = {}
for sub in subnetworks:
ids = {int(node) for node in subnetworks[sub].nodes}
summary[sub] = ids

if not self.stats.networks:
self.stats.networks = {}
self.stats.networks["ion_identity"] = SpecSimNet(
algorithm="ion_identity",
network=g,
subnetworks=subnetworks,
summary=summary,
)

for f_id in self.stats.active_features:
feature = self.features.get(f_id)
if feature.networks is None:
feature.networks = {}

for cluster_id in summary:
if f_id in summary[cluster_id]:
feature.networks["ion_identity"] = SimNetworks(
algorithm="ion_identity", network_id=cluster_id
)
self.features.modify(f_id, feature)

logger.info(
"'AnnotationManager/AdductAnnotator': completed ion identity network"
)

@staticmethod
def add_adduct_info(feature: Feature) -> Feature:
Expand Down Expand Up @@ -281,6 +351,18 @@ def annotate_adducts_pos(self: Self, s_name: str | int):
feat2.f_id, feat1.f_id, s_name
):
continue
elif self.double_quadruple(
feat1.f_id, feat2.f_id, s_name
) or self.double_quadruple(feat2.f_id, feat1.f_id, s_name):
continue
elif self.double_triple(
feat1.f_id, feat2.f_id, s_name
) or self.double_triple(feat2.f_id, feat1.f_id, s_name):
continue
elif self.quadruple_triple(
feat1.f_id, feat2.f_id, s_name
) or self.quadruple_triple(feat2.f_id, feat1.f_id, s_name):
continue

def sodium_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool:
"""Determination of [M+Na]+ adduct, add information
Expand Down Expand Up @@ -1411,3 +1493,147 @@ def acetate_adduct(self: Self, feat1: int, feat2: int, s_name: str) -> bool:
return True
else:
return False

def double_quadruple(self: Self, feat1: int, feat2: int, s_name: str) -> bool:
"""Determination of relationship between [M+2H]2+ and [M+4H]4+

Arguments:
feat1: feature 1 identifier
feat2: feature 2 identifier
s_name: the sample identifier

Returns:
A bool indicating the outcome
"""
double = self.features.get(feat1)
quadruple = self.features.get(feat2)

ppm = UtilityMethodManager.mass_deviation(
(double.mz * 2) - (2 * Mass().H),
(quadruple.mz * 4) - (4 * Mass().H),
quadruple.f_id,
)
if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm:
double = self.add_adduct_info(double)
quadruple = self.add_adduct_info(quadruple)
double.Annotations.adducts.append(
Adduct(
adduct_type="[M+2H]2+",
partner_adduct="[M+4H]4+",
partner_id=quadruple.f_id,
partner_mz=quadruple.mz,
diff_ppm=ppm,
sample=s_name,
)
)
quadruple.Annotations.adducts.append(
Adduct(
adduct_type="[M+4H]4+",
partner_adduct="[M+2H]2+",
partner_id=double.f_id,
partner_mz=double.mz,
diff_ppm=ppm,
sample=s_name,
)
)
self.features.modify(feat1, double)
self.features.modify(feat2, quadruple)
return True
else:
return False

def double_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool:
"""Determination of relationship between [M+2H]2+ and [M+3H]3+

Arguments:
feat1: feature 1 identifier
feat2: feature 2 identifier
s_name: the sample identifier

Returns:
A bool indicating the outcome
"""
double = self.features.get(feat1)
triple = self.features.get(feat2)

ppm = UtilityMethodManager.mass_deviation(
(double.mz * 2) - (2 * Mass().H),
(triple.mz * 3) - (3 * Mass().H),
triple.f_id,
)
if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm:
double = self.add_adduct_info(double)
triple = self.add_adduct_info(triple)
double.Annotations.adducts.append(
Adduct(
adduct_type="[M+2H]2+",
partner_adduct="[M+3H]3+",
partner_id=triple.f_id,
partner_mz=triple.mz,
diff_ppm=ppm,
sample=s_name,
)
)
triple.Annotations.adducts.append(
Adduct(
adduct_type="[M+3H]3+",
partner_adduct="[M+2H]2+",
partner_id=double.f_id,
partner_mz=double.mz,
diff_ppm=ppm,
sample=s_name,
)
)
self.features.modify(feat1, double)
self.features.modify(feat2, triple)
return True
else:
return False

def quadruple_triple(self: Self, feat1: int, feat2: int, s_name: str) -> bool:
"""Determination of relationship between [M+4H]4+ and [M+3H]3+

Arguments:
feat1: feature 1 identifier
feat2: feature 2 identifier
s_name: the sample identifier

Returns:
A bool indicating the outcome
"""
quadruple = self.features.get(feat1)
triple = self.features.get(feat2)

ppm = UtilityMethodManager.mass_deviation(
(quadruple.mz * 4) - (4 * Mass().H),
(triple.mz * 3) - (3 * Mass().H),
triple.f_id,
)
if ppm < self.params.AdductAnnotationParameters.mass_dev_ppm:
quadruple = self.add_adduct_info(quadruple)
triple = self.add_adduct_info(triple)
quadruple.Annotations.adducts.append(
Adduct(
adduct_type="[M+4H]4+",
partner_adduct="[M+3H]3+",
partner_id=triple.f_id,
partner_mz=triple.mz,
diff_ppm=ppm,
sample=s_name,
)
)
triple.Annotations.adducts.append(
Adduct(
adduct_type="[M+3H]3+",
partner_adduct="[M+4H]4+",
partner_id=quadruple.f_id,
partner_mz=quadruple.mz,
diff_ppm=ppm,
sample=s_name,
)
)
self.features.modify(feat1, quadruple)
self.features.modify(feat2, triple)
return True
else:
return False
Loading
Loading