Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 66 additions & 62 deletions src/flatprot/core/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,79 +146,83 @@ def add_secondary_structure(

@property
def secondary_structure(self) -> list[ResidueRange]:
"""Get all secondary structure elements, filling gaps with coils.
"""Get all secondary structure elements, preserving original segmentation.

Handles discontinuities in residue indexing by creating separate ranges
for contiguous segments.
Returns the originally defined secondary structure ranges without merging
adjacent segments of the same type. This preserves the segmentation from
DSSP/CIF parsing to prevent visual artifacts in helix rendering.

If no secondary structure is defined, provides COIL coverage for the entire
chain to ensure annotations and other systems can function properly.

Returns:
list[ResidueRange]: A complete list of secondary structure elements
covering all residues in the chain.
list[ResidueRange]: A list of secondary structure elements
preserving the original segmentation boundaries.
"""
# Get all residue indices present in the chain, sorted
residue_indices = sorted(self.__chain_coordinates.keys())
# If no secondary structure is defined, provide COIL coverage using chain ranges
if not self.__secondary_structure:
if not self.__chain_coordinates:
return []

# Use to_ranges() to properly handle discontinuous chains
coil_ranges = []
for range_obj in self.to_ranges():
coil_ranges.append(
ResidueRange(
range_obj.chain_id,
range_obj.start,
range_obj.end,
range_obj.coordinates_start_index,
SecondaryStructureType.COIL,
)
)
return coil_ranges

if not residue_indices:
return []
# Return the originally defined secondary structure ranges without merging
# This preserves the segmentation from the DSSP/CIF parsing
complete_ss: List[ResidueRange] = []

# Create a map of residue index to defined secondary structure type
# Sorting __secondary_structure ensures predictable behavior in case of overlaps (though overlaps shouldn't occur)
defined_ss_map: Dict[int, SecondaryStructureType] = {}
for ss_element in sorted(self.__secondary_structure):
# Only consider residues actually present in the chain for this element
for idx in range(ss_element.start, ss_element.end + 1):
if idx in self.__chain_coordinates:
defined_ss_map[idx] = ss_element.secondary_structure_type
# Sort by start position to ensure consistent ordering
sorted_ss = sorted(self.__secondary_structure, key=lambda x: x.start)

complete_ss: List[ResidueRange] = []
segment_start_idx = residue_indices[0]
segment_type = defined_ss_map.get(
segment_start_idx, SecondaryStructureType.COIL
)
segment_contig_start = self.__chain_coordinates[
segment_start_idx
].coordinate_index

# Iterate through residues to identify segments of continuous type and index
for i in range(1, len(residue_indices)):
current_idx = residue_indices[i]
prev_idx = residue_indices[i - 1]
current_res_type = defined_ss_map.get(
current_idx, SecondaryStructureType.COIL
)
# Deduplicate ranges to avoid duplicate scene elements
seen_ranges = set()

# Check for discontinuity in residue index or change in SS type
if current_idx != prev_idx + 1 or current_res_type != segment_type:
# End the previous segment
segment_end_idx = prev_idx
complete_ss.append(
ResidueRange(
self.id,
segment_start_idx,
segment_end_idx,
segment_contig_start,
segment_type,
)
# Add each originally defined range as-is, avoiding duplicates
for ss_element in sorted_ss:
# Ensure the range has valid coordinates in this chain
valid_start = None
valid_end = None

# Find the first valid residue in the range
for idx in range(ss_element.start, ss_element.end + 1):
if idx in self.__chain_coordinates:
if valid_start is None:
valid_start = idx
valid_end = idx

# Only add ranges that have at least one valid residue and aren't duplicates
if valid_start is not None and valid_end is not None:
# Create a unique key for this range
range_key = (
ss_element.secondary_structure_type,
valid_start,
valid_end,
)

# Start a new segment
segment_start_idx = current_idx
segment_type = current_res_type
segment_contig_start = self.__chain_coordinates[
segment_start_idx
].coordinate_index
if range_key not in seen_ranges:
seen_ranges.add(range_key)
start_coord = self.__chain_coordinates[valid_start]
complete_ss.append(
ResidueRange(
self.id,
valid_start,
valid_end,
start_coord.coordinate_index,
ss_element.secondary_structure_type,
)
)

# Add the final segment
segment_end_idx = residue_indices[-1]
complete_ss.append(
ResidueRange(
self.id,
segment_start_idx,
segment_end_idx,
segment_contig_start,
segment_type,
)
)
return complete_ss

def __str__(self) -> str:
Expand Down
24 changes: 9 additions & 15 deletions tests/core/test_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_chain_to_ranges_empty() -> None:


def test_chain_secondary_structure_default_coil(sample_chain_a: Chain) -> None:
"""Test that secondary_structure defaults to COIL for all residues if none are added."""
"""Test that secondary_structure provides COIL coverage when no secondary structure is defined."""
ss = sample_chain_a.secondary_structure
assert len(ss) == 1
expected_range = ResidueRange("A", 10, 14, 0, SecondaryStructureType.COIL)
Expand All @@ -160,11 +160,9 @@ def test_chain_add_secondary_structure_success(sample_chain_a: Chain) -> None:
sample_chain_a.add_secondary_structure(SecondaryStructureType.HELIX, 11, 13)

ss = sample_chain_a.secondary_structure
# Expected: Coil(10-10), Helix(11-13), Coil(14-14)
# Expected: Only the explicitly defined Helix(11-13), no gap filling
expected_ss = [
ResidueRange("A", 10, 10, 0, SecondaryStructureType.COIL),
ResidueRange("A", 11, 13, 1, SecondaryStructureType.HELIX),
ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL),
]
assert ss == expected_ss

Expand All @@ -184,23 +182,19 @@ def test_chain_add_secondary_structure_invalid_index(sample_chain_a: Chain) -> N
def test_chain_secondary_structure_add_overlapping(sample_chain_a: Chain) -> None:
"""Test behavior when adding overlapping secondary structures.

The expectation is that the definitions from the structure added *later* alphabetically
(based on range sorting within the property) will take precedence in the overlapping region.
The new behavior preserves the original ranges without merging,
returning both overlapping ranges as-is.
"""
# Add Helix first (start=10), then Sheet (start=11)
sample_chain_a.add_secondary_structure(SecondaryStructureType.HELIX, 10, 12)
sample_chain_a.add_secondary_structure(SecondaryStructureType.SHEET, 11, 13)

ss = sample_chain_a.secondary_structure

# Based on internal sorting (Helix 10-12 comes before Sheet 11-13):
# The map becomes {10: H, 11: H, 12: H}, then {11: S, 12: S, 13: S}
# Resulting map: {10: H, 11: S, 12: S, 13: S}
# Expected segments: H(10-10), S(11-13), Coil(14-14)
# Expected: Both original ranges preserved as-is, no merging or gap filling
expected_ss = [
ResidueRange("A", 10, 10, 0, SecondaryStructureType.HELIX),
ResidueRange("A", 10, 12, 0, SecondaryStructureType.HELIX),
ResidueRange("A", 11, 13, 1, SecondaryStructureType.SHEET),
ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL),
]
assert ss == expected_ss

Expand All @@ -222,7 +216,7 @@ def test_chain_secondary_structure_discontinuous_chain(
) -> None:
"""Test secondary structure generation on a discontinuous chain.

Add structure before the gap and verify the output segments correctly.
Add structure before the gap and verify only explicitly defined structures are returned.
"""
# Chain B has indices 5, 6, 9. Add Helix to 5-6.
sample_chain_b_discontinuous.add_secondary_structure(
Expand All @@ -231,10 +225,10 @@ def test_chain_secondary_structure_discontinuous_chain(

ss = sample_chain_b_discontinuous.secondary_structure

# Expected: Helix segment for 5-6, separate Coil segment for 9.
# Expected: Only the explicitly defined Helix segment for 5-6, no automatic gap filling.
# The implementation preserves segmentation boundaries when secondary structure is defined.
expected_ss = [
ResidueRange("B", 5, 6, 0, SecondaryStructureType.HELIX),
ResidueRange("B", 9, 9, 2, SecondaryStructureType.COIL),
]
assert ss == expected_ss

Expand Down
15 changes: 6 additions & 9 deletions tests/io/test_structure_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,18 @@
# Based on manual inspection of tests/data/test.cif and DSSP logic
# Note: Residue indices are 1-based as in PDB/CIF/DSSP
EXPECTED_SS = [
ResidueRange("A", 1, 1, 0, SecondaryStructureType.COIL),
ResidueRange("A", 2, 6, 1, SecondaryStructureType.SHEET),
# ResidueRange("A", 6, 6, 5, SecondaryStructureType.SHEET), # Note: DSSP has 6 as a sheet, but it is merged with 5 due to the algorithm prioritizing longer streches
ResidueRange("A", 7, 13, 6, SecondaryStructureType.COIL),
# Preserved segmentation: explicitly defined ranges without merging
# This preserves the original boundaries from DSSP/CIF files
ResidueRange("A", 2, 5, 1, SecondaryStructureType.SHEET),
ResidueRange(
"A", 6, 6, 5, SecondaryStructureType.SHEET
), # Separate sheet segment preserved
ResidueRange("A", 14, 17, 13, SecondaryStructureType.SHEET),
ResidueRange("A", 18, 19, 17, SecondaryStructureType.HELIX), # 3-10 Helix in CIF
ResidueRange("A", 20, 23, 19, SecondaryStructureType.COIL),
ResidueRange("A", 24, 30, 23, SecondaryStructureType.SHEET),
ResidueRange("A", 31, 37, 30, SecondaryStructureType.COIL),
ResidueRange("A", 38, 44, 37, SecondaryStructureType.SHEET),
ResidueRange("A", 45, 45, 44, SecondaryStructureType.COIL),
ResidueRange("A", 46, 54, 45, SecondaryStructureType.HELIX), # Alpha Helix
ResidueRange("A", 55, 59, 54, SecondaryStructureType.COIL),
ResidueRange("A", 60, 65, 59, SecondaryStructureType.SHEET),
ResidueRange("A", 66, 72, 65, SecondaryStructureType.COIL),
]


Expand Down