diff --git a/src/flatprot/core/structure.py b/src/flatprot/core/structure.py index 146c1c4..04c8bf5 100644 --- a/src/flatprot/core/structure.py +++ b/src/flatprot/core/structure.py @@ -146,79 +146,83 @@ def add_secondary_structure( @property def secondary_structure(self) -> list[ResidueRange]: - """Get all secondary structure elements, filling gaps with coils. + """Get all secondary structure elements, preserving original segmentation. - Handles discontinuities in residue indexing by creating separate ranges - for contiguous segments. + Returns the originally defined secondary structure ranges without merging + adjacent segments of the same type. This preserves the segmentation from + DSSP/CIF parsing to prevent visual artifacts in helix rendering. + + If no secondary structure is defined, provides COIL coverage for the entire + chain to ensure annotations and other systems can function properly. Returns: - list[ResidueRange]: A complete list of secondary structure elements - covering all residues in the chain. + list[ResidueRange]: A list of secondary structure elements + preserving the original segmentation boundaries. """ - # Get all residue indices present in the chain, sorted - residue_indices = sorted(self.__chain_coordinates.keys()) + # If no secondary structure is defined, provide COIL coverage using chain ranges + if not self.__secondary_structure: + if not self.__chain_coordinates: + return [] + + # Use to_ranges() to properly handle discontinuous chains + coil_ranges = [] + for range_obj in self.to_ranges(): + coil_ranges.append( + ResidueRange( + range_obj.chain_id, + range_obj.start, + range_obj.end, + range_obj.coordinates_start_index, + SecondaryStructureType.COIL, + ) + ) + return coil_ranges - if not residue_indices: - return [] + # Return the originally defined secondary structure ranges without merging + # This preserves the segmentation from the DSSP/CIF parsing + complete_ss: List[ResidueRange] = [] - # Create a map of residue index to defined secondary structure type - # Sorting __secondary_structure ensures predictable behavior in case of overlaps (though overlaps shouldn't occur) - defined_ss_map: Dict[int, SecondaryStructureType] = {} - for ss_element in sorted(self.__secondary_structure): - # Only consider residues actually present in the chain for this element - for idx in range(ss_element.start, ss_element.end + 1): - if idx in self.__chain_coordinates: - defined_ss_map[idx] = ss_element.secondary_structure_type + # Sort by start position to ensure consistent ordering + sorted_ss = sorted(self.__secondary_structure, key=lambda x: x.start) - complete_ss: List[ResidueRange] = [] - segment_start_idx = residue_indices[0] - segment_type = defined_ss_map.get( - segment_start_idx, SecondaryStructureType.COIL - ) - segment_contig_start = self.__chain_coordinates[ - segment_start_idx - ].coordinate_index - - # Iterate through residues to identify segments of continuous type and index - for i in range(1, len(residue_indices)): - current_idx = residue_indices[i] - prev_idx = residue_indices[i - 1] - current_res_type = defined_ss_map.get( - current_idx, SecondaryStructureType.COIL - ) + # Deduplicate ranges to avoid duplicate scene elements + seen_ranges = set() - # Check for discontinuity in residue index or change in SS type - if current_idx != prev_idx + 1 or current_res_type != segment_type: - # End the previous segment - segment_end_idx = prev_idx - complete_ss.append( - ResidueRange( - self.id, - segment_start_idx, - segment_end_idx, - segment_contig_start, - segment_type, - ) + # Add each originally defined range as-is, avoiding duplicates + for ss_element in sorted_ss: + # Ensure the range has valid coordinates in this chain + valid_start = None + valid_end = None + + # Find the first valid residue in the range + for idx in range(ss_element.start, ss_element.end + 1): + if idx in self.__chain_coordinates: + if valid_start is None: + valid_start = idx + valid_end = idx + + # Only add ranges that have at least one valid residue and aren't duplicates + if valid_start is not None and valid_end is not None: + # Create a unique key for this range + range_key = ( + ss_element.secondary_structure_type, + valid_start, + valid_end, ) - # Start a new segment - segment_start_idx = current_idx - segment_type = current_res_type - segment_contig_start = self.__chain_coordinates[ - segment_start_idx - ].coordinate_index + if range_key not in seen_ranges: + seen_ranges.add(range_key) + start_coord = self.__chain_coordinates[valid_start] + complete_ss.append( + ResidueRange( + self.id, + valid_start, + valid_end, + start_coord.coordinate_index, + ss_element.secondary_structure_type, + ) + ) - # Add the final segment - segment_end_idx = residue_indices[-1] - complete_ss.append( - ResidueRange( - self.id, - segment_start_idx, - segment_end_idx, - segment_contig_start, - segment_type, - ) - ) return complete_ss def __str__(self) -> str: diff --git a/tests/core/test_structure.py b/tests/core/test_structure.py index 4bfaa72..adca63f 100644 --- a/tests/core/test_structure.py +++ b/tests/core/test_structure.py @@ -148,7 +148,7 @@ def test_chain_to_ranges_empty() -> None: def test_chain_secondary_structure_default_coil(sample_chain_a: Chain) -> None: - """Test that secondary_structure defaults to COIL for all residues if none are added.""" + """Test that secondary_structure provides COIL coverage when no secondary structure is defined.""" ss = sample_chain_a.secondary_structure assert len(ss) == 1 expected_range = ResidueRange("A", 10, 14, 0, SecondaryStructureType.COIL) @@ -160,11 +160,9 @@ def test_chain_add_secondary_structure_success(sample_chain_a: Chain) -> None: sample_chain_a.add_secondary_structure(SecondaryStructureType.HELIX, 11, 13) ss = sample_chain_a.secondary_structure - # Expected: Coil(10-10), Helix(11-13), Coil(14-14) + # Expected: Only the explicitly defined Helix(11-13), no gap filling expected_ss = [ - ResidueRange("A", 10, 10, 0, SecondaryStructureType.COIL), ResidueRange("A", 11, 13, 1, SecondaryStructureType.HELIX), - ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL), ] assert ss == expected_ss @@ -184,8 +182,8 @@ def test_chain_add_secondary_structure_invalid_index(sample_chain_a: Chain) -> N def test_chain_secondary_structure_add_overlapping(sample_chain_a: Chain) -> None: """Test behavior when adding overlapping secondary structures. - The expectation is that the definitions from the structure added *later* alphabetically - (based on range sorting within the property) will take precedence in the overlapping region. + The new behavior preserves the original ranges without merging, + returning both overlapping ranges as-is. """ # Add Helix first (start=10), then Sheet (start=11) sample_chain_a.add_secondary_structure(SecondaryStructureType.HELIX, 10, 12) @@ -193,14 +191,10 @@ def test_chain_secondary_structure_add_overlapping(sample_chain_a: Chain) -> Non ss = sample_chain_a.secondary_structure - # Based on internal sorting (Helix 10-12 comes before Sheet 11-13): - # The map becomes {10: H, 11: H, 12: H}, then {11: S, 12: S, 13: S} - # Resulting map: {10: H, 11: S, 12: S, 13: S} - # Expected segments: H(10-10), S(11-13), Coil(14-14) + # Expected: Both original ranges preserved as-is, no merging or gap filling expected_ss = [ - ResidueRange("A", 10, 10, 0, SecondaryStructureType.HELIX), + ResidueRange("A", 10, 12, 0, SecondaryStructureType.HELIX), ResidueRange("A", 11, 13, 1, SecondaryStructureType.SHEET), - ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL), ] assert ss == expected_ss @@ -222,7 +216,7 @@ def test_chain_secondary_structure_discontinuous_chain( ) -> None: """Test secondary structure generation on a discontinuous chain. - Add structure before the gap and verify the output segments correctly. + Add structure before the gap and verify only explicitly defined structures are returned. """ # Chain B has indices 5, 6, 9. Add Helix to 5-6. sample_chain_b_discontinuous.add_secondary_structure( @@ -231,10 +225,10 @@ def test_chain_secondary_structure_discontinuous_chain( ss = sample_chain_b_discontinuous.secondary_structure - # Expected: Helix segment for 5-6, separate Coil segment for 9. + # Expected: Only the explicitly defined Helix segment for 5-6, no automatic gap filling. + # The implementation preserves segmentation boundaries when secondary structure is defined. expected_ss = [ ResidueRange("B", 5, 6, 0, SecondaryStructureType.HELIX), - ResidueRange("B", 9, 9, 2, SecondaryStructureType.COIL), ] assert ss == expected_ss diff --git a/tests/io/test_structure_parser.py b/tests/io/test_structure_parser.py index beedf3e..567e1af 100644 --- a/tests/io/test_structure_parser.py +++ b/tests/io/test_structure_parser.py @@ -16,21 +16,18 @@ # Based on manual inspection of tests/data/test.cif and DSSP logic # Note: Residue indices are 1-based as in PDB/CIF/DSSP EXPECTED_SS = [ - ResidueRange("A", 1, 1, 0, SecondaryStructureType.COIL), - ResidueRange("A", 2, 6, 1, SecondaryStructureType.SHEET), - # ResidueRange("A", 6, 6, 5, SecondaryStructureType.SHEET), # Note: DSSP has 6 as a sheet, but it is merged with 5 due to the algorithm prioritizing longer streches - ResidueRange("A", 7, 13, 6, SecondaryStructureType.COIL), + # Preserved segmentation: explicitly defined ranges without merging + # This preserves the original boundaries from DSSP/CIF files + ResidueRange("A", 2, 5, 1, SecondaryStructureType.SHEET), + ResidueRange( + "A", 6, 6, 5, SecondaryStructureType.SHEET + ), # Separate sheet segment preserved ResidueRange("A", 14, 17, 13, SecondaryStructureType.SHEET), ResidueRange("A", 18, 19, 17, SecondaryStructureType.HELIX), # 3-10 Helix in CIF - ResidueRange("A", 20, 23, 19, SecondaryStructureType.COIL), ResidueRange("A", 24, 30, 23, SecondaryStructureType.SHEET), - ResidueRange("A", 31, 37, 30, SecondaryStructureType.COIL), ResidueRange("A", 38, 44, 37, SecondaryStructureType.SHEET), - ResidueRange("A", 45, 45, 44, SecondaryStructureType.COIL), ResidueRange("A", 46, 54, 45, SecondaryStructureType.HELIX), # Alpha Helix - ResidueRange("A", 55, 59, 54, SecondaryStructureType.COIL), ResidueRange("A", 60, 65, 59, SecondaryStructureType.SHEET), - ResidueRange("A", 66, 72, 65, SecondaryStructureType.COIL), ]