From 2d700b9f3293eb8a51d3eac2241c251f52cfb515 Mon Sep 17 00:00:00 2001 From: Tobias O Date: Wed, 2 Jul 2025 11:46:14 +0200 Subject: [PATCH 1/3] improvement: add gradient exp script --- scripts/hydrophobic_gradient_viz.py | 744 ++++++++++++++++++++++++++++ 1 file changed, 744 insertions(+) create mode 100755 scripts/hydrophobic_gradient_viz.py diff --git a/scripts/hydrophobic_gradient_viz.py b/scripts/hydrophobic_gradient_viz.py new file mode 100755 index 0000000..080a9e2 --- /dev/null +++ b/scripts/hydrophobic_gradient_viz.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 + +import sys +import subprocess +import tempfile +import argparse +from pathlib import Path +import re + + +def extract_sequence_from_cif(cif_path: str) -> str: + """Extract protein sequence from CIF file.""" + try: + with open(cif_path, "r") as f: + content = f.read() + + # Look for the sequence line + sequence_match = re.search( + r"_entity_poly\.pdbx_seq_one_letter_code\s+([A-Z\s]+)", content + ) + if sequence_match: + # Remove whitespace and return sequence + sequence = re.sub(r"\s+", "", sequence_match.group(1)) + return sequence + + # Fallback: try to extract from ATOM records + lines = content.split("\n") + residues = {} + for line in lines: + if line.startswith("ATOM"): + parts = line.split() + if len(parts) >= 6: + try: + res_num = int(parts[8]) + res_name = parts[5] + # Convert three-letter to one-letter codes + aa_map = { + "ALA": "A", + "ARG": "R", + "ASN": "N", + "ASP": "D", + "CYS": "C", + "GLN": "Q", + "GLU": "E", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LEU": "L", + "LYS": "K", + "MET": "M", + "PHE": "F", + "PRO": "P", + "SER": "S", + "THR": "T", + "TRP": "W", + "TYR": "Y", + "VAL": "V", + } + if res_name in aa_map: + residues[res_num] = aa_map[res_name] + except (ValueError, IndexError): + continue + + if residues: + # Sort by residue number and join + sorted_residues = sorted(residues.items()) + return "".join([aa for _, aa in sorted_residues]) + + except Exception as e: + print(f"Warning: Could not extract sequence from {cif_path}: {e}") + + return "" + + +def create_hydrophobic_visualization( + structure_path: str, + output_path: str, + canvas_width: int = 800, + canvas_height: int = 600, + show_positions: str = "minimal", +) -> None: + """Create a complete hydrophobic gradient visualization.""" + + structure_path = Path(structure_path) + output_path = Path(output_path) + + if not structure_path.exists(): + raise FileNotFoundError(f"Structure file not found: {structure_path}") + + # Create output directory if needed + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate base SVG with flatprot + with tempfile.NamedTemporaryFile(suffix=".svg", delete=False) as tmp_svg: + tmp_svg_path = tmp_svg.name + + try: + # Run flatprot project + cmd = [ + "uv", + "run", + "flatprot", + "project", + str(structure_path), + tmp_svg_path, + "--canvas-width", + str(canvas_width), + "--canvas-height", + str(canvas_height), + "--show-positions", + show_positions, + "--quiet", + ] + + print(f"šŸš€ Generating base SVG from {structure_path.name}...") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + raise RuntimeError(f"FlatProt failed: {result.stderr}") + + # Extract sequence + sequence = extract_sequence_from_cif(structure_path) + if not sequence: + print("Warning: Could not extract sequence, using default values") + sequence = "A" * 100 # Fallback + + # Read the SVG content + with open(tmp_svg_path, "r") as f: + svg_content = f.read() + + # Create the complete visualization with hydrophobic gradients + html_content = create_hydrophobic_html( + svg_content, sequence, structure_path.stem, canvas_width, canvas_height + ) + + # Write the final HTML file + with open(output_path, "w") as f: + f.write(html_content) + + print(f"āœ… Hydrophobic gradient visualization created: {output_path}") + print(f"🧬 Sequence length: {len(sequence)} residues") + + finally: + # Clean up temporary file + Path(tmp_svg_path).unlink(missing_ok=True) + + +def create_hydrophobic_html( + svg_content: str, + sequence: str, + structure_name: str, + canvas_width: int, + canvas_height: int, +) -> str: + """Create complete HTML with hydrophobic gradients and legend.""" + + # Hydrophobicity scale (Kyte-Doolittle normalized to 0-1) + hydrophobicity_raw = { + "A": 1.8, + "R": -4.5, + "N": -3.5, + "D": -3.5, + "C": 2.5, + "Q": -3.5, + "E": -3.5, + "G": -0.4, + "H": -3.2, + "I": 4.5, + "L": 3.8, + "K": -3.9, + "M": 1.9, + "F": 2.8, + "P": -1.6, + "S": -0.8, + "T": -0.7, + "W": -0.9, + "Y": -1.3, + "V": 4.2, + } + + # Normalize to 0-1 range + min_val = min(hydrophobicity_raw.values()) + max_val = max(hydrophobicity_raw.values()) + hydrophobicity = { + aa: (val - min_val) / (max_val - min_val) + for aa, val in hydrophobicity_raw.items() + } + + html_template = f""" + + + Hydrophobic Gradient: {structure_name} + + + +
+
+

🧬 Hydrophobic Gradient Visualization

+

Structure: {structure_name} | Sequence length: {len(sequence)} residues

+
+ +
+
+
+ {svg_content} +
+ +
+ + + +
+ +
+ Status: Ready to apply hydrophobic gradients +
+
+ +
+
+

šŸ’§ Hydrophobicity Scale

+ +
+
+ Hydrophilic + Hydrophobic +
+ +
+
+
+ Hydrophilic: R, K, D, E, N, Q +
+
+
+ Neutral: G, S, T, H, P, Y +
+
+
+ Hydrophobic: I, L, V, F, W, M, A, C +
+
+
+ +
+

šŸ“Š Visualization Details

+
+

Method: SVG mask-based gradients

+

Scale: Kyte-Doolittle hydrophobicity

+

Resolution: Per-residue color stops

+

Range: Blue (hydrophilic) to Red (hydrophobic)

+
+
+
+
+
+ + + +""" + + return html_template + + +def main(): + parser = argparse.ArgumentParser( + description="Create hydrophobic gradient visualization from protein structure", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python hydrophobic_gradient_viz.py structure.cif output.html + python hydrophobic_gradient_viz.py structure.pdb output.html --width 1000 --height 800 + python hydrophobic_gradient_viz.py structure.cif output.html --positions none + """, + ) + + parser.add_argument("structure", help="Input structure file (CIF, PDB, etc.)") + parser.add_argument("output", help="Output HTML file path") + parser.add_argument( + "--width", type=int, default=800, help="Canvas width in pixels (default: 800)" + ) + parser.add_argument( + "--height", type=int, default=600, help="Canvas height in pixels (default: 600)" + ) + parser.add_argument( + "--positions", + choices=["major", "minor", "none"], + default="minimal", + help="Position annotation level (default: major)", + ) + + args = parser.parse_args() + + try: + create_hydrophobic_visualization( + structure_path=args.structure, + output_path=args.output, + canvas_width=args.width, + canvas_height=args.height, + show_positions=args.positions, + ) + + print("\nšŸŽ‰ Success! Open the visualization:") + print(f" {Path(args.output).absolute()}") + + except Exception as e: + print(f"āŒ Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() From 0cd6b9c52df79eb87dc8323d376cec4e38f5ca84 Mon Sep 17 00:00:00 2001 From: Tobias O Date: Wed, 2 Jul 2025 15:17:23 +0200 Subject: [PATCH 2/3] fix: coils correctly displayed again --- src/flatprot/core/structure.py | 100 ++++++++++++++++++++++++++++++--- tests/core/test_structure.py | 11 ++-- 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/src/flatprot/core/structure.py b/src/flatprot/core/structure.py index 04c8bf5..a9c950b 100644 --- a/src/flatprot/core/structure.py +++ b/src/flatprot/core/structure.py @@ -178,17 +178,26 @@ def secondary_structure(self) -> list[ResidueRange]: ) return coil_ranges - # Return the originally defined secondary structure ranges without merging - # This preserves the segmentation from the DSSP/CIF parsing - complete_ss: List[ResidueRange] = [] + # Get all residue indices present in the chain, sorted + residue_indices = sorted(self.__chain_coordinates.keys()) - # Sort by start position to ensure consistent ordering - sorted_ss = sorted(self.__secondary_structure, key=lambda x: x.start) + if not residue_indices: + return [] - # Deduplicate ranges to avoid duplicate scene elements + # Create a map of residue index to defined secondary structure type + # This preserves original segmentation by not merging adjacent elements + defined_ss_map: Dict[int, SecondaryStructureType] = {} + for ss_element in sorted(self.__secondary_structure, key=lambda x: x.start): + # Only consider residues actually present in the chain for this element + for idx in range(ss_element.start, ss_element.end + 1): + if idx in self.__chain_coordinates: + defined_ss_map[idx] = ss_element.secondary_structure_type + + # First, add all originally defined secondary structure ranges to preserve segmentation + complete_ss: List[ResidueRange] = [] + sorted_ss = sorted(self.__secondary_structure, key=lambda x: x.start) seen_ranges = set() - # Add each originally defined range as-is, avoiding duplicates for ss_element in sorted_ss: # Ensure the range has valid coordinates in this chain valid_start = None @@ -223,7 +232,82 @@ def secondary_structure(self) -> list[ResidueRange]: ) ) - return complete_ss + # Now fill gaps with coil segments to restore coil rendering + # Sort by start position for gap detection + complete_ss.sort(key=lambda x: x.start) + + filled_ss: List[ResidueRange] = [] + + # Add coil at the beginning if needed + if complete_ss and residue_indices[0] < complete_ss[0].start: + self._add_coil_segments( + filled_ss, residue_indices[0], complete_ss[0].start - 1 + ) + + # Process each element and add coil gaps between them + for i, ss_element in enumerate(complete_ss): + filled_ss.append(ss_element) + + # Check if there's a gap after this element + if i < len(complete_ss) - 1: + next_element = complete_ss[i + 1] + if next_element.start > ss_element.end + 1: + self._add_coil_segments( + filled_ss, ss_element.end + 1, next_element.start - 1 + ) + + # Add coil at the end if needed + if complete_ss and residue_indices[-1] > complete_ss[-1].end: + self._add_coil_segments( + filled_ss, complete_ss[-1].end + 1, residue_indices[-1] + ) + + # If no secondary structure was defined, complete_ss will be empty + # In that case, filled_ss will also be empty, so the original fallback applies + if not filled_ss: + return complete_ss + + # Sort the final result by start position + filled_ss.sort(key=lambda x: x.start) + return filled_ss + + def _add_coil_segments( + self, result_list: List[ResidueRange], start: int, end: int + ) -> None: + """Helper method to add coil segments for a given range, handling discontinuities.""" + coil_start = None + for idx in range(start, end + 1): + if idx in self.__chain_coordinates: + if coil_start is None: + coil_start = idx + coil_end = idx + else: + # Discontinuity: end current coil segment if any + if coil_start is not None: + start_coord = self.__chain_coordinates[coil_start] + result_list.append( + ResidueRange( + self.id, + coil_start, + coil_end, + start_coord.coordinate_index, + SecondaryStructureType.COIL, + ) + ) + coil_start = None + + # Add final coil segment if any + if coil_start is not None: + start_coord = self.__chain_coordinates[coil_start] + result_list.append( + ResidueRange( + self.id, + coil_start, + coil_end, + start_coord.coordinate_index, + SecondaryStructureType.COIL, + ) + ) def __str__(self) -> str: return f"Chain {self.id}" diff --git a/tests/core/test_structure.py b/tests/core/test_structure.py index adca63f..4fc82ba 100644 --- a/tests/core/test_structure.py +++ b/tests/core/test_structure.py @@ -160,9 +160,11 @@ def test_chain_add_secondary_structure_success(sample_chain_a: Chain) -> None: sample_chain_a.add_secondary_structure(SecondaryStructureType.HELIX, 11, 13) ss = sample_chain_a.secondary_structure - # Expected: Only the explicitly defined Helix(11-13), no gap filling + # Expected: Coil(10-10), Helix(11-13), Coil(14-14) with gap filling restored expected_ss = [ + ResidueRange("A", 10, 10, 0, SecondaryStructureType.COIL), ResidueRange("A", 11, 13, 1, SecondaryStructureType.HELIX), + ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL), ] assert ss == expected_ss @@ -191,10 +193,11 @@ def test_chain_secondary_structure_add_overlapping(sample_chain_a: Chain) -> Non ss = sample_chain_a.secondary_structure - # Expected: Both original ranges preserved as-is, no merging or gap filling + # Expected: Both original ranges preserved as-is, with coil gap filling at end expected_ss = [ ResidueRange("A", 10, 12, 0, SecondaryStructureType.HELIX), ResidueRange("A", 11, 13, 1, SecondaryStructureType.SHEET), + ResidueRange("A", 14, 14, 4, SecondaryStructureType.COIL), ] assert ss == expected_ss @@ -225,10 +228,10 @@ def test_chain_secondary_structure_discontinuous_chain( ss = sample_chain_b_discontinuous.secondary_structure - # Expected: Only the explicitly defined Helix segment for 5-6, no automatic gap filling. - # The implementation preserves segmentation boundaries when secondary structure is defined. + # Expected: Helix segment for 5-6, separate Coil segment for 9 (with gap filling restored). expected_ss = [ ResidueRange("B", 5, 6, 0, SecondaryStructureType.HELIX), + ResidueRange("B", 9, 9, 2, SecondaryStructureType.COIL), ] assert ss == expected_ss From 22e112768f87c8b822b998e457b36ab2fcee96ce Mon Sep 17 00:00:00 2001 From: Tobias O Date: Wed, 2 Jul 2025 15:21:00 +0200 Subject: [PATCH 3/3] fix: tests working --- src/flatprot/core/structure.py | 11 +++++++---- tests/io/test_structure_parser.py | 16 +++++++++++++++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/flatprot/core/structure.py b/src/flatprot/core/structure.py index a9c950b..d11ab71 100644 --- a/src/flatprot/core/structure.py +++ b/src/flatprot/core/structure.py @@ -146,17 +146,20 @@ def add_secondary_structure( @property def secondary_structure(self) -> list[ResidueRange]: - """Get all secondary structure elements, preserving original segmentation. + """Get all secondary structure elements with coil gap filling and preserved segmentation. Returns the originally defined secondary structure ranges without merging - adjacent segments of the same type. This preserves the segmentation from - DSSP/CIF parsing to prevent visual artifacts in helix rendering. + adjacent segments of the same type, while filling gaps between structured + elements with coil regions. This preserves the segmentation from DSSP/CIF + parsing to prevent visual artifacts in helix rendering, while ensuring + complete coil coverage for proper visualization. If no secondary structure is defined, provides COIL coverage for the entire chain to ensure annotations and other systems can function properly. Returns: - list[ResidueRange]: A list of secondary structure elements + list[ResidueRange]: A complete list of secondary structure elements + including both originally defined structures and coil gap regions, preserving the original segmentation boundaries. """ # If no secondary structure is defined, provide COIL coverage using chain ranges diff --git a/tests/io/test_structure_parser.py b/tests/io/test_structure_parser.py index 567e1af..35658af 100644 --- a/tests/io/test_structure_parser.py +++ b/tests/io/test_structure_parser.py @@ -15,19 +15,33 @@ # --- Expected Secondary Structure Definitions --- # Based on manual inspection of tests/data/test.cif and DSSP logic # Note: Residue indices are 1-based as in PDB/CIF/DSSP +# Includes coil gap filling between structured elements EXPECTED_SS = [ + # Coil before first structured element + ResidueRange("A", 1, 1, 0, SecondaryStructureType.COIL), # Preserved segmentation: explicitly defined ranges without merging - # This preserves the original boundaries from DSSP/CIF files ResidueRange("A", 2, 5, 1, SecondaryStructureType.SHEET), ResidueRange( "A", 6, 6, 5, SecondaryStructureType.SHEET ), # Separate sheet segment preserved + # Coil gap between sheets + ResidueRange("A", 7, 13, 6, SecondaryStructureType.COIL), ResidueRange("A", 14, 17, 13, SecondaryStructureType.SHEET), ResidueRange("A", 18, 19, 17, SecondaryStructureType.HELIX), # 3-10 Helix in CIF + # Coil gap between helix and sheet + ResidueRange("A", 20, 23, 19, SecondaryStructureType.COIL), ResidueRange("A", 24, 30, 23, SecondaryStructureType.SHEET), + # Coil gap between sheets + ResidueRange("A", 31, 37, 30, SecondaryStructureType.COIL), ResidueRange("A", 38, 44, 37, SecondaryStructureType.SHEET), + # Coil gap between sheet and helix + ResidueRange("A", 45, 45, 44, SecondaryStructureType.COIL), ResidueRange("A", 46, 54, 45, SecondaryStructureType.HELIX), # Alpha Helix + # Coil gap between helix and sheet + ResidueRange("A", 55, 59, 54, SecondaryStructureType.COIL), ResidueRange("A", 60, 65, 59, SecondaryStructureType.SHEET), + # Coil after last structured element + ResidueRange("A", 66, 72, 65, SecondaryStructureType.COIL), ]