š§ Hydrophobicity Scale
+ + +š Visualization Details
+Method: SVG mask-based gradients
+Scale: Kyte-Doolittle hydrophobicity
+Resolution: Per-residue color stops
+Range: Blue (hydrophilic) to Red (hydrophobic)
+diff --git a/scripts/hydrophobic_gradient_viz.py b/scripts/hydrophobic_gradient_viz.py new file mode 100755 index 0000000..080a9e2 --- /dev/null +++ b/scripts/hydrophobic_gradient_viz.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 + +import sys +import subprocess +import tempfile +import argparse +from pathlib import Path +import re + + +def extract_sequence_from_cif(cif_path: str) -> str: + """Extract protein sequence from CIF file.""" + try: + with open(cif_path, "r") as f: + content = f.read() + + # Look for the sequence line + sequence_match = re.search( + r"_entity_poly\.pdbx_seq_one_letter_code\s+([A-Z\s]+)", content + ) + if sequence_match: + # Remove whitespace and return sequence + sequence = re.sub(r"\s+", "", sequence_match.group(1)) + return sequence + + # Fallback: try to extract from ATOM records + lines = content.split("\n") + residues = {} + for line in lines: + if line.startswith("ATOM"): + parts = line.split() + if len(parts) >= 6: + try: + res_num = int(parts[8]) + res_name = parts[5] + # Convert three-letter to one-letter codes + aa_map = { + "ALA": "A", + "ARG": "R", + "ASN": "N", + "ASP": "D", + "CYS": "C", + "GLN": "Q", + "GLU": "E", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LEU": "L", + "LYS": "K", + "MET": "M", + "PHE": "F", + "PRO": "P", + "SER": "S", + "THR": "T", + "TRP": "W", + "TYR": "Y", + "VAL": "V", + } + if res_name in aa_map: + residues[res_num] = aa_map[res_name] + except (ValueError, IndexError): + continue + + if residues: + # Sort by residue number and join + sorted_residues = sorted(residues.items()) + return "".join([aa for _, aa in sorted_residues]) + + except Exception as e: + print(f"Warning: Could not extract sequence from {cif_path}: {e}") + + return "" + + +def create_hydrophobic_visualization( + structure_path: str, + output_path: str, + canvas_width: int = 800, + canvas_height: int = 600, + show_positions: str = "minimal", +) -> None: + """Create a complete hydrophobic gradient visualization.""" + + structure_path = Path(structure_path) + output_path = Path(output_path) + + if not structure_path.exists(): + raise FileNotFoundError(f"Structure file not found: {structure_path}") + + # Create output directory if needed + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate base SVG with flatprot + with tempfile.NamedTemporaryFile(suffix=".svg", delete=False) as tmp_svg: + tmp_svg_path = tmp_svg.name + + try: + # Run flatprot project + cmd = [ + "uv", + "run", + "flatprot", + "project", + str(structure_path), + tmp_svg_path, + "--canvas-width", + str(canvas_width), + "--canvas-height", + str(canvas_height), + "--show-positions", + show_positions, + "--quiet", + ] + + print(f"š Generating base SVG from {structure_path.name}...") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + raise RuntimeError(f"FlatProt failed: {result.stderr}") + + # Extract sequence + sequence = extract_sequence_from_cif(structure_path) + if not sequence: + print("Warning: Could not extract sequence, using default values") + sequence = "A" * 100 # Fallback + + # Read the SVG content + with open(tmp_svg_path, "r") as f: + svg_content = f.read() + + # Create the complete visualization with hydrophobic gradients + html_content = create_hydrophobic_html( + svg_content, sequence, structure_path.stem, canvas_width, canvas_height + ) + + # Write the final HTML file + with open(output_path, "w") as f: + f.write(html_content) + + print(f"ā Hydrophobic gradient visualization created: {output_path}") + print(f"𧬠Sequence length: {len(sequence)} residues") + + finally: + # Clean up temporary file + Path(tmp_svg_path).unlink(missing_ok=True) + + +def create_hydrophobic_html( + svg_content: str, + sequence: str, + structure_name: str, + canvas_width: int, + canvas_height: int, +) -> str: + """Create complete HTML with hydrophobic gradients and legend.""" + + # Hydrophobicity scale (Kyte-Doolittle normalized to 0-1) + hydrophobicity_raw = { + "A": 1.8, + "R": -4.5, + "N": -3.5, + "D": -3.5, + "C": 2.5, + "Q": -3.5, + "E": -3.5, + "G": -0.4, + "H": -3.2, + "I": 4.5, + "L": 3.8, + "K": -3.9, + "M": 1.9, + "F": 2.8, + "P": -1.6, + "S": -0.8, + "T": -0.7, + "W": -0.9, + "Y": -1.3, + "V": 4.2, + } + + # Normalize to 0-1 range + min_val = min(hydrophobicity_raw.values()) + max_val = max(hydrophobicity_raw.values()) + hydrophobicity = { + aa: (val - min_val) / (max_val - min_val) + for aa, val in hydrophobicity_raw.items() + } + + html_template = f""" + +
+Structure: {structure_name} | Sequence length: {len(sequence)} residues
+Method: SVG mask-based gradients
+Scale: Kyte-Doolittle hydrophobicity
+Resolution: Per-residue color stops
+Range: Blue (hydrophilic) to Red (hydrophobic)
+