diff --git a/Cargo.lock b/Cargo.lock index 4c94519..73c6043 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1666,6 +1666,7 @@ dependencies = [ "ratatui", "ratatui-image", "reqwest", + "tempfile", "tokio", ] @@ -2312,9 +2313,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.26.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom 0.4.2", diff --git a/Cargo.toml b/Cargo.toml index 4ebd3ec..be0f638 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,3 +34,6 @@ fetch = ["dep:reqwest", "dep:tokio"] lto = true strip = true codegen-units = 1 + +[dev-dependencies] +tempfile = "3.27.0" diff --git a/src/main.rs b/src/main.rs index 315c7d9..40be800 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,7 +34,7 @@ macro_rules! log { #[derive(Parser)] #[command(name = "proteinview", version, about = "TUI protein structure viewer")] struct Cli { - /// Path to PDB or mmCIF file + /// Path to PDB, mmCIF, or XYZ file file: Option, /// Use HD rendering (HalfBlock over SSH, FullHD locally) @@ -79,8 +79,14 @@ fn main() -> Result<()> { std::process::exit(1); }; - // Load protein structure - let protein = parser::pdb::load_structure(&file_path)?; + // Load protein structure (dispatch by file extension) + let lower = file_path.to_lowercase(); + let is_xyz = lower.ends_with(".xyz"); + let protein = if is_xyz { + parser::xyz::load_xyz(&file_path)? + } else { + parser::pdb::load_structure(&file_path)? + }; eprintln!("Loaded: {} ({} chains, {} residues, {} atoms{})", protein.name, protein.chains.len(), @@ -178,6 +184,23 @@ fn main() -> Result<()> { } }; + // XYZ files default to Element coloring + Wireframe mode unless overridden + let (color_override, viz_mode) = if is_xyz { + let color = if color_override.is_none() && cli.color == "structure" { + Some(render::color::ColorSchemeType::Element) + } else { + color_override + }; + let viz = if cli.mode == "cartoon" { + VizMode::Wireframe + } else { + viz_mode + }; + (color, viz) + } else { + (color_override, viz_mode) + }; + // Create app with actual terminal dimensions for dynamic zoom let mut app = App::new(protein, render_mode, term_cols, term_rows, picker, color_override, viz_mode); log!(logfile, "app created: render_mode={:?} chains={} zoom={:.2}", app.render_mode, app.protein.chains.len(), app.camera.zoom); diff --git a/src/model/protein.rs b/src/model/protein.rs index 62455ea..6e7f318 100644 --- a/src/model/protein.rs +++ b/src/model/protein.rs @@ -139,12 +139,11 @@ impl Protein { pub fn bounding_radius(&self) -> f64 { let chain_atoms = self.chains.iter() .flat_map(|c| &c.residues) - .flat_map(|r| &r.atoms) - .filter(|a| a.is_backbone); + .flat_map(|r| &r.atoms); let ligand_atoms = self.ligands.iter() .flat_map(|l| &l.atoms); - chain_atoms.map(|a| (a.x * a.x + a.y * a.y + a.z * a.z).sqrt()) - .chain(ligand_atoms.map(|a| (a.x * a.x + a.y * a.y + a.z * a.z).sqrt())) + chain_atoms.chain(ligand_atoms) + .map(|a| (a.x * a.x + a.y * a.y + a.z * a.z).sqrt()) .fold(0.0f64, f64::max) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 331e512..3633d48 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,3 @@ pub mod pdb; pub mod fetch; +pub mod xyz; diff --git a/src/parser/xyz.rs b/src/parser/xyz.rs new file mode 100644 index 0000000..5e50a3a --- /dev/null +++ b/src/parser/xyz.rs @@ -0,0 +1,138 @@ +use anyhow::{bail, Context, Result}; +use crate::model::protein::{Protein, Chain, MoleculeType, Residue, Atom, SecondaryStructure}; + +/// Load a molecular structure from an XYZ file. +/// +/// XYZ format: +/// Line 1: atom count (integer) +/// Line 2: comment / title +/// Lines 3+: Element x y z +pub fn load_xyz(path: &str) -> Result { + let content = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read XYZ file: {}", path))?; + + let mut lines = content.lines(); + + // Line 1: atom count + let count_line = lines.next().context("XYZ file is empty")?; + let atom_count: usize = count_line.trim().parse() + .with_context(|| format!("First line must be an atom count, got: '{}'", count_line.trim()))?; + + // Line 2: comment / title + let name = lines.next().unwrap_or("").trim().to_string(); + let name = if name.is_empty() { + std::path::Path::new(path) + .file_stem() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or_else(|| "Unknown".to_string()) + } else { + name + }; + + // Atom lines + let mut atoms = Vec::with_capacity(atom_count); + for (i, line) in lines.enumerate() { + let line = line.trim(); + if line.is_empty() { + continue; + } + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() < 4 { + bail!("Line {}: expected 'Element x y z', got: '{}'", i + 3, line); + } + let element = parts[0].to_string(); + let x: f64 = parts[1].parse() + .with_context(|| format!("Line {}: invalid x coordinate '{}'", i + 3, parts[1]))?; + let y: f64 = parts[2].parse() + .with_context(|| format!("Line {}: invalid y coordinate '{}'", i + 3, parts[2]))?; + let z: f64 = parts[3].parse() + .with_context(|| format!("Line {}: invalid z coordinate '{}'", i + 3, parts[3]))?; + + atoms.push(Atom { + name: element.clone(), + element, + x, + y, + z, + b_factor: 0.0, + is_backbone: false, + is_hetero: false, + }); + } + + if atoms.len() != atom_count { + bail!( + "XYZ header declares {} atoms but file contains {}", + atom_count, + atoms.len() + ); + } + + let residue = Residue { + name: "MOL".to_string(), + seq_num: 1, + atoms, + secondary_structure: SecondaryStructure::Coil, + }; + + let chain = Chain { + id: "A".to_string(), + residues: vec![residue], + molecule_type: MoleculeType::SmallMolecule, + }; + + Ok(Protein { + name, + chains: vec![chain], + ligands: vec![], + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + fn write_temp_xyz(content: &str) -> tempfile::NamedTempFile { + let mut f = tempfile::NamedTempFile::new().unwrap(); + f.write_all(content.as_bytes()).unwrap(); + f + } + + #[test] + fn test_load_water() { + let f = write_temp_xyz( + "3\nWater molecule\nO 0.000 0.000 0.117\nH 0.000 0.757 -0.469\nH 0.000 -0.757 -0.469\n", + ); + let protein = load_xyz(f.path().to_str().unwrap()).unwrap(); + assert_eq!(protein.name, "Water molecule"); + assert_eq!(protein.chains.len(), 1); + assert_eq!(protein.chains[0].molecule_type, MoleculeType::SmallMolecule); + assert_eq!(protein.chains[0].residues.len(), 1); + assert_eq!(protein.chains[0].residues[0].atoms.len(), 3); + assert_eq!(protein.chains[0].residues[0].atoms[0].element, "O"); + } + + #[test] + fn test_wrong_atom_count() { + let f = write_temp_xyz("5\nBad count\nO 0 0 0\nH 1 0 0\n"); + let result = load_xyz(f.path().to_str().unwrap()); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("declares 5 atoms but file contains 2")); + } + + #[test] + fn test_bad_coordinate() { + let f = write_temp_xyz("1\nBad\nO 0 abc 0\n"); + let result = load_xyz(f.path().to_str().unwrap()); + assert!(result.is_err()); + } + + #[test] + fn test_empty_comment_uses_filename() { + let f = write_temp_xyz("1\n\nC 0 0 0\n"); + let protein = load_xyz(f.path().to_str().unwrap()).unwrap(); + // Name should be derived from temp file name, not empty + assert!(!protein.name.is_empty()); + } +} diff --git a/src/render/bond.rs b/src/render/bond.rs new file mode 100644 index 0000000..2e44547 --- /dev/null +++ b/src/render/bond.rs @@ -0,0 +1,184 @@ +/// Covalent-radii-based bond detection. +/// +/// Two atoms are considered bonded when their distance is less than +/// `1.3 × (r₁ + r₂)` where r₁, r₂ are the single-bond covalent radii +/// (Cordero et al., Dalton Trans. 2008). The 1.3 scale factor matches +/// the convention used by Open Babel, ASE, and other molecular toolkits. + +const BOND_SCALE: f64 = 1.3; + +/// Look up the single-bond covalent radius (Å) for a given element symbol. +/// Falls back to 1.5 Å for unknown elements (reasonable for transition metals). +pub fn covalent_radius(element: &str) -> f64 { + let e = element.trim(); + match e { + // Period 1 + "H" | "h" => 0.31, + "He" | "HE" | "he" => 0.28, + // Period 2 + "Li" | "LI" | "li" => 1.28, + "Be" | "BE" | "be" => 0.96, + "B" | "b" => 0.84, + "C" | "c" => 0.76, + "N" | "n" => 0.71, + "O" | "o" => 0.66, + "F" | "f" => 0.57, + "Ne" | "NE" | "ne" => 0.58, + // Period 3 + "Na" | "NA" | "na" => 1.66, + "Mg" | "MG" | "mg" => 1.41, + "Al" | "AL" | "al" => 1.21, + "Si" | "SI" | "si" => 1.11, + "P" | "p" => 1.07, + "S" | "s" => 1.05, + "Cl" | "CL" | "cl" => 1.02, + "Ar" | "AR" | "ar" => 1.06, + // Period 4 + "K" | "k" => 2.03, + "Ca" | "CA" | "ca" => 1.76, + "Sc" | "SC" | "sc" => 1.70, + "Ti" | "TI" | "ti" => 1.60, + "V" | "v" => 1.53, + "Cr" | "CR" | "cr" => 1.39, + "Mn" | "MN" | "mn" => 1.39, + "Fe" | "FE" | "fe" => 1.32, + "Co" | "CO" | "co" => 1.26, + "Ni" | "NI" | "ni" => 1.24, + "Cu" | "CU" | "cu" => 1.32, + "Zn" | "ZN" | "zn" => 1.22, + "Ga" | "GA" | "ga" => 1.22, + "Ge" | "GE" | "ge" => 1.20, + "As" | "AS" | "as" => 1.19, + "Se" | "SE" | "se" => 1.20, + "Br" | "BR" | "br" => 1.20, + "Kr" | "KR" | "kr" => 1.16, + // Period 5 + "Rb" | "RB" | "rb" => 2.20, + "Sr" | "SR" | "sr" => 1.95, + "Y" | "y" => 1.90, + "Zr" | "ZR" | "zr" => 1.75, + "Nb" | "NB" | "nb" => 1.64, + "Mo" | "MO" | "mo" => 1.54, + "Ru" | "RU" | "ru" => 1.46, + "Rh" | "RH" | "rh" => 1.42, + "Pd" | "PD" | "pd" => 1.39, + "Ag" | "AG" | "ag" => 1.45, + "Cd" | "CD" | "cd" => 1.44, + "In" | "IN" | "in" => 1.42, + "Sn" | "SN" | "sn" => 1.39, + "Sb" | "SB" | "sb" => 1.39, + "Te" | "TE" | "te" => 1.38, + "I" | "i" => 1.39, + "Xe" | "XE" | "xe" => 1.40, + // Period 6 (common) + "Cs" | "CS" | "cs" => 2.44, + "Ba" | "BA" | "ba" => 2.15, + "W" | "w" => 1.62, + "Re" | "RE" | "re" => 1.51, + "Os" | "OS" | "os" => 1.44, + "Ir" | "IR" | "ir" => 1.41, + "Pt" | "PT" | "pt" => 1.36, + "Au" | "AU" | "au" => 1.36, + "Hg" | "HG" | "hg" => 1.32, + "Tl" | "TL" | "tl" => 1.45, + "Pb" | "PB" | "pb" => 1.46, + "Bi" | "BI" | "bi" => 1.48, + // Lanthanides (representative) + "La" | "LA" | "la" => 2.07, + "Ce" | "CE" | "ce" => 2.04, + "Eu" | "EU" | "eu" => 1.98, + "Gd" | "GD" | "gd" => 1.96, + "Lu" | "LU" | "lu" => 1.87, + // Actinides (representative) + "U" | "u" => 1.96, + // Fallback + _ => 1.50, + } +} + +/// Check whether two atoms are likely bonded based on covalent radii. +/// +/// Returns true when distance < BOND_SCALE × (r₁ + r₂). +pub fn atoms_bonded( + elem1: &str, x1: f64, y1: f64, z1: f64, + elem2: &str, x2: f64, y2: f64, z2: f64, +) -> bool { + let dx = x2 - x1; + let dy = y2 - y1; + let dz = z2 - z1; + let dist_sq = dx * dx + dy * dy + dz * dz; + let r_sum = covalent_radius(elem1) + covalent_radius(elem2); + let cutoff = BOND_SCALE * r_sum; + dist_sq <= cutoff * cutoff +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_carbon_carbon_single_bond() { + // Typical C-C single bond: ~1.54 Å + // Cutoff: 1.3 * (0.76 + 0.76) = 1.976 Å + assert!(atoms_bonded("C", 0.0, 0.0, 0.0, "C", 1.54, 0.0, 0.0)); + } + + #[test] + fn test_carbon_carbon_too_far() { + assert!(!atoms_bonded("C", 0.0, 0.0, 0.0, "C", 2.5, 0.0, 0.0)); + } + + #[test] + fn test_carbon_hydrogen_bond() { + // Typical C-H: ~1.09 Å, cutoff: 1.3 * (0.76 + 0.31) = 1.391 Å + assert!(atoms_bonded("C", 0.0, 0.0, 0.0, "H", 1.09, 0.0, 0.0)); + } + + #[test] + fn test_hydrogen_hydrogen_not_bonded() { + // Cutoff: 1.3 * (0.31 + 0.31) = 0.806 Å + assert!(!atoms_bonded("H", 0.0, 0.0, 0.0, "H", 1.0, 0.0, 0.0)); + } + + #[test] + fn test_metal_ligand_bond() { + // Fe-N bond in heme: ~2.0 Å + // Cutoff: 1.3 * (1.32 + 0.71) = 2.639 Å + assert!(atoms_bonded("Fe", 0.0, 0.0, 0.0, "N", 2.0, 0.0, 0.0)); + } + + #[test] + fn test_metal_ligand_not_bonded() { + assert!(!atoms_bonded("Fe", 0.0, 0.0, 0.0, "N", 3.0, 0.0, 0.0)); + } + + #[test] + fn test_case_insensitive() { + assert!(atoms_bonded("c", 0.0, 0.0, 0.0, "C", 1.54, 0.0, 0.0)); + assert!(atoms_bonded("FE", 0.0, 0.0, 0.0, "n", 2.0, 0.0, 0.0)); + } + + #[test] + fn test_unknown_element_uses_fallback() { + // Cutoff: 1.3 * (1.5 + 1.5) = 3.9 Å + assert!(atoms_bonded("Xx", 0.0, 0.0, 0.0, "Yy", 3.5, 0.0, 0.0)); + assert!(!atoms_bonded("Xx", 0.0, 0.0, 0.0, "Yy", 4.5, 0.0, 0.0)); + } + + #[test] + fn test_old_cutoff_missed_fe_n() { + // The old fixed 1.9 Å cutoff would miss this Fe-N bond at 2.0 Å + let old_cutoff_sq = 1.9_f64 * 1.9; + let dist_sq = 2.0_f64 * 2.0; + assert!(dist_sq > old_cutoff_sq, "old cutoff would miss this bond"); + assert!(atoms_bonded("Fe", 0.0, 0.0, 0.0, "N", 2.0, 0.0, 0.0)); + } + + #[test] + fn test_water_bonds() { + // O-H bond: ~0.96 Å, cutoff: 1.3 * (0.66 + 0.31) = 1.261 Å — bonded + assert!(atoms_bonded("O", 0.0, 0.0, 0.117, "H", 0.0, 0.757, -0.469)); + // H-H in water: ~1.51 Å, cutoff: 0.806 Å — NOT bonded + assert!(!atoms_bonded("H", 0.0, 0.757, -0.469, "H", 0.0, -0.757, -0.469)); + } +} diff --git a/src/render/braille.rs b/src/render/braille.rs index bb7ef69..9356e07 100644 --- a/src/render/braille.rs +++ b/src/render/braille.rs @@ -4,6 +4,7 @@ use ratatui::widgets::canvas::{Canvas, Context, Line}; use crate::app::VizMode; use crate::model::interface::{Interaction, InteractionType}; use crate::model::protein::{LigandType, MoleculeType, Protein}; +use crate::render::bond::atoms_bonded; use crate::render::camera::Camera; use crate::render::color::ColorScheme; @@ -41,20 +42,6 @@ fn draw_thick_line( } } -/// Check whether two atoms are bonded in 3D space. -/// Returns true if they are in the same residue and within 1.9 Angstroms, -/// or if they form a peptide bond (C of residue i to N of residue i+1 in same chain). -fn atoms_bonded_3d( - a1_x: f64, a1_y: f64, a1_z: f64, - a2_x: f64, a2_y: f64, a2_z: f64, -) -> bool { - let dx = a2_x - a1_x; - let dy = a2_y - a1_y; - let dz = a2_z - a1_z; - let dist_sq = dx * dx + dy * dy + dz * dz; - dist_sq <= 1.9 * 1.9 -} - /// Render protein on a ratatui Canvas with the Braille marker. /// Behavior depends on VizMode: /// - Backbone/Cartoon: Connect C-alpha atoms with thick lines (5 offsets) @@ -148,7 +135,7 @@ fn render_wireframe( for j in (i + 1)..atom_count { let (a1, p1) = &projected[i]; let (a2, p2) = &projected[j]; - if atoms_bonded_3d(a1.x, a1.y, a1.z, a2.x, a2.y, a2.z) { + if atoms_bonded(&a1.element, a1.x, a1.y, a1.z, &a2.element, a2.x, a2.y, a2.z) { let color = color_scheme.atom_color(a1, residue, chain); draw_thick_line(ctx, p1.x, p1.y, p2.x, p2.y, color, &offsets); } @@ -222,7 +209,7 @@ fn render_ligands( for j in (i + 1)..projected.len() { let (a1, p1, color) = &projected[i]; let (a2, p2, _) = &projected[j]; - if atoms_bonded_3d(a1.x, a1.y, a1.z, a2.x, a2.y, a2.z) { + if atoms_bonded(&a1.element, a1.x, a1.y, a1.z, &a2.element, a2.x, a2.y, a2.z) { draw_thick_line(ctx, p1.x, p1.y, p2.x, p2.y, *color, &offsets); } } diff --git a/src/render/hd.rs b/src/render/hd.rs index 253ade9..c4c30f5 100644 --- a/src/render/hd.rs +++ b/src/render/hd.rs @@ -1,6 +1,7 @@ use crate::app::VizMode; use crate::model::interface::{Interaction, InteractionType}; use crate::model::protein::{LigandType, MoleculeType, Protein}; +use crate::render::bond::atoms_bonded; use crate::render::camera::Camera; use crate::render::color::{color_to_rgb, ColorScheme}; use crate::render::framebuffer::{default_light_dir, Framebuffer, Triangle}; @@ -138,20 +139,6 @@ fn render_backbone_fb( } } -fn atoms_bonded_3d( - a1_x: f64, - a1_y: f64, - a1_z: f64, - a2_x: f64, - a2_y: f64, - a2_z: f64, -) -> bool { - let dx = a2_x - a1_x; - let dy = a2_y - a1_y; - let dz = a2_z - a1_z; - dx * dx + dy * dy + dz * dz <= 1.9 * 1.9 -} - /// Render wireframe mode to framebuffer. /// /// All atoms are always rendered (the integer underflow fix in `draw_circle_z` @@ -191,7 +178,7 @@ fn render_wireframe_fb( for j in (i + 1)..projected.len() { let (a1, p1, c1) = &projected[i]; let (a2, p2, _) = &projected[j]; - if atoms_bonded_3d(a1.x, a1.y, a1.z, a2.x, a2.y, a2.z) { + if atoms_bonded(&a1.element, a1.x, a1.y, a1.z, &a2.element, a2.x, a2.y, a2.z) { fb.draw_thick_line_3d(*p1, *p2, *c1, 1.5 * ts); } } @@ -277,12 +264,12 @@ fn render_ligands_fb( fb.draw_circle_z(px[0], px[1], px[2], radius, *color); } - // Draw bonds between atoms within 1.9 A + // Draw bonds between nearby atoms for i in 0..projected.len() { for j in (i + 1)..projected.len() { let (a1, p1, c1) = &projected[i]; let (a2, p2, _) = &projected[j]; - if atoms_bonded_3d(a1.x, a1.y, a1.z, a2.x, a2.y, a2.z) { + if atoms_bonded(&a1.element, a1.x, a1.y, a1.z, &a2.element, a2.x, a2.y, a2.z) { fb.draw_thick_line_3d(*p1, *p2, *c1, 1.5 * ts); } } diff --git a/src/render/mod.rs b/src/render/mod.rs index 95a975f..fa1129c 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -1,3 +1,4 @@ +pub mod bond; pub mod camera; pub mod braille; pub mod hd;