Skip to content

Commit 78199bb

Browse files
AmirMohammad CheraghaliAmirMohammad Cheraghali
authored andcommitted
feat(alignment): add RMSD calculation and FASTA export
1 parent 7dae376 commit 78199bb

3 files changed

Lines changed: 93 additions & 38 deletions

File tree

src/components/SequenceAlignmentModal.tsx

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import React, { useMemo, useState } from 'react';
2-
import { X, GitCommitVertical, AlertTriangle, FileText, BarChart2, Hash, Percent } from 'lucide-react';
2+
import { X, GitCommitVertical, AlertTriangle, FileText, BarChart2, Hash, Percent, Download, Activity, Ruler } from 'lucide-react';
33
import type { ChainInfo, SuperposedStructure } from '../types';
44
import clsx from 'clsx';
55

@@ -20,30 +20,26 @@ interface AlignedResult {
2020
identity: number; // %
2121
similarity: number; // %
2222
gaps: number; // count
23-
length: number; // total alignment length
24-
score: number; // raw score
23+
length: number; // length
24+
score: number;
25+
rmsd?: number; // Angstrom
2526
};
2627
alignment: {
2728
seq1: string; // Primary
2829
seq2: string; // Target
29-
matchStr: string; // Match string ( | : . )
30+
matchStr: string;
3031
};
3132
}[];
3233
}
3334

34-
// Scientific Residue Coloring (Clustal-inspired simplified)
35+
// Scientific Residue Coloring
3536
const RESIDUE_COLORS: Record<string, string> = {
36-
// Hydrophobic (Blue)
3737
'A': 'text-blue-400', 'V': 'text-blue-400', 'L': 'text-blue-400', 'I': 'text-blue-400',
3838
'M': 'text-blue-400', 'F': 'text-blue-400', 'W': 'text-blue-400', 'P': 'text-blue-400',
39-
// Polar (Green)
4039
'G': 'text-green-400', 'S': 'text-green-400', 'T': 'text-green-400', 'C': 'text-yellow-400',
4140
'N': 'text-green-400', 'Q': 'text-green-400', 'Y': 'text-green-400',
42-
// Positive (Red)
4341
'K': 'text-red-400', 'R': 'text-red-400', 'H': 'text-red-400',
44-
// Negative (Magenta)
4542
'D': 'text-fuchsia-400', 'E': 'text-fuchsia-400',
46-
// Gap
4743
'-': 'text-neutral-700'
4844
};
4945

@@ -70,17 +66,21 @@ const CONSERVATIVE_GROUPS = [
7066
const getMatchChar = (a: string, b: string): string => {
7167
if (a === '-' || b === '-') return ' ';
7268
if (a === b) return '|';
73-
// Check conservative
7469
for (const group of CONSERVATIVE_GROUPS) {
7570
if (group.includes(a) && group.includes(b)) return ':';
7671
}
7772
return '.';
7873
};
7974

80-
const alignSequences = (seq1: string, seq2: string) => {
75+
const alignSequences = (chain1: ChainInfo, chain2: ChainInfo) => {
76+
const seq1 = chain1.sequence;
77+
const seq2 = chain2.sequence;
78+
const coords1 = chain1.coords;
79+
const coords2 = chain2.coords;
80+
8181
const match = 10;
8282
const mismatch = -2;
83-
const gap = -5; // Penalty
83+
const gap = -5;
8484

8585
const n = seq1.length;
8686
const m = seq2.length;
@@ -108,18 +108,36 @@ const alignSequences = (seq1: string, seq2: string) => {
108108
let similarityCount = 0;
109109
let gapCount = 0;
110110

111+
// RMSD Calc
112+
let sumSqDist = 0;
113+
let atomPairs = 0;
114+
111115
while (i > 0 || j > 0) {
112116
if (i > 0 && j > 0 && scoreMatrix[i][j] === scoreMatrix[i - 1][j - 1] + (seq1[i - 1] === seq2[j - 1] ? match : mismatch)) {
113117
const c1 = seq1[i - 1];
114118
const c2 = seq2[j - 1];
115119
align1 = c1 + align1;
116120
align2 = c2 + align2;
121+
122+
// Stats
117123
if (c1 === c2) {
118124
identityCount++;
119125
similarityCount++;
120126
} else if (getMatchChar(c1, c2) === ':') {
121127
similarityCount++;
122128
}
129+
130+
// RMSD: Calculate dist between coords1[i-1] and coords2[j-1]
131+
if (coords1 && coords2 && coords1[i - 1] && coords2[j - 1]) {
132+
const p1 = coords1[i - 1];
133+
const p2 = coords2[j - 1];
134+
const dx = p1.x - p2.x;
135+
const dy = p1.y - p2.y;
136+
const dz = p1.z - p2.z;
137+
sumSqDist += dx * dx + dy * dy + dz * dz;
138+
atomPairs++;
139+
}
140+
123141
i--; j--;
124142
} else if (i > 0 && scoreMatrix[i][j] === scoreMatrix[i - 1][j] + gap) {
125143
align1 = seq1[i - 1] + align1;
@@ -137,6 +155,9 @@ const alignSequences = (seq1: string, seq2: string) => {
137155
const length = align1.length;
138156
const matchStr = align1.split('').map((c, k) => getMatchChar(c, align2[k])).join('');
139157

158+
// RMSD Final
159+
const rmsd = atomPairs > 0 ? Math.sqrt(sumSqDist / atomPairs) : undefined;
160+
140161
return {
141162
seq1: align1,
142163
seq2: align2,
@@ -146,7 +167,8 @@ const alignSequences = (seq1: string, seq2: string) => {
146167
similarity: (similarityCount / length) * 100,
147168
gaps: gapCount,
148169
length,
149-
score: scoreMatrix[n][m]
170+
score: scoreMatrix[n][m],
171+
rmsd
150172
}
151173
};
152174
};
@@ -168,12 +190,10 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
168190
const chainMatches: AlignedResult['chainMatches'] = [];
169191

170192
primaryStructure.forEach(pChain => {
171-
// Try to find same chain name, else index?
172-
// For exact matches we prefer name.
173193
const targetChain = ov.chains?.find(c => c.name === pChain.name) || ov.chains?.[0];
174194

175195
if (targetChain) {
176-
const result = alignSequences(pChain.sequence, targetChain.sequence);
196+
const result = alignSequences(pChain, targetChain);
177197
chainMatches.push({
178198
primaryChain: pChain.name,
179199
targetChain: targetChain.name,
@@ -202,6 +222,21 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
202222
if (!selectedChain && availableChains.length > 0) setSelectedChain(availableChains[0]);
203223
}, [availableChains]);
204224

225+
const handleExport = (result: AlignedResult, match: AlignedResult['chainMatches'][0]) => {
226+
const text = `>Primary | Chain ${match.primaryChain}
227+
${match.alignment.seq1}
228+
>Overlay: ${result.overlayName} | Chain ${match.targetChain}
229+
${match.alignment.seq2}
230+
`;
231+
const blob = new Blob([text], { type: 'text/plain' });
232+
const url = URL.createObjectURL(blob);
233+
const a = document.createElement('a');
234+
a.href = url;
235+
a.download = `alignment_primary_${match.primaryChain}_vs_${result.overlayName}_${match.targetChain}.fasta`;
236+
a.click();
237+
URL.revokeObjectURL(url);
238+
};
239+
205240
if (!isOpen) return null;
206241

207242
return (
@@ -250,7 +285,7 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
250285
return (
251286
<div key={result.overlayId} className="bg-black/40 border border-white/10 rounded-xl overflow-hidden">
252287
{/* Stats Dashboard */}
253-
<div className="grid grid-cols-2 md:grid-cols-4 divide-x divide-white/10 border-b border-white/10 bg-white/5">
288+
<div className="grid grid-cols-2 md:grid-cols-5 divide-x divide-white/10 border-b border-white/10 bg-white/5">
254289
<StatBox
255290
label="Identity"
256291
value={`${match.stats.identity.toFixed(1)}%`}
@@ -263,6 +298,13 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
263298
icon={<Hash size={14} />}
264299
color="text-blue-400"
265300
/>
301+
<StatBox
302+
label="RMSD"
303+
value={match.stats.rmsd ? `${match.stats.rmsd.toFixed(2)} Å` : "N/A"}
304+
icon={<Activity size={14} />}
305+
color="text-cyan-400"
306+
subtext={match.stats.rmsd ? "Cα Atoms" : "No Coordinates"}
307+
/>
266308
<StatBox
267309
label="Gaps"
268310
value={match.stats.gaps.toString()}
@@ -280,9 +322,18 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
280322

281323
<div className="p-6">
282324
<div className="flex items-baseline justify-between mb-4">
283-
<h3 className="text-lg font-bold text-neutral-200">
284-
{result.overlayName} <span className="text-neutral-500 text-sm font-normal">(Chain {match.targetChain})</span>
285-
</h3>
325+
<div className="flex items-center gap-3">
326+
<h3 className="text-lg font-bold text-neutral-200">
327+
{result.overlayName} <span className="text-neutral-500 text-sm font-normal">(Chain {match.targetChain})</span>
328+
</h3>
329+
<button
330+
onClick={() => handleExport(result, match)}
331+
className="flex items-center gap-1.5 px-3 py-1 bg-white/5 hover:bg-cyan-500/20 text-xs font-bold text-cyan-400 rounded-md transition-colors border border-white/10 hover:border-cyan-500/50"
332+
>
333+
<Download size={12} />
334+
Export FASTA
335+
</button>
336+
</div>
286337
<div className="flex gap-4">
287338
{RESIDUE_GROUPS.map(g => (
288339
<div key={g.name} className="flex items-center gap-2" title={g.desc}>
@@ -301,14 +352,12 @@ export const SequenceAlignmentModal: React.FC<SequenceAlignmentModalProps> = ({
301352
<div className="w-24 shrink-0" />
302353
<div className="flex relative h-4 w-full">
303354
{Array.from({ length: Math.ceil(match.stats.length / 10) }).map((_, i) => (
304-
<span key={i} className="absolute text-[10px] text-neutral-500 border-l border-neutral-700 pl-1 h-3" style={{ left: `${i * 10 * 12}px` }}> {/* Approx 12px per char? No, char width varies. Monospace char width is fixed but usually 1ch ~8-10px depending on font size. We need flex basis. Better to use simple indices in a flex row */}
355+
<span key={i} className="absolute text-[10px] text-neutral-500 border-l border-neutral-700 pl-1 h-3" style={{ left: `${i * 10 * 12}px` }}>
305356
{i * 10 + 1}
306357
</span>
307358
))}
308359
</div>
309360
</div>
310-
{/* Revisit Ruler: Absolute positioning on text is hard without fixed width font metrics.
311-
Let's just put markers every 10 chars in the flex flow. */}
312361

313362
<div className="space-y-1">
314363
{/* Primary Sequence */}
@@ -368,20 +417,9 @@ const SequenceRow = ({ label, sequence, compareSeq }: { label: string, sequence:
368417
<span className="w-24 shrink-0 text-xs font-bold text-neutral-500 uppercase tracking-wider select-none pl-2 group-hover:text-neutral-300 transition-colors">
369418
{label}
370419
</span>
371-
<div className="flex font-mono text-sm tracking-widest"> {/* tracking-widest ~ 0.1em. need fixed width */}
420+
<div className="flex font-mono text-sm tracking-widest">
372421
{sequence.split('').map((char, i) => {
373422
let colorClass = RESIDUE_COLORS[char] || 'text-neutral-300';
374-
if (compareSeq) {
375-
// If it's the target seq, maybe dim non-mismatches?
376-
// Or just use standard colors. Standard colors look "scientific".
377-
// If difference:
378-
const c1 = compareSeq[i];
379-
if (c1 !== '-' && char !== '-' && c1 !== char) {
380-
// Mismatch implies importance? Or just typical color?
381-
// Keep typical color but maybe background highlight?
382-
// Let's stick to text color for clean look.
383-
}
384-
}
385423
return (
386424
<span key={i} className={`w-[1ch] inline-block text-center ${colorClass}`}>
387425
{char}

src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export interface ChainInfo {
4646
type?: 'protein' | 'nucleic' | 'unknown'; // Added: To distinguish polymer type
4747
atoms?: AtomInfo[]; // Added: For small chemicals, list atoms directly
4848
bFactors?: number[]; // Added: Per-residue B-factor for coloring sync
49+
coords?: { x: number; y: number; z: number }[]; // Added: C-alpha/P coordinates for RMSD
4950
}
5051

5152
export interface StructureInfo {

src/utils/pdbUtils.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ export const extractChainsFromComponent = (component: any): { chains: ChainInfo[
142142

143143
const resMap: number[] = [];
144144
const bFactors: number[] = [];
145+
const coords: { x: number, y: number, z: number }[] = [];
145146

146147
try {
147148
c.eachResidue((r: any) => {
@@ -158,16 +159,30 @@ export const extractChainsFromComponent = (component: any): { chains: ChainInfo[
158159
resMap.push((maxSeq > -Infinity ? maxSeq : 0) + 1);
159160
}
160161

161-
// B-Factor Extraction
162+
// B-Factor & Coordinate Extraction
162163
let bSum = 0;
163164
let bCount = 0;
165+
let caAtom: any = null;
166+
let firstAtom: any = null;
167+
164168
r.eachAtom((a: any) => {
165169
bSum += a.bfactor;
166170
bCount++;
171+
172+
if (a.atomname === 'CA' || a.atomname === 'P') caAtom = a;
173+
if (!firstAtom) firstAtom = a;
167174
});
168175
const avgB = bCount > 0 ? bSum / bCount : 0;
169176
bFactors.push(avgB);
170177

178+
// Store coords (CA/P or fallback to first atom)
179+
const targetAtom = caAtom || firstAtom;
180+
if (targetAtom) {
181+
coords.push({ x: targetAtom.x, y: targetAtom.y, z: targetAtom.z });
182+
} else {
183+
coords.push({ x: 0, y: 0, z: 0 }); // Should theoretically not happen
184+
}
185+
171186
// Determine Type
172187
if (r.isNucleic()) nucleicCount++;
173188
else if (r.isProtein()) proteinCount++;
@@ -224,7 +239,8 @@ export const extractChainsFromComponent = (component: any): { chains: ChainInfo[
224239
residueMap: resMap,
225240
type: chainType,
226241
atoms: atomList.length > 0 ? atomList : undefined,
227-
bFactors: bFactors
242+
bFactors: bFactors,
243+
coords: coords // Added field
228244
});
229245
});
230246

0 commit comments

Comments
 (0)