Skip to content

Commit 85fdba5

Browse files
mikikot113mikiko.takahashi
andauthored
Add sdf format exporter (#709)
* Add sdf format exporter * fix tag line("_" change to " ") * minor * change to use stringBuilder --------- Co-authored-by: mikiko.takahashi <mikiko.takahashi@riken.jp>
1 parent 84f8894 commit 85fdba5

15 files changed

Lines changed: 276 additions & 6 deletions

src/Common/CommonStandard/Enum/CommonEnums.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public enum RiCompoundType { Alkanes, Fames }
3939
public enum AlignmentIndexType { RT, RI }
4040
public enum TargetOmics { Metabolomics, Lipidomics, Proteomics }
4141
public enum Ionization { ESI, EI }
42-
public enum ExportSpectraFileFormat { mgf, msp, txt, mat, ms }
42+
public enum ExportSpectraFileFormat { mgf, msp, txt, mat, ms, sdf }
4343
public enum ExportspectraType { profile, centroid, deconvoluted }
4444
public enum IonAbundanceUnit {
4545
Intensity, Height, Area, pmol, fmol, ng, pg,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using CompMs.MsdialCore.DataObj;
2+
using CompMs.MsdialCore.MSDec;
3+
using System.IO;
4+
5+
namespace CompMs.MsdialCore.Export;
6+
7+
public sealed class AlignmentSdfExporter : IAlignmentSpectraExporter
8+
{
9+
private readonly bool _exportNoMs2Molecule;
10+
private readonly bool _set2dCoordinates;
11+
public AlignmentSdfExporter(bool exportNoMs2Molecule, bool set2dCoordinates)
12+
{
13+
_exportNoMs2Molecule = exportNoMs2Molecule;
14+
_set2dCoordinates = set2dCoordinates;
15+
}
16+
public AlignmentSdfExporter() : this(exportNoMs2Molecule: true, set2dCoordinates: true) { }
17+
18+
void IAlignmentSpectraExporter.Export(Stream stream, AlignmentSpotProperty spot, MSDecResult msdecResult)
19+
{
20+
SpectraExport.SaveSpectraTableAsSdfFormat(
21+
stream,
22+
spot,
23+
msdecResult.Spectrum,
24+
_exportNoMs2Molecule,
25+
_set2dCoordinates
26+
);
27+
}
28+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
using CompMs.MsdialCore.DataObj;
2+
using CompMs.MsdialCore.Parser;
3+
using System;
4+
using System.IO;
5+
6+
namespace CompMs.MsdialCore.Export
7+
{
8+
public sealed class AnalysisSdfExporter : IAnalysisExporter<ChromatogramPeakFeatureCollection>
9+
{
10+
private readonly Func<AnalysisFileBean, IMsScanPropertyLoader<ChromatogramPeakFeature>> _loaderFactory;
11+
12+
public AnalysisSdfExporter(Func<AnalysisFileBean, IMsScanPropertyLoader<ChromatogramPeakFeature>> loaderFuctory) {
13+
_loaderFactory = loaderFuctory ?? throw new ArgumentNullException(nameof(loaderFuctory));
14+
}
15+
private readonly bool _exportNoMs2Molecule = true;
16+
private readonly bool _set2dCoordinates = true;
17+
18+
void IAnalysisExporter<ChromatogramPeakFeatureCollection>.Export(Stream stream, AnalysisFileBean analysisFile, ChromatogramPeakFeatureCollection peakFeatureCollection, ExportStyle exportStyle) {
19+
var loader = _loaderFactory(analysisFile);
20+
foreach (var peak in peakFeatureCollection.Items) {
21+
SpectraExport.SaveSpectraTableAsSdfFormat(
22+
stream,
23+
peak,
24+
loader.Load(peak).Spectrum,
25+
_exportNoMs2Molecule,
26+
_set2dCoordinates
27+
);
28+
}
29+
}
30+
}
31+
}

src/MSDIAL5/MsdialCore/Export/SpectraExport.cs

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@
99
using CompMs.MsdialCore.DataObj;
1010
using CompMs.MsdialCore.Parameter;
1111
using CompMs.MsdialCore.Utility;
12+
using NCDK;
13+
using NCDK.IO;
14+
using NCDK.Layout;
15+
using NCDK.Smiles;
1216
using System;
1317
using System.Collections.Generic;
18+
using System.Globalization;
1419
using System.IO;
1520
using System.Linq;
1621
using System.Text;
@@ -34,6 +39,9 @@ public static void SaveSpectraTable(
3439
case ExportSpectraFileFormat.mgf:
3540
SaveSpectraTableAsMgfFormat(exportStream, chromPeakFeature, scan.Spectrum);
3641
break;
42+
case ExportSpectraFileFormat.sdf:
43+
SaveSpectraTableAsSdfFormat(exportStream, chromPeakFeature, scan.Spectrum, true, true);
44+
break;
3745
case ExportSpectraFileFormat.mat:
3846
SaveSpectraTableAsMatFormat(exportStream, chromPeakFeature, scan.Spectrum, spectrumList, mapper, parameter);
3947
break;
@@ -53,14 +61,18 @@ public static void SaveSpectraTable(
5361
IMSScanProperty scan,
5462
DataBaseMapper mapper,
5563
ParameterBase parameter,
56-
AlignmentSpotProperty isotopeTrackedLastSpot = null) {
64+
AlignmentSpotProperty isotopeTrackedLastSpot = null)
65+
{
5766
switch (spectraFormat) {
5867
case ExportSpectraFileFormat.msp:
5968
SaveSpectraTableAsNistFormat(exportStream, spotProperty, scan.Spectrum, mapper, parameter);
6069
break;
6170
case ExportSpectraFileFormat.mgf:
6271
SaveSpectraTableAsMgfFormat(exportStream, spotProperty, scan.Spectrum);
6372
break;
73+
case ExportSpectraFileFormat.sdf:
74+
SaveSpectraTableAsSdfFormat(exportStream, spotProperty, scan.Spectrum, true, true);
75+
break;
6476
case ExportSpectraFileFormat.mat:
6577
SaveSpectraTableAsMatFormat(exportStream, spotProperty, scan.Spectrum, mapper, parameter, isotopeTrackedLastSpot);
6678
break;
@@ -308,6 +320,152 @@ private static void WriteChromXFieldAsMGF(
308320
}
309321
#endregion
310322

323+
#region sdf
324+
public static void SaveSpectraTableAsSdfFormat(
325+
Stream stream,
326+
AlignmentSpotProperty spotProperty,
327+
IEnumerable<ISpectrumPeak> spectrum,
328+
bool exportNoMs2Molecule,
329+
bool Set2dCoordinates
330+
)
331+
{
332+
if (!exportNoMs2Molecule && !spotProperty.IsMsmsAssigned)
333+
{
334+
return;
335+
}
336+
var sb = new StringBuilder(8 * 1024);
337+
if(spotProperty.IsMsmsAssigned)
338+
{
339+
MolBlockFromSmiles(sb, spotProperty.SMILES, Set2dCoordinates);
340+
}
341+
else
342+
{
343+
EmptyMolBlock(sb);
344+
}
345+
WriteChromPeakFeatureInfoAsSdf(sb, spotProperty, spectrum);
346+
sb.AppendLine("$$$$");
347+
sb.AppendLine();
348+
var bytes = Encoding.ASCII.GetBytes(sb.ToString());
349+
stream.Write(bytes, 0, bytes.Length);
350+
}
351+
public static void SaveSpectraTableAsSdfFormat(
352+
Stream stream,
353+
ChromatogramPeakFeature chromPeakFeature,
354+
IEnumerable<ISpectrumPeak> spectrum,
355+
bool exportNoMs2Molecule,
356+
bool Set2dCoordinates
357+
)
358+
{
359+
if (!exportNoMs2Molecule && !chromPeakFeature.IsMsmsContained)
360+
{
361+
return;
362+
}
363+
var sb = new StringBuilder(8 * 1024);
364+
if (chromPeakFeature.IsMsmsContained)
365+
{
366+
MolBlockFromSmiles(sb, chromPeakFeature.SMILES, Set2dCoordinates);
367+
}
368+
else
369+
{
370+
EmptyMolBlock(sb);
371+
}
372+
WriteChromPeakFeatureInfoAsSdf(sb, chromPeakFeature, spectrum);
373+
sb.AppendLine("$$$$");
374+
sb.AppendLine();
375+
var bytes = Encoding.ASCII.GetBytes(sb.ToString());
376+
stream.Write(bytes, 0, bytes.Length);
377+
}
378+
private static void WriteSdfDataItem(StringBuilder sb, string fieldName, string value)
379+
{
380+
sb.AppendLine("> <" + fieldName + ">");
381+
sb.AppendLine(value ?? string.Empty);
382+
}
383+
private static void EmptyMolBlock(StringBuilder sb)
384+
{
385+
sb.AppendLine("");
386+
sb.AppendLine(" MS-DIAL");
387+
sb.AppendLine();
388+
sb.AppendLine(" 0 0 0 0 0 0 999 V2000");
389+
sb.AppendLine("M END");
390+
391+
}
392+
private static void MolBlockFromSmiles(StringBuilder sb, string smiles, bool Set2dCoordinates)
393+
{
394+
var sp = new SmilesParser();
395+
IAtomContainer mol = sp.ParseSmiles(smiles);
396+
var sdg = new StructureDiagramGenerator
397+
{
398+
Molecule = mol
399+
};
400+
if(Set2dCoordinates)
401+
{
402+
sdg.GenerateCoordinates();
403+
}
404+
mol = sdg.Molecule;
405+
using var tw = new StringWriter(sb, CultureInfo.InvariantCulture);
406+
using (var w = new MDLV2000Writer(tw))
407+
{
408+
w.Write(mol);
409+
};
410+
}
411+
private static void WriteChromPeakFeatureInfoAsSdf(
412+
StringBuilder sb,
413+
AlignmentSpotProperty spotProperty,
414+
IEnumerable<ISpectrumPeak> spectrum)
415+
{
416+
WriteSdfDataItem(sb, "NAME", string.IsNullOrWhiteSpace(spotProperty.Name)? "Unknown": spotProperty.Name);
417+
WriteSdfDataItem(sb, "SCANS", spotProperty.MasterAlignmentID.ToString());
418+
WriteSdfDataItem(sb, "PRECURSOR MZ", Math.Round(spotProperty.MassCenter,5).ToString());
419+
WriteSdfDataItem(sb, "ION MODE", spotProperty.IonMode.ToString());
420+
421+
if (spotProperty.IsMsmsAssigned)
422+
{
423+
if (spotProperty.AdductType != null) WriteSdfDataItem(sb, "PRECURSOR TYPE", spotProperty.AdductType.AdductIonName);
424+
if (!string.IsNullOrWhiteSpace(spotProperty.Formula.FormulaString)) WriteSdfDataItem(sb, "FORMULA", spotProperty.Formula.FormulaString);
425+
if (!string.IsNullOrWhiteSpace(spotProperty.InChIKey)) WriteSdfDataItem(sb, "FORMULA", spotProperty.InChIKey);
426+
if (!string.IsNullOrWhiteSpace(spotProperty.SMILES)) WriteSdfDataItem(sb, "FORMULA", spotProperty.SMILES);
427+
WriteSdfDataItem(sb, "MS LEVEL", "MS2");
428+
var peaks = spectrum.Where(spec => spec.Intensity > 0).ToList();
429+
WriteSdfDataItem(sb, "NUM PEAKS", peaks.Count.ToString());
430+
var peaksText = string.Join(
431+
"\n",
432+
spectrum.Select(p =>
433+
$"{Math.Round(p.Mass, 5)} {Math.Round(p.Intensity,0)}"
434+
)
435+
);
436+
WriteSdfDataItem(sb, "MASS SPECTRAL PEAKS", peaksText);
437+
}
438+
}
439+
private static void WriteChromPeakFeatureInfoAsSdf(
440+
StringBuilder sb,
441+
ChromatogramPeakFeature spotProperty,
442+
IEnumerable<ISpectrumPeak> spectrum)
443+
{
444+
WriteSdfDataItem(sb, "NAME", string.IsNullOrWhiteSpace(spotProperty.Name) ? "Unknown" : spotProperty.Name);
445+
WriteSdfDataItem(sb, "SCANS", spotProperty.PeakID.ToString());
446+
WriteSdfDataItem(sb, "PRECURSOR MZ", Math.Round(spotProperty.PrecursorMz, 5).ToString());
447+
WriteSdfDataItem(sb, "ION MODE", spotProperty.IonMode.ToString());
448+
449+
if (spotProperty.IsMsmsContained)
450+
{
451+
if (spotProperty.AdductType != null) WriteSdfDataItem(sb, "PRECURSOR TYPE", spotProperty.AdductType.AdductIonName);
452+
if (!string.IsNullOrWhiteSpace(spotProperty.Formula.FormulaString)) WriteSdfDataItem(sb, "FORMULA", spotProperty.Formula.FormulaString);
453+
if (!string.IsNullOrWhiteSpace(spotProperty.InChIKey)) WriteSdfDataItem(sb, "FORMULA", spotProperty.InChIKey);
454+
if (!string.IsNullOrWhiteSpace(spotProperty.SMILES)) WriteSdfDataItem(sb, "FORMULA", spotProperty.SMILES);
455+
WriteSdfDataItem(sb, "MS LEVEL", "MS2");
456+
var peaks = spectrum.Where(spec => spec.Intensity > 0).ToList();
457+
WriteSdfDataItem(sb, "NUM PEAKS", peaks.Count.ToString());
458+
var peaksText = string.Join(
459+
"\n",
460+
spectrum.Select(p =>
461+
$"{Math.Round(p.Mass, 5)} {Math.Round(p.Intensity, 0)}"
462+
)
463+
);
464+
WriteSdfDataItem(sb, "MASS SPECTRAL PEAKS", peaksText);
465+
}
466+
}
467+
#endregion
468+
311469
#region mat
312470
private static void SaveSpectraTableAsMatFormat(
313471
Stream stream,

src/MSDIAL5/MsdialGuiApp/Model/Dims/DimsMethodModel.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ public DimsMethodModel(
9999
peakSpotSupplyer,
100100
new AlignmentSpectraExportFormat("Msp", "msp", new AlignmentMspExporter(storage.DataBaseMapper, storage.Parameter)),
101101
new AlignmentSpectraExportFormat("Mgf", "mgf", new AlignmentMgfExporter()),
102+
new AlignmentSpectraExportFormat("Sdf", "sdf", new AlignmentSdfExporter()),
102103
new AlignmentSpectraExportFormat("Mat", "mat", new AlignmentMatExporter(storage.DataBaseMapper, storage.Parameter)));
103104
var gnps = new AlignmentGnpsExportModel("GNPS", quantTypes, new GnpsMetadataAccessor(storage.DataBaseMapper, storage.Parameter), peakMeta.GetAccessor(), fileMeta.GetAccessor(), analysisFileBeanModelCollection);
104105
var spectraAndReference = new AlignmentMatchedSpectraExportModel(peakSpotSupplyer, storage.DataBaseMapper, analysisFileBeanModelCollection.IncludedAnalysisFiles, CompoundSearcherCollection.BuildSearchers(storage.DataBases, storage.DataBaseMapper));

src/MSDIAL5/MsdialGuiApp/Model/Gcms/GcmsMethodModel.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ public GcmsMethodModel(AnalysisFileBeanModelCollection analysisFileBeanModelColl
112112
peakSpotSupplyer,
113113
new AlignmentSpectraExportFormat("Msp", "msp", new AlignmentMspExporter(storage.DataBaseMapper, storage.Parameter)),
114114
new AlignmentSpectraExportFormat("Mgf", "mgf", new AlignmentMgfExporter()),
115+
new AlignmentSpectraExportFormat("Sdf", "sdf", new AlignmentSdfExporter()),
115116
new AlignmentSpectraExportFormat("Mat", "mat", new AlignmentMatExporter(storage.DataBaseMapper, storage.Parameter)));
116117
var gnps = new AlignmentGnpsExportModel("GNPS", quantTypes, new GnpsMetadataAccessor(storage.DataBaseMapper, storage.Parameter), peakMeta.GetAccessor(), fileMeta.GetAccessor(), analysisFileBeanModelCollection);
117118
var exportGroups = new List<IAlignmentResultExportModel> { peakGroup, spectraGroup, gnps, };

src/MSDIAL5/MsdialGuiApp/Model/ImagingImms/ImagingImmsMethodModel.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
using System;
1616
using System.Collections.Generic;
1717
using System.Collections.ObjectModel;
18+
using System.ComponentModel;
1819
using System.Diagnostics;
1920
using System.Linq;
2021
using System.Threading;
@@ -150,6 +151,15 @@ public AnalysisResultExportModel CreateExportAnalysisModel() {
150151
FileSuffix = "mgf",
151152
Label = "MASCOT format (*.mgf)"
152153
},
154+
new SpectraTypeSelectableMsdialAnalysisExportModel(new Dictionary<ExportspectraType, IAnalysisExporter<ChromatogramPeakFeatureCollection>> {
155+
[ExportspectraType.deconvoluted] = new AnalysisSdfExporter(file => new MSDecLoader(file.DeconvolutionFilePath, file.DeconvolutionFilePathList)),
156+
[ExportspectraType.centroid] = new AnalysisSdfExporter(file => new CentroidMsScanPropertyLoader(_storage.Parameter.ProviderFactoryParameter.Create().Create(file.LoadRawMeasurement(true, true, 5, 5000)), _storage.Parameter.MS2DataType)),
157+
})
158+
{
159+
FilePrefix = "Sdf",
160+
FileSuffix = "sdf",
161+
Label = "MDL SDfile (*.sdf)"
162+
},
153163
new MsdialAnalysisMassBankRecordExportModel(_storage.Parameter.ProjectParam, StudyContext),
154164
};
155165

src/MSDIAL5/MsdialGuiApp/Model/Imms/ImmsMethodModel.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ public ImmsMethodModel(AnalysisFileBeanModelCollection analysisFileBeanModelColl
104104
peakSpotSupplyer,
105105
new AlignmentSpectraExportFormat("Msp", "msp", new AlignmentMspExporter(storage.DataBaseMapper, storage.Parameter)),
106106
new AlignmentSpectraExportFormat("Mgf", "mgf", new AlignmentMgfExporter()),
107+
new AlignmentSpectraExportFormat("Sdf", "sdf", new AlignmentSdfExporter()),
107108
new AlignmentSpectraExportFormat("Mat", "mat", new AlignmentMatExporter(storage.DataBaseMapper, storage.Parameter)));
108109
var gnps = new AlignmentGnpsExportModel("GNPS", quantTypes, new GnpsMetadataAccessor(storage.DataBaseMapper, storage.Parameter), peakMeta.GetAccessor(), fileMeta.GetAccessor(), analysisFileBeanModelCollection);
109110
var spectraAndReference = new AlignmentMatchedSpectraExportModel(peakSpotSupplyer, storage.DataBaseMapper, analysisFileBeanModelCollection.IncludedAnalysisFiles, CompoundSearcherCollection.BuildSearchers(storage.DataBases, storage.DataBaseMapper));
@@ -317,6 +318,15 @@ public AnalysisResultExportModel CreateExportAnalysisResult() {
317318
FileSuffix = "mgf",
318319
Label = "MASCOT format (*.mgf)"
319320
},
321+
new SpectraTypeSelectableMsdialAnalysisExportModel(new Dictionary<ExportspectraType, IAnalysisExporter<ChromatogramPeakFeatureCollection>> {
322+
[ExportspectraType.deconvoluted] = new AnalysisSdfExporter(file => new MSDecLoader(file.DeconvolutionFilePath, file.DeconvolutionFilePathList)),
323+
[ExportspectraType.centroid] = new AnalysisSdfExporter(file => new CentroidMsScanPropertyLoader(ProviderFactory.Create(file), _storage.Parameter.MS2DataType)),
324+
})
325+
{
326+
FilePrefix = "Sdf",
327+
FileSuffix = "sdf",
328+
Label = "MDL SDfile (*.sdf)"
329+
},
320330
new MsdialAnalysisMassBankRecordExportModel(_storage.Parameter.ProjectParam, StudyContext),
321331
};
322332
return new AnalysisResultExportModel(AnalysisFileModelCollection, _storage.Parameter.ProjectParam.ProjectFolderPath, _broker, models);

src/MSDIAL5/MsdialGuiApp/Model/Lcimms/LcimmsMethodModel.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
using Reactive.Bindings.Notifiers;
2727
using System;
2828
using System.Collections.Generic;
29+
using System.ComponentModel;
2930
using System.Diagnostics;
3031
using System.Linq;
3132
using System.Reactive.Linq;
@@ -100,6 +101,7 @@ public LcimmsMethodModel(AnalysisFileBeanModelCollection analysisFileBeanModelCo
100101
peakSpotSupplyer,
101102
new AlignmentSpectraExportFormat("Msp", "msp", new AlignmentMspExporter(storage.DataBaseMapper, storage.Parameter)),
102103
new AlignmentSpectraExportFormat("Mgf", "mgf", new AlignmentMgfExporter()),
104+
new AlignmentSpectraExportFormat("Sdf", "sdf", new AlignmentSdfExporter()),
103105
new AlignmentSpectraExportFormat("Mat", "mat", new AlignmentMatExporter(storage.DataBaseMapper, storage.Parameter)));
104106
var gnps = new AlignmentGnpsExportModel("GNPS", quantTypes, new GnpsMetadataAccessor(storage.DataBaseMapper, storage.Parameter), peakMeta.GetAccessor(), fileMeta.GetAccessor(), analysisFileBeanModelCollection);
105107
var spectraAndReference = new AlignmentMatchedSpectraExportModel(peakSpotSupplyer, storage.DataBaseMapper, analysisFileBeanModelCollection.IncludedAnalysisFiles, CompoundSearcherCollection.BuildSearchers(storage.DataBases, storage.DataBaseMapper));
@@ -319,6 +321,15 @@ static RawMeasurement map(AnalysisFileBean file) {
319321
FileSuffix = "mgf",
320322
Label = "MASCOT format (*.mgf)"
321323
},
324+
new SpectraTypeSelectableMsdialAnalysisExportModel(new Dictionary<ExportspectraType, IAnalysisExporter<ChromatogramPeakFeatureCollection>> {
325+
[ExportspectraType.deconvoluted] = new AnalysisSdfExporter(file => new MSDecLoader(file.DeconvolutionFilePath, file.DeconvolutionFilePathList)),
326+
[ExportspectraType.centroid] = new AnalysisSdfExporter(file => new CentroidMsScanPropertyLoader(factory.Create(file), Storage.Parameter.MS2DataType)),
327+
})
328+
{
329+
FilePrefix = "Sdf",
330+
FileSuffix = "sdf",
331+
Label = "MDL SDfile (*.sdf)"
332+
},
322333
new MsdialAnalysisMassBankRecordExportModel(Storage.Parameter.ProjectParam, _studyContext),
323334
};
324335
return new AnalysisResultExportModel(AnalysisFileModelCollection, Storage.Parameter.ProjectParam.ProjectFolderPath, _broker, models);

0 commit comments

Comments
 (0)