@@ -171,18 +171,40 @@ private static List<LibSourceFile> getLibSources(SpecLibReader libReader, ISpect
171171 {
172172 throw UnexpectedException .wrap (e , "Error reading source files from library file " + libFilePath .toString ());
173173 }
174- if (sourceFiles != null && sourceFiles .stream ().anyMatch (LibSourceFile ::isMaxQuantSearch ))
174+
175+ if (sourceFiles == null ) return null ;
176+
177+ if (sourceFiles .stream ().anyMatch (LibSourceFile ::isMaxQuantSearch ))
175178 {
176179 // For libraries built with MaxQuant search results we need to add additional files that are required for library building
177180 Set <String > idFileNames = sourceFiles .stream ().filter (LibSourceFile ::hasIdFile ).map (LibSourceFile ::getIdFile ).collect (Collectors .toSet ());
178- for (String file : LibSourceFile .MAX_QUANT_ID_FILES )
181+ for (String file : LibSourceFile .MAX_QUANT_ID_FILES )
179182 {
180183 if (!idFileNames .contains (file ))
181184 {
182185 sourceFiles .add (new LibSourceFile (null , file , null ));
183186 }
184187 }
185188 }
189+ else if (sourceFiles .stream ().anyMatch (LibSourceFile ::isDiannSearch ))
190+ {
191+ // Building a library with DIA-NN results in Skyline requires a .speclib file and a report TSV file.
192+ // The .blib file includes the name of .speclib but not the name of the report TSV file.
193+ // Building a library without the TSV gives this error message in Skyline:
194+ // "...the TSV report is required to read speclib files and must be in the same directory as the speclib
195+ // and share some leading characters (e.g. somedata-tsv.speclib and somedata-report.tsv)..."
196+
197+ // At some point Skyline may start including the names of all source files in the .blib SQLite file,
198+ // so first check if any TSV files were listed as sources in the .blib
199+ boolean hasTsvFiles = sourceFiles .stream ()
200+ .anyMatch (file -> file .hasIdFile () && file .getIdFile ().toLowerCase ().endsWith (".tsv" ));
201+ if (!hasTsvFiles )
202+ {
203+ // If there is no TSV source listed in the .blib, then add a placeholder for the DIA-NN report file.
204+ sourceFiles .add (new LibSourceFile (null , LibSourceFile .DIANN_REPORT_TSV_PLACEHOLDER , null ));
205+ }
206+ }
207+
186208 return sourceFiles ;
187209 }
188210
@@ -241,12 +263,29 @@ private void validateLibrarySources(List<LibSourceFile> sources, FileContentServ
241263 String idFile = source .getIdFile ();
242264 if (source .hasIdFile () && !checkedFiles .contains (idFile ))
243265 {
266+ if (LibSourceFile .DIANN_REPORT_TSV_PLACEHOLDER .equals (idFile )) continue ; // We will look for this when we come to the .speclib file
267+
244268 checkedFiles .add (idFile );
245269 Path path = getPath (idFile , rawFilesDirPaths , false , fcs );
246270 SpecLibSourceFile sourceFile = new SpecLibSourceFile (idFile , PEPTIDE_ID );
247271 sourceFile .setSpecLibValidationId (getId ());
248272 sourceFile .setPath (path != null ? path .toString () : DataFile .NOT_FOUND );
249273 idFiles .add (sourceFile );
274+
275+ if (source .isDiannSearch ())
276+ {
277+ // If this is a DIA-NN .speclib file, check for the required report TSV file.
278+ // We are doing this because the .blib does not include the name of the report TSV file.
279+ // We only know that: "the TSV report is required to read speclib files and must be in the
280+ // same directory as the speclib and share some leading characters
281+ // (e.g. somedata-tsv.speclib and somedata-report.tsv)"
282+ Path reportFilePath = sourceFile .found () ? getDiannReportFilePath (path ) : null ;
283+ SpecLibSourceFile diannReportSourceFile = new SpecLibSourceFile (LibSourceFile .DIANN_REPORT_TSV_PLACEHOLDER , PEPTIDE_ID );
284+ diannReportSourceFile .setSpecLibValidationId (getId ());
285+ diannReportSourceFile .setPath (reportFilePath != null ? reportFilePath .toString () : DataFile .NOT_FOUND );
286+ idFiles .add (diannReportSourceFile );
287+ checkedFiles .add (idFile );
288+ }
250289 }
251290 }
252291 setSpectrumFiles (spectrumFiles );
@@ -266,6 +305,77 @@ private Path getPath(String name, Set<Path> rawFilesDirPaths, boolean isMaxquant
266305 return null ;
267306 }
268307
308+ private static Path getDiannReportFilePath (Path speclibFilePath )
309+ {
310+ Path specLibFileDir = speclibFilePath .getParent ();
311+ try (Stream <Path > paths = Files .list (specLibFileDir ))
312+ {
313+ List <Path > files = paths .filter (path -> Files .isRegularFile (path )).collect (Collectors .toList ());
314+ return getDiannReportFilePath (speclibFilePath .getFileName ().toString (), files );
315+ }
316+ catch (IOException e )
317+ {
318+ throw UnexpectedException .wrap (e , "Error looking for DIA-NN report TSV file in " + specLibFileDir );
319+ }
320+ }
321+
322+ private static Path getDiannReportFilePath (String specLibFileName , List <Path > candidateFiles )
323+ {
324+ Map <Path , Integer > prefixLengthMap = getCommonPrefixLengthsForTsvFiles (candidateFiles , specLibFileName );
325+
326+ // Find the TSV file with the longest common prefix that also has the expected column headers in the first line
327+ return prefixLengthMap .entrySet ().stream ()
328+ .sorted ((entry1 , entry2 ) -> Integer .compare (entry2 .getValue (), entry1 .getValue ())) // Sort descending by matching prefix length
329+ .map (Map .Entry ::getKey ) // File paths
330+ .filter (file -> hasRequiredHeaders (file )) // First line should have expected header columns
331+ .findFirst () // Get the first file that meets the conditions
332+ .orElse (null );
333+ }
334+
335+ private static Map <Path , Integer > getCommonPrefixLengthsForTsvFiles (List <Path > files , String specLibFileName )
336+ {
337+ String specLibFileBaseName = FileUtil .getBaseName (specLibFileName ); // Remove file extension
338+ Map <Path , Integer > prefixLengthMap = new HashMap <>();
339+ files .stream ()
340+ .filter (file -> file .getFileName ().toString ().toLowerCase ().endsWith (".tsv" )) // Ensure it's a TSV file
341+ .forEach (file -> {
342+ // Get the longest common prefix length
343+ int commonPrefixLength = commonPrefixLength (specLibFileBaseName , FileUtil .getBaseName (file .getFileName ().toString ()));
344+
345+ if (commonPrefixLength > 0 )
346+ {
347+ prefixLengthMap .put (file , commonPrefixLength );
348+ }
349+ });
350+ return prefixLengthMap ;
351+ }
352+
353+ private static int commonPrefixLength (String s1 , String s2 )
354+ {
355+ int maxLength = Math .min (s1 .length (), s2 .length ());
356+ int index = 0 ;
357+ while (index < maxLength && s1 .charAt (index ) == s2 .charAt (index ))
358+ {
359+ index ++;
360+ }
361+ return index ;
362+ }
363+
364+ private static boolean hasRequiredHeaders (Path diannReportTsv )
365+ {
366+ try
367+ {
368+ // Read the first line of the file
369+ String firstLine = Files .lines (diannReportTsv ).findFirst ().orElse ("" );
370+ // Check if the first line has the expected header columns names
371+ return List .of (firstLine .trim ().split ("\t " )).containsAll (LibSourceFile .DIANN_REPORT_EXPECTED_HEADERS );
372+ }
373+ catch (IOException e )
374+ {
375+ throw UnexpectedException .wrap (e , "Error reading the first line of TSV file " + diannReportTsv );
376+ }
377+ }
378+
269379 private Path findInDirectoryTree (java .nio .file .Path rawFilesDirPath , String fileName , boolean allowBaseName )
270380 {
271381 try
@@ -459,6 +569,116 @@ public void testAccept()
459569 assertTrue (accept ("170428_DBS_cal_7a.d" , "170428_DBS_cal_7a.d.zip" ));
460570 }
461571
572+ @ Test
573+ public void testCommonPrefixLength () throws IOException
574+ {
575+ Path testDataDir = getDiannTestFilesPath ();
576+
577+ // The spec lib file name to compare against
578+ String specLibFileName = "report-lib.parquet.skyline-for-test.speclib" ;
579+
580+ Path tsvFile1 = testDataDir .resolve ("report-lib.tsv" );
581+ Path tsvFile2 = testDataDir .resolve ("report-lib-for-test.tsv" );
582+ Path tsvFile3 = testDataDir .resolve ("report-lib.parquet.tsv" );
583+ Path tsvFile4 = testDataDir .resolve ("report-lib.parquet-test.tsv" );
584+ Path tsvFile5 = testDataDir .resolve ("no-prefix-match-report.tsv" );
585+ Path nonTsvFile1 = testDataDir .resolve ("report-lib.parquet.skyline-for-test.txt" );
586+ Path nonTsvFile2 = testDataDir .resolve ("report.txt" );
587+ Path nonTsvFile3 = testDataDir .resolve (specLibFileName );
588+
589+ List <Path > files = List .of (tsvFile1 , tsvFile2 , tsvFile3 , tsvFile4 , tsvFile5 , nonTsvFile1 , nonTsvFile2 , nonTsvFile3 );
590+
591+ Map <Path , Integer > prefixLengthMap = SpecLibValidator .getCommonPrefixLengthsForTsvFiles (files , specLibFileName );
592+ // Expect 4 TSV files in the list; files without a prefix match, and non-TSV files should be ignored.
593+ assertEquals ("Unexpected size of prefixLengthMap" , 4 , prefixLengthMap .size ());
594+
595+ // File report-lib.tsv should have a common prefix "report-lib"
596+ assertTrue (prefixLengthMap .containsKey (tsvFile1 ));
597+ assertEquals ("report-lib" .length (), prefixLengthMap .get (tsvFile1 ).intValue ());
598+
599+ // File report-lib-test.tsv should have a common prefix "report-lib"
600+ assertTrue (prefixLengthMap .containsKey (tsvFile2 ));
601+ assertEquals ("report-lib" .length (), prefixLengthMap .get (tsvFile2 ).intValue ());
602+
603+ // File report-lib.parquet.tsv should have a common prefix "report-lib.parquet"
604+ assertTrue (prefixLengthMap .containsKey (tsvFile3 ));
605+ assertEquals ("report-lib.parquet" .length (), prefixLengthMap .get (tsvFile3 ).intValue ());
606+
607+ // File report-lib.parquet-test.tsv should have a common prefix "report-lib.parquet"
608+ assertTrue (prefixLengthMap .containsKey (tsvFile4 ));
609+ assertEquals ("report-lib.parquet" .length (), prefixLengthMap .get (tsvFile4 ).intValue ());
610+
611+ // File no-prefix-match-report.tsv should not have a common prefix
612+ assertFalse (tsvFile5 + " does not share a prefix with " + specLibFileName , prefixLengthMap .containsKey (tsvFile5 ));
613+
614+ assertFalse (prefixLengthMap .containsKey (nonTsvFile1 ));
615+ assertFalse (prefixLengthMap .containsKey (nonTsvFile2 ));
616+ assertFalse (prefixLengthMap .containsKey (nonTsvFile3 ));
617+
618+ // List of files that do not share a common prefix with the speclib file
619+ files = List .of (testDataDir .resolve ("abcd.tsv" ), testDataDir .resolve ("1234.tsv" ), testDataDir .resolve ("lib.parquet.skyline.tsv" ));
620+ prefixLengthMap = SpecLibValidator .getCommonPrefixLengthsForTsvFiles (files , specLibFileName );
621+ assertEquals (0 , prefixLengthMap .size ());
622+
623+ prefixLengthMap = SpecLibValidator .getCommonPrefixLengthsForTsvFiles (files , specLibFileName );
624+ assertEquals (0 , prefixLengthMap .size ());
625+ }
626+
627+ @ Test
628+ public void testGetDiannReportFilePath () throws IOException
629+ {
630+ Path testDataDir = getDiannTestFilesPath ();
631+ String specLibFileName = "report-lib.parquet.skyline-for-test.speclib" ;
632+
633+ Path reportTsvFile = SpecLibValidator .getDiannReportFilePath (specLibFileName , Collections .emptyList ());
634+ assertNull ("Unexpected report TSV file path returned. Input file list is empty." , reportTsvFile );
635+
636+ // TSV Files in the test directory
637+ Path tsvFile1 = testDataDir .resolve ("report.tsv" );
638+ Path tsvFile2 = testDataDir .resolve ("report-lib-for-test.tsv" );
639+ Path tsvFile3 = testDataDir .resolve ("no-prefix-match-report-for-test.tsv" );
640+ Path tsvFile4 = testDataDir .resolve ("report-lib.parquet-missing-headers.txt" );
641+ // Non-TSV files in the test directory
642+ Path nonTsvFile1 = testDataDir .resolve ("report.txt" );
643+ Path nonTsvFile2 = testDataDir .resolve ("report-lib.parquet.skyline-for-test.txt" );
644+ Path nonTsvFile3 = testDataDir .resolve (specLibFileName );
645+ Path nonTsvFile4 = testDataDir .resolve ("test_diann_library.blib" );
646+
647+ List <Path > candidateFiles = new ArrayList <>();
648+ candidateFiles .add (nonTsvFile1 );
649+ candidateFiles .add (nonTsvFile2 );
650+ candidateFiles .add (nonTsvFile3 );
651+ candidateFiles .add (nonTsvFile4 );
652+
653+ assertNull ("Unexpected report TSV file path returned. Input list does not have any TSV files" ,
654+ SpecLibValidator .getDiannReportFilePath (specLibFileName , candidateFiles ));
655+
656+ candidateFiles .add (tsvFile3 ); // TSV file does not share a prefix with the speclib file
657+ assertNull ("Unexpected report TSV file path returned. Input list does not have any TSV files that share a prefix with the speclib file" ,
658+ SpecLibValidator .getDiannReportFilePath (specLibFileName , candidateFiles ));
659+
660+ candidateFiles .add (tsvFile4 ); // TSV file does not have the required column headers
661+ assertNull ("Unexpected report TSV file path returned. Input list does not have any TSV files that share a prefix with the speclib file" +
662+ " and have the required column headers" ,
663+ SpecLibValidator .getDiannReportFilePath (specLibFileName , candidateFiles ));
664+
665+ candidateFiles .add (tsvFile1 ); // Shares a prefix and has the required column headers
666+ reportTsvFile = SpecLibValidator .getDiannReportFilePath (specLibFileName , candidateFiles );
667+ assertNotNull (reportTsvFile );
668+ assertEquals (tsvFile1 , reportTsvFile );
669+
670+ candidateFiles .add (tsvFile2 ); // Shares a longer prefix with the speclib file
671+ reportTsvFile = SpecLibValidator .getDiannReportFilePath (specLibFileName , candidateFiles );
672+ assertNotNull (reportTsvFile );
673+ assertEquals (tsvFile2 , reportTsvFile );
674+ }
675+
676+ private static Path getDiannTestFilesPath () throws IOException
677+ {
678+ return JunitUtil .getSampleData (ModuleLoader .getInstance ().getModule (PanoramaPublicModule .class ),
679+ "TargetedMS/panoramapublic/LibraryTest-DiaNN" ).toPath ();
680+ }
681+
462682 private ISpectrumLibrary createLibrary (Path path )
463683 {
464684 return new ISpectrumLibrary ()
0 commit comments