Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
000292f
Limit seq variable scope
eaasna Jun 26, 2024
0361b81
Object generator for stellar thread options
eaasna Jun 26, 2024
9bca02d
Constructor for shared_query_record
eaasna Jun 26, 2024
a75753e
Alphabet adaptor boilerplate
eaasna Jun 27, 2024
958fbf6
Construct stellar matcher
eaasna Jun 28, 2024
a0a083b
Replace stellar index
eaasna Jul 3, 2024
3113d9b
Read query sequences of adaptor alphabet in addition to native seqan …
eaasna Jul 12, 2024
4101365
Split and prefilter adapted sequences without stellar search
eaasna Jul 31, 2024
99b1aaa
Fixup
eaasna Aug 2, 2024
6704aea
Add use cases A and B
eaasna Aug 2, 2024
cae7a7c
Insert adapted sequences into queue
eaasna Aug 2, 2024
1850b96
Check unique IDs
eaasna Aug 2, 2024
294362f
Adapted database sequence input
eaasna Aug 4, 2024
1e3d1ee
Fill reverse databases
eaasna Aug 5, 2024
d3f7448
Copy stellar segment and launcher headers
eaasna Aug 5, 2024
45de638
Try to make finder
eaasna Aug 5, 2024
d52c91e
Fix
eaasna Aug 6, 2024
0a44843
Add copyright notices to each file
eaasna Aug 6, 2024
0b1ab06
Replace seqan2 containers with std::vector up to find callback
eaasna Aug 6, 2024
8930a2e
Fix segment range and add kmer shape to index
eaasna Aug 7, 2024
667c5fd
Pass abundanceCut in arguments object
eaasna Aug 8, 2024
336f9ef
Get query IDs from ID map
eaasna Aug 8, 2024
a0c41e4
Refactor database ID mapping and segments
eaasna Aug 12, 2024
681b596
Try to make stellar finder
eaasna Aug 13, 2024
b7f8641
Make finder over spans
eaasna Aug 15, 2024
f46a5bf
Fixup
eaasna Aug 15, 2024
9c0cdd6
Use local copy of stellar library
eaasna Aug 15, 2024
6794753
Remove stellar3 submodule
eaasna Aug 15, 2024
4000beb
Try to verify seqan2 segments of adapted alphabet type
eaasna Aug 16, 2024
c5e7bf4
const what can be const
eaasna Aug 20, 2024
58379ad
Fix match vector size
eaasna Aug 20, 2024
5e89739
Fix finder callback
eaasna Aug 20, 2024
0032913
Try to pass query segment parameters in shopping carts
eaasna Aug 21, 2024
b0a2d70
Make shared query records over spans
eaasna Aug 22, 2024
6830eed
Expose delta parameter
eaasna Aug 22, 2024
30e185c
Fixup: still cannot equality compare std::span
eaasna Aug 23, 2024
81d38eb
Replace seqan2::StringSet and String with std::vector in bestExtension
eaasna Aug 30, 2024
51c9199
Fixup
eaasna Sep 2, 2024
e9ae4f0
Remove faulty global alignment
eaasna Sep 3, 2024
b7ea28d
Pass reference to std::span to make sure that object survives
eaasna Sep 6, 2024
1efd8d7
Debug info
eaasna Sep 9, 2024
b8de2a1
Fix: check for index out of range
eaasna Sep 9, 2024
1115648
Copy Segment sequence
eaasna Sep 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
[submodule "lib/raptor_data_simulation"]
path = lib/raptor_data_simulation
url = git@github.com:eaasna/raptor_data_simulation.git
[submodule "lib/stellar3"]
path = lib/stellar3
url = git@github.com:seqan/stellar3.git
[submodule "lib/seqan"]
path = lib/seqan
url = git@github.com:seqan/seqan.git
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

# For debugging only
#set (CMAKE_CXX_FLAGS "-ftemplate-backtrace-limit=0")
#set (CMAKE_CXX_FLAGS "-fsanitize=address -g -O0")
set (CMAKE_CXX_FLAGS "-g -O0 -Wno-unused-parameter -Wno-unused-value -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-local-typedefs")


# Messages
string (ASCII 27 Esc)
set (FontBold "${Esc}[1m")
Expand Down
22 changes: 22 additions & 0 deletions include/dream_stellar/LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// ==========================================================================
// STELLAR - SwifT Exact LocaL AligneR
// http://www.seqan.de/projects/stellar/
// ==========================================================================
// Copyright (C) 2010-2012 by Birte Kehr
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your options) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ==========================================================================
// Author: Birte Kehr <birte.kehr@fu-berlin.de>
// ==========================================================================
60 changes: 60 additions & 0 deletions include/dream_stellar/diagnostics/print.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include <dream_stellar/stellar_types.hpp>
#include <dream_stellar/stellar_output.hpp>

namespace dream_stellar
{

///////////////////////////////////////////////////////////////////////////////
// Calculates parameters from parameters in options object and writes them to outStr
// Sets options.qGram if not set by user input
template <typename TStream>
void _writeCalculatedParams(StellarOptions & options, TStream & outStr);

///////////////////////////////////////////////////////////////////////////////
// Writes user specified parameters from options object to outStr
template <typename TStream>
void _writeSpecifiedParams(StellarOptions const & options, TStream & outStr);

///////////////////////////////////////////////////////////////////////////////
// Writes file name from options object to outStr
template <typename TStream>
void _writeFileNames(StellarOptions const & options, TStream & outStr);

///////////////////////////////////////////////////////////////////////////////
// Calculates parameters from parameters in options object and from sequences and writes them to outStr
template <typename sequence_t, typename TStream>
void _writeMoreCalculatedParams(StellarOptions const & options,
uint64_t const & refLen,
std::vector<sequence_t> const & queries,
TStream & outStr);

void _writeOutputStatistics(StellarOutputStatistics const & statistics, bool const verbose, bool const writeDisabledQueriesFile);

template <typename TStream>
void _printStellarKernelStatistics(StellarComputeStatistics const & statistics, TStream & outStr);

template <typename TStream>
void _printDatabaseIdAndStellarKernelStatistics(
bool const verbose,
bool const databaseStrand,
CharString const & databaseID,
StellarComputeStatistics const & statistics,
TStream & outStr);

template <typename TStream>
void _printStellarStatistics(
bool const verbose,
bool const databaseStrand,
StringSet<CharString> const & databaseIDs,
StellarComputeStatisticsCollection const & computeStatistics,
TStream & outStr);

template <typename TStream>
void _writeOutputStatistics(StellarOutputStatistics const & statistics,
bool const verbose,
bool const writeDisabledQueriesFile,
TStream & outStr);

} // namespace dream_stellar
182 changes: 182 additions & 0 deletions include/dream_stellar/diagnostics/print.tpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#pragma once

#include <dream_stellar/diagnostics/print.hpp>

namespace dream_stellar
{

using namespace seqan2;

///////////////////////////////////////////////////////////////////////////////
// Writes user specified parameters from options object to outStr
template <typename TStream>
void _writeSpecifiedParams(StellarOptions const & options, TStream & outStr)
{
//IOREV _notio_
// Output user specified parameters
outStr << "User specified parameters:" << std::endl;
outStr << " minimal match length : " << options.minLength << std::endl;
outStr << " maximal error rate (epsilon) : " << options.epsilon << std::endl;
outStr << " maximal x-drop : " << options.xDrop << std::endl;
if (options.qGram != std::numeric_limits<size_t>::max())
outStr << " k-mer (q-gram) length : " << options.qGram << std::endl;
outStr << " search forward strand : " << ((options.forward) ? "yes" : "no") << std::endl;
outStr << " search reverse complement : " << ((options.reverse) ? "yes" : "no") << std::endl;
outStr << std::endl;

outStr << " verification strategy : " << to_string(options.verificationMethod) << std::endl;
if (options.disableThresh != std::numeric_limits<size_t>::max())
{
outStr << " disable queries with more than : " << options.disableThresh << " matches" << std::endl;
}
outStr << " maximal number of matches : " << options.numMatches << std::endl;
outStr << " duplicate removal every : " << options.compactThresh << std::endl;
if (options.maxRepeatPeriod != 1 || options.minRepeatLength != 1000)
{
outStr << " max low complexity repeat period : " << options.maxRepeatPeriod << std::endl;
outStr << " min low complexity repeat length : " << options.minRepeatLength << std::endl;
}
if (options.qgramAbundanceCut != 1)
{
outStr << " q-gram abundance cut ratio : " << options.qgramAbundanceCut << std::endl;
}

outStr << std::endl;
}

///////////////////////////////////////////////////////////////////////////////
// Calculates parameters from parameters in options object and writes them to outStr
// Sets options.qGram if not set by user input
template <typename TStream>
void _writeCalculatedParams(StellarOptions & options, TStream & outStr)
{
//IOREV _notio_
StellarStatistics statistics{options};

outStr << "Calculated parameters:" << std::endl;
if (statistics.kMerComputed)
{
options.qGram = (unsigned)statistics.kMerLength;
outStr << " k-mer length : " << statistics.kMerLength << std::endl;
}

outStr << " s^min : " << statistics.smin << std::endl;
outStr << " threshold : " << statistics.threshold << std::endl;
outStr << " distance cut : " << statistics.distanceCut << std::endl;
outStr << " delta : " << statistics.delta << std::endl;
outStr << " overlap : " << statistics.overlap << std::endl;
outStr << std::endl;
}

///////////////////////////////////////////////////////////////////////////////
// Writes file name from options object to outStr
template <typename TStream>
void _writeFileNames(StellarOptions const & options, TStream & outStr)
{
//IOREV _notio_
outStr << "I/O options:" << std::endl;
outStr << " database file : " << options.databaseFile << std::endl;
outStr << " query file : " << options.queryFile << std::endl;
outStr << " alphabet : " << options.alphabet << std::endl;
outStr << " output file : " << options.outputFile << std::endl;
outStr << " output format : " << options.outputFormat << std::endl;
if (options.disableThresh != std::numeric_limits<size_t>::max())
{
outStr << " disabled queries: " << options.disabledQueriesFile << std::endl;
}
outStr << std::endl;
}

///////////////////////////////////////////////////////////////////////////////
// Calculates parameters from parameters in options object and from sequences and writes them to outStr
template <typename sequence_t, typename TStream>
void _writeMoreCalculatedParams(StellarOptions const & options,
uint64_t const & refLen,
std::vector<sequence_t> const & queries,
TStream & outStr)
{
if (options.qgramAbundanceCut != 1)
{
outStr << "Calculated parameters:" << std::endl;
}

uint64_t queryLength{0};
for (auto & query : queries)
queryLength += query.size();

if (options.qgramAbundanceCut != 1)
{
outStr << " q-gram expected abundance : ";
outStr << queryLength / (double)((long)1 << (options.qGram << 1)) << std::endl;
outStr << " q-gram abundance threshold: ";
outStr << _max(100, (int)(queryLength * options.qgramAbundanceCut)) << std::endl;
outStr << std::endl;
}
}

template <typename TStream>
void _printStellarKernelStatistics(StellarComputeStatistics const & statistics, TStream & outStr)
{
if (statistics.numSwiftHits == 0)
return;

outStr << std::endl << " # SWIFT hits : " << statistics.numSwiftHits;
outStr << std::endl << " Longest hit : " << statistics.maxLength;
outStr << std::endl << " Avg hit length : " << statistics.totalLength/statistics.numSwiftHits;
}

template <typename TStream>
void _printDatabaseIdAndStellarKernelStatistics(
bool const verbose,
bool const databaseStrand,
CharString const & databaseID,
StellarComputeStatistics const & statistics,
TStream & outStr)
{
outStr << " " << databaseID;
if (!databaseStrand)
outStr << ", complement";
outStr << std::flush;

if (verbose)
{
_printStellarKernelStatistics(statistics, outStr);
}
outStr << std::endl;
}

template <typename TStream>
void _printStellarStatistics(
bool const verbose,
bool const databaseStrand,
StringSet<CharString> const & databaseIDs,
StellarComputeStatisticsCollection const & computeStatistics,
TStream & outStr)
{
std::cerr << std::endl; // swift filter output is on same line
for (size_t i = 0; i < length(databaseIDs); ++i)
{
CharString const & databaseID = databaseIDs[i];
StellarComputeStatistics const & statistics = computeStatistics[i];
_printDatabaseIdAndStellarKernelStatistics(verbose, databaseStrand, databaseID, statistics, outStr);
}
}

template <typename TStream>
void _writeOutputStatistics(StellarOutputStatistics const & statistics,
bool const verbose,
bool const writeDisabledQueriesFile,
TStream & outStr)
{
outStr << "# Eps-matches : " << statistics.numMatches << std::endl;
if (verbose) {
if (statistics.numMatches > 0) {
outStr << "Longest eps-match : " << statistics.maxLength << std::endl;
outStr << "Avg match length : " << statistics.totalLength / statistics.numMatches << std::endl;
}
if (writeDisabledQueriesFile)
outStr << "# Disabled queries: " << statistics.numDisabled << std::endl;
}
}

} // namespace dream_stellar
Loading