10
10
#include < iostream>
11
11
#include < unordered_map>
12
12
#include < string>
13
+ #include < set>
13
14
#include " ColoredCDBG.hpp"
14
15
15
16
// --aa option helper functions
@@ -231,6 +232,18 @@ std::pair<size_t,size_t> KmerIndex::getECInfo() const {
231
232
return std::make_pair (max_ec_len, cardinality_zero_encounters);
232
233
}
233
234
235
+ // Begin Shading
236
+ std::pair<std::string,std::string> shadedTargetName (std::string& name) {
237
+ if (name.find (" _shade_" ) != std::string::npos) {
238
+ std::string name_header = " _shade_" ;
239
+ std::string tname = name.substr (0 , name.find (name_header));
240
+ std::string variant = name.substr (name.find (name_header)+name_header.length (), name.size ());
241
+ return std::make_pair (tname,variant); // Return the target name and the the associated shade
242
+ }
243
+ return std::make_pair (" " ," " ); // Not a shade
244
+ }
245
+ // End Shading
246
+
234
247
void KmerIndex::BuildTranscripts (const ProgramOptions& opt, std::ofstream& out) {
235
248
// read input
236
249
u_set_<std::string> unique_names;
@@ -354,6 +367,18 @@ void KmerIndex::BuildTranscripts(const ProgramOptions& opt, std::ofstream& out)
354
367
}
355
368
unique_names.insert (name);
356
369
target_names_.push_back (name);
370
+
371
+ // Begin Shading
372
+ auto shade_info = shadedTargetName (name);
373
+ if (shade_info.first != " " ) {
374
+ std::string tname = shade_info.first ;
375
+ std::string variant = shade_info.second ;
376
+ auto it = std::find (target_names_.begin (), target_names_.end (), tname);
377
+ if (it != target_names_.end ()) {
378
+ shadeToColorTranscriptMap[target_names_.size ()-1 ] = std::distance (target_names_.begin (), it);
379
+ }
380
+ }
381
+ // End Shading
357
382
}
358
383
}
359
384
@@ -399,6 +424,9 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
399
424
size_t num_seqs = 0 ;
400
425
int max_color = 0 ;
401
426
u_set_<int > external_input_names;
427
+ // Begin Shading
428
+ std::set<std::string> variants_set; // Ordered set to keep track of variants (i.e. colors with shades)
429
+ // End Shading
402
430
for (int i = 0 ; i < opt.transfasta .size (); i++) { // Currently, this should only be one file
403
431
auto fasta = opt.transfasta [i];
404
432
fp = opt.transfasta .size () == 1 && opt.transfasta [0 ] == " -" ? gzdopen (fileno (stdin), " r" ) : gzopen (fasta.c_str (), " r" );
@@ -414,9 +442,23 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
414
442
continue ;
415
443
}
416
444
int color = std::atoi (strname.c_str ());
445
+ // Begin Shading
446
+ std::string variant;
447
+ auto shade_info = shadedTargetName (strname);
448
+ if (shade_info.first != " " ) {
449
+ std::string tname = shade_info.first ;
450
+ variant = shade_info.second ;
451
+ color = std::atoi (tname.c_str ());
452
+ variants_set.insert (std::to_string (color) + " _shade_" + variant);
453
+ }
454
+ // End Shading
417
455
external_input_names.insert (color);
418
456
if (color > max_color) max_color = color;
419
- of << " >" << std::to_string (color) << " \n " << str << std::endl;
457
+ of << " >" << std::to_string (color);
458
+ // Begin Shading
459
+ if (!variant.empty ()) of << " _shade_" << variant;
460
+ // End Shading
461
+ of << " \n " << str << std::endl;
420
462
num_seqs++;
421
463
}
422
464
gzclose (fp);
@@ -437,6 +479,16 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
437
479
target_names_.push_back (std::to_string (i));
438
480
target_lens_.push_back (k); // dummy length (k-mer size)
439
481
}
482
+ // Begin Shading
483
+ for (const auto & v : variants_set) {
484
+ num_trans++; // Each color-shade duo counts as an additional target
485
+ target_names_.push_back (v);
486
+ target_lens_.push_back (k); // dummy length (k-mer size)
487
+ }
488
+ if (num_trans != ncolors) {
489
+ std::cerr << " [build] Detected " << std::to_string (num_trans-ncolors) << " shades" << std::endl;
490
+ }
491
+ // End Shading
440
492
441
493
std::cerr << " [build] Building graph from k-mers" << std::endl;
442
494
BuildDeBruijnGraph (opt, tmp_file2, out);
@@ -449,6 +501,9 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
449
501
std::vector<std::vector<TRInfo> > trinfos (dbg.size ());
450
502
std::ifstream infile_a (tmp_file2);
451
503
int current_color = 0 ;
504
+ // Begin Shading
505
+ std::string current_variant;
506
+ // End Shading
452
507
std::string line;
453
508
while (std::getline (infile_a, line)) {
454
509
if (line.length () == 0 ) {
@@ -458,6 +513,16 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
458
513
current_color = onlist_sequences.cardinality ();
459
514
} else {
460
515
current_color = std::atoi (line.c_str ()+1 );
516
+ // Begin Shading
517
+ current_variant = " " ;
518
+ std::string name = line.substr (1 );
519
+ auto shade_info = shadedTargetName (name);
520
+ if (shade_info.first != " " ) {
521
+ std::string tname = shade_info.first ;
522
+ current_variant = shade_info.second ;
523
+ current_color = std::atoi (tname.c_str ());
524
+ }
525
+ // End Shading
461
526
}
462
527
continue ;
463
528
}
@@ -481,8 +546,16 @@ void KmerIndex::BuildDistinguishingGraph(const ProgramOptions& opt, std::ofstrea
481
546
tr.pos = (proc-um.len ) | (!um.strand ? sense : missense);
482
547
tr.start = um.dist ;
483
548
tr.stop = um.dist + um.len ;
484
-
485
549
trinfos[n->id ].push_back (tr);
550
+
551
+ // Begin Shading
552
+ if (!current_variant.empty ()) {
553
+ auto it = variants_set.find (std::to_string (current_color) + " _shade_" + current_variant);
554
+ assert (it != variants_set.end ());
555
+ tr.trid = ncolors + std::distance (variants_set.begin (), it);
556
+ trinfos[n->id ].push_back (tr);
557
+ }
558
+ // End Shading
486
559
}
487
560
}
488
561
infile_a.close ();
@@ -995,6 +1068,13 @@ void KmerIndex::BuildEquivalenceClasses(const ProgramOptions& opt, const std::st
995
1068
tr.stop = um.dist + um.len ;
996
1069
997
1070
trinfos[n->id ].push_back (tr);
1071
+ // Begin Shading
1072
+ auto it = shadeToColorTranscriptMap.find (tr.trid );
1073
+ if (it != shadeToColorTranscriptMap.end ()) {
1074
+ tr.trid = shadeToColorTranscriptMap[tr.trid ];
1075
+ trinfos[n->id ].push_back (tr); // Add the color of the original transcript as well
1076
+ }
1077
+ // End Shading
998
1078
}
999
1079
j++;
1000
1080
}
@@ -1020,6 +1100,11 @@ void KmerIndex::BuildEquivalenceClasses(const ProgramOptions& opt, const std::st
1020
1100
1021
1101
std::cerr << " [build] target de Bruijn graph has k-mer length " << dbg.getK () << " and minimizer length " << dbg.getG () << std::endl;
1022
1102
std::cerr << " [build] target de Bruijn graph has " << dbg.size () << " contigs and contains " << dbg.nbKmers () << " k-mers " << std::endl;
1103
+ // Begin Shading
1104
+ if (shadeToColorTranscriptMap.size () != 0 ) {
1105
+ std::cerr << " [build] number of shades: " << std::to_string (shadeToColorTranscriptMap.size ()) << std::endl;
1106
+ }
1107
+ // End Shading
1023
1108
}
1024
1109
1025
1110
void KmerIndex::PopulateMosaicECs (std::vector<std::vector<TRInfo> >& trinfos) {
@@ -1418,6 +1503,19 @@ void KmerIndex::load(ProgramOptions& opt, bool loadKmerTable, bool loadDlist) {
1418
1503
in.read (buffer, tmp_size);
1419
1504
1420
1505
target_names_.push_back (std::string (buffer));
1506
+ // Begin Shading
1507
+ auto shade_info = shadedTargetName (target_names_[target_names_.size ()-1 ]);
1508
+ if (shade_info.first != " " ) {
1509
+ std::string tname = shade_info.first ;
1510
+ std::string variant = shade_info.second ;
1511
+ auto it = std::find (target_names_.begin (), target_names_.end (), tname);
1512
+ if (it != target_names_.end ()) {
1513
+ shadeToColorTranscriptMap[i] = std::distance (target_names_.begin (), it);
1514
+ }
1515
+ use_shade = true ;
1516
+ shade_sequences.add (i);
1517
+ }
1518
+ // End Shading
1421
1519
}
1422
1520
delete[] buffer;
1423
1521
@@ -1438,6 +1536,11 @@ void KmerIndex::load(ProgramOptions& opt, bool loadKmerTable, bool loadDlist) {
1438
1536
if (num_trans-onlist_sequences.cardinality () > 0 ) {
1439
1537
std::cerr << " [index] number of D-list k-mers: " << pretty_num (static_cast <size_t >(num_trans-onlist_sequences.cardinality ())) << std::endl;
1440
1538
}
1539
+ // Begin Shading
1540
+ if (shadeToColorTranscriptMap.size () != 0 ) {
1541
+ std::cerr << " [build] number of shades: " << std::to_string (shadeToColorTranscriptMap.size ()) << std::endl;
1542
+ }
1543
+ // End Shading
1441
1544
1442
1545
in.close ();
1443
1546
@@ -1594,6 +1697,11 @@ int KmerIndex::mapPair(const char *s1, int l1, const char *s2, int l2) const {
1594
1697
// post: v contains all equiv classes for the k-mers in s
1595
1698
void KmerIndex::match (const char *s, int l, std::vector<std::pair<const_UnitigMap<Node>, int >>& v, bool partial, bool cfc) const {
1596
1699
const Node* n;
1700
+
1701
+ // Begin Shading
1702
+ if (use_shade) partial = false ;
1703
+ // End Shading
1704
+ if (do_union) partial = false ;
1597
1705
1598
1706
// TODO:
1599
1707
// Rework KmerIndex::match() such that it uses the following type of logic
@@ -1664,6 +1772,8 @@ void KmerIndex::match(const char *s, int l, std::vector<std::pair<const_UnitigMa
1664
1772
}
1665
1773
1666
1774
v.push_back ({um, kit->second });
1775
+
1776
+ if (no_jump) continue ;
1667
1777
1668
1778
// Find start and end of O.G. kallisto contig w.r.t. the bifrost-kallisto
1669
1779
// unitig
0 commit comments