4141#include " ratioTest.cpp"
4242#include " panmanUtils.hpp"
4343
44+ constexpr size_t NEWICK_CHUNK_SIZE = 1 * 1024 * 1024 ;
45+
4446char panmanUtils::getNucleotideFromCode (int code) {
4547 switch (code) {
4648 case 1 :
@@ -1646,9 +1648,21 @@ int doPreOrderLoop(panmanUtils::Node* node){
16461648 return c;
16471649}
16481650
1651+ std::string reconstructNewick (const panman::Tree::Reader& tree) {
1652+ std::string result;
1653+ auto newickList = tree.getNewick ();
1654+
1655+ for (auto chunk : newickList) {
1656+ result.append (chunk.cStr ());
1657+ }
1658+
1659+ return result;
1660+ }
1661+
16491662void panmanUtils::Tree::protoMATToTree (const panman::Tree::Reader& mainTree) {
16501663 // Create tree
1651- root = createTreeFromNewickString (mainTree.getNewick ().cStr ());
1664+ std::string newickString = reconstructNewick (mainTree);
1665+ root = createTreeFromNewickString (newickString);
16521666 // std::cout << "Size of nodes: " << allNodes.size() << std::endl;
16531667 // std::cout << doPreOrderLoop(root) << std::endl;
16541668
@@ -2616,6 +2630,19 @@ void panmanUtils::Tree::extractPanMATIndex(std::ostream& fout, int64_t start, in
26162630 return ;
26172631}
26182632
2633+ std::vector<std::string> splitNewick (const std::string& newick) {
2634+ std::vector<std::string> chunks;
2635+ chunks.reserve ((newick.size () / NEWICK_CHUNK_SIZE) + 1 );
2636+
2637+ for (size_t i = 0 ; i < newick.size (); i += NEWICK_CHUNK_SIZE) {
2638+ chunks.emplace_back (
2639+ newick.substr (i, NEWICK_CHUNK_SIZE)
2640+ );
2641+ }
2642+
2643+ return chunks;
2644+ }
2645+
26192646void panmanUtils::Tree::extractPanMATSegment (kj::std::StdOutputStream& fout, int64_t start, int64_t end) {
26202647 sequence_t rootSequence;
26212648 blockExists_t rootBlockExists;
@@ -2773,8 +2800,12 @@ void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int
27732800
27742801 std::string newick = getNewickString (newRoot);
27752802 std::string newick2 = getNewickString (root);
2776-
2777- treeToWrite.setNewick (newick);
2803+
2804+ auto chunks = splitNewick (newick);
2805+ auto newickList = treeToWrite.initNewick (chunks.size ());
2806+ for (size_t i = 0 ; i < chunks.size (); ++i) {
2807+ newickList.set (i, chunks[i]);
2808+ }
27782809
27792810 std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t , bool > > >
27802811 consensusSeqToBlockIds;
@@ -2941,8 +2972,12 @@ void panmanUtils::Tree::writeToFile(kj::std::StdOutputStream& fout, panmanUtils:
29412972 assert (nodeIndex==allNodes.size ());
29422973
29432974 std::string newick = getNewickString (node);
2944-
2945- treeToWrite.setNewick (newick);
2975+
2976+ auto chunks = splitNewick (newick);
2977+ auto newickList = treeToWrite.initNewick (chunks.size ());
2978+ for (size_t i = 0 ; i < chunks.size (); ++i) {
2979+ newickList.set (i, chunks[i]);
2980+ }
29462981
29472982 std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t , bool > > > consensusSeqToBlockIds;
29482983
@@ -6988,6 +7023,7 @@ void panmanUtils::TreeGroup::printFASTA(std::ofstream& fout, bool rootSeq ) {
69887023 }
69897024}
69907025
7026+
69917027void panmanUtils::TreeGroup::writeToFile (kj::std::StdOutputStream& fout) {
69927028 capnp::MallocMessageBuilder message;
69937029 panman::TreeGroup::Builder treeGroupToWrite = message.initRoot <panman::TreeGroup>();
@@ -7009,7 +7045,11 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) {
70097045 assert (nodeIndex == tree.allNodes .size ());
70107046
70117047 std::string newick = tree.getNewickString (node);
7012- treeToWrite.setNewick (newick);
7048+ auto chunks = splitNewick (newick);
7049+ auto newickList = treeToWrite.initNewick (chunks.size ());
7050+ for (size_t i = 0 ; i < chunks.size (); ++i) {
7051+ newickList.set (i, chunks[i]);
7052+ }
70137053 std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t , bool > > >
70147054 consensusSeqToBlockIds;
70157055
0 commit comments