From 41ea182090b3a025cd387d26ccebe4b2b040b731 Mon Sep 17 00:00:00 2001 From: "Eric T. Dawson" Date: Tue, 11 Jun 2019 12:30:09 -0400 Subject: [PATCH 1/6] Revert to old intervaltree that predates a major refactor. This appears to fix the build process. --- intervaltree | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intervaltree b/intervaltree index 17ddd578..8fc4be91 160000 --- a/intervaltree +++ b/intervaltree @@ -1 +1 @@ -Subproject commit 17ddd578a455fe6c10e3ecde1c67121d4412bcec +Subproject commit 8fc4be91866237995e1105689d84bece619f4663 From 71cf50a2fb5bec2829773d4c7015ca5366d6c9e2 Mon Sep 17 00:00:00 2001 From: "Eric T. Dawson" Date: Tue, 11 Jun 2019 14:47:44 -0400 Subject: [PATCH 2/6] Harden against int overflows by forcing SV fields to be int64_t or uint64_t rather than long --- src/Variant.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Variant.cpp b/src/Variant.cpp index e676e560..90fc7e2a 100644 --- a/src/Variant.cpp +++ b/src/Variant.cpp @@ -388,10 +388,10 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.count("END") && !this->info.at("END").empty(); // Where is the end, or where should it be? - long info_end = 0; + uint64_t info_end = 0; if (has_end) { // Get the END from the tag - info_end = stol(this->info.at("END")[0]); + info_end = stoull(this->info.at("END")[0]); } else if(ref_valid && !place_seq) { // Get the END from the reference sequence, which is ready. @@ -421,13 +421,13 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.at("SVLEN")[0])); + info_len = abs(stoll(this->info.at("SVLEN")[0])); } else if ((svtype == "INS" || svtype == "DEL") && has_span){ - info_len = abs(stol(this->info.at("SPAN")[0])); + info_len = abs(stoll(this->info.at("SPAN")[0])); } else if (svtype == "DEL"){ // We always have the end by now @@ -462,7 +462,7 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.at("SVLEN")[0])); From e98378aeb9836158b657eabcc1b0f838d970901c Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 11 Jun 2019 15:44:55 -0400 Subject: [PATCH 3/6] Change CXXFLAGS to include debugging and c++11 rather than c++0x --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 348ace53..ce37ffc0 100644 --- a/Makefile +++ b/Makefile @@ -147,7 +147,7 @@ scriptToBin: $(BINS) GIT_VERSION := $(shell git describe --abbrev=4 --dirty --always) -CXXFLAGS = -O3 -D_FILE_OFFSET_BITS=64 -std=c++0x +CXXFLAGS = -O3 -D_FILE_OFFSET_BITS=64 -std=c++11 -ggdb #CXXFLAGS = -O2 #CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual From 7b14ba8383e985c41a6ecc5e9d5999ca950c4f65 Mon Sep 17 00:00:00 2001 From: "Eric T. Dawson" Date: Tue, 11 Jun 2019 16:15:04 -0400 Subject: [PATCH 4/6] Use int64_t, rather than uint64_t, for SV span/len/end --- src/Variant.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Variant.cpp b/src/Variant.cpp index 90fc7e2a..1d2bb221 100644 --- a/src/Variant.cpp +++ b/src/Variant.cpp @@ -388,7 +388,7 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.count("END") && !this->info.at("END").empty(); // Where is the end, or where should it be? - uint64_t info_end = 0; + int64_t info_end = 0; if (has_end) { // Get the END from the tag info_end = stoull(this->info.at("END")[0]); @@ -421,7 +421,7 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.at("SVLEN")[0])); @@ -462,7 +462,7 @@ bool Variant::canonicalize(FastaReference& fasta_reference, vectorinfo.at("SVLEN")[0])); From fb1a3e02c52299c789f22f86239b339dd9fcbbc8 Mon Sep 17 00:00:00 2001 From: "Eric T. Dawson" Date: Thu, 13 Jun 2019 13:05:26 -0400 Subject: [PATCH 5/6] Add more warning texts for variant SV canonicalization --- src/Variant.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Variant.cpp b/src/Variant.cpp index 90fc7e2a..8088d22b 100644 --- a/src/Variant.cpp +++ b/src/Variant.cpp @@ -314,6 +314,7 @@ bool Variant::canonicalizable(){ if (svtype.empty()){ // We have no SV type, so we can't interpret things. + cerr << "No SV type information. Variant cannot be canonicalized: " << *this << endl; return false; } From 13f001dac3af3a62e4f55735262dafebb07b15b0 Mon Sep 17 00:00:00 2001 From: "Eric T. Dawson" Date: Thu, 13 Jun 2019 13:05:44 -0400 Subject: [PATCH 6/6] Check if a variant is canonicalizable before canonicalizing it --- src/vcfnormalizesvs.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/vcfnormalizesvs.cpp b/src/vcfnormalizesvs.cpp index 63a6d591..b70c342e 100644 --- a/src/vcfnormalizesvs.cpp +++ b/src/vcfnormalizesvs.cpp @@ -92,7 +92,10 @@ int main(int argc, char** argv) { Variant var; while (variantFile.getNextVariant(var)) { - bool valid = var.canonicalize(ref, insertions, replace_sequences, min_size); + bool valid = false; + if (var.canonicalizable()){ + valid = var.canonicalize(ref, insertions, replace_sequences, min_size); + } if (!valid){ cerr << "Variant could not be normalized" << var << endl; }