diff --git a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactory.java b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactory.java index 55ffb129..ebd7da27 100644 --- a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactory.java +++ b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactory.java @@ -18,7 +18,6 @@ import uk.ac.ebi.eva.commons.core.models.VariantStatistics; import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException; -import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException; import uk.ac.ebi.eva.commons.core.models.genotype.Genotype; import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry; @@ -395,18 +394,23 @@ protected void addGenotypeWithGTS(Variant variant, VariantSourceEntry sourceEntr } @Override - protected void checkVariantInformation(Variant variant, String fileId, String studyId) - throws NonVariantException, IncompleteInformationException { - super.checkVariantInformation(variant, fileId, studyId); + protected boolean checkVariantInformation(Variant variant, String fileId, String studyId) + throws IncompleteInformationException { + if (!super.checkVariantInformation(variant, fileId, studyId)) { + return false; + } if (requireEvidence) { VariantSourceEntry variantSourceEntry = variant.getSourceEntry(fileId, studyId); if (!canAlleleFrequenciesBeCalculated(variantSourceEntry)) { throw new IncompleteInformationException(variant); } else if (variantFrequencyIsZero(variantSourceEntry)) { - throw new NonVariantException("The variant " + variant + " has allele frequency or counts '0'"); + logger.warn("The variant {} has allele frequency or counts '0' and will be discarded as a non-variant", + variant); + return false; } } + return true; } protected boolean canAlleleFrequenciesBeCalculated(VariantSourceEntry variantSourceEntry) { diff --git a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactory.java b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactory.java index 2ecade6b..4dd2d95e 100644 --- a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactory.java +++ b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactory.java @@ -16,7 +16,6 @@ package uk.ac.ebi.eva.commons.core.models.factories; import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException; -import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException; import uk.ac.ebi.eva.commons.core.models.genotype.Genotype; import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry; @@ -109,13 +108,18 @@ private String processGenotypeField(int alternateAlleleIdx, String genotype) { } @Override - protected void checkVariantInformation(Variant variant, String fileId, String studyId) - throws NonVariantException, IncompleteInformationException { - super.checkVariantInformation(variant, fileId, studyId); + protected boolean checkVariantInformation(Variant variant, String fileId, String studyId) + throws IncompleteInformationException { + if (!super.checkVariantInformation(variant, fileId, studyId)) { + return false; + } VariantSourceEntry variantSourceEntry = variant.getSourceEntry(fileId, studyId); if (!hasAlternateAlleleCalls(variantSourceEntry)) { - throw new NonVariantException("The variant " + variant + " has no alternate allele genotype calls"); + logger.warn("The variant {} has no alternate allele genotype calls and will be discarded as a non-variant", + variant); + return false; } + return true; } private boolean hasAlternateAlleleCalls(VariantSourceEntry variantSourceEntry) { diff --git a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantVcfFactory.java b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantVcfFactory.java index 377a8231..238d967d 100644 --- a/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantVcfFactory.java +++ b/variation-commons-core/src/main/java/uk/ac/ebi/eva/commons/core/models/factories/VariantVcfFactory.java @@ -38,7 +38,8 @@ * Abstract class to parse the basic fields in VCF lines */ public abstract class VariantVcfFactory { - private final static Logger logger = LoggerFactory.getLogger(VariantVcfFactory.class); + + protected final static Logger logger = LoggerFactory.getLogger(VariantVcfFactory.class); public static final String ALLELE_FREQUENCY = "AF"; @@ -100,16 +101,13 @@ public List create(String fileId, String studyId, String line) // Fill the rest of fields (after samples because INFO depends on them) setOtherFields(variant, fileId, studyId, ids, quality, filter, info, altAlleleIdx, alternateAlleles, line); - try { - checkVariantInformation(variant, fileId, studyId); + if (checkVariantInformation(variant, fileId, studyId)) { variants.add(variant); - } catch (NonVariantException nve) { - logger.warn("The variant {} will be discarded as it is a non-variant: {}", variant, nve); } } if (variants.isEmpty()) { - throw new NonVariantException("No valid variants could be found"); + logger.warn("No valid variants could be found"); } return variants; @@ -312,13 +310,19 @@ protected static int mapToMultiallelicIndex(int parsedAllele, int numAllele) { return correctedAllele; } - protected void checkVariantInformation(Variant variant, String fileId, String studyId) - throws NonVariantException, IncompleteInformationException { + protected boolean checkVariantInformation(Variant variant, String fileId, String studyId) + throws IncompleteInformationException { if (variant.getAlternate().equalsIgnoreCase(variant.getReference())) { - throw new NonVariantException("The variant " + variant + " reference and alternate alleles are the same"); + logger.warn( + "The variant {} reference and alternate alleles are the same and will be discarded as a " + + "non-variant", + variant); + return false; } if (variant.getAlternate().equals(".")) { - throw new NonVariantException("The variant " + variant + " has no alternate allele"); + logger.warn("The variant {} has no alternate allele and will be discarded as a non-variant", variant); + return false; } + return true; } } diff --git a/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactoryTest.java b/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactoryTest.java index 210b4655..96ede577 100644 --- a/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactoryTest.java +++ b/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantAggregatedVcfFactoryTest.java @@ -15,12 +15,18 @@ */ package uk.ac.ebi.eva.commons.core.models.factories; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.slf4j.LoggerFactory; + import uk.ac.ebi.eva.commons.core.models.VariantStatistics; import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException; -import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException; import uk.ac.ebi.eva.commons.core.models.genotype.Genotype; import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; @@ -46,6 +52,17 @@ public class VariantAggregatedVcfFactoryTest { @Rule public ExpectedException thrown = ExpectedException.none(); + private ListAppender listAppender; + + @Before + public void setUp() { + // Set up appender to capture log messages + Logger factoryLogger = (Logger) LoggerFactory.getLogger(VariantVcfFactory.class); + listAppender = new ListAppender<>(); + listAppender.start(); + factoryLogger.addAppender(listAppender); + } + @Test public void parseAC_AN() { String line = "1\t54722\t.\tTTC\tT,TCTC\t999\tPASS\tDP4=3122,3282,891,558;DP=22582;INDEL;IS=3,0.272727;VQSLOD=6.76;AN=3854;AC=889,61;TYPE=del,ins;HWE=0;ICF=-0.155251"; // structure like uk10k @@ -137,8 +154,8 @@ public void getGenotype() { public void variantWithAlleleFrequencyZero() { String line = "1\t10040\trs123\tT\tC\t.\t.\tAF=0"; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test @@ -155,8 +172,8 @@ public void multiallelicWithAlleleFrequencyZero() { public void variantWithAlleleCountZero() { String line = "1\t10040\trs123\tT\tC\t.\t.\tAN=5;AC=0"; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test @@ -173,8 +190,8 @@ public void multiAlleleWithOneAlleleCountZero() { public void variantWithAlleleTotalNumberZero() { String line = "1\t10040\trs123\tT\tC\t.\t.\tAN=0;AC=5"; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test @@ -220,4 +237,10 @@ public void testMultiallelicVariantWithNoAlleleCountsOrFrequency() { thrown.expect(IncompleteInformationException.class); factory.create(FILE_ID, STUDY_ID, line); } + + private void assertNonVariantLogged() { + List logsList = listAppender.list; + assertTrue(logsList.get(0).getMessage().contains("non-variant")); + assertEquals(Level.WARN, logsList.get(0).getLevel()); + } } diff --git a/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactoryTest.java b/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactoryTest.java index a981c360..ba24ec4b 100644 --- a/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactoryTest.java +++ b/variation-commons-core/src/test/java/uk/ac/ebi/eva/commons/core/models/factories/VariantGenotypedVcfFactoryTest.java @@ -15,10 +15,16 @@ */ package uk.ac.ebi.eva.commons.core.models.factories; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException; +import org.slf4j.LoggerFactory; + import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry; @@ -42,6 +48,17 @@ public class VariantGenotypedVcfFactoryTest { @Rule public ExpectedException thrown = ExpectedException.none(); + private ListAppender listAppender; + + @Before + public void setUp() { + // Set up appender to capture log messages + Logger factoryLogger = (Logger) LoggerFactory.getLogger(VariantVcfFactory.class); + listAppender = new ListAppender<>(); + listAppender.start(); + factoryLogger.addAppender(listAppender); + } + @Test public void testCreateVariant_Samples() { String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0/1\t0/.\t./1\t1/1"; // 5 samples @@ -433,40 +450,40 @@ public void testMultiallelicVariantWitnNoSampleInformation() { public void variantWithNoAltGenotypes() { String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0|0\t./.\t0|0\t./."; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test public void haploidVariantWithNoAltGenotypes() { String line = "Y\t10040\trs123\tT\tC\t.\t.\t.\tGT\t.\t0\t0\t."; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test public void triploidVariantWithNoAltGenotypes() { String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0/0\t0|0|0\t././.\t0|0|0\t././."; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test public void variantWhereAllGenotypesAreMissingValues() { String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t./.\t./.\t./.\t./.\t./."; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test public void variantWhereAllGenotypesAreReference() { String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0|0\t0/0\t0|0\t0/0"; - thrown.expect(NonVariantException.class); factory.create(FILE_ID, STUDY_ID, line); + assertNonVariantLogged(); } @Test @@ -491,4 +508,10 @@ public void infoAttributesWontBeValidated() { assertEquals(expResult, result); } + private void assertNonVariantLogged() { + List logsList = listAppender.list; + assertTrue(logsList.get(0).getMessage().contains("non-variant")); + assertEquals(Level.WARN, logsList.get(0).getLevel()); + } + }