Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import uk.ac.ebi.eva.commons.core.models.VariantStatistics;
import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException;
import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException;
import uk.ac.ebi.eva.commons.core.models.genotype.Genotype;
import uk.ac.ebi.eva.commons.core.models.pipeline.Variant;
import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry;
Expand Down Expand Up @@ -395,18 +394,23 @@ protected void addGenotypeWithGTS(Variant variant, VariantSourceEntry sourceEntr
}

@Override
protected void checkVariantInformation(Variant variant, String fileId, String studyId)
throws NonVariantException, IncompleteInformationException {
super.checkVariantInformation(variant, fileId, studyId);
protected boolean checkVariantInformation(Variant variant, String fileId, String studyId)
throws IncompleteInformationException {
if (!super.checkVariantInformation(variant, fileId, studyId)) {
return false;
}

if (requireEvidence) {
VariantSourceEntry variantSourceEntry = variant.getSourceEntry(fileId, studyId);
if (!canAlleleFrequenciesBeCalculated(variantSourceEntry)) {
throw new IncompleteInformationException(variant);
} else if (variantFrequencyIsZero(variantSourceEntry)) {
throw new NonVariantException("The variant " + variant + " has allele frequency or counts '0'");
logger.warn("The variant {} has allele frequency or counts '0' and will be discarded as a non-variant",
variant);
return false;
}
}
return true;
}

protected boolean canAlleleFrequenciesBeCalculated(VariantSourceEntry variantSourceEntry) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package uk.ac.ebi.eva.commons.core.models.factories;

import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException;
import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException;
import uk.ac.ebi.eva.commons.core.models.genotype.Genotype;
import uk.ac.ebi.eva.commons.core.models.pipeline.Variant;
import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry;
Expand Down Expand Up @@ -109,13 +108,18 @@ private String processGenotypeField(int alternateAlleleIdx, String genotype) {
}

@Override
protected void checkVariantInformation(Variant variant, String fileId, String studyId)
throws NonVariantException, IncompleteInformationException {
super.checkVariantInformation(variant, fileId, studyId);
protected boolean checkVariantInformation(Variant variant, String fileId, String studyId)
throws IncompleteInformationException {
if (!super.checkVariantInformation(variant, fileId, studyId)) {
return false;
}
VariantSourceEntry variantSourceEntry = variant.getSourceEntry(fileId, studyId);
if (!hasAlternateAlleleCalls(variantSourceEntry)) {
throw new NonVariantException("The variant " + variant + " has no alternate allele genotype calls");
logger.warn("The variant {} has no alternate allele genotype calls and will be discarded as a non-variant",
variant);
return false;
}
return true;
}

private boolean hasAlternateAlleleCalls(VariantSourceEntry variantSourceEntry) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
* Abstract class to parse the basic fields in VCF lines
*/
public abstract class VariantVcfFactory {
private final static Logger logger = LoggerFactory.getLogger(VariantVcfFactory.class);

protected final static Logger logger = LoggerFactory.getLogger(VariantVcfFactory.class);

public static final String ALLELE_FREQUENCY = "AF";

Expand Down Expand Up @@ -100,16 +101,13 @@ public List<Variant> create(String fileId, String studyId, String line)
// Fill the rest of fields (after samples because INFO depends on them)
setOtherFields(variant, fileId, studyId, ids, quality, filter, info, altAlleleIdx, alternateAlleles, line);

try {
checkVariantInformation(variant, fileId, studyId);
if (checkVariantInformation(variant, fileId, studyId)) {
variants.add(variant);
} catch (NonVariantException nve) {
logger.warn("The variant {} will be discarded as it is a non-variant: {}", variant, nve);
}
}

if (variants.isEmpty()) {
throw new NonVariantException("No valid variants could be found");
logger.warn("No valid variants could be found");
}

return variants;
Expand Down Expand Up @@ -312,13 +310,19 @@ protected static int mapToMultiallelicIndex(int parsedAllele, int numAllele) {
return correctedAllele;
}

protected void checkVariantInformation(Variant variant, String fileId, String studyId)
throws NonVariantException, IncompleteInformationException {
protected boolean checkVariantInformation(Variant variant, String fileId, String studyId)
throws IncompleteInformationException {
if (variant.getAlternate().equalsIgnoreCase(variant.getReference())) {
throw new NonVariantException("The variant " + variant + " reference and alternate alleles are the same");
logger.warn(
"The variant {} reference and alternate alleles are the same and will be discarded as a " +
"non-variant",
variant);
return false;
}
if (variant.getAlternate().equals(".")) {
throw new NonVariantException("The variant " + variant + " has no alternate allele");
logger.warn("The variant {} has no alternate allele and will be discarded as a non-variant", variant);
return false;
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@
*/
package uk.ac.ebi.eva.commons.core.models.factories;

import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.slf4j.LoggerFactory;

import uk.ac.ebi.eva.commons.core.models.VariantStatistics;
import uk.ac.ebi.eva.commons.core.models.factories.exception.IncompleteInformationException;
import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException;
import uk.ac.ebi.eva.commons.core.models.genotype.Genotype;
import uk.ac.ebi.eva.commons.core.models.pipeline.Variant;

Expand All @@ -46,6 +52,17 @@ public class VariantAggregatedVcfFactoryTest {
@Rule
public ExpectedException thrown = ExpectedException.none();

private ListAppender<ILoggingEvent> listAppender;

@Before
public void setUp() {
// Set up appender to capture log messages
Logger factoryLogger = (Logger) LoggerFactory.getLogger(VariantVcfFactory.class);
listAppender = new ListAppender<>();
listAppender.start();
factoryLogger.addAppender(listAppender);
}

@Test
public void parseAC_AN() {
String line = "1\t54722\t.\tTTC\tT,TCTC\t999\tPASS\tDP4=3122,3282,891,558;DP=22582;INDEL;IS=3,0.272727;VQSLOD=6.76;AN=3854;AC=889,61;TYPE=del,ins;HWE=0;ICF=-0.155251"; // structure like uk10k
Expand Down Expand Up @@ -137,8 +154,8 @@ public void getGenotype() {
public void variantWithAlleleFrequencyZero() {
String line = "1\t10040\trs123\tT\tC\t.\t.\tAF=0";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
Expand All @@ -155,8 +172,8 @@ public void multiallelicWithAlleleFrequencyZero() {
public void variantWithAlleleCountZero() {
String line = "1\t10040\trs123\tT\tC\t.\t.\tAN=5;AC=0";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
Expand All @@ -173,8 +190,8 @@ public void multiAlleleWithOneAlleleCountZero() {
public void variantWithAlleleTotalNumberZero() {
String line = "1\t10040\trs123\tT\tC\t.\t.\tAN=0;AC=5";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
Expand Down Expand Up @@ -220,4 +237,10 @@ public void testMultiallelicVariantWithNoAlleleCountsOrFrequency() {
thrown.expect(IncompleteInformationException.class);
factory.create(FILE_ID, STUDY_ID, line);
}

private void assertNonVariantLogged() {
List<ILoggingEvent> logsList = listAppender.list;
assertTrue(logsList.get(0).getMessage().contains("non-variant"));
assertEquals(Level.WARN, logsList.get(0).getLevel());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@
*/
package uk.ac.ebi.eva.commons.core.models.factories;

import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import uk.ac.ebi.eva.commons.core.models.factories.exception.NonVariantException;
import org.slf4j.LoggerFactory;

import uk.ac.ebi.eva.commons.core.models.pipeline.Variant;
import uk.ac.ebi.eva.commons.core.models.pipeline.VariantSourceEntry;

Expand All @@ -42,6 +48,17 @@ public class VariantGenotypedVcfFactoryTest {
@Rule
public ExpectedException thrown = ExpectedException.none();

private ListAppender<ILoggingEvent> listAppender;

@Before
public void setUp() {
// Set up appender to capture log messages
Logger factoryLogger = (Logger) LoggerFactory.getLogger(VariantVcfFactory.class);
listAppender = new ListAppender<>();
listAppender.start();
factoryLogger.addAppender(listAppender);
}

@Test
public void testCreateVariant_Samples() {
String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0/1\t0/.\t./1\t1/1"; // 5 samples
Expand Down Expand Up @@ -433,40 +450,40 @@ public void testMultiallelicVariantWitnNoSampleInformation() {
public void variantWithNoAltGenotypes() {
String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0|0\t./.\t0|0\t./.";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
public void haploidVariantWithNoAltGenotypes() {
String line = "Y\t10040\trs123\tT\tC\t.\t.\t.\tGT\t.\t0\t0\t.";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
public void triploidVariantWithNoAltGenotypes() {
String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0/0\t0|0|0\t././.\t0|0|0\t././.";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
public void variantWhereAllGenotypesAreMissingValues() {
String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t./.\t./.\t./.\t./.\t./.";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
public void variantWhereAllGenotypesAreReference() {
String line = "1\t10040\trs123\tT\tC\t.\t.\t.\tGT\t0/0\t0|0\t0/0\t0|0\t0/0";

thrown.expect(NonVariantException.class);
factory.create(FILE_ID, STUDY_ID, line);
assertNonVariantLogged();
}

@Test
Expand All @@ -491,4 +508,10 @@ public void infoAttributesWontBeValidated() {
assertEquals(expResult, result);
}

private void assertNonVariantLogged() {
List<ILoggingEvent> logsList = listAppender.list;
assertTrue(logsList.get(0).getMessage().contains("non-variant"));
assertEquals(Level.WARN, logsList.get(0).getLevel());
}

}
Loading