Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import gsrs.entityProcessor.EntityProcessorConfig;
import gsrs.imports.ImportAdapterFactoryConfig;
import gsrs.imports.MatchableCalculationConfig;
import gsrs.imports.indexers.ValidationMessageSubstitution;
import gsrs.validator.ValidatorConfig;
import ix.core.util.EntityUtils;
import ix.core.validator.ValidationMessage;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.context.properties.ConfigurationProperties;
Expand All @@ -31,6 +33,8 @@ public class GsrsFactoryConfiguration {

private Map<String, Map<String, Map<String, Map<String, Object>>>> matchableCalculators;

private Map<String, Map<String, Map<String, Map<String, Object>>>> validationIvmSubstitutions;

private Map<String, Map<String, Object>> search;

private Map<String, Map<String, EntityProcessorConfig>> entityProcessors;
Expand All @@ -49,6 +53,8 @@ public class GsrsFactoryConfiguration {

private Map<String, Boolean> sortExportOutput;

private Map<String, List<? extends MatchableCalculationConfig>> matchableConfigsByContext = new LinkedHashMap<>();

public Optional<Map<String, Object>> getSearchSettingsFor(String context) {
if (search == null) return Optional.empty();
return Optional.ofNullable(search.get(context));
Expand All @@ -71,9 +77,9 @@ public List<EntityProcessorConfig> getEntityProcessors() {
System.out.println(reportTag + " found before filtering: " + configs.size());
configs = configs.stream().filter(c->!c.isDisabled()).sorted(Comparator.comparing(c->c.getOrder(),nullsFirst(naturalOrder()))).collect(Collectors.toList());
System.out.println(reportTag + " active after filtering: " + configs.size());
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, "class", "parentKey", "order", "isDisabled");
System.out.printf("%s|%s|%s|%s|%s%n", reportTag, "class", "parentKey", "order", "isDisabled");
for (EntityProcessorConfig config : configs) {
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, config.getProcessor(), config.getParentKey(), config.getOrder(), config.isDisabled());
System.out.printf("%s|%s|%s|%s|%s%n", reportTag, config.getProcessor(), config.getParentKey(), config.getOrder(), config.isDisabled());
}
return configs;
}
Expand Down Expand Up @@ -149,8 +155,6 @@ public List<? extends ImportAdapterFactoryConfig> getImportAdapterFactories(Stri
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, config.getImportAdapterFactoryClass(), config.getParentKey(), config.getOrder(), config.isDisabled());
}

//log.trace("list (after):");
//configs.forEach(c-> log.trace("name: {}; desc: {}; ext: {}", c.getAdapterName(), c.getDescription(), c.getSupportedFileExtensions()));
return configs;
} catch (Exception t) {
log.error("Error fetching import factory config");
Expand All @@ -159,33 +163,61 @@ public List<? extends ImportAdapterFactoryConfig> getImportAdapterFactories(Stri
}

public List<? extends MatchableCalculationConfig> getMatchableCalculationConfig(String context) {
log.trace("in ");
String reportTag = "MatchableCalculationConfig";
if(matchableCalculators==null){
return Collections.emptyList();
}
log.trace("in getMatchableCalculationConfig");
return this.matchableConfigsByContext.computeIfAbsent(context, ctxt->
{
String reportTag = "MatchableCalculationConfig";
if (matchableCalculators == null) {
return Collections.emptyList();
}
try {
Map<String, Map<String, Object>> map = matchableCalculators.get(ctxt).get("list");
if (map == null || map.isEmpty()) {
log.warn("no matchable calculation configuration info found!");
return Collections.emptyList();
}
// Copy the key into the Object for quality control and maybe as a way to access by key from the list
for (String k : map.keySet()) {
map.get(k).put("parentKey", k);
}
List<Object> list = map.values().stream().collect(Collectors.toList());
List<? extends MatchableCalculationConfig> configs = EntityUtils.convertClean(list, new TypeReference<List<? extends MatchableCalculationConfig>>() {
});
System.out.println(reportTag + " found before filtering: " + configs.size());
configs = configs.stream().filter(c -> !c.isDisabled()).sorted(Comparator.comparing(c -> c.getOrder(), nullsFirst(naturalOrder()))).collect(Collectors.toList());
System.out.println(reportTag + " active after filtering: " + configs.size());
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, "class", "parentKey", "order", "isDisabled");
for (MatchableCalculationConfig config : configs) {
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, config.getMatchableCalculationClass(), config.getParentKey(), config.getOrder(), config.isDisabled());
}
return configs;
} catch (Throwable t) {
throw t;
}
});
}

public List<ValidationMessageSubstitution> getValidationMessageSubstitutions(String context) {
List<ValidationMessageSubstitution> validationMessageSubstitutions = new ArrayList<>();

try {
Map<String, Map<String, Object>> map = matchableCalculators.get(context).get("list");
if (map == null || map.isEmpty()) {
log.warn("no matchable calculation configuration info found!");
Map<String, Map<String, Object>> map = validationIvmSubstitutions.get(context).get("list");
if( validationIvmSubstitutions == null || validationIvmSubstitutions.isEmpty()){
return Collections.emptyList();
}
// Copy the key into the Object for quality control and maybe as a way to access by key from the list
for (String k: map.keySet()) {
map.get(k).put("parentKey", k);
if (map == null || map.isEmpty()) {
log.warn("no validation IVM substitutions configuration info found!");
return Collections.emptyList();
}
List<Object> list = map.values().stream().collect(Collectors.toList());
List<? extends MatchableCalculationConfig> configs = EntityUtils.convertClean(list, new TypeReference<List<? extends MatchableCalculationConfig>>() { });
System.out.println(reportTag + " found before filtering: " + configs.size());
configs = configs.stream().filter(c->!c.isDisabled()).sorted(Comparator.comparing(c->c.getOrder(),nullsFirst(naturalOrder()))).collect(Collectors.toList());
System.out.println(reportTag + " active after filtering: " + configs.size());
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, "class", "parentKey", "order", "isDisabled");
for (MatchableCalculationConfig config : configs) {
System.out.printf("%s|%s|%s|%s|%s\n", reportTag, config.getMatchableCalculationClass(), config.getParentKey(), config.getOrder(), config.isDisabled());
for(Map.Entry<String, Map<String, Object>> entry : map.entrySet()) {
String toMatch = (String) entry.getValue().get("toMatch");
String replacement = (String) entry.getValue().get("replacement");
validationMessageSubstitutions.add( ValidationMessageSubstitution.of(toMatch, replacement));
}
return configs;
return validationMessageSubstitutions;
} catch (Throwable t) {
throw t;
log.error("Error parsing validation message substitutions config: ", t);
return Collections.emptyList();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package gsrs.imports.indexers;

import gsrs.GsrsFactoryConfiguration;
import gsrs.config.EntityContextLookup;
import gsrs.imports.GsrsImportAdapterFactoryFactory;
import gsrs.stagingarea.model.ImportMetadata;
Expand All @@ -13,25 +14,65 @@
import org.springframework.beans.factory.annotation.Autowired;

import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;

@Slf4j
public class MetadataValidationIndexValueMaker implements IndexValueMaker<ImportMetadata> {
public final static String IMPORT_METADATA_VALIDATION_TYPE_FACET="Validation Type";
public final static String IMPORT_METADATA_VALIDATION_MESSAGE_FACET="Validation Message";
public static final String IMPORT_METADATA_VALIDATION_TYPE_FACET="Validation Type";
public static final String IMPORT_METADATA_VALIDATION_MESSAGE_FACET="Validation Message";

//@Autowired
StagingAreaService stagingAreaService;

@Autowired
private GsrsImportAdapterFactoryFactory gsrsImportAdapterFactoryFactory;

@Autowired
private GsrsFactoryConfiguration config;

@Override
public Class<ImportMetadata> getIndexedEntityClass() {
return ImportMetadata.class;
}

private List<ValidationMessageSubstitution> substitutions;

private void init(String context) {
substitutions = config.getValidationMessageSubstitutions(context);
log.trace("substitutions from config: {}", substitutions== null ? "null" : substitutions.size());
if( substitutions == null || substitutions.isEmpty()) {
log.warn("configured substitutions empty; used hard-coded values");
substitutions = getDefaultSubstitutions();
}

log.trace("initialized substitution list with {}, items", substitutions.size());
}

private List<ValidationMessageSubstitution> getDefaultSubstitutions() {
return Arrays.asList(
ValidationMessageSubstitution.of("Substance .* appears to be a full duplicate",
"Substance appears to have a full duplicate"),
ValidationMessageSubstitution.of("Record .* is a potential duplicate",
"Record has a potential duplicate"),
ValidationMessageSubstitution.of("Name .* minimally standardized to .*",
"Name was minimally standardized"),
ValidationMessageSubstitution.of("Substances should have exactly one \\(1\\) display name.*",
"Display name was selected automatically"),
ValidationMessageSubstitution.of("Each fragment should be present as a separate record in the database. Please register:.*",
"Substance contains a fragment that has not been registered as an individual record"),
ValidationMessageSubstitution.of("Substance .* is a possible duplicate", "Record has a possible duplicate"),
ValidationMessageSubstitution.of("This fragment is present as a separate record in the database but in a different form. Please register: .* as an individual substance",
"Substance contains a fragment that has not been registered as an individual record in its current form"),
ValidationMessageSubstitution.of("Name .* collides \\(possible duplicate\\) with existing name for substance.*", "Duplicate name"),
ValidationMessageSubstitution.of("Structure is not charged balanced, net charge of:.*", "Structure is not charged balanced"),
ValidationMessageSubstitution.of("Substance may be represented as protein as well. Sequence:.*", "Substance may be represented as protein as well"),
ValidationMessageSubstitution.of("Substance has no UUID, will generate uuid:.*", "Generated UUID because none was supplied"),
ValidationMessageSubstitution.of("Valence Error on .*", "Valence error on one or more atoms")
);
}

@Override
public void createIndexableValues(ImportMetadata importMetadata, Consumer<IndexableValue> consumer) {
log.trace("In createIndexableValues");
Expand All @@ -44,7 +85,11 @@ public void createIndexableValues(ImportMetadata importMetadata, Consumer<Indexa
throw new RuntimeException(e);
}
}
if( importMetadata.getInstanceId()==null) {
if( substitutions == null || substitutions.isEmpty()) {
String contextName = EntityContextLookup.getContextFromEntityClass( importMetadata.getEntityClassName());
init(contextName);
}
if( importMetadata.getInstanceId()==null) {
log.warn("importMetadata.getInstanceId() null! ");
return;
}
Expand All @@ -57,15 +102,26 @@ public void createIndexableValues(ImportMetadata importMetadata, Consumer<Indexa
consumer.accept(IndexableValue.simpleFacetStringValue(IMPORT_METADATA_VALIDATION_TYPE_FACET,
String.valueOf(((ValidationMessage) vm).getMessageType())));
consumer.accept(IndexableValue.simpleFacetStringValue(IMPORT_METADATA_VALIDATION_MESSAGE_FACET,
((ValidationMessage) vm).getMessage()));
cleanValidationMessage(((ValidationMessage) vm).getMessage())));
});
return;
}
validations.forEach(v->{
consumer.accept (IndexableValue.simpleFacetStringValue(IMPORT_METADATA_VALIDATION_TYPE_FACET,
String.valueOf((v.getValidationType()))));

consumer.accept (IndexableValue.simpleFacetStringValue(IMPORT_METADATA_VALIDATION_MESSAGE_FACET,
(v.getValidationMessage())));
cleanValidationMessage((v.getValidationMessage()))));
});
}

public String cleanValidationMessage(String inputMessage){
for( ValidationMessageSubstitution substitution : substitutions){
if(substitution.getToMatch().matcher(inputMessage).find()) {
return substitution.getReplacement();
}
}
log.trace("cleanValidationMessage found no match for {}", inputMessage);
return inputMessage;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package gsrs.imports.indexers;

import lombok.AllArgsConstructor;
import lombok.Data;

import java.util.regex.Pattern;

@Data
@AllArgsConstructor
public class ValidationMessageSubstitution {
private Pattern toMatch;
private String replacement;

public static ValidationMessageSubstitution of(String patternInfo, String replacementInfo) {
return new ValidationMessageSubstitution(Pattern.compile(patternInfo), replacementInfo);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@
import ix.core.validator.ValidationResponse;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import java.util.stream.Stream;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -62,4 +66,44 @@ public String getMessageId() {
Assertions.assertTrue(indexedValues.stream().anyMatch(i->i.name().equals(MetadataValidationIndexValueMaker.IMPORT_METADATA_VALIDATION_MESSAGE_FACET)
&& i.value().equals(tooManyAtomsMessage)));
}

@Test
void testStringCleanup() {
MetadataValidationIndexValueMaker indexValueMaker = new MetadataValidationIndexValueMaker();
String input = "Substance PILOCARPINE HYDROCHLORIDE (ID: 2f2b3e85-2cca-41dd-921d-c8eaf6feb376) appears to be a full duplicate";
String expected = "Substance appears to have a full duplicate";
String actual = indexValueMaker.cleanValidationMessage(input);
Assertions.assertEquals(expected, actual);
}

@ParameterizedTest
@MethodSource("inputData")
void testStringCleanups(String input, String expected, boolean willMatch) {
MetadataValidationIndexValueMaker indexValueMaker = new MetadataValidationIndexValueMaker();
String actual = indexValueMaker.cleanValidationMessage(input);
Assertions.assertEquals(willMatch, expected.equals(actual));
}

private static Stream<Arguments> inputData() {
return Stream.of(
Arguments.of("Record PILOCARPINE HYDROCHLORIDE is a potential duplicate", "Record has a potential duplicate", true),
Arguments.of("Record PILOCARPINE HYDROCHLORIDE is a potential duplicate", "Record might have a potential duplicate", false),
Arguments.of( "Name (4<I>R</I>)-4-[(3-methylimidazol-4-yl)methyl]oxolan-2-one minimally standardized to (4<i>R</i>)-4-[(3-methylimidazol-4-yl)methyl]oxolan-2-one",
"Name was minimally standardized", true),
Arguments.of( "Name (4<I>R</I>)-4-[(3-methylimidazol-4-yl)methyl]oxolan-2-one minimally standardized to (4<i>R</i>)-4-[(3-methylimidazol-4-yl)methyl]oxolan-2-one",
"Name got standardized", false),
Arguments.of("Substances should have exactly one (1) display name, Default to using:3-ethyl-4-[(3-methyl-1H-imidazol-3-ium-4-yl)methyl]tetrahydrofuran-2-one",
"Display name was selected automatically", true),
Arguments.of("Substances should have exactly one (1) display name, Default to using:3-ethyl-4-[(3-methyl-1H-imidazol-3-ium-4-yl)methyl]tetrahydrofuran-2-one",
"name selected automatically", false),
Arguments.of("Each fragment should be present as a separate record in the database. Please register: [#6][#6]1[#6]*([#6][#6][#6]1)[#6][#6][#6]#[#6][#6]1[#6][#6][#6]([#6][#6]1)[#6]#[#6][#6][#6]*1[#6][#6]([#6][#6][#6]1)[#6]",
"Substance contains a fragment that has not been registered as an individual record", true),
Arguments.of("Name '2-[2-(2-Methoxyethoxy)ethoxy]ethyl 3-methylbutanoate' collides (possible duplicate) with existing name for substance: ",
"Duplicate name", true),
Arguments.of("Name '3-(2-Methoxyethyl) 5-(1-methylethyl) 1,4-dihydro-2,6-dimethyl-4-(4-nitrophenyl)-3,5-pyridinedicarboxylate' collides (possible duplicate) with existing name for substance: ",
"Duplicate name", true),
Arguments.of("Name '(Z)-2-Methoxyethyl 2-[(3-nitrophenyl)methylene]-3-oxobutanoate' collides (possible duplicate) with existing name for substance: ",
"Duplicate name", true)
);
}
}
Loading