Skip to content

Commit

Permalink
Merge branch 'dh2-i18n-rc1' of https://github.com/cidgoh/DataHarmonizer
Browse files Browse the repository at this point in the history
… into dh2-i18n-rc1
  • Loading branch information
kennethbruskiewicz committed Nov 12, 2024
2 parents 1628809 + d1803cb commit 9013cb9
Show file tree
Hide file tree
Showing 72 changed files with 194,454 additions and 43,688 deletions.
180 changes: 119 additions & 61 deletions lib/DataHarmonizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ class DataHarmonizer {
} else {
col.type = 'key-value-list';
if (
!field.sources.includes('null value menu') ||
!field.sources.includes('NullValueMenu') ||
field.sources.length > 1
) {
col.trimDropdown = false; // Allow expansion of pulldown past field width
Expand Down Expand Up @@ -1919,7 +1919,7 @@ class DataHarmonizer {
let sources = [];
for (const [, item] of Object.entries(field.sources)) {
// List null value menu items directly
if (item === 'null value menu') {
if (item === 'NullValueMenu') {
let null_values = Object.keys(
this.schema.enums[item].permissible_values
);
Expand Down Expand Up @@ -2080,11 +2080,24 @@ class DataHarmonizer {
* @return {Dictionary<Integer>} Dictionary of all fields.
*/
getFieldNameMap(fields) {
const titleMap = {};
const map = {};
for (const [fieldIndex, field] of fields.entries()) {
titleMap[field.name] = fieldIndex;
map[field.name] = fieldIndex;
}
return titleMap;
return map;
}

/**
* Get a dictionary of source field TITLES pointing to column index
* @param {Object} fields A flat version of data.js.
* @return {Dictionary<Integer>} Dictionary of all fields.
*/
getFieldTitleMap(fields) {
const map = {};
for (const [fieldIndex, field] of fields.entries()) {
map[field.title] = fieldIndex;
}
return map;
}

/**
Expand Down Expand Up @@ -2230,24 +2243,114 @@ class DataHarmonizer {

// if sources exist, fetch transformed Value
if (field.sources) {
mappedCell.push(
self.getTransformedField(headerName, mappedCellVal, field, prefix)
);
} else if (field.multivalued === true) {
// ISSUE: relying on semicolon delimiter in input

for (let cellVal of mappedCellVal.split(';')) {
mappedCell.push(
self.getTransformedField(headerName, cellVal.trim(), field, prefix)
);
if (field.multivalued === true) { //Map list of semicolon-delimited choices
// ISSUE: relying on semicolon delimiter in input
for (let cellVal of mappedCellVal.split(';')) {
mappedCell.push( self.getTransformedField(headerName, cellVal.trim(), field, prefix));
}
}
} else {
else { // Map single choice
mappedCell.push(self.getTransformedField(headerName, mappedCellVal, field, prefix));
}
}
else { // No mapping.
mappedCell.push(mappedCellVal);
}
}

return mappedCell.join(delimiter);
}

/**
* Given a table row, output a value based on the following conditional:
* ```
* if (value in headerNameToCheck == valToMatch) {
* return value in headerNameToOutput;
* } else {
* return "";
* }
* ```
* TODO is there any need for additional complexities in getMappedField?
* i.e., transforming field
* @param {string} headerNameToCheck Field name of user-inputted vals to check
* against `valToMatch`.
* @param {string} valToMatch Value to match user-inputted vals against during
* conditional.
* @param {string} headerNameToOutput Field name of user-inputted vals to
* return if conditional is satisfied.
* @param {string[]} inputRow Table row.
* @param {Object<string, number>} sourceFieldNameMap `getFieldNameMap` return
* val.
* @return {string} `valToMatch` if condition is satisfied; empty str
* otherwise.
*/
getIfThenField(
headerNameToCheck,
valToMatch,
headerNameToOutput,
inputRow,
sourceFieldNameMap
) {
const valToCheck = inputRow[sourceFieldNameMap[headerNameToCheck]];
const valToOutput = inputRow[sourceFieldNameMap[headerNameToOutput]];
return valToCheck === valToMatch ? valToOutput : '';
}

/**
* Given a table row, find the intersection of user-inputted values in
* `headerNameToCheck` and vals in `matchedValsSet`.
* @param {string} headerNameToCheck Field name of user-inputted vals to
* intersect against `matchedValsSet`.
* @param {Set<string>} matchedValsSet Set of values that user-inputted vals
* are intersected against.
* @param {string[]} inputRow Table row.
* @param {Object<string, number>} sourceFieldNameMap `getFieldNameMap` return
* val.
* @return {string} Intersection of user-inputted values in
* `headerNameToCheck` and vals in `matchedValsSet`.
*/
getMatchedValsField(
headerNameToCheck,
matchedValsSet,
inputRow,
sourceFieldNameMap
) {
const valsToCheckStr = inputRow[sourceFieldNameMap[headerNameToCheck]];
if (!valsToCheckStr) return '';

const valsToCheckArray = valsToCheckStr.split('; ');
const valsToOutputArray = valsToCheckArray.filter((e) =>
matchedValsSet.has(e)
);
return valsToOutputArray.join('; ');
}

/**
* Given a table row, and an ordered collection of field names, return the
* first non-null field val.
* @param {string[]} headerNamesToCheck Field names of user-inputted vals to
* check for non-null vals, in 0-indexed order.
* @param {string[]} inputRow Table row.
* @param {Object<string, number>} sourceFieldNameMap `getFieldNameMap` return
* val.
* @return {string} First non-null val in `headerNamesToCheck`.
*/
getFirstNonNullField(headerNamesToCheck, inputRow, sourceFieldNameMap) {
const nullValsSet = new Set(
Object.keys(this.schema.enums.NullValueMenu.permissible_values).concat([
'',
null,
])
);
const valsToCheck = headerNamesToCheck.map((headerName) => {
const valToCheck = inputRow[sourceFieldNameMap[headerName]];
// TODO trim because copy pasting from excel == '\r\n'; wider issue?
return typeof valToCheck === 'string' ? valToCheck.trim() : valToCheck;
});
const firstNonNullVal = valsToCheck.find((e) => !nullValsSet.has(e));
return firstNonNullVal ? firstNonNullVal : '';
}

/**
* Some enumeration values get mapped over to export format values.
*
Expand Down Expand Up @@ -2285,51 +2388,6 @@ class DataHarmonizer {
return value;
}

/**
* Get a dictionary of empty arrays for each ExportHeader field
* FUTURE: enable it to work with hierarchic vocabulary lists
*
* @param {Array<String>} sourceRow array of values to be exported.
* @param {Array<String>} sourceFields list of source fields to examine for mappings.
* @param {Array<Array>} RuleDB list of export fields modified by rules.
* @param {Array<Array>} fields list of export fields modified by rules.
* @param {Array<Integer>} titleMap map of field names to column index.
* @param {String} prefix of export format to examine.
* @return {Array<Object>} fields Dictionary of all fields.
*/

getRowMap(sourceRow, sourceFields, RuleDB, fields, titleMap, prefix) {
for (const title of sourceFields) {
const sourceIndex = titleMap[title];
let value = sourceRow[sourceIndex]; // All text values.
// Sets source field to data value so that rules can reference it easily.
RuleDB[title] = value;
// Check to see if value is in vocabulary of given select field, and if it
// has a mapping for export to a GRDI target field above, then set target
// to value.
if (value && value.length > 0) {
const vocab_list = fields[sourceIndex]['schema:ItemList'];
if (value in vocab_list) {
const term = vocab_list[value];
// Looking for term.exportField['GRDI'] for example:
if ('exportField' in term && prefix in term.exportField) {
for (let mapping of term.exportField[prefix]) {
// Here mapping involves a value substitution
if ('value' in mapping) {
value = mapping.value;
// Changed on a copy of data, not handsongrid table
sourceRow[sourceIndex] = value;
}
if ('field' in mapping && mapping['field'] in RuleDB) {
RuleDB[mapping['field']] = value;
}
}
}
}
}
}
}

/**
* Return first and last items of a delimited string
* @param {String} value A string of values separated by delimiter.
Expand Down
59 changes: 51 additions & 8 deletions lib/Validator.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,21 @@ class Validator {
);
}

// LinkML does not yet have support for non-numeric minimum_value and maximum_value. In the
// meantime, DataHarmonizer has a convention of putting these values in todos with specific
// prefixes.
/* LinkML does not yet have support for non-numeric minimum_value and
maximum_value. In the meantime, DataHarmonizer has a convention of
putting these values in todos with specific prefixes. If a slot has
any todos, process them here.
This has to be defined in runtime since variables like {today} exist
in todos array.
*/
const processTodos = (slotDefinition, todos) => {
if (!todos || !todos.length) {
return;
}
const slotType = this.#schema.types?.[slotDefinition.range];

const slotType = this.getSlotType(slotDefinition);

if (slotType?.uri === 'xsd:date') {
for (const todo of todos) {
if (todo.substring(0, 2) === '>=') {
Expand All @@ -62,6 +69,7 @@ class Validator {
}
}
}

for (const def of slotDefinition.any_of || []) {
processTodos(def, todos);
}
Expand All @@ -75,13 +83,15 @@ class Validator {
processTodos(def, todos);
}
};

for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) {
processTodos(slotDefinition, slotDefinition.todos);
}

// DataHarmonizer has a convention for using todos to specify that for a given row the value
// of one column is the min/max value of another column (e.g. ">={other slot name}"). Index
// info about that here.
/* DataHarmonizer has a convention for using todos to specify that for a
given row the value of one column is the min/max value of another column
(e.g. ">={other slot name}"). Index info about that here.
*/
this.#dependantMinimumValuesMap = new Map();
this.#dependantMaximumValuesMap = new Map();
for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) {
Expand Down Expand Up @@ -130,6 +140,32 @@ class Validator {
this.#valueValidatorMap = new Map();
}


/* This returns a single primitve data type for a slot - a decimal, date,
string etc. or possibly an enumeration. Enumerations are handled
separately however (by const slotEnum = ...). Slots either use "range"
attribute, OR they use any_of or exactly_one_of etc. attribute expression
where an array of [range: x, range: y ...] is given. This call returns the
schema.types[] lookup for the FIRST range in the list in that case, which
may be undefined if that is a menu too.
*/
getSlotType(slotDefinition) {

var slotType = this.#schema.types?.[slotDefinition.range];

if (slotType === undefined) {
const extended_range = ['any_of', 'all_of', 'exactly_one_of', 'none_of'];
for (let def of extended_range) {
if (def in slotDefinition) {
slotType = this.#schema.types?.[slotDefinition[def][0]['range']];
break;
}
}
}

return slotType
}

getValidatorForSlot(slot, options = {}) {
const { cacheKey, inheritedRange } = options;
if (typeof cacheKey === 'string' && this.#valueValidatorMap.has(cacheKey)) {
Expand All @@ -147,12 +183,19 @@ class Validator {
slotDefinition.range = inheritedRange;
}

const slotType = this.#schema.types?.[slotDefinition.range];
const slotType = this.getSlotType(slotDefinition);

const slotEnum = this.#schema.enums?.[slotDefinition.range];
const slotPermissibleValues = Object.values(
slotEnum?.permissible_values ?? {}
).map((pv) => pv.text);

// Issue: if any_of lists a NullValueList enumeration in 2nd range
// where first range is a date, we don't have a menu control but
// also a valid "Missing" etc isn't validated as ok.
// TEST CASE:
// if (slotDefinition.name == "sample_collection_date")
// console.log("any_of", DEBUG INFO)
const anyOfValidators = (slotDefinition.any_of ?? []).map((subSlot) =>
this.getValidatorForSlot(subSlot, {
inheritedRange: slotDefinition.range,
Expand Down
Loading

0 comments on commit 9013cb9

Please sign in to comment.