Skip to content

Commit

Permalink
Merge branch 'development' into 7460-image-not-found-error
Browse files Browse the repository at this point in the history
  • Loading branch information
konzz authored Dec 12, 2024
2 parents a2a8c26 + 70dbc9f commit 4b1549b
Show file tree
Hide file tree
Showing 38 changed files with 247 additions and 209 deletions.
2 changes: 1 addition & 1 deletion app/api/activitylog/helpers.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { typeParsers } from 'api/activitylog/migrationsParser';
import templates from 'api/templates/templates';
import entities from 'api/entities/entities';
Expand Down
2 changes: 1 addition & 1 deletion app/api/csv/importThesauri.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { createError } from 'api/utils';
import csvtojson from 'csvtojson';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { ensure } from 'shared/tsUtils';
import { LanguageSchema } from 'shared/types/commonTypes';
import { ThesaurusValueSchema } from 'shared/types/thesaurusType';
Expand Down
2 changes: 1 addition & 1 deletion app/api/entities.v2/types/EntityInputDataSchema.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ISO6391Codes } from 'shared/languagesList';
import { ISO6391Codes } from 'shared/language';

const linkSchema = {
type: 'object',
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/defaultTranslations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { readFile, readdir } from 'fs/promises';

import { CSVLoader } from 'api/csv';
import { objectIndex } from 'shared/data_utils/objectIndex';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';

const availableLanguagesByKey = objectIndex(
availableLanguages,
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/specs/routes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import settings from 'api/settings';
import { getFixturesFactory } from 'api/utils/fixturesFactory';
import { testingEnvironment } from 'api/utils/testingEnvironment';
import { TestEmitSources, iosocket, setUpApp } from 'api/utils/testingRoutes';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { LanguageSchema } from 'shared/types/commonTypes';
import { UserRole } from 'shared/types/userSchema';
import { DefaultTranslations } from '../defaultTranslations';
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/translations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { TranslationContext, TranslationType, TranslationValue } from 'shared/tr
// eslint-disable-next-line node/no-restricted-import
import { createWriteStream } from 'fs';
import { ObjectId } from 'mongodb';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { ContextType } from 'shared/translationSchema';
import { LanguageISO6391 } from 'shared/types/commonTypes';
import { pipeline } from 'stream/promises';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable no-await-in-loop */
import languages from 'shared/languages';
import languages from './languages';

const getDefaultLanguage = async db => {
const settings = await db.collection('settings').find().toArray();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const elasticLanguages: {
[index: string]: { franc: string; elastic: string; ISO639_1: string | null };
} = {
arb: { franc: 'arb', elastic: 'arabic', ISO639_1: 'ar' },
bul: { franc: 'bul', elastic: 'bulgarian', ISO639_1: 'bg' },
cat: { franc: 'cat', elastic: 'catalan', ISO639_1: 'ca' },
cjk: { franc: 'cjk', elastic: 'cjk', ISO639_1: null },
ckb: { franc: 'ckb', elastic: 'sorani', ISO639_1: null },
ces: { franc: 'ces', elastic: 'czech', ISO639_1: 'cs' },
dan: { franc: 'dan', elastic: 'danish', ISO639_1: 'da' },
deu: { franc: 'deu', elastic: 'german', ISO639_1: 'de' },
ell: { franc: 'ell', elastic: 'greek', ISO639_1: 'el' },
eng: { franc: 'eng', elastic: 'english', ISO639_1: 'en' },
eus: { franc: 'eus', elastic: 'basque', ISO639_1: 'eu' },
fas: { franc: 'fas', elastic: 'persian', ISO639_1: 'fa' },
fin: { franc: 'fin', elastic: 'finnish', ISO639_1: 'fi' },
fra: { franc: 'fra', elastic: 'french', ISO639_1: 'fr' },
gle: { franc: 'gle', elastic: 'irish', ISO639_1: 'ga' },
glg: { franc: 'glg', elastic: 'galician', ISO639_1: 'gl' },
hin: { franc: 'hin', elastic: 'hindi', ISO639_1: 'hi' },
hun: { franc: 'hun', elastic: 'hungarian', ISO639_1: 'hu' },
hye: { franc: 'hye', elastic: 'armenian', ISO639_1: 'hy' },
ind: { franc: 'ind', elastic: 'indonesian', ISO639_1: 'id' },
ita: { franc: 'ita', elastic: 'italian', ISO639_1: 'it' },
lav: { franc: 'lav', elastic: 'latvian', ISO639_1: 'lv' },
lit: { franc: 'lit', elastic: 'lithuanian', ISO639_1: 'lt' },
nld: { franc: 'nld', elastic: 'dutch', ISO639_1: 'nl' },
nno: { franc: 'nno', elastic: 'norwegian', ISO639_1: 'nn' },
nob: { franc: 'nob', elastic: 'norwegian', ISO639_1: 'nb' },
por: { franc: 'por', elastic: 'portuguese', ISO639_1: 'pt' },
ron: { franc: 'ron', elastic: 'romanian', ISO639_1: 'ro' },
rus: { franc: 'rus', elastic: 'russian', ISO639_1: 'ru' },
spa: { franc: 'spa', elastic: 'spanish', ISO639_1: 'es' },
swe: { franc: 'swe', elastic: 'swedish', ISO639_1: 'sv' },
tha: { franc: 'tha', elastic: 'thai', ISO639_1: 'th' },
tur: { franc: 'tur', elastic: 'turkish', ISO639_1: 'tr' },
};

export { elasticLanguages };
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { elasticLanguages } from './languageList';

export default {
data: Object.keys(elasticLanguages).map(k => elasticLanguages[k]),
};
4 changes: 2 additions & 2 deletions app/api/search/entitiesIndex.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { detectLanguage } from 'shared/detectLanguage';
import { language as languages } from 'shared/languagesList';
import entities from 'api/entities';
import { legacyLogger } from 'api/log';
import { entityDefaultDocument } from 'shared/entityDefaultDocument';
Expand All @@ -8,6 +7,7 @@ import { ElasticEntityMapper } from 'api/entities.v2/database/ElasticEntityMappe
import { MongoTemplatesDataSource } from 'api/templates.v2/database/MongoTemplatesDataSource';
import { getConnection } from 'api/common.v2/database/getConnectionForCurrentTenant';
import { MongoSettingsDataSource } from 'api/settings.v2/database/MongoSettingsDataSource';
import { LanguageUtils } from 'shared/language';
import { DefaultTransactionManager } from 'api/common.v2/database/data_source_defaults';
import elasticMapping from '../../../database/elastic_mapping/elastic_mapping';
import elasticMapFactory from '../../../database/elastic_mapping/elasticMapFactory';
Expand Down Expand Up @@ -50,7 +50,7 @@ function setFullTextSettings(defaultDocument, id, body, doc) {
language = detectLanguage(fullText);
}
if (defaultDocument.language) {
language = languages(defaultDocument.language);
language = LanguageUtils.fromISO639_3(defaultDocument.language).elastic;
}
const fullTextObject = {
[`fullText_${language}`]: fullText,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import settings from 'api/settings/settings';
import templatesModel from 'api/templates/templates';
import dictionatiesModel from 'api/thesauri/dictionariesModel';
import request from 'shared/JSONRequest';
import languages from 'shared/languages';
import { EntitySchema } from 'shared/types/entityType';
import { ExtractedMetadataSchema, ObjectIdSchema, PropertySchema } from 'shared/types/commonTypes';
import { ModelStatus } from 'shared/types/IXModelSchema';
Expand All @@ -36,6 +35,7 @@ import {
} from 'api/services/informationextraction/getFiles';
import { Suggestions } from 'api/suggestions/suggestions';
import { IXExtractorType } from 'shared/types/extractorType';
import { LanguageUtils } from 'shared/language';
import { IXModelType } from 'shared/types/IXModelType';
import { ParagraphSchema } from 'shared/types/segmentationType';
import ixmodels from './ixmodels';
Expand Down Expand Up @@ -174,7 +174,8 @@ class InformationExtraction {
file: FileWithAggregation,
_data: CommonMaterialsData
): MaterialsData => {
const languageIso = languages.get(file.language!, 'ISO639_1') || defaultTrainingLanguage;
const languageIso =
LanguageUtils.fromISO639_3(file.language!, false)?.ISO639_1 || defaultTrainingLanguage;

let data: MaterialsData = { ..._data, language_iso: languageIso };

Expand Down Expand Up @@ -257,7 +258,7 @@ class InformationExtraction {
_getEntityFromFile = async (file: EnforcedWithId<FileType> | FileWithAggregation) => {
let [entity] = await entities.getUnrestricted({
sharedId: file.entity,
language: languages.get(file.language!, 'ISO639_1'),
language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1,
});

if (!entity) {
Expand Down Expand Up @@ -346,7 +347,7 @@ class InformationExtraction {
...existingSuggestions,
entityId: entity.sharedId!,
fileId: file._id,
language: languages.get(file.language, 'ISO639_1') || 'other',
language: LanguageUtils.fromISO639_3(file.language)?.ISO639_1 || 'other',
extractorId: extractor._id,
propertyName: extractor.property,
status: 'processing',
Expand Down
13 changes: 9 additions & 4 deletions app/api/services/informationextraction/getFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import { objectIndex } from 'shared/data_utils/objectIndex';
import settings from 'api/settings/settings';
import templatesModel from 'api/templates/templates';
import { propertyTypes } from 'shared/propertyTypes';
import languages from 'shared/languages';
import { ensure } from 'shared/tsUtils';
import { LanguageUtils } from 'shared/language';

const BATCH_SIZE = 50;
const MAX_TRAINING_FILES_NUMBER = 2000;
Expand Down Expand Up @@ -126,13 +126,18 @@ async function anyFilesLabeled(
return !!count;
}

async function anyFilesSegmented(property: string, propertyType: string) {
async function anyFilesSegmented(
property: string,
propertyType: string,
entitiesFromTrainingTemplatesIds: string[]
) {
const needsExtractedMetadata = !propertyTypeIsWithoutExtractedMetadata(propertyType);
const segmentedFilesCount = await filesModel.count({
type: 'document',
filename: { $exists: true },
language: { $exists: true },
_id: { $in: await getSegmentedFilesIds() },
entity: { $in: entitiesFromTrainingTemplatesIds },
...(needsExtractedMetadata ? { 'extractedMetadata.name': property } : {}),
});
return !!segmentedFilesCount;
Expand Down Expand Up @@ -183,7 +188,7 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string
throw new NoLabeledFiles();
}

if (!(await anyFilesSegmented(property, propertyType))) {
if (!(await anyFilesSegmented(property, propertyType, entitiesFromTrainingTemplatesIds))) {
throw new NoSegmentedFiles();
}

Expand All @@ -202,7 +207,7 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string
const defaultLang = (await settings.getDefaultLanguage())?.key;

const filesWithEntityValue = files.map(file => {
const fileLang = languages.get(file.language, 'ISO639_1') || defaultLang;
const fileLang = LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLang;
const entity = indexedEntities[file.entity + fileLang];
if (!entity?.metadata || !entity?.metadata[property]?.length) {
return { ...file, propertyType };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,18 @@ describe('InformationExtraction', () => {
);
expect(result).toMatchObject(expectedError);
});

it('should return error status (No segmented files) and stop finding suggestions, when there are no segmented files (select/multiselect/relationship)', async () => {
const expectedError = {
status: 'error',
message: 'There are no documents segmented yet, please try again later',
};

const result = await informationExtraction.trainModel(
factory.id('selectExtractorWithoutSegmentations')
);
expect(result).toMatchObject(expectedError);
});
});

describe('when model is trained', () => {
Expand Down
13 changes: 11 additions & 2 deletions app/api/services/informationextraction/specs/fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ const fixtures: DBFixture = {
'templateToSegmentF',
]),
factory.ixExtractor('extractorWithoutSegmentations', 'title', ['templateWithoutSegmentations']),
factory.ixExtractor('selectExtractorWithoutSegmentations', 'property_select', [
'templateWithoutSegmentations',
]),
],
entities: [
factory.entity('P1', 'relationshipPartnerTemplate', {}, { sharedId: 'P1sharedId' }),
Expand Down Expand Up @@ -139,7 +142,9 @@ const fixtures: DBFixture = {
property_empty_relationship: [],
property_relationship_to_any: [],
}),
factory.entity('entityWithoutSegmentation', 'templateWithoutSegmentations', {}),
factory.entity('entityWithoutSegmentation', 'templateWithoutSegmentations', {
property_select: [{ value: 'B', label: 'B' }],
}),
],
files: [
factory.fileDeprecated('F1', 'A1', 'document', fixturesPdfNameA, 'other', '', [
Expand Down Expand Up @@ -781,7 +786,11 @@ const fixtures: DBFixture = {
relationType: factory.idString('relatedToAny'),
}),
]),
factory.template('templateWithoutSegmentations'),
factory.template('templateWithoutSegmentations', [
factory.property('property_select', 'select', {
content: factory.id('thesauri1').toString(),
}),
]),
],
dictionaries: [factory.nestedThesauri('thesauri1', ['A', 'B', 'C', { 1: ['1A', '1B'] }])],
};
Expand Down
6 changes: 3 additions & 3 deletions app/api/services/ocr/OcrManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import settings from 'api/settings/settings';
import { emitToTenant } from 'api/socketio/setupSockets';
import { tenants } from 'api/tenants/tenantContext';
import createError from 'api/utils/Error';
import { LanguageUtils } from 'shared/language';
import { handleError } from 'api/utils/handleError';
// eslint-disable-next-line node/no-restricted-import
import { createReadStream, createWriteStream } from 'fs';
import request from 'shared/JSONRequest';
import { language as getLanguage } from 'shared/languagesList';
import { FileType } from 'shared/types/fileType';
import { Readable } from 'stream';
import { pipeline } from 'stream/promises';
Expand Down Expand Up @@ -146,7 +146,7 @@ const processResults = async (message: ResultsMessage): Promise<void> => {
const validateLanguage = async (language: string, ocrSettings?: { url: string }) => {
const _ocrSettings = ocrSettings || (await getSettings());
const supportedLanguages = await fetchSupportedLanguages(_ocrSettings);
return supportedLanguages.includes(getLanguage(language, 'ISO639_1')!);
return supportedLanguages.includes(LanguageUtils.fromISO639_3(language)?.ISO639_1!);
};

const getStatus = async (file: EnforcedWithId<FileType>) => {
Expand Down Expand Up @@ -219,7 +219,7 @@ class OcrManager {
tenant: tenant.name,
params: {
filename: file.filename,
language: getLanguage(file.language!, 'ISO639_1'),
language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1,
},
});

Expand Down
4 changes: 2 additions & 2 deletions app/api/suggestions/blankSuggestions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { files } from 'api/files';
import { EnforcedWithId } from 'api/odm';
import settings from 'api/settings';
import { propertyTypeIsMultiValued } from 'api/services/informationextraction/getFiles';
import languages from 'shared/languages';
import { ObjectIdSchema } from 'shared/types/commonTypes';
import { IXExtractorType } from 'shared/types/extractorType';
import { FileType } from 'shared/types/fileType';
import { IXSuggestionType } from 'shared/types/suggestionType';
import { Suggestions } from './suggestions';
import templates from 'api/templates';
import { LanguageUtils } from 'shared/language';

const fetchEntitiesBatch = async (query: any, limit: number = 100) =>
entitiesModel.db.find(query).select('sharedId').limit(limit).sort({ _id: 1 }).lean();
Expand Down Expand Up @@ -49,7 +49,7 @@ export const getBlankSuggestion = (
defaultLanguage: string
) => ({
language: file.language
? languages.get(file.language, 'ISO639_1') || defaultLanguage
? LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLanguage
: defaultLanguage,
fileId: file._id,
entityId: file.entity!,
Expand Down
4 changes: 2 additions & 2 deletions app/react/App/Root.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import PropTypes from 'prop-types';
import React, { Component } from 'react';
import serialize from 'serialize-javascript';

import { availableLanguages as languagesList } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';

const determineHotAssets = query => ({
JS: [
Expand Down Expand Up @@ -87,7 +87,7 @@ class Root extends Component {
const isHotReload = process.env.HOT;
const { head, language, assets, reduxData, content } = this.props;

const languageData = languagesList.find(l => l.key === language);
const languageData = availableLanguages.find(l => l.key === language);
const query = languageData && languageData.rtl ? '?rtl=true' : '';

const { JS, CSS } = isHotReload
Expand Down
8 changes: 4 additions & 4 deletions app/react/Attachments/components/AttachmentForm.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ import { connect } from 'react-redux';
import { Form, Field } from 'react-redux-form';

import { FormGroup, Select } from 'app/ReactReduxForms';
import { elasticLanguages } from 'shared/languagesList';
import { elasticLanguages } from 'shared/language';
import t from 'app/I18N/t';
import ShowIf from 'app/App/ShowIf';

export class AttachmentForm extends Component {
render() {
const { model } = this.props;
const validators = { originalname: { required: val => !!val && val.trim() !== '' } };
const languageOptions = Object.keys(elasticLanguages).map(key => ({
value: elasticLanguages[key].franc,
label: elasticLanguages[key].elastic,
const languageOptions = elasticLanguages.map(language => ({
value: language.ISO639_3,
label: language.elastic,
}));
languageOptions.push({ value: 'other', label: 'other' });

Expand Down
6 changes: 4 additions & 2 deletions app/react/Attachments/components/File.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { wrapDispatch } from 'app/Multireducer';
import { TocGeneratedLabel } from 'app/ToggledFeatures/tocGeneration';
import { NeedAuthorization } from 'app/Auth';
import { LocalForm } from 'app/Forms/Form';
import { availableLanguages, getLanguageSchema } from 'shared/languagesList';
import { availableLanguages, LanguageUtils } from 'shared/language';
import { isBlobFile } from 'shared/tsUtils';
import { EntitySchema } from 'shared/types/entityType';
import { FileType } from 'shared/types/fileType';
Expand Down Expand Up @@ -105,7 +105,9 @@ class File extends Component<FileOwnProps, FileState> {
<div>
<div>
<span className="badge">
<Translate>{language ? getLanguageSchema(language)?.label || '' : ''}</Translate>
<Translate>
{language ? LanguageUtils.fromISO639_3(language)?.label || '' : ''}
</Translate>
</span>
<TocGeneratedLabel file={this.props.file}>
<Translate>ML TOC</Translate>
Expand Down
Loading

0 comments on commit 4b1549b

Please sign in to comment.