From 0b5ec75873a2b373dbe2e961f17847cf828f1add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20P=C3=B3lit?= Date: Wed, 11 Dec 2024 13:32:11 -0500 Subject: [PATCH 1/8] Added match settings for manual procedures (#7539) * Added match settings for manual procedures * Bump version --- app/api/suggestions/specs/fixtures.ts | 22 ++++++++++++++++++- app/api/suggestions/specs/suggestions.spec.ts | 8 ++++--- app/api/suggestions/suggestions.ts | 6 +++-- package.json | 2 +- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/app/api/suggestions/specs/fixtures.ts b/app/api/suggestions/specs/fixtures.ts index 79dc3fc101..8c4c7a8329 100644 --- a/app/api/suggestions/specs/fixtures.ts +++ b/app/api/suggestions/specs/fixtures.ts @@ -117,6 +117,16 @@ const fixtures: DBFixture = { date: 5, page: 2, status: 'ready', + state: { + labeled: true, + obsolete: false, + match: true, + withValue: false, + withSuggestion: false, + processing: false, + error: false, + hasContext: false, + }, error: '', }, { @@ -130,6 +140,16 @@ const fixtures: DBFixture = { date: 5, page: 2, status: 'ready', + state: { + labeled: true, + obsolete: false, + match: false, + withValue: false, + withSuggestion: false, + processing: false, + error: false, + hasContext: false, + }, error: '', }, { @@ -1323,9 +1343,9 @@ const stateFilterFixtures: DBFixture = { factory.ixSuggestion({ extractorId: factory.id('unused_extractor'), state: { - labeled: true, match: true, obsolete: true, + labeled: true, error: true, }, }), diff --git a/app/api/suggestions/specs/suggestions.spec.ts b/app/api/suggestions/specs/suggestions.spec.ts index 09ac72ccce..b88b8954e4 100644 --- a/app/api/suggestions/specs/suggestions.spec.ts +++ b/app/api/suggestions/specs/suggestions.spec.ts @@ -1541,7 +1541,7 @@ describe('suggestions', () => { const query = { entityId: 'shared1' }; await Suggestions.setObsolete(query); const obsoletes = await db.mongodb?.collection('ixsuggestions').find(query).toArray(); - expect(obsoletes?.every(s => s.state.obsolete)).toBe(true); + expect(obsoletes?.every(s => s.state.obsolete && s.state.match === null)).toBe(true); expect(obsoletes?.length).toBe(4); }); }); @@ -1555,7 +1555,7 @@ describe('suggestions', () => { const query = { entityId: 'shared1' }; await Suggestions.markSuggestionsWithoutSegmentation(query); const notSegmented = await db.mongodb?.collection('ixsuggestions').find(query).toArray(); - expect(notSegmented?.every(s => s.state.error)).toBe(true); + expect(notSegmented?.every(s => s.state.error && s.state.match === null)).toBe(true); }); it('should not mark suggestions when segmentations are correct', async () => { @@ -1572,7 +1572,7 @@ describe('suggestions', () => { expect(segmented?.length).toBe(1); expect(segmented?.every(s => s.state?.error)).toBe(false); expect(notSegmented?.length).toBe(1); - expect(notSegmented?.every(s => s.state.error)).toBe(true); + expect(notSegmented?.every(s => s.state.error && s.state.match === null)).toBe(true); }); }); @@ -1612,6 +1612,7 @@ describe('suggestions', () => { ...newErroringSuggestion, state: { error: true, + match: null, }, }); expect(await findOneSuggestion({ entityId: newProcessingSuggestion.entityId })).toMatchObject( @@ -1619,6 +1620,7 @@ describe('suggestions', () => { ...newProcessingSuggestion, state: { processing: true, + match: null, }, } ); diff --git a/app/api/suggestions/suggestions.ts b/app/api/suggestions/suggestions.ts index 3f57f55fd7..087ffac480 100644 --- a/app/api/suggestions/suggestions.ts +++ b/app/api/suggestions/suggestions.ts @@ -277,7 +277,9 @@ const Suggestions = { updateStates, setObsolete: async (query: any) => - IXSuggestionsModel.updateMany(query, { $set: { 'state.obsolete': true } }), + IXSuggestionsModel.updateMany(query, { + $set: { 'state.obsolete': true, 'state.match': null }, + }), markSuggestionsWithoutSegmentation: async (query: any) => { const segmentedFilesIds = await getSegmentedFilesIds(); @@ -286,7 +288,7 @@ const Suggestions = { ...query, fileId: { $nin: segmentedFilesIds }, }, - { $set: { 'state.error': true } } + { $set: { 'state.error': true, 'state.match': null } } ); }, diff --git a/package.json b/package.json index 7145f134ec..979cd38d56 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uwazi", - "version": "1.193.2", + "version": "1.193.3", "description": "Uwazi is a free, open-source solution for organising, analysing and publishing your documents.", "keywords": [ "react" From 8e9220eca313d3a2d42f5b23e4734ae5b45c793c Mon Sep 17 00:00:00 2001 From: Joan Gallego Girona Date: Thu, 12 Dec 2024 10:08:16 +0100 Subject: [PATCH 2/8] Properly support non labeled properties to report missing segmentation (#7541) select/multiselect/relationship need a different approach when calculating segmented / labeled status --- app/api/services/informationextraction/getFiles.ts | 9 +++++++-- .../specs/InformationExtraction.spec.ts | 12 ++++++++++++ .../informationextraction/specs/fixtures.ts | 13 +++++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/app/api/services/informationextraction/getFiles.ts b/app/api/services/informationextraction/getFiles.ts index 01041ac43d..30ef763851 100644 --- a/app/api/services/informationextraction/getFiles.ts +++ b/app/api/services/informationextraction/getFiles.ts @@ -126,13 +126,18 @@ async function anyFilesLabeled( return !!count; } -async function anyFilesSegmented(property: string, propertyType: string) { +async function anyFilesSegmented( + property: string, + propertyType: string, + entitiesFromTrainingTemplatesIds: string[] +) { const needsExtractedMetadata = !propertyTypeIsWithoutExtractedMetadata(propertyType); const segmentedFilesCount = await filesModel.count({ type: 'document', filename: { $exists: true }, language: { $exists: true }, _id: { $in: await getSegmentedFilesIds() }, + entity: { $in: entitiesFromTrainingTemplatesIds }, ...(needsExtractedMetadata ? { 'extractedMetadata.name': property } : {}), }); return !!segmentedFilesCount; @@ -183,7 +188,7 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string throw new NoLabeledFiles(); } - if (!(await anyFilesSegmented(property, propertyType))) { + if (!(await anyFilesSegmented(property, propertyType, entitiesFromTrainingTemplatesIds))) { throw new NoSegmentedFiles(); } diff --git a/app/api/services/informationextraction/specs/InformationExtraction.spec.ts b/app/api/services/informationextraction/specs/InformationExtraction.spec.ts index 3ed80d908c..6b3b416e2c 100644 --- a/app/api/services/informationextraction/specs/InformationExtraction.spec.ts +++ b/app/api/services/informationextraction/specs/InformationExtraction.spec.ts @@ -498,6 +498,18 @@ describe('InformationExtraction', () => { ); expect(result).toMatchObject(expectedError); }); + + it('should return error status (No segmented files) and stop finding suggestions, when there are no segmented files (select/multiselect/relationship)', async () => { + const expectedError = { + status: 'error', + message: 'There are no documents segmented yet, please try again later', + }; + + const result = await informationExtraction.trainModel( + factory.id('selectExtractorWithoutSegmentations') + ); + expect(result).toMatchObject(expectedError); + }); }); describe('when model is trained', () => { diff --git a/app/api/services/informationextraction/specs/fixtures.ts b/app/api/services/informationextraction/specs/fixtures.ts index c6bc2904b3..5fe581ba82 100644 --- a/app/api/services/informationextraction/specs/fixtures.ts +++ b/app/api/services/informationextraction/specs/fixtures.ts @@ -56,6 +56,9 @@ const fixtures: DBFixture = { 'templateToSegmentF', ]), factory.ixExtractor('extractorWithoutSegmentations', 'title', ['templateWithoutSegmentations']), + factory.ixExtractor('selectExtractorWithoutSegmentations', 'property_select', [ + 'templateWithoutSegmentations', + ]), ], entities: [ factory.entity('P1', 'relationshipPartnerTemplate', {}, { sharedId: 'P1sharedId' }), @@ -139,7 +142,9 @@ const fixtures: DBFixture = { property_empty_relationship: [], property_relationship_to_any: [], }), - factory.entity('entityWithoutSegmentation', 'templateWithoutSegmentations', {}), + factory.entity('entityWithoutSegmentation', 'templateWithoutSegmentations', { + property_select: [{ value: 'B', label: 'B' }], + }), ], files: [ factory.fileDeprecated('F1', 'A1', 'document', fixturesPdfNameA, 'other', '', [ @@ -781,7 +786,11 @@ const fixtures: DBFixture = { relationType: factory.idString('relatedToAny'), }), ]), - factory.template('templateWithoutSegmentations'), + factory.template('templateWithoutSegmentations', [ + factory.property('property_select', 'select', { + content: factory.id('thesauri1').toString(), + }), + ]), ], dictionaries: [factory.nestedThesauri('thesauri1', ['A', 'B', 'C', { 1: ['1A', '1B'] }])], }; From bde6bec394153f196bcb2dc50ce48c4493f3e62d Mon Sep 17 00:00:00 2001 From: Daneryl Date: Thu, 12 Dec 2024 10:08:22 +0100 Subject: [PATCH 3/8] Bump version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 979cd38d56..c2e9601163 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uwazi", - "version": "1.193.3", + "version": "1.193.4", "description": "Uwazi is a free, open-source solution for organising, analysing and publishing your documents.", "keywords": [ "react" From 99d8eb2c62ea4cad58af45eb285b1e9a5f4e05a0 Mon Sep 17 00:00:00 2001 From: Daneryl Date: Thu, 12 Dec 2024 10:08:29 +0100 Subject: [PATCH 4/8] Merge back from production and Bump rc version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 552303ed9c..8e4d12b580 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uwazi", - "version": "1.194.0-rc4", + "version": "1.194.0-rc5", "description": "Uwazi is a free, open-source solution for organising, analysing and publishing your documents.", "keywords": [ "react" From 15353704a1d19338ed828e7d40a07ef8fd5734e3 Mon Sep 17 00:00:00 2001 From: "Joao Victor G. Rodrigues" Date: Thu, 12 Dec 2024 08:18:27 -0400 Subject: [PATCH 5/8] refactor language list (#7516) * refactor language list * remove unused test suits * remove franc from LanguageCode * fix flaky test * improve code quality * apply pull request changes * replace LanguageMapper for LanguageUtils * fix test * fix check types --------- Co-authored-by: Joan Gallego Girona --- app/api/activitylog/helpers.js | 2 +- app/api/csv/importThesauri.ts | 2 +- .../types/EntityInputDataSchema.ts | 2 +- app/api/i18n/defaultTranslations.ts | 2 +- app/api/i18n/specs/routes.spec.ts | 2 +- app/api/i18n/translations.ts | 2 +- .../32-add-file-field-to-connections/index.js | 2 +- .../languageList.ts | 39 ++++++++ .../languages.ts | 5 + app/api/search/entitiesIndex.js | 4 +- .../InformationExtraction.ts | 9 +- .../informationextraction/getFiles.ts | 4 +- app/api/services/ocr/OcrManager.ts | 6 +- app/api/suggestions/blankSuggestions.ts | 4 +- app/react/App/Root.js | 4 +- .../Attachments/components/AttachmentForm.js | 8 +- app/react/Attachments/components/File.tsx | 6 +- app/react/Attachments/components/FileList.tsx | 4 +- .../components/specs/FileList.spec.tsx | 6 +- app/react/Layout/DocumentLanguage.js | 6 +- app/react/UI/Icon/Icon.js | 4 +- app/react/V2/Components/UI/Sidepanel.tsx | 2 +- .../Translations/EditTranslations.tsx | 2 +- app/react/Viewer/utils/determineDirection.js | 9 +- app/shared/detectLanguage.ts | 5 +- app/shared/entityDefaultDocument.ts | 8 +- .../availableLanguages.ts} | 92 +------------------ app/shared/language/index.ts | 12 +++ app/shared/language/languageUtils.ts | 52 +++++++++++ app/shared/languages.js | 14 --- app/shared/specs/languages.spec.js | 84 +++++++++-------- app/shared/types/commonSchemas.ts | 1 - app/shared/types/commonTypes.d.ts | 2 - database/elastic_mapping/elastic_mapping.js | 4 +- 34 files changed, 211 insertions(+), 199 deletions(-) create mode 100644 app/api/migrations/migrations/32-add-file-field-to-connections/languageList.ts create mode 100644 app/api/migrations/migrations/32-add-file-field-to-connections/languages.ts rename app/shared/{languagesList.ts => language/availableLanguages.ts} (93%) create mode 100644 app/shared/language/index.ts create mode 100644 app/shared/language/languageUtils.ts delete mode 100644 app/shared/languages.js diff --git a/app/api/activitylog/helpers.js b/app/api/activitylog/helpers.js index 2c0a9d0880..4a5c069650 100644 --- a/app/api/activitylog/helpers.js +++ b/app/api/activitylog/helpers.js @@ -1,4 +1,4 @@ -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { typeParsers } from 'api/activitylog/migrationsParser'; import templates from 'api/templates/templates'; import entities from 'api/entities/entities'; diff --git a/app/api/csv/importThesauri.ts b/app/api/csv/importThesauri.ts index 3ce63749b0..8632b1559a 100644 --- a/app/api/csv/importThesauri.ts +++ b/app/api/csv/importThesauri.ts @@ -1,6 +1,6 @@ import { createError } from 'api/utils'; import csvtojson from 'csvtojson'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { ensure } from 'shared/tsUtils'; import { LanguageSchema } from 'shared/types/commonTypes'; import { ThesaurusValueSchema } from 'shared/types/thesaurusType'; diff --git a/app/api/entities.v2/types/EntityInputDataSchema.ts b/app/api/entities.v2/types/EntityInputDataSchema.ts index 9423e9e7d0..a5ea1f766a 100644 --- a/app/api/entities.v2/types/EntityInputDataSchema.ts +++ b/app/api/entities.v2/types/EntityInputDataSchema.ts @@ -1,4 +1,4 @@ -import { ISO6391Codes } from 'shared/languagesList'; +import { ISO6391Codes } from 'shared/language'; const linkSchema = { type: 'object', diff --git a/app/api/i18n/defaultTranslations.ts b/app/api/i18n/defaultTranslations.ts index 6b0eee9ada..1ac3e7005a 100644 --- a/app/api/i18n/defaultTranslations.ts +++ b/app/api/i18n/defaultTranslations.ts @@ -4,7 +4,7 @@ import { readFile, readdir } from 'fs/promises'; import { CSVLoader } from 'api/csv'; import { objectIndex } from 'shared/data_utils/objectIndex'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; const availableLanguagesByKey = objectIndex( availableLanguages, diff --git a/app/api/i18n/specs/routes.spec.ts b/app/api/i18n/specs/routes.spec.ts index f885c0c03b..6b39b4697e 100644 --- a/app/api/i18n/specs/routes.spec.ts +++ b/app/api/i18n/specs/routes.spec.ts @@ -9,7 +9,7 @@ import settings from 'api/settings'; import { getFixturesFactory } from 'api/utils/fixturesFactory'; import { testingEnvironment } from 'api/utils/testingEnvironment'; import { TestEmitSources, iosocket, setUpApp } from 'api/utils/testingRoutes'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { LanguageSchema } from 'shared/types/commonTypes'; import { UserRole } from 'shared/types/userSchema'; import { DefaultTranslations } from '../defaultTranslations'; diff --git a/app/api/i18n/translations.ts b/app/api/i18n/translations.ts index d2c14dd09a..d77c894553 100644 --- a/app/api/i18n/translations.ts +++ b/app/api/i18n/translations.ts @@ -13,7 +13,7 @@ import { TranslationContext, TranslationType, TranslationValue } from 'shared/tr // eslint-disable-next-line node/no-restricted-import import { createWriteStream } from 'fs'; import { ObjectId } from 'mongodb'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { ContextType } from 'shared/translationSchema'; import { LanguageISO6391 } from 'shared/types/commonTypes'; import { pipeline } from 'stream/promises'; diff --git a/app/api/migrations/migrations/32-add-file-field-to-connections/index.js b/app/api/migrations/migrations/32-add-file-field-to-connections/index.js index 3b931b07f1..59c51577fe 100644 --- a/app/api/migrations/migrations/32-add-file-field-to-connections/index.js +++ b/app/api/migrations/migrations/32-add-file-field-to-connections/index.js @@ -1,5 +1,5 @@ /* eslint-disable no-await-in-loop */ -import languages from 'shared/languages'; +import languages from './languages'; const getDefaultLanguage = async db => { const settings = await db.collection('settings').find().toArray(); diff --git a/app/api/migrations/migrations/32-add-file-field-to-connections/languageList.ts b/app/api/migrations/migrations/32-add-file-field-to-connections/languageList.ts new file mode 100644 index 0000000000..87e3ebb73b --- /dev/null +++ b/app/api/migrations/migrations/32-add-file-field-to-connections/languageList.ts @@ -0,0 +1,39 @@ +const elasticLanguages: { + [index: string]: { franc: string; elastic: string; ISO639_1: string | null }; +} = { + arb: { franc: 'arb', elastic: 'arabic', ISO639_1: 'ar' }, + bul: { franc: 'bul', elastic: 'bulgarian', ISO639_1: 'bg' }, + cat: { franc: 'cat', elastic: 'catalan', ISO639_1: 'ca' }, + cjk: { franc: 'cjk', elastic: 'cjk', ISO639_1: null }, + ckb: { franc: 'ckb', elastic: 'sorani', ISO639_1: null }, + ces: { franc: 'ces', elastic: 'czech', ISO639_1: 'cs' }, + dan: { franc: 'dan', elastic: 'danish', ISO639_1: 'da' }, + deu: { franc: 'deu', elastic: 'german', ISO639_1: 'de' }, + ell: { franc: 'ell', elastic: 'greek', ISO639_1: 'el' }, + eng: { franc: 'eng', elastic: 'english', ISO639_1: 'en' }, + eus: { franc: 'eus', elastic: 'basque', ISO639_1: 'eu' }, + fas: { franc: 'fas', elastic: 'persian', ISO639_1: 'fa' }, + fin: { franc: 'fin', elastic: 'finnish', ISO639_1: 'fi' }, + fra: { franc: 'fra', elastic: 'french', ISO639_1: 'fr' }, + gle: { franc: 'gle', elastic: 'irish', ISO639_1: 'ga' }, + glg: { franc: 'glg', elastic: 'galician', ISO639_1: 'gl' }, + hin: { franc: 'hin', elastic: 'hindi', ISO639_1: 'hi' }, + hun: { franc: 'hun', elastic: 'hungarian', ISO639_1: 'hu' }, + hye: { franc: 'hye', elastic: 'armenian', ISO639_1: 'hy' }, + ind: { franc: 'ind', elastic: 'indonesian', ISO639_1: 'id' }, + ita: { franc: 'ita', elastic: 'italian', ISO639_1: 'it' }, + lav: { franc: 'lav', elastic: 'latvian', ISO639_1: 'lv' }, + lit: { franc: 'lit', elastic: 'lithuanian', ISO639_1: 'lt' }, + nld: { franc: 'nld', elastic: 'dutch', ISO639_1: 'nl' }, + nno: { franc: 'nno', elastic: 'norwegian', ISO639_1: 'nn' }, + nob: { franc: 'nob', elastic: 'norwegian', ISO639_1: 'nb' }, + por: { franc: 'por', elastic: 'portuguese', ISO639_1: 'pt' }, + ron: { franc: 'ron', elastic: 'romanian', ISO639_1: 'ro' }, + rus: { franc: 'rus', elastic: 'russian', ISO639_1: 'ru' }, + spa: { franc: 'spa', elastic: 'spanish', ISO639_1: 'es' }, + swe: { franc: 'swe', elastic: 'swedish', ISO639_1: 'sv' }, + tha: { franc: 'tha', elastic: 'thai', ISO639_1: 'th' }, + tur: { franc: 'tur', elastic: 'turkish', ISO639_1: 'tr' }, +}; + +export { elasticLanguages }; diff --git a/app/api/migrations/migrations/32-add-file-field-to-connections/languages.ts b/app/api/migrations/migrations/32-add-file-field-to-connections/languages.ts new file mode 100644 index 0000000000..30c15eda27 --- /dev/null +++ b/app/api/migrations/migrations/32-add-file-field-to-connections/languages.ts @@ -0,0 +1,5 @@ +import { elasticLanguages } from './languageList'; + +export default { + data: Object.keys(elasticLanguages).map(k => elasticLanguages[k]), +}; diff --git a/app/api/search/entitiesIndex.js b/app/api/search/entitiesIndex.js index 3a76cb0642..08afee47e1 100644 --- a/app/api/search/entitiesIndex.js +++ b/app/api/search/entitiesIndex.js @@ -1,5 +1,4 @@ import { detectLanguage } from 'shared/detectLanguage'; -import { language as languages } from 'shared/languagesList'; import entities from 'api/entities'; import { legacyLogger } from 'api/log'; import { entityDefaultDocument } from 'shared/entityDefaultDocument'; @@ -8,6 +7,7 @@ import { ElasticEntityMapper } from 'api/entities.v2/database/ElasticEntityMappe import { MongoTemplatesDataSource } from 'api/templates.v2/database/MongoTemplatesDataSource'; import { getConnection } from 'api/common.v2/database/getConnectionForCurrentTenant'; import { MongoSettingsDataSource } from 'api/settings.v2/database/MongoSettingsDataSource'; +import { LanguageUtils } from 'shared/language'; import { DefaultTransactionManager } from 'api/common.v2/database/data_source_defaults'; import elasticMapping from '../../../database/elastic_mapping/elastic_mapping'; import elasticMapFactory from '../../../database/elastic_mapping/elasticMapFactory'; @@ -50,7 +50,7 @@ function setFullTextSettings(defaultDocument, id, body, doc) { language = detectLanguage(fullText); } if (defaultDocument.language) { - language = languages(defaultDocument.language); + language = LanguageUtils.fromISO639_3(defaultDocument.language).elastic; } const fullTextObject = { [`fullText_${language}`]: fullText, diff --git a/app/api/services/informationextraction/InformationExtraction.ts b/app/api/services/informationextraction/InformationExtraction.ts index 4f24a1ca66..15d554559c 100644 --- a/app/api/services/informationextraction/InformationExtraction.ts +++ b/app/api/services/informationextraction/InformationExtraction.ts @@ -19,7 +19,6 @@ import settings from 'api/settings/settings'; import templatesModel from 'api/templates/templates'; import dictionatiesModel from 'api/thesauri/dictionariesModel'; import request from 'shared/JSONRequest'; -import languages from 'shared/languages'; import { EntitySchema } from 'shared/types/entityType'; import { ExtractedMetadataSchema, ObjectIdSchema, PropertySchema } from 'shared/types/commonTypes'; import { ModelStatus } from 'shared/types/IXModelSchema'; @@ -36,6 +35,7 @@ import { } from 'api/services/informationextraction/getFiles'; import { Suggestions } from 'api/suggestions/suggestions'; import { IXExtractorType } from 'shared/types/extractorType'; +import { LanguageUtils } from 'shared/language'; import { IXModelType } from 'shared/types/IXModelType'; import { ParagraphSchema } from 'shared/types/segmentationType'; import ixmodels from './ixmodels'; @@ -174,7 +174,8 @@ class InformationExtraction { file: FileWithAggregation, _data: CommonMaterialsData ): MaterialsData => { - const languageIso = languages.get(file.language!, 'ISO639_1') || defaultTrainingLanguage; + const languageIso = + LanguageUtils.fromISO639_3(file.language!, false)?.ISO639_1 || defaultTrainingLanguage; let data: MaterialsData = { ..._data, language_iso: languageIso }; @@ -257,7 +258,7 @@ class InformationExtraction { _getEntityFromFile = async (file: EnforcedWithId | FileWithAggregation) => { let [entity] = await entities.getUnrestricted({ sharedId: file.entity, - language: languages.get(file.language!, 'ISO639_1'), + language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1, }); if (!entity) { @@ -346,7 +347,7 @@ class InformationExtraction { ...existingSuggestions, entityId: entity.sharedId!, fileId: file._id, - language: languages.get(file.language, 'ISO639_1') || 'other', + language: LanguageUtils.fromISO639_3(file.language)?.ISO639_1 || 'other', extractorId: extractor._id, propertyName: extractor.property, status: 'processing', diff --git a/app/api/services/informationextraction/getFiles.ts b/app/api/services/informationextraction/getFiles.ts index 30ef763851..638fc6cfa0 100644 --- a/app/api/services/informationextraction/getFiles.ts +++ b/app/api/services/informationextraction/getFiles.ts @@ -17,8 +17,8 @@ import { objectIndex } from 'shared/data_utils/objectIndex'; import settings from 'api/settings/settings'; import templatesModel from 'api/templates/templates'; import { propertyTypes } from 'shared/propertyTypes'; -import languages from 'shared/languages'; import { ensure } from 'shared/tsUtils'; +import { LanguageUtils } from 'shared/language'; const BATCH_SIZE = 50; const MAX_TRAINING_FILES_NUMBER = 2000; @@ -207,7 +207,7 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string const defaultLang = (await settings.getDefaultLanguage())?.key; const filesWithEntityValue = files.map(file => { - const fileLang = languages.get(file.language, 'ISO639_1') || defaultLang; + const fileLang = LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLang; const entity = indexedEntities[file.entity + fileLang]; if (!entity?.metadata || !entity?.metadata[property]?.length) { return { ...file, propertyType }; diff --git a/app/api/services/ocr/OcrManager.ts b/app/api/services/ocr/OcrManager.ts index 02eea9d0d5..a843f64219 100644 --- a/app/api/services/ocr/OcrManager.ts +++ b/app/api/services/ocr/OcrManager.ts @@ -7,11 +7,11 @@ import settings from 'api/settings/settings'; import { emitToTenant } from 'api/socketio/setupSockets'; import { tenants } from 'api/tenants/tenantContext'; import createError from 'api/utils/Error'; +import { LanguageUtils } from 'shared/language'; import { handleError } from 'api/utils/handleError'; // eslint-disable-next-line node/no-restricted-import import { createReadStream, createWriteStream } from 'fs'; import request from 'shared/JSONRequest'; -import { language as getLanguage } from 'shared/languagesList'; import { FileType } from 'shared/types/fileType'; import { Readable } from 'stream'; import { pipeline } from 'stream/promises'; @@ -146,7 +146,7 @@ const processResults = async (message: ResultsMessage): Promise => { const validateLanguage = async (language: string, ocrSettings?: { url: string }) => { const _ocrSettings = ocrSettings || (await getSettings()); const supportedLanguages = await fetchSupportedLanguages(_ocrSettings); - return supportedLanguages.includes(getLanguage(language, 'ISO639_1')!); + return supportedLanguages.includes(LanguageUtils.fromISO639_3(language)?.ISO639_1!); }; const getStatus = async (file: EnforcedWithId) => { @@ -219,7 +219,7 @@ class OcrManager { tenant: tenant.name, params: { filename: file.filename, - language: getLanguage(file.language!, 'ISO639_1'), + language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1, }, }); diff --git a/app/api/suggestions/blankSuggestions.ts b/app/api/suggestions/blankSuggestions.ts index 50d069fe58..c5d551dd80 100644 --- a/app/api/suggestions/blankSuggestions.ts +++ b/app/api/suggestions/blankSuggestions.ts @@ -3,13 +3,13 @@ import { files } from 'api/files'; import { EnforcedWithId } from 'api/odm'; import settings from 'api/settings'; import { propertyTypeIsMultiValued } from 'api/services/informationextraction/getFiles'; -import languages from 'shared/languages'; import { ObjectIdSchema } from 'shared/types/commonTypes'; import { IXExtractorType } from 'shared/types/extractorType'; import { FileType } from 'shared/types/fileType'; import { IXSuggestionType } from 'shared/types/suggestionType'; import { Suggestions } from './suggestions'; import templates from 'api/templates'; +import { LanguageUtils } from 'shared/language'; const fetchEntitiesBatch = async (query: any, limit: number = 100) => entitiesModel.db.find(query).select('sharedId').limit(limit).sort({ _id: 1 }).lean(); @@ -49,7 +49,7 @@ export const getBlankSuggestion = ( defaultLanguage: string ) => ({ language: file.language - ? languages.get(file.language, 'ISO639_1') || defaultLanguage + ? LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLanguage : defaultLanguage, fileId: file._id, entityId: file.entity!, diff --git a/app/react/App/Root.js b/app/react/App/Root.js index e35c2c2afa..2af53c64c7 100644 --- a/app/react/App/Root.js +++ b/app/react/App/Root.js @@ -3,7 +3,7 @@ import PropTypes from 'prop-types'; import React, { Component } from 'react'; import serialize from 'serialize-javascript'; -import { availableLanguages as languagesList } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; const determineHotAssets = query => ({ JS: [ @@ -87,7 +87,7 @@ class Root extends Component { const isHotReload = process.env.HOT; const { head, language, assets, reduxData, content } = this.props; - const languageData = languagesList.find(l => l.key === language); + const languageData = availableLanguages.find(l => l.key === language); const query = languageData && languageData.rtl ? '?rtl=true' : ''; const { JS, CSS } = isHotReload diff --git a/app/react/Attachments/components/AttachmentForm.js b/app/react/Attachments/components/AttachmentForm.js index ef4ec3193f..9b38b393cd 100644 --- a/app/react/Attachments/components/AttachmentForm.js +++ b/app/react/Attachments/components/AttachmentForm.js @@ -4,7 +4,7 @@ import { connect } from 'react-redux'; import { Form, Field } from 'react-redux-form'; import { FormGroup, Select } from 'app/ReactReduxForms'; -import { elasticLanguages } from 'shared/languagesList'; +import { elasticLanguages } from 'shared/language'; import t from 'app/I18N/t'; import ShowIf from 'app/App/ShowIf'; @@ -12,9 +12,9 @@ export class AttachmentForm extends Component { render() { const { model } = this.props; const validators = { originalname: { required: val => !!val && val.trim() !== '' } }; - const languageOptions = Object.keys(elasticLanguages).map(key => ({ - value: elasticLanguages[key].franc, - label: elasticLanguages[key].elastic, + const languageOptions = elasticLanguages.map(language => ({ + value: language.ISO639_3, + label: language.elastic, })); languageOptions.push({ value: 'other', label: 'other' }); diff --git a/app/react/Attachments/components/File.tsx b/app/react/Attachments/components/File.tsx index 25b8a4fb2c..8e7664393b 100644 --- a/app/react/Attachments/components/File.tsx +++ b/app/react/Attachments/components/File.tsx @@ -12,7 +12,7 @@ import { wrapDispatch } from 'app/Multireducer'; import { TocGeneratedLabel } from 'app/ToggledFeatures/tocGeneration'; import { NeedAuthorization } from 'app/Auth'; import { LocalForm } from 'app/Forms/Form'; -import { availableLanguages, getLanguageSchema } from 'shared/languagesList'; +import { availableLanguages, LanguageUtils } from 'shared/language'; import { isBlobFile } from 'shared/tsUtils'; import { EntitySchema } from 'shared/types/entityType'; import { FileType } from 'shared/types/fileType'; @@ -105,7 +105,9 @@ class File extends Component {
- {language ? getLanguageSchema(language)?.label || '' : ''} + + {language ? LanguageUtils.fromISO639_3(language)?.label || '' : ''} + ML TOC diff --git a/app/react/Attachments/components/FileList.tsx b/app/react/Attachments/components/FileList.tsx index 88c7914ee2..aaf8d681c1 100644 --- a/app/react/Attachments/components/FileList.tsx +++ b/app/react/Attachments/components/FileList.tsx @@ -5,8 +5,8 @@ import { Translate } from 'app/I18N'; import { FileType } from 'shared/types/fileType'; import { EntitySchema } from 'shared/types/entityType'; import UploadButton from 'app/Metadata/components/UploadButton'; +import { LanguageUtils } from 'shared/language'; import { NeedAuthorization } from 'app/Auth'; -import languageLib from 'shared/languages'; import { ConnectedFile as File } from './File'; import './scss/filelist.scss'; @@ -28,7 +28,7 @@ export type FileListProps = { const orderFilesByLanguage = (files: FileType[], systemLanguage: string) => { const orderedFiles = [...files]; const fileIndex = orderedFiles.findIndex(file => { - const language = languageLib.get(file.language as string, 'ISO639_1'); + const language = LanguageUtils.fromISO639_3(file.language as string)?.ISO639_1; return language === systemLanguage; }); if (fileIndex > -1) { diff --git a/app/react/Attachments/components/specs/FileList.spec.tsx b/app/react/Attachments/components/specs/FileList.spec.tsx index 83278c25a1..cbf57823bb 100644 --- a/app/react/Attachments/components/specs/FileList.spec.tsx +++ b/app/react/Attachments/components/specs/FileList.spec.tsx @@ -2,8 +2,8 @@ import React from 'react'; import { shallow, ShallowWrapper } from 'enzyme'; import { FileType } from 'shared/types/fileType'; import UploadButton from 'app/Metadata/components/UploadButton'; +import { LanguageUtils } from 'shared/language'; import { EntitySchema } from 'shared/types/entityType'; -import languageLib from 'shared/languages'; import { ConnectedFile as File } from '../File'; import { FileList, FileListProps } from '../FileList'; @@ -49,7 +49,7 @@ describe('FileList', () => { expect(renderedFiles.at(0).props().file).toBe(file); expect(renderedFiles.at(1).props().file).toBe(file2); const firstFile = renderedFiles.at(0).props().file; - const language = languageLib.get(firstFile.language as string, 'ISO639_1'); + const language = LanguageUtils.fromISO639_3(firstFile.language as string)?.ISO639_1; expect(entity.language).toEqual(language); }); @@ -58,7 +58,7 @@ describe('FileList', () => { render(); const renderedFiles = component.find(File); const firstFile = renderedFiles.at(0).props().file; - const language = languageLib.get(firstFile.language as string, 'ISO639_1'); + const language = LanguageUtils.fromISO639_3(firstFile.language as string)?.ISO639_1; expect(entity.language).toEqual(language); }); diff --git a/app/react/Layout/DocumentLanguage.js b/app/react/Layout/DocumentLanguage.js index a509c2a957..de5e20ae3a 100644 --- a/app/react/Layout/DocumentLanguage.js +++ b/app/react/Layout/DocumentLanguage.js @@ -2,7 +2,7 @@ import PropTypes from 'prop-types'; import React, { Component } from 'react'; import { connect } from 'react-redux'; -import { language as getLanguage } from 'shared/languagesList'; +import { LanguageUtils } from 'shared/language'; import t from '../I18N/t'; export class DocumentLanguage extends Component { @@ -16,13 +16,13 @@ export class DocumentLanguage extends Component { if (doc.get('file')) { const fileLanguage = doc.getIn(['file', 'language']); if (fileLanguage && fileLanguage !== 'other') { - if (this.props.locale === getLanguage(fileLanguage, 'ISO639_1')) { + if (this.props.locale === LanguageUtils.fromISO639_3(fileLanguage, false)?.ISO639_1) { return null; } return ( - {getLanguage(fileLanguage, 'ISO639_1') || fileLanguage} + {LanguageUtils.fromISO639_3(fileLanguage, false)?.ISO639_1 || fileLanguage} ); } diff --git a/app/react/UI/Icon/Icon.js b/app/react/UI/Icon/Icon.js index 60548988f3..1ed62e2ec6 100644 --- a/app/react/UI/Icon/Icon.js +++ b/app/react/UI/Icon/Icon.js @@ -3,13 +3,13 @@ import React from 'react'; import { connect } from 'react-redux'; import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; -import { availableLanguages as languagesList } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { loadIcons } from './library'; loadIcons(); const Icon = ({ locale = '', ...ownProps }) => { - const languageData = languagesList.find(l => l.key === locale); + const languageData = availableLanguages.find(l => l.key === locale); return ( ); diff --git a/app/react/V2/Components/UI/Sidepanel.tsx b/app/react/V2/Components/UI/Sidepanel.tsx index 603685dc34..8c997388d8 100644 --- a/app/react/V2/Components/UI/Sidepanel.tsx +++ b/app/react/V2/Components/UI/Sidepanel.tsx @@ -3,7 +3,7 @@ import React from 'react'; import { Transition } from '@headlessui/react'; import { useParams } from 'react-router-dom'; import { XMarkIcon } from '@heroicons/react/20/solid'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { Translate } from 'app/I18N'; interface SidePanelProps { diff --git a/app/react/V2/Routes/Settings/Translations/EditTranslations.tsx b/app/react/V2/Routes/Settings/Translations/EditTranslations.tsx index 83c3f01e9b..69dc388a81 100644 --- a/app/react/V2/Routes/Settings/Translations/EditTranslations.tsx +++ b/app/react/V2/Routes/Settings/Translations/EditTranslations.tsx @@ -24,7 +24,7 @@ import { Button, ToggleButton } from 'V2/Components/UI'; import * as translationsAPI from 'V2/api/translations'; import * as settingsAPI from 'V2/api/settings'; import { notificationAtom } from 'V2/atoms'; -import { availableLanguages } from 'shared/languagesList'; +import { availableLanguages } from 'shared/language'; import { Settings } from 'shared/types/settingsType'; import { FetchResponseError } from 'shared/JSONRequest'; import { LanguagePill } from './components/LanguagePill'; diff --git a/app/react/Viewer/utils/determineDirection.js b/app/react/Viewer/utils/determineDirection.js index c0eb474106..b6db82472c 100644 --- a/app/react/Viewer/utils/determineDirection.js +++ b/app/react/Viewer/utils/determineDirection.js @@ -1,10 +1,7 @@ -import { - language as francLanguages, - availableLanguages as languagesList, -} from 'shared/languagesList'; +import { LanguageUtils, availableLanguages } from 'shared/language'; export default ({ language }) => { - const languageKey = francLanguages(language, 'ISO639_1'); - const laguageData = languagesList.find(l => l.key === languageKey) || {}; + const languageKey = LanguageUtils.fromISO639_3(language).ISO639_1; + const laguageData = availableLanguages.find(l => l.key === languageKey) || {}; return `force-${laguageData.rtl ? 'rtl' : 'ltr'}`; }; diff --git a/app/shared/detectLanguage.ts b/app/shared/detectLanguage.ts index 025bc08084..4a7bf78649 100644 --- a/app/shared/detectLanguage.ts +++ b/app/shared/detectLanguage.ts @@ -1,6 +1,7 @@ import franc from 'franc'; -import { language, LanguageCode } from 'shared/languagesList'; +import { LanguageCode, LanguageUtils } from 'shared/language'; const detectLanguage = (text: string, purpose: LanguageCode = 'elastic') => - language(franc(text), purpose); + LanguageUtils.fromISO639_3(franc(text))?.[purpose]; + export { detectLanguage }; diff --git a/app/shared/entityDefaultDocument.ts b/app/shared/entityDefaultDocument.ts index ca7473148c..4d212782c9 100644 --- a/app/shared/entityDefaultDocument.ts +++ b/app/shared/entityDefaultDocument.ts @@ -1,5 +1,5 @@ import { FileType } from 'shared/types/fileType'; -import { language } from 'shared/languagesList'; +import { LanguageUtils } from 'shared/language'; export const entityDefaultDocument = ( entityDocuments: Array, @@ -9,12 +9,14 @@ export const entityDefaultDocument = ( const documents = entityDocuments || []; const documentMatchingEntity = documents.find( (document: FileType) => - document.language && language(document.language, 'ISO639_1') === entityLanguage + document.language && + LanguageUtils.fromISO639_3(document.language)?.ISO639_1 === entityLanguage ); const documentMatchingDefault = documents.find( (document: FileType) => - document.language && language(document.language, 'ISO639_1') === defaultLanguage + document.language && + LanguageUtils.fromISO639_3(document.language)?.ISO639_1 === defaultLanguage ); return documentMatchingEntity || documentMatchingDefault || documents[0]; diff --git a/app/shared/languagesList.ts b/app/shared/language/availableLanguages.ts similarity index 93% rename from app/shared/languagesList.ts rename to app/shared/language/availableLanguages.ts index aa6f1804d7..ecde594d38 100644 --- a/app/shared/languagesList.ts +++ b/app/shared/language/availableLanguages.ts @@ -3,23 +3,12 @@ import { LanguageSchema } from 'shared/types/commonTypes'; type LanguageCode = 'elastic' | 'ISO639_3' | 'ISO639_1'; -type LegacyElasticObject = Record< - string, - { franc: string; elastic: string; ISO639_1: string | null } ->; - -type Language = { - label: string; - value: string; -}; - -const undefinedLanguage: LanguageSchema = { +const otherLanguageSchema: LanguageSchema = { label: 'Other', key: 'other' as any, ISO639_1: 'other', ISO639_3: 'other', elastic: 'other', - franc: 'other', localized_label: 'Other', translationAvailable: false, }; @@ -78,7 +67,6 @@ const availableLanguages: LanguageSchema[] = [ key: 'ar', rtl: true, ISO639_3: 'arb', - franc: 'arb', elastic: 'arabic', ISO639_1: 'ar', localized_label: 'العربية', @@ -96,7 +84,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Armenian', key: 'hy', ISO639_3: 'hye', - franc: 'hye', elastic: 'armenian', ISO639_1: 'hy', localized_label: 'Հայերեն', @@ -162,7 +149,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Basque', key: 'eu', ISO639_3: 'eus', - franc: 'eus', elastic: 'basque', ISO639_1: 'eu', localized_label: 'Euskara', @@ -220,7 +206,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Bulgarian', key: 'bg', ISO639_3: 'bul', - franc: 'bul', elastic: 'bulgarian', ISO639_1: 'bg', localized_label: 'Български', @@ -238,7 +223,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Catalan', key: 'ca', ISO639_3: 'cat', - franc: 'cat', elastic: 'catalan', ISO639_1: 'ca', localized_label: 'Català', @@ -272,7 +256,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Chinese', key: 'zh', ISO639_3: 'zho', - franc: 'cjk', elastic: 'cjk', ISO639_1: 'zh', localized_label: '中文', @@ -282,7 +265,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Chinese (Simplified)', key: 'zh-Hans', ISO639_3: 'zho-Hans', - franc: 'cjk', elastic: 'cjk', ISO639_1: 'zh-Hans', localized_label: '简体中文', @@ -292,7 +274,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Chinese (Traditional)', key: 'zh-Hant', ISO639_3: 'zho-Hant', - franc: 'cjk', elastic: 'cjk', ISO639_1: 'zh-Hant', localized_label: '繁體中文', @@ -342,7 +323,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Czech', key: 'cs', ISO639_3: 'ces', - franc: 'ces', elastic: 'czech', ISO639_1: 'cs', localized_label: 'Čeština', @@ -352,7 +332,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Danish', key: 'da', ISO639_3: 'dan', - franc: 'dan', elastic: 'danish', ISO639_1: 'da', localized_label: 'Dansk', @@ -371,7 +350,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Dutch', key: 'nl', ISO639_3: 'nld', - franc: 'nld', elastic: 'dutch', ISO639_1: 'nl', localized_label: 'Nederlands', @@ -389,7 +367,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'English', key: 'en', ISO639_3: 'eng', - franc: 'eng', elastic: 'english', ISO639_1: 'en', localized_label: 'English', @@ -439,7 +416,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Finnish', key: 'fi', ISO639_3: 'fin', - franc: 'fin', elastic: 'finnish', ISO639_1: 'fi', localized_label: 'Suomi', @@ -449,7 +425,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'French', key: 'fr', ISO639_3: 'fra', - franc: 'fra', elastic: 'french', ISO639_1: 'fr', localized_label: 'Français', @@ -467,7 +442,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Galician', key: 'gl', ISO639_3: 'glg', - franc: 'glg', elastic: 'galician', ISO639_1: 'gl', localized_label: 'Galego', @@ -501,7 +475,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'German', key: 'de', ISO639_3: 'deu', - franc: 'deu', elastic: 'german', ISO639_1: 'de', localized_label: 'Deutsch', @@ -511,7 +484,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Greek', key: 'el', ISO639_3: 'ell', - franc: 'ell', elastic: 'greek', ISO639_1: 'el', localized_label: 'Ελληνικά', @@ -571,7 +543,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Hindi', key: 'hi', ISO639_3: 'hin', - franc: 'hin', elastic: 'hindi', ISO639_1: 'hi', localized_label: 'हिन्दी', @@ -589,7 +560,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Hungarian', key: 'hu', ISO639_3: 'hun', - franc: 'hun', elastic: 'hungarian', ISO639_1: 'hu', localized_label: 'Magyar', @@ -623,7 +593,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Indonesian', key: 'in', ISO639_3: 'ind', - franc: 'ind', elastic: 'indonesian', ISO639_1: 'in', localized_label: 'Indonesia', @@ -665,7 +634,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Irish', key: 'ga', ISO639_3: 'gle', - franc: 'gle', elastic: 'irish', ISO639_1: 'ga', localized_label: 'Gaeilge', @@ -675,7 +643,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Italian', key: 'it', ISO639_3: 'ita', - franc: 'ita', elastic: 'italian', ISO639_1: 'it', localized_label: 'Italiano', @@ -685,7 +652,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Japanese', key: 'ja', ISO639_3: 'jpn', - franc: 'cjk', elastic: 'cjk', ISO639_1: 'ja', localized_label: '日本語', @@ -800,7 +766,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Korean', key: 'ko', ISO639_3: 'kor', - franc: 'cjk', elastic: 'cjk', ISO639_1: 'ko', localized_label: '한국어', @@ -811,7 +776,6 @@ const availableLanguages: LanguageSchema[] = [ key: 'ku', rtl: true, ISO639_3: 'kur', - franc: 'ckb', elastic: 'sorani', ISO639_1: 'ku', localized_label: 'Kurdî', @@ -845,7 +809,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Latvian (Lettish)', key: 'lv', ISO639_3: 'lav', - franc: 'lav', elastic: 'latvian', ISO639_1: 'lv', localized_label: 'Latviešu', @@ -871,7 +834,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Lithuanian', key: 'lt', ISO639_3: 'lit', - franc: 'lit', elastic: 'lithuanian', ISO639_1: 'lt', localized_label: 'Lietuvių', @@ -1025,7 +987,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Norwegian bokmål', key: 'nb', ISO639_3: 'nob', - franc: 'nob', elastic: 'norwegian', ISO639_1: 'nb', localized_label: 'Norsk bokmål', @@ -1035,7 +996,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Norwegian nynorsk', key: 'nn', ISO639_3: 'nno', - franc: 'nno', elastic: 'norwegian', ISO639_1: 'nn', localized_label: 'Norsk nynorsk', @@ -1111,7 +1071,6 @@ const availableLanguages: LanguageSchema[] = [ key: 'fa', rtl: true, ISO639_3: 'fas', - franc: 'fas', elastic: 'persian', ISO639_1: 'fa', localized_label: 'فارسی', @@ -1129,7 +1088,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Portuguese', key: 'pt', ISO639_3: 'por', - franc: 'por', elastic: 'portuguese', ISO639_1: 'pt', localized_label: 'Português', @@ -1163,7 +1121,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Romanian/Moldavian', key: 'ro', ISO639_3: 'ron', - franc: 'ron', elastic: 'romanian', ISO639_1: 'ro', localized_label: 'Română', @@ -1173,7 +1130,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Russian', key: 'ru', ISO639_3: 'rus', - franc: 'rus', elastic: 'russian', ISO639_1: 'ru', localized_label: 'Русский', @@ -1319,7 +1275,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Spanish', key: 'es', ISO639_3: 'spa', - franc: 'spa', elastic: 'spanish', ISO639_1: 'es', localized_label: 'Español', @@ -1345,7 +1300,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Swedish', key: 'sv', ISO639_3: 'swe', - franc: 'swe', elastic: 'swedish', ISO639_1: 'sv', localized_label: 'Svenska', @@ -1403,7 +1357,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Thai', key: 'th', ISO639_3: 'tha', - franc: 'tha', elastic: 'thai', ISO639_1: 'th', localized_label: 'ไทย', @@ -1445,7 +1398,6 @@ const availableLanguages: LanguageSchema[] = [ label: 'Turkish', key: 'tr', ISO639_3: 'tur', - franc: 'tur', elastic: 'turkish', ISO639_1: 'tr', localized_label: 'Türkçe', @@ -1599,46 +1551,6 @@ const availableLanguages: LanguageSchema[] = [ }, ]; -const languageMapper = (ISO639_3: string, to: LanguageCode = 'elastic') => { - const language = availableLanguages.find(item => item.ISO639_3 === ISO639_3); - const defaultValue = to !== 'ISO639_1' ? undefinedLanguage.ISO639_3 : null; - - return language?.[to] || defaultValue; -}; - -const getLanguagesByCode = (code: LanguageCode = 'elastic'): Language[] => - availableLanguages - .filter(item => Boolean(item[code])) - .map(item => ({ value: item[code] as string, label: item.label })); - -const getLanguageCodes = (languages: Language[]): string[] => languages.map(item => item.value); - -const getLanguageSchema = (ISO639_3: string) => - availableLanguages.find(item => item.ISO639_3 === ISO639_3) || undefinedLanguage; - -const ISO6391Languages = getLanguagesByCode('ISO639_1'); -const ISO6391Codes = getLanguageCodes(ISO6391Languages); - -const elasticLanguages: LegacyElasticObject = availableLanguages - .filter(item => Boolean(item?.elastic && item?.franc)) - .reduce( - (prev, next) => ({ - ...prev, - [next.franc!]: { - franc: next.franc!, - elastic: next.elastic!, - ISO639_1: ['cjk', 'ckb'].includes(next.franc!) ? null : next?.ISO639_1 || null, - }, - }), - {} as LegacyElasticObject - ); - export type { LanguageCode }; -export { - elasticLanguages, - availableLanguages, - ISO6391Codes, - languageMapper as language, - getLanguageSchema, -}; +export { otherLanguageSchema, availableLanguages }; diff --git a/app/shared/language/index.ts b/app/shared/language/index.ts new file mode 100644 index 0000000000..3fe6d946ea --- /dev/null +++ b/app/shared/language/index.ts @@ -0,0 +1,12 @@ +import { LanguageUtils } from './languageUtils'; + +const ISO6391Languages = LanguageUtils.getByCode('ISO639_1'); +const ISO6391Codes = LanguageUtils.getCodes(ISO6391Languages, 'ISO639_1'); + +const elasticLanguages = LanguageUtils.getByCode('elastic'); +const elasticLanguageCodes = LanguageUtils.getCodes(elasticLanguages, 'elastic'); + +export { elasticLanguages, elasticLanguageCodes, ISO6391Codes, LanguageUtils }; +export { availableLanguages } from './availableLanguages'; + +export type { LanguageCode } from './availableLanguages'; diff --git a/app/shared/language/languageUtils.ts b/app/shared/language/languageUtils.ts new file mode 100644 index 0000000000..da55ba6d48 --- /dev/null +++ b/app/shared/language/languageUtils.ts @@ -0,0 +1,52 @@ +import { LanguageSchema } from 'shared/types/commonTypes'; +import { LanguageCode, availableLanguages, otherLanguageSchema } from './availableLanguages'; + +class LanguageUtils { + private static createLanguageIndex(code: LanguageCode) { + return availableLanguages.reduce( + (prev, next) => { + if (!next[code]) return prev; + + return { + ...prev, + [next[code]]: { ...next }, + }; + }, + {} as Record + ); + } + + private static ISO639_3Index = this.createLanguageIndex('ISO639_3'); + + private static ISO639_1Index = this.createLanguageIndex('ISO639_1'); + + private static elasticIndex = this.createLanguageIndex('elastic'); + + private static uniqueValues(array: string[]) { + return Array.from(new Set(array)); + } + + static getByCode(code: LanguageCode = 'elastic'): LanguageSchema[] { + return availableLanguages.filter(item => Boolean(item[code])); + } + + static getCodes(languages: LanguageSchema[], languageCode: LanguageCode): string[] { + return this.uniqueValues(languages.map(item => item[languageCode]) as string[]); + } + + static fromISO639_3(ISO639_3: string, enableFallback = true): LanguageSchema { + const fallback = enableFallback ? otherLanguageSchema : null; + + return this.ISO639_3Index[ISO639_3] || fallback; + } + + static fromISO639_1(ISO639_1: string): LanguageSchema | null { + return this.ISO639_1Index[ISO639_1] || null; + } + + static fromElastic(elastic: string): LanguageSchema { + return this.elasticIndex[elastic] || otherLanguageSchema; + } +} + +export { LanguageUtils }; diff --git a/app/shared/languages.js b/app/shared/languages.js deleted file mode 100644 index ef5f275ca1..0000000000 --- a/app/shared/languages.js +++ /dev/null @@ -1,14 +0,0 @@ -import { language, elasticLanguages } from 'shared/languagesList'; - -export default { - get: language, - data: Object.keys(elasticLanguages).map(k => elasticLanguages[k]), - getAll: (purpose = 'elastic') => { - const unique = (v, i, a) => a.indexOf(v) === i; - const notNull = v => Boolean(v); - return Object.keys(elasticLanguages) - .map(k => elasticLanguages[k][purpose]) - .filter(unique) - .filter(notNull); - }, -}; diff --git a/app/shared/specs/languages.spec.js b/app/shared/specs/languages.spec.js index 3f37835923..ef7fc6e4a9 100644 --- a/app/shared/specs/languages.spec.js +++ b/app/shared/specs/languages.spec.js @@ -1,42 +1,8 @@ -import languages from '../languages'; -import { detectLanguage } from '../detectLanguage'; +import { detectLanguage } from 'shared/detectLanguage'; +import { availableLanguages, LanguageUtils } from 'shared/language'; +import { otherLanguageSchema } from 'shared/language/availableLanguages'; describe('languages', () => { - describe('getAll', () => { - it('should return a list of all languages for the default purpose', () => { - expect(languages.getAll().length).toBe(32); - expect(languages.getAll()[0]).toBe('arabic'); - }); - - it('should return a list of all languages for the passed purpose', () => { - expect(languages.getAll('ISO639_1').length).toBe(31); - expect(languages.getAll('franc').length).toBe(33); - - expect(languages.getAll('ISO639_1')[5]).toBe(languages.data[6].ISO639_1); - expect(languages.getAll('franc')[5]).toBe(languages.data[5].franc); - }); - }); - - describe('get', () => { - it('should return a match for the key for the default purpose', () => { - expect(languages.get('glg')).toBe('galician'); - expect(languages.get('lav')).toBe('latvian'); - }); - - it('should return a match for the key for the passed purpose', () => { - expect(languages.get('glg', 'ISO639_1')).toBe('gl'); - expect(languages.get('lav', 'ISO639_1')).toBe('lv'); - }); - - it('should return other for a key in a non supported lang', () => { - expect(languages.get('und')).toBe('other'); - }); - - it('should return null for a key in a non supported lang when asking for ISO639_1', () => { - expect(languages.get('und', 'ISO639_1')).toBe(null); - }); - }); - describe('detectLanguage', () => { it('should return the text language (for elasticsearch by default)', () => { expect(detectLanguage('de que color es el caballo blanco de santiago')).toBe('spanish'); @@ -50,8 +16,10 @@ describe('languages', () => { expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_1')).toBe( 'en' ); - expect(detectLanguage('de que color es el caballo blanco de santiago', 'franc')).toBe('spa'); - expect(detectLanguage('what is the colour of the white horse of santiago', 'franc')).toBe( + expect(detectLanguage('de que color es el caballo blanco de santiago', 'ISO639_3')).toBe( + 'spa' + ); + expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_3')).toBe( 'eng' ); @@ -65,4 +33,42 @@ describe('languages', () => { ); }); }); + + describe('Language Utils', () => { + it('should return language schema for the given a ISO639_3 language code', () => { + const input = availableLanguages[0]; + + expect(LanguageUtils.fromISO639_3(input.ISO639_3)).toEqual(input); + }); + + it('should return default language schema if given a ISO639_3 language code that does not exist on available languages', () => { + const input = 'language_code_that_does_not_exist'; + + expect(LanguageUtils.fromISO639_3(input)).toEqual(otherLanguageSchema); + }); + + it('should return language schema for the given a elastic language code', () => { + const input = availableLanguages.find(language => Boolean(language.elastic)); + + expect(LanguageUtils.fromElastic(input.elastic)).toEqual(input); + }); + + it('should return default language schema if given a elastic language code that does not exist on available languages', () => { + const input = 'language_code_that_does_not_exist'; + + expect(LanguageUtils.fromElastic(input)).toEqual(otherLanguageSchema); + }); + + it('should return language schema for the given a ISO639_1 language code', () => { + const input = availableLanguages[0]; + + expect(LanguageUtils.fromISO639_1(input.ISO639_1)).toEqual(input); + }); + + it('should return null if given a ISO639_1 language code that does not exist on available languages', () => { + const input = 'language_code_that_does_not_exist'; + + expect(LanguageUtils.fromISO639_1(input)).toBe(null); + }); + }); }); diff --git a/app/shared/types/commonSchemas.ts b/app/shared/types/commonSchemas.ts index 17fccc396e..05203af95c 100644 --- a/app/shared/types/commonSchemas.ts +++ b/app/shared/types/commonSchemas.ts @@ -251,7 +251,6 @@ export const languageSchema = { rtl: { type: 'boolean' }, default: { type: 'boolean' }, ISO639_3: { type: 'string' }, - franc: { type: 'string' }, elastic: { type: 'string' }, ISO639_1: { type: 'string' }, localized_label: { type: 'string' }, diff --git a/app/shared/types/commonTypes.d.ts b/app/shared/types/commonTypes.d.ts index d0edf3b932..6d220f3b97 100644 --- a/app/shared/types/commonTypes.d.ts +++ b/app/shared/types/commonTypes.d.ts @@ -244,7 +244,6 @@ export interface LanguageSchema { rtl?: boolean; default?: boolean; ISO639_3?: string; - franc?: string; elastic?: string; ISO639_1?: string; localized_label?: string; @@ -258,7 +257,6 @@ export type LanguagesListSchema = { rtl?: boolean; default?: boolean; ISO639_3?: string; - franc?: string; elastic?: string; ISO639_1?: string; localized_label?: string; diff --git a/database/elastic_mapping/elastic_mapping.js b/database/elastic_mapping/elastic_mapping.js index e58a778b3d..924e626e03 100644 --- a/database/elastic_mapping/elastic_mapping.js +++ b/database/elastic_mapping/elastic_mapping.js @@ -1,4 +1,4 @@ -import languages from '../../app/shared/languages'; +import { elasticLanguageCodes } from 'shared/language'; import baseProperties from './base_properties'; import settings from './settings'; import dynamicTemplates from './dynamic_templates'; @@ -11,7 +11,7 @@ const config = { }, }; -languages.getAll().forEach(language => { +elasticLanguageCodes.forEach(language => { config.settings.analysis.filter[`${language}_stop`] = { type: 'stop', stopwords: `_${language}_`, From 7d20d66bb131c9119ff5e7aa4541de6612833e63 Mon Sep 17 00:00:00 2001 From: Joao Date: Thu, 12 Dec 2024 08:18:40 -0400 Subject: [PATCH 6/8] Bump version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index c2e9601163..b09fd06ee0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uwazi", - "version": "1.193.4", + "version": "1.193.5", "description": "Uwazi is a free, open-source solution for organising, analysing and publishing your documents.", "keywords": [ "react" From 879e8fa4d7784196ff3a279ca187a82e584cc8e5 Mon Sep 17 00:00:00 2001 From: Joao Date: Thu, 12 Dec 2024 08:23:30 -0400 Subject: [PATCH 7/8] Merge back from production and Bump rc version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 8e4d12b580..4b64e967b7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "uwazi", - "version": "1.194.0-rc5", + "version": "1.194.0-rc6", "description": "Uwazi is a free, open-source solution for organising, analysing and publishing your documents.", "keywords": [ "react" From 70dbc9f1f01a979f15e84b697f33c53505af8cc7 Mon Sep 17 00:00:00 2001 From: A happy cat Date: Thu, 12 Dec 2024 15:52:51 +0100 Subject: [PATCH 8/8] downgraded monaco editor --- package.json | 2 +- yarn.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 4b64e967b7..eea136130f 100644 --- a/package.json +++ b/package.json @@ -173,7 +173,7 @@ "mime-types": "^2.1.35", "moment": "^2.30.1", "moment-timezone": "0.5.46", - "monaco-editor": "^0.52.0", + "monaco-editor": "0.50.0", "monaco-editor-webpack-plugin": "^7.1.0", "mongodb": "6.3.0", "mongoose": "8.1.2", diff --git a/yarn.lock b/yarn.lock index 44c7cc77b8..a9c2c490f5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -12190,10 +12190,10 @@ monaco-editor-webpack-plugin@^7.1.0: dependencies: loader-utils "^2.0.2" -monaco-editor@^0.52.0: - version "0.52.0" - resolved "https://registry.yarnpkg.com/monaco-editor/-/monaco-editor-0.52.0.tgz#d47c02b191eae208d68878d679b3ee7456031be7" - integrity sha512-OeWhNpABLCeTqubfqLMXGsqf6OmPU6pHM85kF3dhy6kq5hnhuVS1p3VrEW/XhWHc71P2tHyS5JFySD8mgs1crw== +monaco-editor@0.50.0: + version "0.50.0" + resolved "https://registry.yarnpkg.com/monaco-editor/-/monaco-editor-0.50.0.tgz#44e62b124c8aed224e1d310bbbe6ffd6d5122413" + integrity sha512-8CclLCmrRRh+sul7C08BmPBP3P8wVWfBHomsTcndxg5NRCEPfu/mc2AGU8k37ajjDVXcXFc12ORAMUkmk+lkFA== mongodb-connection-string-url@^3.0.0: version "3.0.0"