diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingCallQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingCallQueryGenerator.cs index acb043c..d60392a 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingCallQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingCallQueryGenerator.cs @@ -131,4 +131,11 @@ private static IEnumerable, QueryCont return filters; } + + protected override Func, IPromise>> GenerateSortForSearch(FundingCallSearchParameters parameters) + { + // Sort funding calls + return sortDescriptor => sortDescriptor + .Field(f => f.CallProgrammeDueDate, SortOrder.Ascending); + } } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingDecisionQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingDecisionQueryGenerator.cs index 78090cb..344870f 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingDecisionQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingDecisionQueryGenerator.cs @@ -174,4 +174,11 @@ private static IEnumerable, Query return filters; } + + protected override Func, IPromise>> GenerateSortForSearch(FundingDecisionSearchParameters parameters) + { + // Sort funding decisions + return sortDescriptor => sortDescriptor + .Field(f => f.FundingStartDate, SortOrder.Descending); + } } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/InfrastructureQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/InfrastructureQueryGenerator.cs index 8e88034..595ce12 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/InfrastructureQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/InfrastructureQueryGenerator.cs @@ -38,4 +38,10 @@ protected override Func, QueryContainer { throw new NotImplementedException(); } + + protected override Func, IPromise>> GenerateSortForSearch(InfrastructureSearchParameters parameters) + { + // Sort infrastructures + return sortDescriptor => sortDescriptor; + } } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/OrganizationQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/OrganizationQueryGenerator.cs index 959f9b8..e81c741 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/OrganizationQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/OrganizationQueryGenerator.cs @@ -38,4 +38,10 @@ protected override Func, QueryContainer> { throw new NotImplementedException(); } + + protected override Func, IPromise>> GenerateSortForSearch(OrganizationSearchParameters parameters) + { + // Sort organizations + return sortDescriptor => sortDescriptor; + } } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/PublicationQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/PublicationQueryGenerator.cs index be9516b..faa2362 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/PublicationQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/PublicationQueryGenerator.cs @@ -30,7 +30,7 @@ private static IEnumerable, QueryCont if (!string.IsNullOrWhiteSpace(parameters.Name)) { subQueries.Add(t => - t.Match(query => query.Field(f => f.Name) + t.MatchPhrase(query => query.Field(f => f.Name) .Query(parameters.Name))); } @@ -89,6 +89,18 @@ private static IEnumerable, QueryCont .Field(f => f.Authors.Suffix(nameof(Author.LastName))).Query(parameters.AuthorLastName))))))); } + if (!string.IsNullOrWhiteSpace(parameters.AuthorOrcId)) + { + subQueries.Add( + q => q.Nested( + query => query + .Path(p => p.Authors) + .Query( + q => q.Match(m => m + .Field(f => f.Authors.Suffix(nameof(Author.Orcid))) + .Query(parameters.AuthorOrcId))))); + } + if (!string.IsNullOrWhiteSpace(parameters.ConferenceName)) { subQueries.Add(t => @@ -224,25 +236,22 @@ private static IEnumerable, QueryCont .Value(parameters.OrganizationUnitId))); } - if (parameters.AuthorOrcId is not null) + // Searching with type code requires exact match. + if (!string.IsNullOrWhiteSpace(parameters.TypeCode)) { - filters.Add(t => - t.Term(s => s.Field(f => f.Authors.Suffix(nameof(Author.Orcid))) - .Value(parameters.AuthorOrcId))); + filters.Add(t => t.Term(term => term + .Field(f => f.Type!.Code) + .Value(parameters.TypeCode) + )); } - - if (parameters.TypeCode is not null) - { - filters.Add(t => - t.Term(s => s.Field(f => f.Type) - .Value(parameters.TypeCode))); - } - - if (parameters.PublisherOpenAccess is not null) + + // Searching with publisher open access code requires exact match. + if (!string.IsNullOrWhiteSpace(parameters.PublisherOpenAccess)) { - filters.Add(t => - t.Term(s => s.Field(f => f.PublisherOpenAccess) - .Value(parameters.PublisherOpenAccess))); + filters.Add(t => t.Term(term => term + .Field(f => f.PublisherOpenAccess!.Code) + .Value(parameters.PublisherOpenAccess) + )); } if (parameters.Issn is not null) @@ -274,11 +283,13 @@ private static IEnumerable, QueryCont .Value(parameters.Doi))); } - if (parameters.Status is not null) + // Searching with status requires exact match. + if (!string.IsNullOrWhiteSpace(parameters.Status)) { - filters.Add(t => - t.Term(s => s.Field(f => f.Status) - .Value(parameters.Status))); + filters.Add(t => t.Term(term => term + .Field(f => f.Status!.Code) + .Value(parameters.Status) + )); } return filters; @@ -288,4 +299,11 @@ protected override Func, QueryContainer> G { return queryContainerDescriptor => queryContainerDescriptor.Term(query => query.Field(f => f.Id).Value(id)); } + + protected override Func, IPromise>> GenerateSortForSearch(PublicationSearchParameters parameters) + { + // Sort publications + return sortDescriptor => sortDescriptor + .Field(f => f.PublicationYear, SortOrder.Descending); + } } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/QueryGeneratorBase.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/QueryGeneratorBase.cs index 2d3fb57..8afc1a8 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/QueryGeneratorBase.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/QueryGeneratorBase.cs @@ -18,6 +18,7 @@ public Func, ISearchRequest> GenerateQuery(TIn searchPara .Index(indexName) .Skip((pageNumber - 1) * pageSize) .Take(pageSize) + .Sort(GenerateSortForSearch(searchParameters)) .Query(GenerateQueryForSearch(searchParameters)); } @@ -30,6 +31,8 @@ public Func, ISearchRequest> GenerateSingleQuery(string i } protected abstract Func, QueryContainer> GenerateQueryForSearch(TIn parameters); - + protected abstract Func,QueryContainer> GenerateQueryForSingle(string id); + + protected abstract Func, IPromise>> GenerateSortForSearch(TIn parameters); } \ No newline at end of file diff --git a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/ResearchDatasetQueryGenerator.cs b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/ResearchDatasetQueryGenerator.cs index 6fb8767..1329002 100644 --- a/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/ResearchDatasetQueryGenerator.cs +++ b/aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/ResearchDatasetQueryGenerator.cs @@ -202,4 +202,11 @@ private static IEnumerable, Query return filters; } + + protected override Func, IPromise>> GenerateSortForSearch(ResearchDatasetSearchParameters parameters) + { + // Sort research datasets + return sortDescriptor => sortDescriptor + .Field(f => f.Created, SortOrder.Descending); + } } \ No newline at end of file diff --git a/aspnetcore/src/Indexer/DatabasePreflightCheck.cs b/aspnetcore/src/Indexer/DatabasePreflightCheck.cs new file mode 100644 index 0000000..c811e1d --- /dev/null +++ b/aspnetcore/src/Indexer/DatabasePreflightCheck.cs @@ -0,0 +1,99 @@ +using CSC.PublicApi.DatabaseContext; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; + +namespace CSC.PublicApi.Indexer; + +public class DatabasePreflightCheck +{ + private readonly ApiDbContext? _context; + private readonly ILogger? _logger; + private readonly string _logPrefix = "Database preflight check: "; + + public DatabasePreflightCheck(ApiDbContext context, ILogger logger) + { + _context = context; + _logger = logger; + } + + // Constructor without dependencies for unit testing + public DatabasePreflightCheck() + { + } + + // Most of publications should have author information linked via fact_contribution. + // Exact ratio cannot be determined, 80% is used as a baseline. + public bool FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(int dimPublicationCount, int factContributionDistinctReferencesToDimPublicationCount) + { + if (factContributionDistinctReferencesToDimPublicationCount >= dimPublicationCount * 0.8) + { + return true; + } + return false; + } + public bool IsGood() + { + bool isGood = true; + if (_context != null && _logger != null) + { + _logger.LogInformation(_logPrefix + "Check that required database tables contain data for indexing"); + + // Publication count + int dimPublication_Count = _context.DimPublications.AsNoTracking().Where(dp => dp.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "publications: dim_publication count = {DimPublicationCount}", dimPublication_Count); + if (dimPublication_Count == 0) + { + _logger.LogError(_logPrefix + "publications: Table dim_publication is empty"); + isGood = false; + } + + // Funding call count (dim_call_programmme in database) + int dimCallProgramme_Count = _context.DimCallProgrammes.AsNoTracking().Where(dcp => dcp.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "funding calls: dim_call_programme count = {DimCallProgramme}", dimCallProgramme_Count); + if (dimCallProgramme_Count == 0) + { + _logger.LogError(_logPrefix + "funding calls: Table dim_call_programme is empty"); + isGood = false; + } + + // Funding decision count + int dimFundingDecision_Count = _context.DimFundingDecisions.AsNoTracking().Where(dfd => dfd.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "funding decisions: dim_funding_decision count = {DimFundingDecision}", dimFundingDecision_Count); + if (dimFundingDecision_Count == 0) + { + _logger.LogError(_logPrefix + "funding decisions: Table dim_funding_decision is empty"); + isGood = false; + } + + // Research dataset count + int dimResearchDataset_Count = _context.DimResearchDatasets.AsNoTracking().Where(drd => drd.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "research datasets: dim_research_dataset count = {DimResearchDataset}", dimResearchDataset_Count); + if (dimResearchDataset_Count == 0) + { + _logger.LogError(_logPrefix + "research datasets: Table dim_research_dataset is empty"); + isGood = false; + } + + // Publication related fact_contribution count. + // Count distinct dim_publication references in fact_contribution. + int distinctDimPublicationReferencesInFactContribution_Count = + _context.FactContributions.AsNoTracking().Where(fc => fc.DimPublicationId > 0).Select(fc => fc.DimPublicationId).Distinct().Count(); + _logger.LogInformation(_logPrefix + "publications: Number of distinct dim_publication references in fact_contribution = {DistinctDimPublicationReferencesInFactContributionCount}", distinctDimPublicationReferencesInFactContribution_Count); + if (!FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(dimPublication_Count, distinctDimPublicationReferencesInFactContribution_Count)) + { + _logger.LogError(_logPrefix + "publications: Possibly too few of dim_publication references in fact_contribution"); + isGood = false; + } + + if (isGood) + { + _logger.LogInformation(_logPrefix + "status OK"); + } + else if (!isGood) + { + _logger.LogError(_logPrefix + "indexing aborted"); + } + } + return isGood; + } +} \ No newline at end of file diff --git a/aspnetcore/src/Indexer/Program.cs b/aspnetcore/src/Indexer/Program.cs index 71d883a..95f88a3 100644 --- a/aspnetcore/src/Indexer/Program.cs +++ b/aspnetcore/src/Indexer/Program.cs @@ -33,6 +33,13 @@ public static async Task Main(string[] args) // Create and configure the host to support dependency injection, configuration, etc. var consoleHost = CreateHostBuilder(args).Build(); + // Check if the database is ready for indexing. + var databasePreflightCheck = consoleHost.Services.GetRequiredService(); + if (!databasePreflightCheck.IsGood()) + { + return; + } + // Get the "Main" service which handles the indexing. var indexer = consoleHost.Services.GetRequiredService(); @@ -51,6 +58,9 @@ private static IHostBuilder CreateHostBuilder(string[] args) => Host // Register the "Main" service. services.AddTransient(); + // Register the database checker service. + services.AddTransient(); + // Register settings. services.AddSettings(hostContext.Configuration); diff --git a/aspnetcore/src/Interface/Maps/PublicationProfile.cs b/aspnetcore/src/Interface/Maps/PublicationProfile.cs index 1094cb0..812760f 100644 --- a/aspnetcore/src/Interface/Maps/PublicationProfile.cs +++ b/aspnetcore/src/Interface/Maps/PublicationProfile.cs @@ -17,7 +17,7 @@ public PublicationProfile() AllowNullDestinationValues = true; CreateMap() - .ForMember(dst => dst.TypeCode, opt => opt.MapFrom(src => src.Type!.ToLower())); + .ForMember(dst => dst.TypeCode, opt => opt.MapFrom(src => src.Type!)); CreateMap() .ForMember(dst => dst.PublicationYear, opt => opt.MapFrom(src => src.PublicationYear.HasValue ? src.PublicationYear.Value.ToString(DateTimeYearFormat) : null)) diff --git a/aspnetcore/src/Service.Models/ReferenceData.cs b/aspnetcore/src/Service.Models/ReferenceData.cs index 5f6f23c..85207b6 100644 --- a/aspnetcore/src/Service.Models/ReferenceData.cs +++ b/aspnetcore/src/Service.Models/ReferenceData.cs @@ -6,7 +6,6 @@ public class ReferenceData { [Keyword] public string? Code { get; set; } - public string? NameFi { get; set; } public string? NameSv { get; set; } public string? NameEn { get; set; } diff --git a/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj b/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj index 2300af5..962c00b 100644 --- a/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj +++ b/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj @@ -29,6 +29,11 @@ + + + + + diff --git a/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs b/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs new file mode 100644 index 0000000..f78d4bd --- /dev/null +++ b/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs @@ -0,0 +1,46 @@ + +using FluentAssertions; +using Xunit; + +namespace CSC.PublicApi.Indexer.Tests.Preflight; + +public class DatabasePreflightCheckTest +{ + public DatabasePreflightCheckTest() + { + } + + [Fact] + public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_01() + { + // Arrange + DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck(); + int dimPublicationCount = 10; + int factContributionDistinctReferencesToDimPublicationCount = 7; + + // Act + bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood( + dimPublicationCount, + factContributionDistinctReferencesToDimPublicationCount); + + // Assert + Assert.False(actualResult); + } + + [Fact] + public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_02() + { + // Arrange + DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck(); + int dimPublicationCount = 10; + int factContributionDistinctReferencesToDimPublicationCount = 9; + + // Act + bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood( + dimPublicationCount, + factContributionDistinctReferencesToDimPublicationCount); + + // Assert + Assert.True(actualResult); + } +} \ No newline at end of file