Skip to content

Commit 93e62c8

Browse files
authored
Merge pull request #133 from CSCfi/qa
Database status check. Result ordering. Publication bug fixes.
2 parents c97a1df + 2de9935 commit 93e62c8

13 files changed

+237
-24
lines changed

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingCallQueryGenerator.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,11 @@ private static IEnumerable<Func<QueryContainerDescriptor<FundingCall>, QueryCont
131131

132132
return filters;
133133
}
134+
135+
protected override Func<SortDescriptor<FundingCall>, IPromise<IList<ISort>>> GenerateSortForSearch(FundingCallSearchParameters parameters)
136+
{
137+
// Sort funding calls
138+
return sortDescriptor => sortDescriptor
139+
.Field(f => f.CallProgrammeDueDate, SortOrder.Ascending);
140+
}
134141
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/FundingDecisionQueryGenerator.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,4 +174,11 @@ private static IEnumerable<Func<QueryContainerDescriptor<FundingDecision>, Query
174174

175175
return filters;
176176
}
177+
178+
protected override Func<SortDescriptor<FundingDecision>, IPromise<IList<ISort>>> GenerateSortForSearch(FundingDecisionSearchParameters parameters)
179+
{
180+
// Sort funding decisions
181+
return sortDescriptor => sortDescriptor
182+
.Field(f => f.FundingStartDate, SortOrder.Descending);
183+
}
177184
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/InfrastructureQueryGenerator.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,10 @@ protected override Func<QueryContainerDescriptor<Infrastructure>, QueryContainer
3838
{
3939
throw new NotImplementedException();
4040
}
41+
42+
protected override Func<SortDescriptor<Infrastructure>, IPromise<IList<ISort>>> GenerateSortForSearch(InfrastructureSearchParameters parameters)
43+
{
44+
// Sort infrastructures
45+
return sortDescriptor => sortDescriptor;
46+
}
4147
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/OrganizationQueryGenerator.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,10 @@ protected override Func<QueryContainerDescriptor<Organization>, QueryContainer>
3838
{
3939
throw new NotImplementedException();
4040
}
41+
42+
protected override Func<SortDescriptor<Organization>, IPromise<IList<ISort>>> GenerateSortForSearch(OrganizationSearchParameters parameters)
43+
{
44+
// Sort organizations
45+
return sortDescriptor => sortDescriptor;
46+
}
4147
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/PublicationQueryGenerator.cs

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ private static IEnumerable<Func<QueryContainerDescriptor<Publication>, QueryCont
3030
if (!string.IsNullOrWhiteSpace(parameters.Name))
3131
{
3232
subQueries.Add(t =>
33-
t.Match(query => query.Field(f => f.Name)
33+
t.MatchPhrase(query => query.Field(f => f.Name)
3434
.Query(parameters.Name)));
3535
}
3636

@@ -89,6 +89,18 @@ private static IEnumerable<Func<QueryContainerDescriptor<Publication>, QueryCont
8989
.Field(f => f.Authors.Suffix(nameof(Author.LastName))).Query(parameters.AuthorLastName)))))));
9090
}
9191

92+
if (!string.IsNullOrWhiteSpace(parameters.AuthorOrcId))
93+
{
94+
subQueries.Add(
95+
q => q.Nested(
96+
query => query
97+
.Path(p => p.Authors)
98+
.Query(
99+
q => q.Match(m => m
100+
.Field(f => f.Authors.Suffix(nameof(Author.Orcid)))
101+
.Query(parameters.AuthorOrcId)))));
102+
}
103+
92104
if (!string.IsNullOrWhiteSpace(parameters.ConferenceName))
93105
{
94106
subQueries.Add(t =>
@@ -224,25 +236,22 @@ private static IEnumerable<Func<QueryContainerDescriptor<Publication>, QueryCont
224236
.Value(parameters.OrganizationUnitId)));
225237
}
226238

227-
if (parameters.AuthorOrcId is not null)
239+
// Searching with type code requires exact match.
240+
if (!string.IsNullOrWhiteSpace(parameters.TypeCode))
228241
{
229-
filters.Add(t =>
230-
t.Term(s => s.Field(f => f.Authors.Suffix(nameof(Author.Orcid)))
231-
.Value(parameters.AuthorOrcId)));
242+
filters.Add(t => t.Term(term => term
243+
.Field(f => f.Type!.Code)
244+
.Value(parameters.TypeCode)
245+
));
232246
}
233-
234-
if (parameters.TypeCode is not null)
235-
{
236-
filters.Add(t =>
237-
t.Term(s => s.Field(f => f.Type)
238-
.Value(parameters.TypeCode)));
239-
}
240-
241-
if (parameters.PublisherOpenAccess is not null)
247+
248+
// Searching with publisher open access code requires exact match.
249+
if (!string.IsNullOrWhiteSpace(parameters.PublisherOpenAccess))
242250
{
243-
filters.Add(t =>
244-
t.Term(s => s.Field(f => f.PublisherOpenAccess)
245-
.Value(parameters.PublisherOpenAccess)));
251+
filters.Add(t => t.Term(term => term
252+
.Field(f => f.PublisherOpenAccess!.Code)
253+
.Value(parameters.PublisherOpenAccess)
254+
));
246255
}
247256

248257
if (parameters.Issn is not null)
@@ -274,11 +283,13 @@ private static IEnumerable<Func<QueryContainerDescriptor<Publication>, QueryCont
274283
.Value(parameters.Doi)));
275284
}
276285

277-
if (parameters.Status is not null)
286+
// Searching with status requires exact match.
287+
if (!string.IsNullOrWhiteSpace(parameters.Status))
278288
{
279-
filters.Add(t =>
280-
t.Term(s => s.Field(f => f.Status)
281-
.Value(parameters.Status)));
289+
filters.Add(t => t.Term(term => term
290+
.Field(f => f.Status!.Code)
291+
.Value(parameters.Status)
292+
));
282293
}
283294

284295
return filters;
@@ -288,4 +299,11 @@ protected override Func<QueryContainerDescriptor<Publication>, QueryContainer> G
288299
{
289300
return queryContainerDescriptor => queryContainerDescriptor.Term(query => query.Field(f => f.Id).Value(id));
290301
}
302+
303+
protected override Func<SortDescriptor<Publication>, IPromise<IList<ISort>>> GenerateSortForSearch(PublicationSearchParameters parameters)
304+
{
305+
// Sort publications
306+
return sortDescriptor => sortDescriptor
307+
.Field(f => f.PublicationYear, SortOrder.Descending);
308+
}
291309
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/QueryGeneratorBase.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public Func<SearchDescriptor<TOut>, ISearchRequest> GenerateQuery(TIn searchPara
1818
.Index(indexName)
1919
.Skip((pageNumber - 1) * pageSize)
2020
.Take(pageSize)
21+
.Sort(GenerateSortForSearch(searchParameters))
2122
.Query(GenerateQueryForSearch(searchParameters));
2223
}
2324

@@ -30,6 +31,8 @@ public Func<SearchDescriptor<TOut>, ISearchRequest> GenerateSingleQuery(string i
3031
}
3132

3233
protected abstract Func<QueryContainerDescriptor<TOut>, QueryContainer> GenerateQueryForSearch(TIn parameters);
33-
34+
3435
protected abstract Func<QueryContainerDescriptor<TOut>,QueryContainer> GenerateQueryForSingle(string id);
36+
37+
protected abstract Func<SortDescriptor<TOut>, IPromise<IList<ISort>>> GenerateSortForSearch(TIn parameters);
3538
}

aspnetcore/src/ElasticService/ElasticSearchQueryGenerators/ResearchDatasetQueryGenerator.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,11 @@ private static IEnumerable<Func<QueryContainerDescriptor<ResearchDataset>, Query
202202

203203
return filters;
204204
}
205+
206+
protected override Func<SortDescriptor<ResearchDataset>, IPromise<IList<ISort>>> GenerateSortForSearch(ResearchDatasetSearchParameters parameters)
207+
{
208+
// Sort research datasets
209+
return sortDescriptor => sortDescriptor
210+
.Field(f => f.Created, SortOrder.Descending);
211+
}
205212
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
using CSC.PublicApi.DatabaseContext;
2+
using Microsoft.EntityFrameworkCore;
3+
using Microsoft.Extensions.Logging;
4+
5+
namespace CSC.PublicApi.Indexer;
6+
7+
public class DatabasePreflightCheck
8+
{
9+
private readonly ApiDbContext? _context;
10+
private readonly ILogger<DatabasePreflightCheck>? _logger;
11+
private readonly string _logPrefix = "Database preflight check: ";
12+
13+
public DatabasePreflightCheck(ApiDbContext context, ILogger<DatabasePreflightCheck> logger)
14+
{
15+
_context = context;
16+
_logger = logger;
17+
}
18+
19+
// Constructor without dependencies for unit testing
20+
public DatabasePreflightCheck()
21+
{
22+
}
23+
24+
// Most of publications should have author information linked via fact_contribution.
25+
// Exact ratio cannot be determined, 80% is used as a baseline.
26+
public bool FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(int dimPublicationCount, int factContributionDistinctReferencesToDimPublicationCount)
27+
{
28+
if (factContributionDistinctReferencesToDimPublicationCount >= dimPublicationCount * 0.8)
29+
{
30+
return true;
31+
}
32+
return false;
33+
}
34+
public bool IsGood()
35+
{
36+
bool isGood = true;
37+
if (_context != null && _logger != null)
38+
{
39+
_logger.LogInformation(_logPrefix + "Check that required database tables contain data for indexing");
40+
41+
// Publication count
42+
int dimPublication_Count = _context.DimPublications.AsNoTracking().Where(dp => dp.Id > 0).Count();
43+
_logger.LogInformation(_logPrefix + "publications: dim_publication count = {DimPublicationCount}", dimPublication_Count);
44+
if (dimPublication_Count == 0)
45+
{
46+
_logger.LogError(_logPrefix + "publications: Table dim_publication is empty");
47+
isGood = false;
48+
}
49+
50+
// Funding call count (dim_call_programmme in database)
51+
int dimCallProgramme_Count = _context.DimCallProgrammes.AsNoTracking().Where(dcp => dcp.Id > 0).Count();
52+
_logger.LogInformation(_logPrefix + "funding calls: dim_call_programme count = {DimCallProgramme}", dimCallProgramme_Count);
53+
if (dimCallProgramme_Count == 0)
54+
{
55+
_logger.LogError(_logPrefix + "funding calls: Table dim_call_programme is empty");
56+
isGood = false;
57+
}
58+
59+
// Funding decision count
60+
int dimFundingDecision_Count = _context.DimFundingDecisions.AsNoTracking().Where(dfd => dfd.Id > 0).Count();
61+
_logger.LogInformation(_logPrefix + "funding decisions: dim_funding_decision count = {DimFundingDecision}", dimFundingDecision_Count);
62+
if (dimFundingDecision_Count == 0)
63+
{
64+
_logger.LogError(_logPrefix + "funding decisions: Table dim_funding_decision is empty");
65+
isGood = false;
66+
}
67+
68+
// Research dataset count
69+
int dimResearchDataset_Count = _context.DimResearchDatasets.AsNoTracking().Where(drd => drd.Id > 0).Count();
70+
_logger.LogInformation(_logPrefix + "research datasets: dim_research_dataset count = {DimResearchDataset}", dimResearchDataset_Count);
71+
if (dimResearchDataset_Count == 0)
72+
{
73+
_logger.LogError(_logPrefix + "research datasets: Table dim_research_dataset is empty");
74+
isGood = false;
75+
}
76+
77+
// Publication related fact_contribution count.
78+
// Count distinct dim_publication references in fact_contribution.
79+
int distinctDimPublicationReferencesInFactContribution_Count =
80+
_context.FactContributions.AsNoTracking().Where(fc => fc.DimPublicationId > 0).Select(fc => fc.DimPublicationId).Distinct().Count();
81+
_logger.LogInformation(_logPrefix + "publications: Number of distinct dim_publication references in fact_contribution = {DistinctDimPublicationReferencesInFactContributionCount}", distinctDimPublicationReferencesInFactContribution_Count);
82+
if (!FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(dimPublication_Count, distinctDimPublicationReferencesInFactContribution_Count))
83+
{
84+
_logger.LogError(_logPrefix + "publications: Possibly too few of dim_publication references in fact_contribution");
85+
isGood = false;
86+
}
87+
88+
if (isGood)
89+
{
90+
_logger.LogInformation(_logPrefix + "status OK");
91+
}
92+
else if (!isGood)
93+
{
94+
_logger.LogError(_logPrefix + "indexing aborted");
95+
}
96+
}
97+
return isGood;
98+
}
99+
}

aspnetcore/src/Indexer/Program.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ public static async Task Main(string[] args)
3333
// Create and configure the host to support dependency injection, configuration, etc.
3434
var consoleHost = CreateHostBuilder(args).Build();
3535

36+
// Check if the database is ready for indexing.
37+
var databasePreflightCheck = consoleHost.Services.GetRequiredService<DatabasePreflightCheck>();
38+
if (!databasePreflightCheck.IsGood())
39+
{
40+
return;
41+
}
42+
3643
// Get the "Main" service which handles the indexing.
3744
var indexer = consoleHost.Services.GetRequiredService<Indexer>();
3845

@@ -51,6 +58,9 @@ private static IHostBuilder CreateHostBuilder(string[] args) => Host
5158
// Register the "Main" service.
5259
services.AddTransient<Indexer>();
5360

61+
// Register the database checker service.
62+
services.AddTransient<DatabasePreflightCheck>();
63+
5464
// Register settings.
5565
services.AddSettings(hostContext.Configuration);
5666

aspnetcore/src/Interface/Maps/PublicationProfile.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public PublicationProfile()
1717
AllowNullDestinationValues = true;
1818

1919
CreateMap<GetPublicationsQueryParameters, PublicationSearchParameters>()
20-
.ForMember(dst => dst.TypeCode, opt => opt.MapFrom(src => src.Type!.ToLower()));
20+
.ForMember(dst => dst.TypeCode, opt => opt.MapFrom(src => src.Type!));
2121

2222
CreateMap<Service.Models.Publication.Publication, Publication>()
2323
.ForMember(dst => dst.PublicationYear, opt => opt.MapFrom(src => src.PublicationYear.HasValue ? src.PublicationYear.Value.ToString(DateTimeYearFormat) : null))

0 commit comments

Comments
 (0)