Skip to content

Commit

Permalink
Merge pull request #4457 from inception-project/feature/4456-FTS-scor…
Browse files Browse the repository at this point in the history
…e-accessible-in-entity-linking-ranker

#4456 - FTS score accessible in entity linking ranker
  • Loading branch information
reckart authored Jan 25, 2024
2 parents 78c698f + f5d8186 commit 92371e3
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import de.tudarmstadt.ukp.inception.conceptlinking.feature.CasingFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.EntityRankingFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.FrequencyFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.FtsScoreFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.LevenshteinFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.SemanticSignatureFeatureGenerator;
import de.tudarmstadt.ukp.inception.conceptlinking.feature.WikidataIdRankFeatureGenerator;
Expand All @@ -54,7 +55,6 @@
public class EntityLinkingServiceAutoConfiguration
{
@Bean
@Autowired
public ConceptLinkingService conceptLinkingService(KnowledgeBaseService aKbService,
EntityLinkingPropertiesImpl aProperties, RepositoryProperties aRepoProperties,
@Lazy @Autowired(required = false) List<EntityRankingFeatureGenerator> aFeatureGenerators)
Expand Down Expand Up @@ -82,31 +82,33 @@ public LevenshteinFeatureGenerator levenshteinFeatureGenerator()
}

@Bean
@Autowired
public WikidataIdRankFeatureGenerator wikidataIdRankFeatureGenerator(
KnowledgeBaseService aKbService)
{
return new WikidataIdRankFeatureGenerator(aKbService);
}

@Bean
public FtsScoreFeatureGenerator ftsScoreFeatureGenerator()
{
return new FtsScoreFeatureGenerator();
}

@ConditionalOnBean(RecommendationService.class)
@Bean
@Autowired
public NamedEntityLinkerFactory namedEntityLinkerFactory(KnowledgeBaseService aKbService,
ConceptLinkingService aClService, FeatureSupportRegistry aFsRegistry)
{
return new NamedEntityLinkerFactory(aKbService, aClService, aFsRegistry);
}

// @Bean
// @Autowired
public FrequencyFeatureGenerator frequencyFeatureGenerator(RepositoryProperties aRepoProperties)
{
return new FrequencyFeatureGenerator(aRepoProperties);
}

// @Bean
// @Autowired
public SemanticSignatureFeatureGenerator semanticSignatureFeatureGenerator(
KnowledgeBaseService aKbService, RepositoryProperties aRepoProperties,
EntityLinkingProperties aProperties)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.conceptlinking.feature;

import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration;
import de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity;

/**
* <p>
* This class is exposed as a Spring Component via
* {@link EntityLinkingServiceAutoConfiguration#ftsScoreFeatureGenerator()}.
* </p>
*/
public class FtsScoreFeatureGenerator
implements EntityRankingFeatureGenerator
{
@Override
public void apply(CandidateEntity aCandidate)
{
aCandidate.put(CandidateEntity.KEY_FTS_SCORE, aCandidate.getHandle().getScore());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ public class CandidateEntity
*/
public static final Key<Double> KEY_ID_RANK = new Key<>("idRank", 0.0d);

/**
* FTS score - score assigned by the KB FTS (if any)
*/
public static final Key<Double> KEY_FTS_SCORE = new Key<>("ftsScore", 0.0d);

/**
* in-link count of wikipedia article of IRI
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ public String toString()
if (range != null) {
builder.append("range", range);
}
if (score != 0.0) {
builder.append("score", score);
}
return builder.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.TupleQueryResult;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.sparqlbuilder.constraint.Expression;
import org.eclipse.rdf4j.sparqlbuilder.constraint.Expressions;
Expand Down Expand Up @@ -870,10 +869,12 @@ private GraphPattern withLabelMatchingExactlyAnyOf_RDF4J_FTS(String[] aValues)
continue;
}

projections.add(VAR_SCORE);

valuePatterns.add(VAR_SUBJECT
.has(FTS_LUCENE,
bNode(LUCENE_QUERY, literalOf(sanitizedValue)).andHas(LUCENE_PROPERTY,
VAR_MATCH_TERM_PROPERTY))
.has(FTS_LUCENE, bNode(LUCENE_QUERY, literalOf(sanitizedValue)) //
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY) //
.andHas(LUCENE_SCORE, VAR_SCORE))
.andHas(VAR_MATCH_TERM_PROPERTY, VAR_MATCH_TERM)
.filter(equalsPattern(VAR_MATCH_TERM, value, kb)));
}
Expand Down Expand Up @@ -903,6 +904,8 @@ private GraphPattern withLabelMatchingExactlyAnyOf_Fuseki_FTS(String[] aValues)
continue;
}

projections.add(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(getLimit()) //
Expand Down Expand Up @@ -1140,6 +1143,8 @@ private GraphPattern withLabelMatchingAnyOf_Fuseki_FTS(String[] aValues)
continue;
}

projections.add(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, fuzzyQuery).withLimit(getLimit()));
}
Expand Down Expand Up @@ -1167,6 +1172,8 @@ private GraphPattern withLabelMatchingAnyOf_RDF4J_FTS(String[] aValues)
labelFilterExpressions.add(Expressions.equals(str(var("label")), str(VAR_MATCH_TERM)));
labelFilterExpressions.add(matchKbLanguage(VAR_MATCH_TERM));

projections.add(VAR_SCORE);

// If a KB item has multiple labels, we want to return only the ones which actually
// match the query term such that the user is not confused that the results contain
// items that don't match the query (even though they do through a label that is not
Expand All @@ -1175,7 +1182,8 @@ private GraphPattern withLabelMatchingAnyOf_RDF4J_FTS(String[] aValues)
// out as part of the query.
valuePatterns.add(VAR_SUBJECT //
.has(FTS_LUCENE, bNode(LUCENE_QUERY, literalOf(fuzzyQuery)) //
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY)
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY) //
.andHas(LUCENE_SCORE, VAR_SCORE) //
.andHas(LUCENE_SNIPPET, var("snippet")))
.and(bind(
function(REPLACE,
Expand Down Expand Up @@ -1267,10 +1275,12 @@ private GraphPattern withLabelContainingAnyOf_RDF4J_FTS(String[] aValues)
continue;
}

projections.add(VAR_SCORE);

valuePatterns.add(VAR_SUBJECT
.has(FTS_LUCENE,
bNode(LUCENE_QUERY, literalOf(sanitizedValue + "*"))
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY))
.has(FTS_LUCENE, bNode(LUCENE_QUERY, literalOf(sanitizedValue + "*")) //
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY) //
.andHas(LUCENE_SCORE, VAR_SCORE))
.andHas(VAR_MATCH_TERM_PROPERTY, VAR_MATCH_TERM)
.filter(containsPattern(VAR_MATCH_TERM, value)));
}
Expand Down Expand Up @@ -1299,6 +1309,8 @@ private GraphPattern withLabelContainingAnyOf_Fuseki_FTS(String[] aValues)
continue;
}

projections.add(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(getLimit()) //
Expand Down Expand Up @@ -1560,14 +1572,15 @@ private GraphPattern withLabelStartingWith_RDF4J_FTS(String aPrefixQuery)
queryString += "*";
}

projections.add(VAR_SCORE);

// Locate all entries where the label contains the prefix (using the FTS) and then
// filter them by those which actually start with the prefix.
return and( //
bindMatchTermProperties(VAR_MATCH_TERM_PROPERTY), //
VAR_SUBJECT
.has(FTS_LUCENE,
bNode(LUCENE_QUERY, literalOf(queryString)).andHas(LUCENE_PROPERTY,
VAR_MATCH_TERM_PROPERTY))
VAR_SUBJECT.has(FTS_LUCENE, bNode(LUCENE_QUERY, literalOf(queryString)) //
.andHas(LUCENE_SCORE, VAR_SCORE)
.andHas(LUCENE_PROPERTY, VAR_MATCH_TERM_PROPERTY))
.andHas(VAR_MATCH_TERM_PROPERTY, VAR_MATCH_TERM)
.filter(startsWithPattern(VAR_MATCH_TERM, aPrefixQuery)));
}
Expand Down Expand Up @@ -1597,6 +1610,8 @@ private GraphPattern withLabelStartingWith_Fuseki_FTS(String aPrefixQuery)
queryString += "*";
}

projections.add(VAR_SCORE);

// Locate all entries where the label contains the prefix (using the FTS) and then
// filter them by those which actually start with the prefix.
return and( //
Expand Down Expand Up @@ -2101,28 +2116,29 @@ public Optional<KBHandle> asHandle(RepositoryConnection aConnection, boolean aAl
private List<KBHandle> evaluateListQuery(TupleQuery tupleQuery, boolean aAll)
throws QueryEvaluationException
{
try (TupleQueryResult result = tupleQuery.evaluate()) {
List<KBHandle> handles = new ArrayList<>();
try (var result = tupleQuery.evaluate()) {
var handles = new ArrayList<KBHandle>();
while (result.hasNext()) {
BindingSet bindings = result.next();
var bindings = result.next();
if (bindings.size() == 0) {
continue;
}

// LOG.trace("[{}] Bindings: {}", toHexString(hashCode()), bindings);

String id = bindings.getBinding(VAR_SUBJECT_NAME).getValue().stringValue();
var id = bindings.getBinding(VAR_SUBJECT_NAME).getValue().stringValue();
if (!id.contains(":") || (!aAll && hasImplicitNamespace(kb, id))) {
continue;
}

KBHandle handle = new KBHandle(id);
var handle = new KBHandle(id);
handle.setKB(kb);

extractLabel(handle, bindings);
extractDescription(handle, bindings);
extractRange(handle, bindings);
extractDomain(handle, bindings);
extractScore(handle, bindings);

handles.add(handle);
}
Expand Down Expand Up @@ -2243,6 +2259,14 @@ private void extractRange(KBHandle aTargetHandle, BindingSet aSourceBindings)
}
}

private void extractScore(KBHandle aTargetHandle, BindingSet aSourceBindings)
{
Binding score = aSourceBindings.getBinding(VAR_SCORE_NAME);
if (score != null) {
aTargetHandle.setScore(Double.valueOf(score.getValue().stringValue()));
}
}

/**
* Removes leading and trailing space and single quote characters which could cause the query
* string to escape its quotes in the SPARQL query.
Expand Down

0 comments on commit 92371e3

Please sign in to comment.