Skip to content

Commit

Permalink
#5291 - Improve curation and auto-merging of link features
Browse files Browse the repository at this point in the history
- Better support for selecting suitable link hosts and link targets
- More trace logging
- Fix a couple of NPEs
- Bit of cleaning up
  • Loading branch information
reckart committed Feb 16, 2025
1 parent 425f965 commit 1f66216
Show file tree
Hide file tree
Showing 24 changed files with 552 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static de.tudarmstadt.ukp.clarin.webanno.model.LinkMode.WITH_ROLE;
import static de.tudarmstadt.ukp.clarin.webanno.model.MultiValueMode.ARRAY;
import static de.tudarmstadt.ukp.inception.schema.api.feature.MaterializedLink.toMaterializedLink;
import static java.util.Collections.emptyList;
import static org.apache.commons.collections4.CollectionUtils.disjunction;
import static org.apache.uima.cas.CAS.TYPE_NAME_FS_ARRAY;
import static org.apache.uima.cas.CAS.TYPE_NAME_STRING;
Expand All @@ -28,7 +29,6 @@
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import org.apache.commons.text.WordUtils;
Expand Down Expand Up @@ -210,14 +210,18 @@ public void generateFeature(TypeSystemDescription aTSD, TypeDescription aTD,
public List<LinkWithRoleModel> getFeatureValue(AnnotationFeature aFeature, FeatureStructure aFS)
{
var linkFeature = aFS.getType().getFeatureByBaseName(aFeature.getName());
if (linkFeature == null) {
return emptyList();
}

return wrapFeatureValue(aFeature, aFS.getCAS(), aFS.getFeatureValue(linkFeature));
}

@SuppressWarnings("unchecked")
@Override
public <V> V getDefaultFeatureValue(AnnotationFeature aFeature, FeatureStructure aFS)
{
return (V) Collections.emptyList();
return (V) emptyList();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureDiffMode.DEFAULT_LINK_DIFF_MODE;
import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureMultiplicityMode.DEFAULT_LINK_MULTIPLICITY_MODE;
import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureMultiplicityMode.MULTIPLE_TARGETS_ONE_ROLE;

import java.io.Serializable;
import java.util.ArrayList;
Expand Down Expand Up @@ -75,6 +76,10 @@ public void setEnableRoleLabels(boolean aEnableRoleLabels)

public LinkFeatureMultiplicityMode getMultiplicityMode()
{
if (!enableRoleLabels) {
return MULTIPLE_TARGETS_ONE_ROLE;
}

return compareMode;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureDiffMode.DEFAULT_LINK_DIFF_MODE;
import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureMultiplicityMode.DEFAULT_LINK_MULTIPLICITY_MODE;
import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureMultiplicityMode.ONE_TARGET_MULTIPLE_ROLES;
import static de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureMultiplicityMode.MULTIPLE_TARGETS_ONE_ROLE;
import static de.tudarmstadt.ukp.inception.support.lambda.LambdaBehavior.visibleWhen;
import static java.util.Arrays.asList;

Expand Down Expand Up @@ -198,7 +198,7 @@ private void writeTraits()
t.setMultiplicityMode(traits.getObject().getCompareMode());
}
else {
t.setMultiplicityMode(ONE_TARGET_MULTIPLE_ROLES);
t.setMultiplicityMode(MULTIPLE_TARGETS_ONE_ROLE);
}

getFeatureSupport().writeTraits(feature.getObject(), t);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,19 @@ a| image::images/LinkFeatureDiffMode_EXCLUDE.png[]

Determines how links are compared to each other e.g. when calulating agreement or when merging annotations during curation.

Use *Target should be linked in only one role* if you expect that a link target should only appear in a single roles with respect to the same source span - but the same role may be assigned to different targets.
Use *Each target should be linked in only one role* if you expect that a link target should only appear in a single roles with respect to the same source span - but the same role may be assigned to different targets.
In this mode, if an annotator links the same target in multiple roles, the links will be considered stacked and not be auto-merged by curation or used for agreement calculation.

.When the `slot1` link is merge, the `slot2` link to the same target disappears because each target can only be assigned a single role. The second `slot1` link remains available because it has a different target.
video::images/MULTIPLE_TARGETS_ONE_ROLE.mp4[]

Use *Target can be linked in multiple different roles* if a link target can appear in multiple roles with respect to the same source span - but a role may only be used once.
Use *Each Ttrget can be linked in multiple different roles* if a link target can appear in multiple roles with respect to the same source span - but a role may only be used once.
In this mode, if an annotator links multiple targets using the same role, the links will be considered stacked and not be not auto-merged by curation or used for agreement calculation.

.When first link labelled `slot1` is linked, the second `slot1` link disappears because each role can only be assigned to a single target. The `slot2` link remains available because it has a different role.
video::images/ONE_TARGET_MULTIPLE_ROLES.mp4[]

Use *Target can be linked in multiple roles (same or different)* if you expect that a link target should be linked multiple times with different roles as well as different targets can be linked with the same role.
Use *Each target can be linked in multiple roles (same or different)* if you expect that a link target should be linked multiple times with different roles as well as different targets can be linked with the same role.
In this mode, there is no stacking.
Note that the order in which you merge the links and whether you create stacked annotations or not can affect the final result.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private void addCas(String aCasGroupId, CAS aCas, String aType)
}

if (LOG.isDebugEnabled()) {
LOG.debug("Processing CAS group [{}]", aCasGroupId);
LOG.debug("Analyzing CAS group [{}]", aCasGroupId);

String collectionId = null;
String documentId = null;
Expand Down Expand Up @@ -261,6 +261,8 @@ private void addCas(String aCasGroupId, CAS aCas, String aType)
positions.addAll(adapter.generateSubPositions(ann));

for (var pos : positions) {
LOG.trace("Analyzing {}", pos);

var configSet = configSets.get(pos);
if (configSet == null) {
configSet = new ConfigurationSet(pos);
Expand All @@ -282,7 +284,7 @@ private void addCas(String aCasGroupId, CAS aCas, String aType)
}
}

private void addConfiguration(ConfigurationSet aSet, String aCasGroupId, FeatureStructure aFS)
private void addConfiguration(ConfigurationSet aSet, String aCasGroupId, AnnotationBase aFS)
{
if (aFS instanceof SofaFS) {
return;
Expand All @@ -298,10 +300,10 @@ private void addConfiguration(ConfigurationSet aSet, String aCasGroupId, Feature
}

private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
FeatureStructure aFS)
AnnotationBase aHost)
{
var position = aSet.getPosition();
var feat = aFS.getType().getFeatureByBaseName(position.getLinkFeature());
var feat = aHost.getType().getFeatureByBaseName(position.getLinkFeature());

// If the CAS has not been upgraded yet to include the feature, then there are no
// configurations for it.
Expand All @@ -311,19 +313,22 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,

// For each slot at the given position in the FS-to-be-added, we need find a
// corresponding configuration
var links = getFeature(aFS, feat, ArrayFS.class);
var links = getFeature(aHost, feat, ArrayFS.class);
linkLoop: for (var i = 0; i < links.size(); i++) {
var link = links.get(i);
var adapter = getAdapter(aFS.getType().getName());
var adapter = getAdapter(aHost.getType().getName());
var decl = adapter.getLinkFeature(position.getLinkFeature());

LOG.trace("`-> link {}", decl);

// Check if this configuration is already present
Configuration configuration = null;
switch (position.getLinkFeatureMultiplicityMode()) {
case ONE_TARGET_MULTIPLE_ROLES: {
var role = link
.getStringValue(link.getType().getFeatureByBaseName(decl.getRoleFeature()));
if (!Objects.equals(role, position.getLinkRole())) {
LOG.trace(" `-> role mismatch", decl);
continue linkLoop;
}

Expand All @@ -340,8 +345,10 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
repLink.getType().getFeatureByBaseName(decl.getTargetFeature()));

// Compare targets
if (samePosition(repTarget, target)) {
if (samePosition(repTarget, target) && equalsFS(aHost, repFS)) {
configuration = cfg;
LOG.trace(" `-> target position match");
LOG.trace(" `-> host match");
break cfgLoop;
}
}
Expand All @@ -352,6 +359,7 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
link.getType().getFeatureByBaseName(decl.getTargetFeature()));
if (!(target.getBegin() == position.getLinkTargetBegin()
&& target.getEnd() == position.getLinkTargetEnd())) {
LOG.trace(" `-> target offset mismatch", decl);
continue linkLoop;
}

Expand All @@ -368,8 +376,10 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
repLink.getType().getFeatureByBaseName(decl.getRoleFeature()));

// Compare roles
if (Objects.equals(role, linkRole)) {
if (Objects.equals(role, linkRole) && equalsFS(aHost, repFS)) {
configuration = cfg;
LOG.trace(" `-> role match: [{}]", linkRole);
LOG.trace(" `-> host match");
break cfgLoop;
}
}
Expand All @@ -380,12 +390,14 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
link.getType().getFeatureByBaseName(decl.getTargetFeature()));
if (!(target.getBegin() == position.getLinkTargetBegin()
&& target.getEnd() == position.getLinkTargetEnd())) {
LOG.trace(" `-> target offset mismatch", decl);
continue linkLoop;
}

var role = link
.getStringValue(link.getType().getFeatureByBaseName(decl.getRoleFeature()));
if (!Objects.equals(role, position.getLinkRole())) {
LOG.trace(" `-> role mismatch", decl);
continue linkLoop;
}

Expand All @@ -401,8 +413,12 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
repLink.getType().getFeatureByBaseName(decl.getTargetFeature()));

// Compare role and target
if (Objects.equals(role, linkRole) && samePosition(repTarget, target)) {
if (Objects.equals(role, linkRole) && samePosition(repTarget, target)
&& equalsFS(aHost, repFS)) {
configuration = cfg;
LOG.trace(" `-> role match: [{}]", linkRole);
LOG.trace(" `-> target position match");
LOG.trace(" `-> host match");
break cfgLoop;
}
}
Expand All @@ -417,9 +433,13 @@ private void addLinkConfiguration(ConfigurationSet aSet, String aCasGroupId,
if (configuration == null) {
configuration = new Configuration(position);
aSet.addConfiguration(configuration);
LOG.trace(" `-> Link configuration created: {}", configuration);
}
else {
LOG.trace(" `-> Link configuration found : {}", configuration);
}

configuration.add(aCasGroupId, aFS, position.getLinkFeature(), i);
configuration.add(aCasGroupId, aHost, position.getLinkFeature(), i);
aSet.addCasGroupId(aCasGroupId);
}
}
Expand All @@ -440,6 +460,10 @@ private void addBaseConfiguration(ConfigurationSet aSet, String aCasGroupId,
if (configuration == null) {
configuration = new Configuration(aSet.getPosition());
aSet.addConfiguration(configuration);
LOG.trace("`-> Base configuration created: {}", configuration);
}
else {
LOG.trace("`-> Base configuration found : {}", configuration);
}

configuration.add(aCasGroupId, aFS);
Expand Down Expand Up @@ -669,7 +693,7 @@ private Boolean equalsPrimitiveOrMultiValueFeature(FeatureStructure aFS1, Featur
return null;
}

private boolean samePosition(Annotation aFS1, Annotation aFS2)
private boolean samePosition(AnnotationBase aFS1, AnnotationBase aFS2)
{
// Null check
if (aFS1 == null || aFS2 == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.jcas.cas.AnnotationBase;

import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position;
import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID;
Expand Down Expand Up @@ -109,10 +110,10 @@ public AID getRepresentativeAID()
return e.getValue();
}

public FeatureStructure getRepresentative(Map<String, CAS> aCasMap)
public AnnotationBase getRepresentative(Map<String, CAS> aCasMap)
{
var e = fsAddresses.entrySet().iterator().next();
return selectFsByAddr(aCasMap.get(e.getKey()), e.getValue().addr);
return (AnnotationBase) selectFsByAddr(aCasMap.get(e.getKey()), e.getValue().addr);
}

public AID getAID(String aCasGroupId)
Expand Down Expand Up @@ -202,28 +203,40 @@ public List<FeatureStructure> getFses(String aCasGroupId, Map<String, CAS> aCasM
public String toString()
{
var sb = new StringBuilder();
sb.append('[');
for (var e : fsAddresses.entrySet()) {
if (sb.length() > 1) {
sb.append(", ");
}
sb.append(e.getKey());
sb.append(": ");
sb.append(e.getValue());
if (duplicates != null) {
sb.append(" (duplicates: ");
for (var entries : duplicates.entrySet()) {
sb.append(" {");
sb.append(entries.getKey());
sb.append(entries.getValue().stream().map(String::valueOf)
.collect(joining(", ")));
sb.append("} ");
if (!fsAddresses.isEmpty()) {
for (var e : fsAddresses.entrySet()) {
if (sb.length() > 1) {
sb.append(", ");
}

sb.append(e.getKey());
sb.append(": ");
sb.append(e.getValue());
if (duplicates != null) {
sb.append(" (duplicates: ");
for (var entries : duplicates.entrySet()) {
sb.append(" {");
sb.append(entries.getKey());
sb.append(entries.getValue().stream() //
.map(String::valueOf) //
.collect(joining(", ")));
sb.append("} ");
}
sb.append(")");
}
sb.append(")");
}

sb.insert(0, " ~= [");
sb.insert(0, getRepresentativeAID());
sb.append("]");
}
sb.append("] -> ");
sb.append(getRepresentativeAID());
else {
sb.append("empty");
}

sb.insert(0, ": ");
sb.insert(0, position);

return sb.toString();
}
}
Loading

0 comments on commit 1f66216

Please sign in to comment.