Skip to content

Commit

Permalink
Add the capacity to negate attributes in a node, rather than requirin…
Browse files Browse the repository at this point in the history
…g negative lookahead regex
  • Loading branch information
AngledLuffa committed Feb 22, 2025
1 parent 81290ba commit 7399e9b
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 80 deletions.
19 changes: 19 additions & 0 deletions src/edu/stanford/nlp/semgraph/semgrex/Attribute.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package edu.stanford.nlp.semgraph.semgrex;

import java.io.Serializable;

public class Attribute implements Serializable {
final String key;
final Object cased;
final Object caseless;
final boolean negated;

Attribute(String key, Object cased, Object caseless, boolean negated) {
this.key = key;
this.cased = cased;
this.caseless = caseless;
this.negated = negated;
}

private static final long serialVersionUID = 973567614155612487L;
}
25 changes: 17 additions & 8 deletions src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package edu.stanford.nlp.semgraph.semgrex;

import java.util.LinkedHashMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.util.Triple;

/**
* Stores attributes for a Semgrex NodePattern.
Expand All @@ -18,12 +22,14 @@
public class NodeAttributes {
private boolean root;
private boolean empty;
private Map<String, String> attributes;
private List<Triple<String, String, Boolean>> attributes;
private Set<String> positiveAttributes;

public NodeAttributes() {
root = false;
empty = false;
attributes = new LinkedHashMap<>();
attributes = new ArrayList<>();
positiveAttributes = new HashSet<>();
}

public void setRoot(boolean root) {
Expand All @@ -42,14 +48,17 @@ public boolean empty() {
return empty;
}

public void setAttribute(String key, String value) {
if (attributes.containsKey(key)) {
public void setAttribute(String key, String value, boolean negated) {
if (positiveAttributes.contains(key)) {
throw new SemgrexParseException("Duplicate attribute " + key + " found in semgrex expression");
}
attributes.put(key, value);
if (!negated) {
positiveAttributes.add(key);
}
attributes.add(new Triple(key, value, negated));
}

public Map<String, String> attributes() {
public List<Triple<String, String, Boolean>> attributes() {
return attributes;
}
}
62 changes: 28 additions & 34 deletions src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
Expand All @@ -13,6 +12,7 @@
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;

public class NodePattern extends SemgrexPattern {
Expand All @@ -31,7 +31,7 @@ public class NodePattern extends SemgrexPattern {
* value.
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
*/
private final Map<String, Pair<Object, Object>> attributes;
private final List<Attribute> attributes;
private final boolean isRoot;
private final boolean isLink;
private final boolean isEmpty;
Expand All @@ -43,33 +43,34 @@ public class NodePattern extends SemgrexPattern {
private List<Pair<Integer, String>> variableGroups;

public NodePattern(GraphRelation r, boolean negDesc,
Map<String, String> attrs,
List<Triple<String, String, Boolean>> attrs,
boolean root, boolean empty, boolean isLink, String name) {
this(r, negDesc, attrs, root, empty, isLink, name,
new ArrayList<>(0));
}

// TODO: there is no capacity for named variable groups in the parser right now
public NodePattern(GraphRelation r, boolean negDesc,
Map<String, String> attrs,
List<Triple<String, String, Boolean>> attrs,
boolean root, boolean empty, boolean isLink, String name,
List<Pair<Integer, String>> variableGroups) {
this.reln = r;
this.negDesc = negDesc;
this.isLink = isLink;
// order the attributes so that the pattern stays the same when
// printing a compiled pattern
attributes = new LinkedHashMap<>();
attributes = new ArrayList<>();
descString = "{";
for (Map.Entry<String, String> entry : attrs.entrySet()) {
for (Triple<String, String, Boolean> entry : attrs) {
if (!descString.equals("{"))
descString += ";";
String key = entry.getKey();
String value = entry.getValue();
String key = entry.first();
String value = entry.second();
boolean negated = entry.third();

// Add the attributes for this key
if (value.equals("__")) {
attributes.put(key, Pair.makePair(true, true));
attributes.add(new Attribute(key, true, true, negated));
} else if (value.matches("/.*/")) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
Expand All @@ -81,34 +82,24 @@ public NodePattern(GraphRelation r, boolean negDesc,
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
attributes.put(key, Pair.makePair(
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE))
);
attributes.add(new Attribute(key,
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
negated));
} else {
attributes.put(key, Pair.makePair(patternContent, patternContent));
attributes.add(new Attribute(key, patternContent, patternContent, negated));
}
} else { // raw description
attributes.put(key, Pair.makePair(value, value));
attributes.add(new Attribute(key, value, value, negated));
}



// if (value.equals("__")) {
// attributes.put(key, Pair.makePair(Pattern.compile(".*"), Pattern.compile(".*", Pattern.CASE_INSENSITIVE)));
// } else if (value.matches("/.*/")) {
// attributes.put(key, Pair.makePair(
// Pattern.compile(value.substring(1, value.length() - 1)),
// Pattern.compile(value.substring(1, value.length() - 1), Pattern.CASE_INSENSITIVE))
// );
// } else { // raw description
// attributes.put(key, Pair.makePair(
// Pattern.compile("^(" + value + ")$"),
// Pattern.compile("^(" + value + ")$", Pattern.CASE_INSENSITIVE))
// );
// }
descString += (key + ':' + value);
if (negated) {
descString += (key + "!:" + value);
} else {
descString += (key + ':' + value);
}
}

if (root) {
if (!descString.equals("{"))
descString += ";";
Expand Down Expand Up @@ -145,8 +136,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));

// log.info("Attributes are: " + attributes);
for (Map.Entry<String, Pair<Object, Object>> attr : attributes.entrySet()) {
String key = attr.getKey();
for (Attribute attr : attributes) {
String key = attr.key;
// System.out.println(key);
String nodeValue;
// if (key.equals("idx"))
Expand All @@ -167,7 +158,7 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
return negDesc;

// Get the node pattern
Object toMatch = ignoreCase ? attr.getValue().second : attr.getValue().first;
Object toMatch = ignoreCase ? attr.caseless : attr.cased;
boolean matches;
if (toMatch instanceof Boolean) {
matches = ((Boolean) toMatch);
Expand All @@ -182,6 +173,9 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
} else {
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
}
if (attr.negated) {
matches = !matches;
}

if (!matches) {
// System.out.println("doesn't match");
Expand Down
Loading

0 comments on commit 7399e9b

Please sign in to comment.