Skip to content

Commit

Permalink
Add a negative containment to semgrex to match the containment option
Browse files Browse the repository at this point in the history
  • Loading branch information
AngledLuffa committed Mar 2, 2025
1 parent fe6ed18 commit 451af86
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 56 deletions.
11 changes: 6 additions & 5 deletions src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.util.Quadruple;
import edu.stanford.nlp.util.Triple;

/**
Expand All @@ -31,8 +32,8 @@ public class NodeAttributes {
// for individual elements of that map rather than turn the map into a string
// and search on its contents that way. This is especially true since there
// is no guarantee the map will be in a consistent order.
// String, String, String: node attribute for a map (such as CoNLLUFeats), key in that map, value to match
private List<Triple<String, String, String>> contains;
// String, String, String, Boolean: node attribute for a map (such as CoNLLUFeats), key in that map, value to match, negated?
private List<Quadruple<String, String, String, Boolean>> contains;

public NodeAttributes() {
root = false;
Expand Down Expand Up @@ -68,15 +69,15 @@ public void setAttribute(String key, String value, boolean negated) {
attributes.add(new Triple(key, value, negated));
}

public void addContains(String annotation, String key, String value) {
contains.add(new Triple(annotation, key, value));
public void addContains(String annotation, String key, String value, Boolean negated) {
contains.add(new Quadruple(annotation, key, value, negated));
}

public List<Triple<String, String, Boolean>> attributes() {
return Collections.unmodifiableList(attributes);
}

public List<Triple<String, String, String>> contains() {
public List<Quadruple<String, String, String, Boolean>> contains() {
return Collections.unmodifiableList(contains);
}
}
28 changes: 16 additions & 12 deletions src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Quadruple;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;

Expand Down Expand Up @@ -91,10 +92,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
}
}

for (Triple<String, String, String> entry : attrs.contains()) {
for (Quadruple<String, String, String, Boolean> entry : attrs.contains()) {
String annotation = entry.first();
String key = entry.second();
String value = entry.third();
boolean negated = entry.fourth();

Class<?> clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation));
boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz);
Expand All @@ -105,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
final Attribute attr;
// Add the attributes for this key
if (value.equals("__")) {
attr = new Attribute(key, true, true, false);
attr = new Attribute(key, true, true, negated);
} else if (value.matches("/.*/")) {
attr = buildRegexAttribute(key, value, false);
attr = buildRegexAttribute(key, value, negated);
} else { // raw description
attr = new Attribute(key, value, value, false);
attr = new Attribute(key, value, value, negated);
}
partialAttributes.add(new Pair<>(annotation, attr));

Expand Down Expand Up @@ -239,17 +241,19 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i

Class clazz = Env.lookupAnnotationKey(env, annotation);
Object rawmap = node.get(clazz);
// if the map is null, it can't possibly match...
final String nodeValue;
if (rawmap == null) {
return negDesc;
nodeValue = null;
} else {
if (!(rawmap instanceof Map))
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
Map<String, ?> map = (Map) rawmap;

// TODO: allow for regex match on the keys?
Object value = map.get(attr.key);
nodeValue = (value == null) ? null : value.toString();
}
if (!(rawmap instanceof Map))
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
Map<String, ?> map = (Map) rawmap;

// TODO: allow for regex match on the keys?
Object value = map.get(attr.key);
final String nodeValue = (value == null) ? null : value.toString();
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
if (!matches) {
return negDesc;
Expand Down
75 changes: 45 additions & 30 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
case 11:
case 15:
case 17:
case 23:{
case 24:{
node = SubNode(GraphRelation.ROOT);
children.add(node);
label_1:
Expand Down Expand Up @@ -135,7 +135,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
}
case 15:
case 17:
case 23:{
case 24:{
result = ModNode(r);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case RELATION:
Expand Down Expand Up @@ -397,7 +397,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 15:
case 17:
case 23:{
case 24:{
node = ModNode(reln);
break;
}
Expand Down Expand Up @@ -454,7 +454,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
case 14:
case 15:
case 17:
case 23:{
case 24:{
;
break;
}
Expand Down Expand Up @@ -485,7 +485,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
boolean startUnderNeg;
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 17:
case 23:{
case 24:{
child = Child(r);
break;
}
Expand All @@ -512,7 +512,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
child = NodeDisj(r);
break;
}
case 23:{
case 24:{
child = Description(r);
break;
}
Expand Down Expand Up @@ -569,8 +569,22 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
}
break;
}
case ALIGNRELN:{
attrType = jj_consume_token(ALIGNRELN);
case ALIGNRELN:
case 23:{
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case ALIGNRELN:{
attrType = jj_consume_token(ALIGNRELN);
break;
}
case 23:{
attrType = jj_consume_token(23);
break;
}
default:
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
key = jj_consume_token(IDENTIFIER);
jj_consume_token(21);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
Expand All @@ -583,19 +597,20 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[25] = jj_gen;
jj_la1[26] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
if (attr == null || key == null || value == null) {
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);}
}
attributes.addContains(attr.image, key.image, value.image);
boolean negated = attrType.image.equals("!@");
attributes.addContains(attr.image, key.image, value.image, negated);
break;
}
default:
jj_la1[26] = jj_gen;
jj_la1[27] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
Expand All @@ -612,7 +627,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[27] = jj_gen;
jj_la1[28] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
Expand All @@ -622,7 +637,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
boolean link = false;
NodeAttributes attributes = new NodeAttributes();
NodePattern pat;
jj_consume_token(23);
jj_consume_token(24);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:
case EMPTY:
Expand All @@ -631,24 +646,24 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
label_6:
while (true) {
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 24:{
case 25:{
;
break;
}
default:
jj_la1[28] = jj_gen;
jj_la1[29] = jj_gen;
break label_6;
}
jj_consume_token(24);
jj_consume_token(25);
AddAttribute(attributes);
}
break;
}
default:
jj_la1[29] = jj_gen;
jj_la1[30] = jj_gen;
;
}
jj_consume_token(25);
jj_consume_token(26);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 21:{
jj_consume_token(21);
Expand All @@ -665,7 +680,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[30] = jj_gen;
jj_la1[31] = jj_gen;
;
}
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
Expand All @@ -682,13 +697,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
public Token jj_nt;
private int jj_ntk;
private int jj_gen;
final private int[] jj_la1 = new int[31];
final private int[] jj_la1 = new int[32];
static private int[] jj_la1_0;
static {
jj_la1_init_0();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0x110,0x400408,0xd0,0x1000000,0xd0,0x200000,};
jj_la1_0 = new int[] {0x400,0x1028808,0x3801c,0x3801c,0x1028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x1028800,0x2000,0x102c000,0x4000,0x1028000,0x1020000,0x400400,0x110,0x800008,0x110,0xc00408,0xd0,0x2000000,0xd0,0x200000,};
}

/** Constructor with InputStream. */
Expand All @@ -702,7 +717,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -716,7 +731,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

/** Constructor. */
Expand All @@ -726,7 +741,7 @@ public SemgrexParser(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -744,7 +759,7 @@ public void ReInit(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

/** Constructor with generated Token Manager. */
Expand All @@ -753,7 +768,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -762,7 +777,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
}

private Token jj_consume_token(int kind) throws ParseException {
Expand Down Expand Up @@ -813,12 +828,12 @@ private int jj_ntk_f() {
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
boolean[] la1tokens = new boolean[26];
boolean[] la1tokens = new boolean[27];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
for (int i = 0; i < 31; i++) {
for (int i = 0; i < 32; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) {
Expand All @@ -827,7 +842,7 @@ public ParseException generateParseException() {
}
}
}
for (int i = 0; i < 26; i++) {
for (int i = 0; i < 27; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
Expand Down
5 changes: 3 additions & 2 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,14 @@ void AddAttribute(NodeAttributes attributes) : {
}
})
|
(attrType = "@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
(attrType = "@" | attrType = "!@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
{
if (attr == null || key == null || value == null) {
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);
}
attributes.addContains(attr.image, key.image, value.image);
boolean negated = attrType.image.equals("!@");
attributes.addContains(attr.image, key.image, value.image, negated);
})
)
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ interface SemgrexParserConstants {
"\"~\"",
"\"=\"",
"\"!:\"",
"\"!@\"",
"\"{\"",
"\";\"",
"\"}\"",
Expand Down
Loading

0 comments on commit 451af86

Please sign in to comment.