Skip to content

Commit

Permalink
Parse a node containment option... need to process it in the NodePatt…
Browse files Browse the repository at this point in the history
…ern still

Keep track of the partial attributes.  Still need to actually check them...
  • Loading branch information
AngledLuffa committed Feb 27, 2025
1 parent 0de1865 commit b5d5792
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 51 deletions.
16 changes: 16 additions & 0 deletions src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,20 @@ public class NodeAttributes {
// String, String, Boolean: key, value, negated
private List<Triple<String, String, Boolean>> attributes;
private Set<String> positiveAttributes;
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
// are represented by Maps. In some cases it will be easier to search
// for individual elements of that map rather than turn the map into a string
// and search on its contents that way. This is especially true since there
// is no guarantee the map will be in a consistent order.
// String, String, String: node attribute for a map (such as CoNLLUFeats), key in that map, value to match
private List<Triple<String, String, String>> contains;

public NodeAttributes() {
root = false;
empty = false;
attributes = new ArrayList<>();
positiveAttributes = new HashSet<>();
contains = new ArrayList<>();
}

public void setRoot(boolean root) {
Expand Down Expand Up @@ -60,7 +68,15 @@ public void setAttribute(String key, String value, boolean negated) {
attributes.add(new Triple(key, value, negated));
}

public void addContains(String annotation, String key, String value) {
contains.add(new Triple(annotation, key, value));
}

public List<Triple<String, String, Boolean>> attributes() {
return Collections.unmodifiableList(attributes);
}

public List<Triple<String, String, String>> contains() {
return Collections.unmodifiableList(contains);
}
}
77 changes: 60 additions & 17 deletions src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ public class NodePattern extends SemgrexPattern {
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
*/
private final List<Attribute> attributes;
/**
* Attributes which represent Maps (eg CoNLLUFeats)
* and only partial matches are necessary
*/
private final List<Pair<String, Attribute>> partialAttributes;
private final boolean isRoot;
private final boolean isLink;
private final boolean isEmpty;
Expand All @@ -58,6 +63,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
// order the attributes so that the pattern stays the same when
// printing a compiled pattern
this.attributes = new ArrayList<>();
// same with partial attributes
this.partialAttributes = new ArrayList<>();

descString = "{";
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
if (!descString.equals("{"))
Expand All @@ -70,23 +78,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
if (value.equals("__")) {
attributes.add(new Attribute(key, true, true, negated));
} else if (value.matches("/.*/")) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
char chr = value.charAt(i);
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
isRegexp = true;
break;
}
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
attributes.add(new Attribute(key,
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
negated));
} else {
attributes.add(new Attribute(key, patternContent, patternContent, negated));
}
attributes.add(buildRegexAttribute(key, value, negated));
} else { // raw description
attributes.add(new Attribute(key, value, value, negated));
}
Expand All @@ -98,6 +90,27 @@ public NodePattern(GraphRelation r, boolean negDesc,
}
}

for (Triple<String, String, String> entry : attrs.contains()) {
String annotation = entry.first();
String key = entry.second();
String value = entry.third();

final Attribute attr;
// Add the attributes for this key
if (value.equals("__")) {
attr = new Attribute(key, true, true, false);
} else if (value.matches("/.*/")) {
attr = buildRegexAttribute(key, value, false);
} else { // raw description
attr = new Attribute(key, value, value, false);
}
partialAttributes.add(new Pair<>(annotation, attr));

if (!descString.equals("{"))
descString += ";";
descString += (annotation + "@" + key + "=" + value);
}

if (attrs.root()) {
if (!descString.equals("{"))
descString += ";";
Expand All @@ -118,6 +131,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
this.variableGroups = Collections.unmodifiableList(variableGroups);
}

/**
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
* Return an Attribute which matches this expression
*/
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
char chr = value.charAt(i);
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
isRegexp = true;
break;
}
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
return new Attribute(key,
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
negated);
} else {
return new Attribute(key, patternContent, patternContent, negated);
}
}

private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue) {
if (nodeValue == null) {
// treat non-existent attributes has having matched a negated expression
Expand Down Expand Up @@ -189,6 +226,12 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
return negDesc;
}
}
for (Pair<String, Attribute> partialAttribute : partialAttributes) {
String annotation = partialAttribute.first();
Attribute attr = partialAttribute.second();
// TODO
}

// System.out.println("matches");
// System.out.println("");
return !negDesc;
Expand Down
102 changes: 70 additions & 32 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -526,40 +526,77 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
}

final public void AddAttribute(NodeAttributes attributes) throws ParseException {Token attr = null;
Token key = null;
Token value = null;
Token attrType = null;
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case ALIGNRELN:
case IDENTIFIER:{
attr = jj_consume_token(IDENTIFIER);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 10:{
attrType = jj_consume_token(10);
break;
case IDENTIFIER:{
attr = jj_consume_token(IDENTIFIER);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 10:{
attrType = jj_consume_token(10);
break;
}
case 22:{
attrType = jj_consume_token(22);
break;
}
default:
jj_la1[23] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
case 22:{
attrType = jj_consume_token(22);
break;
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
value = jj_consume_token(IDENTIFIER);
break;
}
case REGEX:{
value = jj_consume_token(REGEX);
break;
}
default:
jj_la1[24] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
default:
jj_la1[23] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
value = jj_consume_token(IDENTIFIER);
break;
}
case REGEX:{
value = jj_consume_token(REGEX);
case ALIGNRELN:{
attrType = jj_consume_token(ALIGNRELN);
key = jj_consume_token(IDENTIFIER);
jj_consume_token(21);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
value = jj_consume_token(IDENTIFIER);
break;
}
case REGEX:{
value = jj_consume_token(REGEX);
break;
}
default:
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
}
default:
jj_la1[24] = jj_gen;
jj_la1[26] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
if (attr != null && value != null) {
if (attrType.image.equals("@")) {
if (attr == null || key == null || value == null) {
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);}
}
attributes.addContains(attr.image, key.image, value.image);
} else if (attr != null && value != null) {
boolean negated = attrType.image.equals("!:");
attributes.setAttribute(attr.image, value.image, negated);
}
Expand All @@ -576,7 +613,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[25] = jj_gen;
jj_la1[27] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
Expand All @@ -588,6 +625,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
NodePattern pat;
jj_consume_token(23);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case ALIGNRELN:
case IDENTIFIER:
case EMPTY:
case ROOT:{
Expand All @@ -600,7 +638,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[26] = jj_gen;
jj_la1[28] = jj_gen;
break label_6;
}
jj_consume_token(24);
Expand All @@ -609,7 +647,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[27] = jj_gen;
jj_la1[29] = jj_gen;
;
}
jj_consume_token(25);
Expand All @@ -629,7 +667,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[28] = jj_gen;
jj_la1[30] = jj_gen;
;
}
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
Expand All @@ -646,13 +684,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
public Token jj_nt;
private int jj_ntk;
private int jj_gen;
final private int[] jj_la1 = new int[29];
final private int[] jj_la1 = new int[31];
static private int[] jj_la1_0;
static {
jj_la1_init_0();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0xd0,0x1000000,0xd0,0x200000,};
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0x110,0x18,0xd8,0x1000000,0xd8,0x200000,};
}

/** Constructor with InputStream. */
Expand All @@ -666,7 +704,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -680,7 +718,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

/** Constructor. */
Expand All @@ -690,7 +728,7 @@ public SemgrexParser(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -708,7 +746,7 @@ public void ReInit(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

/** Constructor with generated Token Manager. */
Expand All @@ -717,7 +755,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -726,7 +764,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
}

private Token jj_consume_token(int kind) throws ParseException {
Expand Down Expand Up @@ -782,7 +820,7 @@ public ParseException generateParseException() {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
for (int i = 0; i < 29; i++) {
for (int i = 0; i < 31; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) {
Expand Down
13 changes: 11 additions & 2 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,21 @@ SemgrexPattern Child(GraphRelation r) : {

void AddAttribute(NodeAttributes attributes) : {
Token attr = null;
Token key = null;
Token value = null;
Token attrType = null;
} {
((attr = <IDENTIFIER> (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) )
((attr = <IDENTIFIER>
( (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) ) |
( (attrType = "@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>) ) )
{
if (attr != null && value != null) {
if (attrType.image.equals("@")) {
if (attr == null || key == null || value == null) {
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);
}
attributes.addContains(attr.image, key.image, value.image);
} else if (attr != null && value != null) {
boolean negated = attrType.image.equals("!:");
attributes.setAttribute(attr.image, value.image, negated);
}
Expand Down

0 comments on commit b5d5792

Please sign in to comment.