From f3f19fdf3f5bf41dfeebb8e124fd1c8b4f47532c Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Sun, 7 May 2023 22:09:03 +0200 Subject: [PATCH] fix: fix relative IRI and context issues (#31) --- __tests__/valid/relative.shaclc | 3 + __tests__/valid/relative.ttl | 8 ++ __tests__/valid/relativeHash.shaclc | 3 + __tests__/valid/relativeHash.ttl | 8 ++ __tests__/valid/relativePrefix.shaclc | 4 + __tests__/valid/relativePrefix.ttl | 9 ++ __tests__/valid/relativePrefixFragment.shaclc | 4 + __tests__/valid/relativePrefixFragment.ttl | 9 ++ .../valid/relativePrefixFragmentThing.shaclc | 4 + .../valid/relativePrefixFragmentThing.ttl | 9 ++ lib/index.js | 21 +++ lib/shaclc.jison | 120 +++++------------- 12 files changed, 115 insertions(+), 87 deletions(-) create mode 100644 __tests__/valid/relative.shaclc create mode 100644 __tests__/valid/relative.ttl create mode 100644 __tests__/valid/relativeHash.shaclc create mode 100644 __tests__/valid/relativeHash.ttl create mode 100644 __tests__/valid/relativePrefix.shaclc create mode 100644 __tests__/valid/relativePrefix.ttl create mode 100644 __tests__/valid/relativePrefixFragment.shaclc create mode 100644 __tests__/valid/relativePrefixFragment.ttl create mode 100644 __tests__/valid/relativePrefixFragmentThing.shaclc create mode 100644 __tests__/valid/relativePrefixFragmentThing.ttl diff --git a/__tests__/valid/relative.shaclc b/__tests__/valid/relative.shaclc new file mode 100644 index 0000000..0ef115f --- /dev/null +++ b/__tests__/valid/relative.shaclc @@ -0,0 +1,3 @@ +BASE + +shape -> {} diff --git a/__tests__/valid/relative.ttl b/__tests__/valid/relative.ttl new file mode 100644 index 0000000..f1ecccd --- /dev/null +++ b/__tests__/valid/relative.ttl @@ -0,0 +1,8 @@ +@base . +@prefix owl: . +@prefix sh: . + +<> a owl:Ontology . + + a sh:NodeShape ; + sh:targetClass . diff --git a/__tests__/valid/relativeHash.shaclc b/__tests__/valid/relativeHash.shaclc new file mode 100644 index 0000000..de8813a --- /dev/null +++ b/__tests__/valid/relativeHash.shaclc @@ -0,0 +1,3 @@ +BASE + +shape -> {} diff --git a/__tests__/valid/relativeHash.ttl b/__tests__/valid/relativeHash.ttl new file mode 100644 index 0000000..f2020c7 --- /dev/null +++ b/__tests__/valid/relativeHash.ttl @@ -0,0 +1,8 @@ +@base . +@prefix owl: . +@prefix sh: . + +<> a owl:Ontology . + + a sh:NodeShape ; + sh:targetClass . diff --git a/__tests__/valid/relativePrefix.shaclc b/__tests__/valid/relativePrefix.shaclc new file mode 100644 index 0000000..a5fea17 --- /dev/null +++ b/__tests__/valid/relativePrefix.shaclc @@ -0,0 +1,4 @@ +BASE +PREFIX ex: <> + +shape ex:TestShape -> ex:TestClass {} diff --git a/__tests__/valid/relativePrefix.ttl b/__tests__/valid/relativePrefix.ttl new file mode 100644 index 0000000..5d4617d --- /dev/null +++ b/__tests__/valid/relativePrefix.ttl @@ -0,0 +1,9 @@ +@base . +@prefix owl: . +@prefix sh: . +@prefix ex: <> . + +<> a owl:Ontology . + +ex:TestShape a sh:NodeShape ; + sh:targetClass ex:TestClass . diff --git a/__tests__/valid/relativePrefixFragment.shaclc b/__tests__/valid/relativePrefixFragment.shaclc new file mode 100644 index 0000000..23334bd --- /dev/null +++ b/__tests__/valid/relativePrefixFragment.shaclc @@ -0,0 +1,4 @@ +BASE +PREFIX ex: <#> + +shape ex:TestShape -> ex:TestClass {} diff --git a/__tests__/valid/relativePrefixFragment.ttl b/__tests__/valid/relativePrefixFragment.ttl new file mode 100644 index 0000000..bc5e136 --- /dev/null +++ b/__tests__/valid/relativePrefixFragment.ttl @@ -0,0 +1,9 @@ +@base . +@prefix owl: . +@prefix sh: . +@prefix ex: <#> . + +<> a owl:Ontology . + +ex:TestShape a sh:NodeShape ; + sh:targetClass ex:TestClass . diff --git a/__tests__/valid/relativePrefixFragmentThing.shaclc b/__tests__/valid/relativePrefixFragmentThing.shaclc new file mode 100644 index 0000000..226e118 --- /dev/null +++ b/__tests__/valid/relativePrefixFragmentThing.shaclc @@ -0,0 +1,4 @@ +BASE +PREFIX ex: <#> + +shape ex:TestShape -> ex:TestClass {} diff --git a/__tests__/valid/relativePrefixFragmentThing.ttl b/__tests__/valid/relativePrefixFragmentThing.ttl new file mode 100644 index 0000000..88e9a4b --- /dev/null +++ b/__tests__/valid/relativePrefixFragmentThing.ttl @@ -0,0 +1,9 @@ +@base . +@prefix owl: . +@prefix sh: . +@prefix ex: <#> . + +<> a owl:Ontology . + +ex:TestShape a sh:NodeShape ; + sh:targetClass ex:TestClass . diff --git a/lib/index.js b/lib/index.js index 8cb2c9a..3d3b00f 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,12 +1,28 @@ const ShaclcParser = require('./ShaclcParser').Parser; const N3 = require('n3'); +// const arr = []; + +// this._parser.Parser = { +// factory: N3.DataFactory, +// base: N3.DataFactory.namedNode('urn:x-base:default'), +// prefixes: { +// rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', +// rdfs: 'http://www.w3.org/2000/01/rdf-schema#', +// sh: 'http://www.w3.org/ns/shacl#', +// xsd: 'http://www.w3.org/2001/XMLSchema#' +// }, +// onQuad: (quad) => { arr.push(quad) }, +// extended: extendedSyntax === true +// } + class Parser { constructor() { } parse(str, { extendedSyntax } = {}) { this._parser = new ShaclcParser(); + this._parser.Parser.factory = N3.DataFactory; this._parser.Parser.base = N3.DataFactory.namedNode('urn:x-base:default'); this._parser.Parser.extended = extendedSyntax === true; @@ -16,6 +32,11 @@ class Parser { sh: 'http://www.w3.org/ns/shacl#', xsd: 'http://www.w3.org/2001/XMLSchema#' } + this._parser.Parser.currentNodeShape = undefined; + this._parser.Parser.currentPropertyNode = undefined; + this._parser.Parser.nodeShapeStack = []; + this._parser.Parser.tempCurrentNodeShape = undefined; + this._parser.Parser.n3Parser = new N3.Parser({ baseIRI: 'urn:x-base:default' }); const arr = [] this._parser.Parser.onQuad = (quad) => { arr.push(quad) }; diff --git a/lib/shaclc.jison b/lib/shaclc.jison index 0272f0f..f1b27eb 100644 --- a/lib/shaclc.jison +++ b/lib/shaclc.jison @@ -1,6 +1,3 @@ -// TODO: Work out why the alternativePath - - %{ /* Grammar specification for a SHACL compact @@ -22,14 +19,6 @@ SH = 'http://www.w3.org/ns/shacl#', OWL = 'http://www.w3.org/2002/07/owl#', RDFS = 'http://www.w3.org/2000/01/rdf-schema#'; - var base = Parser.base = '', basePath = '', baseRoot = '', currentNodeShape, currentPropertyNode, nodeShapeStack = [], tempCurrentNodeShape; - - Parser.prefixes = { - rdf: RDF, - rdfs: RDFS, - sh: SH, - xsd: XSD - } // TODO: Make sure all SPARQL supported datatypes are here const datatypes = { @@ -83,32 +72,8 @@ // TODO: Port over any updates to this from SPARLQL.js // Resolves an IRI against a base path function resolveIRI(iri) { - // Strip off possible angular brackets - if (iri[0] === '<') - iri = iri.substring(1, iri.length - 1); - // Return absolute IRIs unmodified - if (/^[a-z]+:/.test(iri)) - return iri; - if (!Parser.base) - throw new Error('Cannot resolve relative IRI ' + iri + ' because no base IRI was set.'); - - switch (iri[0]) { - // An empty relative IRI indicates the base IRI - case undefined: - return base.value; - // Resolve relative fragment IRIs against the base IRI - case '#': - return base.value + iri; - // Resolve relative query string IRIs by replacing the query string - case '?': - return base.value.replace(/(?:\?.*)?$/, iri); - // Resolve root relative IRIs at the root of the base IRI - case '/': - return base.value.replace(/[^\/:]*$/, '') + iri; - // Resolve all other IRIs at the base IRI's path - default: - return base.value.match(/^(?:[a-z]+:\/*)?[^\/]*/)[0] + iri; - } + // Strip off possible angular brackets and resolve the IRI + return Parser.n3Parser._resolveIRI(iri[0] === '<' ? iri.substring(1, iri.length - 1) : iri) } function expandPrefix(iri) { @@ -153,30 +118,7 @@ fromCharCode = String.fromCharCode; // Translates escape codes in the string into their textual equivalent function unescapeString(string, trimLength) { - string = string.substring(trimLength, string.length - trimLength); - try { - string = string.replace(escapeSequence, function (sequence, unicode4, unicode8, escapedChar) { - var charCode; - if (unicode4) { - charCode = parseInt(unicode4, 16); - if (isNaN(charCode)) throw new Error(); // can never happen (regex), but helps performance - return fromCharCode(charCode); - } - else if (unicode8) { - charCode = parseInt(unicode8, 16); - if (isNaN(charCode)) throw new Error(); // can never happen (regex), but helps performance - if (charCode < 0xFFFF) return fromCharCode(charCode); - return fromCharCode(0xD800 + ((charCode -= 0x10000) >> 10), 0xDC00 + (charCode & 0x3FF)); - } - else { - var replacement = escapeReplacements[escapedChar]; - if (!replacement) throw new Error(); - return replacement; - } - }); - } - catch (error) { return ''; } - return string; + return Parser.n3Parser._lexer._unescape(string.substring(trimLength, string.length - trimLength)); } function emit(s, p, o) { @@ -187,7 +129,7 @@ } function emitProperty(p, o) { - emit(currentPropertyNode, Parser.factory.namedNode(SH + p), o) + emit(Parser.currentPropertyNode, Parser.factory.namedNode(SH + p), o) } function chainProperty(name, p, o) { @@ -318,12 +260,16 @@ PARAM 'deactivated' | 'severity' | 'message' | 'class' | 'data %% // TODO: Work out why this occurs multiple times when the empty file is called with other things (the base from the previous file is somehow getting leaked thorugh) -shaclDoc : directive* (nodeShape|shapeClass)* ttlSection EOF -> emit(Parser.base, Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(OWL + 'Ontology')) +shaclDoc : directive* (nodeShape|shapeClass)* ttlSection EOF -> emit(Parser.factory.namedNode(resolveIRI('')), Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(OWL + 'Ontology')) ; directive : baseDecl | importsDecl | prefixDecl ; // TODO: Remove the duplicate declaration of base -baseDecl : KW_BASE IRIREF -> base = Parser.base = Parser.factory.namedNode($2.slice(1, -1)) +baseDecl : KW_BASE IRIREF + { + Parser.base = Parser.factory.namedNode($2.slice(1, -1)); + Parser.n3Parser._setBase(Parser.base.value); + } ; // TODO: See if this should be resolveIRI($2) @@ -335,15 +281,15 @@ prefixDecl : KW_PREFIX PNAME_NS IRIREF -> Parser.prefixes[$2.substr(0, nodeShapeIri : iri { - nodeShapeStack = false - emit(currentNodeShape = $1, Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(SH + 'NodeShape')) + Parser.nodeShapeStack = false + emit(Parser.currentNodeShape = $1, Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(SH + 'NodeShape')) } ; nodeShape : KW_SHAPE nodeShapeIri targetClass? turtleAnnotation? nodeShapeBody ; -shapeClass : KW_SHAPE_CLASS nodeShapeIri turtleAnnotation? nodeShapeBody -> emit(currentNodeShape, Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(RDFS + 'Class')) +shapeClass : KW_SHAPE_CLASS nodeShapeIri turtleAnnotation? nodeShapeBody -> emit(Parser.currentNodeShape, Parser.factory.namedNode(RDF_TYPE), Parser.factory.namedNode(RDFS + 'Class')) ; turtleAnnotation : ';' turtleAnnotation2 -> ensureExtended() @@ -352,7 +298,7 @@ turtleAnnotation : ';' turtleAnnotation2 -> ensureExtended() turtleAnnotation2 : predicate turtleAnnotation? ; -predicate : iri objectList -> $2.forEach(e => emit(currentNodeShape, $1, e)) +predicate : iri objectList -> $2.forEach(e => emit(Parser.currentNodeShape, $1, e)) ; objectList : object objectTail* -> [$1, ...$2] @@ -371,14 +317,14 @@ objectTail : ',' object -> $2 LB : '[' { - tempCurrentNodeShape = currentNodeShape; - $$ = currentNodeShape = blank(); + Parser.tempCurrentNodeShape = Parser.currentNodeShape; + $$ = Parser.currentNodeShape = blank(); } ; RB : ']' { - currentNodeShape = tempCurrentNodeShape; + Parser.currentNodeShape = Parser.tempCurrentNodeShape; } ; @@ -387,14 +333,14 @@ blankNodeSection : LB turtleAnnotation2 RB -> $1 LP : "%" { - tempCurrentNodeShape = currentNodeShape; - currentNodeShape = currentPropertyNode; + Parser.tempCurrentNodeShape = Parser.currentNodeShape; + Parser.currentNodeShape = Parser.currentPropertyNode; } ; RP : "%" { - currentNodeShape = tempCurrentNodeShape + Parser.currentNodeShape = Parser.tempCurrentNodeShape } ; @@ -403,7 +349,7 @@ pcSection : LP turtleAnnotation2 RP iriHead : iri { - currentNodeShape = $1 + Parser.currentNodeShape = $1 } ; @@ -415,26 +361,26 @@ ttlSection : ttlStatement* startNodeShape : '{' { - if (!nodeShapeStack) { - nodeShapeStack = []; + if (!Parser.nodeShapeStack) { + Parser.nodeShapeStack = []; } else { - nodeShapeStack.push(currentNodeShape); + Parser.nodeShapeStack.push(Parser.currentNodeShape); emit( // In the grammar a path signals the start of a new property declaration - currentPropertyNode, + Parser.currentPropertyNode, Parser.factory.namedNode(SH + 'node'), - currentNodeShape = blank(), + Parser.currentNodeShape = blank(), ) } - $$ = currentNodeShape; + $$ = Parser.currentNodeShape; } ; endNodeShape : '}' { - if (nodeShapeStack.length > 0) { - currentNodeShape = nodeShapeStack.pop(); + if (Parser.nodeShapeStack.length > 0) { + Parser.currentNodeShape = Parser.nodeShapeStack.pop(); } } ; @@ -442,7 +388,7 @@ endNodeShape : '}' nodeShapeBody : startNodeShape constraint* endNodeShape -> $1 ; -targetClass : '->' iri+ -> $2.forEach(node => { emit(currentNodeShape, Parser.factory.namedNode(SH + 'targetClass'), node) }) +targetClass : '->' iri+ -> $2.forEach(node => { emit(Parser.currentNodeShape, Parser.factory.namedNode(SH + 'targetClass'), node) }) ; constraint : ( nodeOrEmit+ | propertyShape ) pcSection? '.' @@ -451,7 +397,7 @@ constraint : ( nodeOrEmit+ | propertyShape ) pcSection? '.' orNotComponent : '|' nodeNot -> $2 ; -nodeOrEmit : nodeOr -> emit(currentNodeShape, Parser.factory.namedNode(SH + $1[0]), $1[1]) +nodeOrEmit : nodeOr -> emit(Parser.currentNodeShape, Parser.factory.namedNode(SH + $1[0]), $1[1]) ; nodeOr : nodeNot @@ -529,9 +475,9 @@ path : pathAlternative { emit( // In the grammar a path signals the start of a new property declaration - currentNodeShape, + Parser.currentNodeShape, Parser.factory.namedNode(SH + 'property'), - currentPropertyNode = blank(), + Parser.currentPropertyNode = blank(), ) emitProperty('path', $1)