Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for parsing reified triples and triple terms #486

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- run: npm ci --ignore-scripts
- run: npm run build
- run: npm run test

lint:
runs-on: ubuntu-latest
strategy:
Expand All @@ -34,7 +34,7 @@ jobs:
- run: npm ci --ignore-scripts
- run: npm run build
- run: npm run lint

spec:
runs-on: ubuntu-latest
strategy:
Expand All @@ -48,10 +48,14 @@ jobs:
node-version: ${{ matrix.node-version }}
- uses: actions/checkout@v4
- run: npm ci
- run: npm run spec-turtle
- run: npm run spec-ntriples
- run: npm run spec-nquads
- run: npm run spec-trig
- run: npm run spec-1-1-turtle
- run: npm run spec-1-1-ntriples
- run: npm run spec-1-1-nquads
- run: npm run spec-1-1-trig
- run: npm run spec-1-2-turtle
- run: npm run spec-1-2-ntriples
- run: npm run spec-1-2-nquads
- run: npm run spec-1-2-trig

docs:
runs-on: ubuntu-latest
Expand Down
31 changes: 21 additions & 10 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,27 @@
"test": "jest",
"lint": "eslint src perf test spec",
"prepare": "npm run build",
"spec": "npm run spec-turtle && npm run spec-ntriples && npm run spec-nquads && npm run spec-trig",
"spec-earl": "npm run spec-earl-turtle && npm run spec-earl-ntriples && npm run spec-earl-nquads && npm run spec-earl-trig",
"spec-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-triples/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/",
"spec-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-quads/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/",
"spec-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-turtle/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/",
"spec-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-trig/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/",
"spec-earl-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-triples/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-ntriples.ttl",
"spec-earl-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-quads/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-nquads.ttl",
"spec-earl-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-turtle/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-turtle.ttl",
"spec-earl-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-trig/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-trig.ttl",
"spec": "npm run spec-1-1 && npm run spec-1-2",
"spec-1-1": "npm run spec-1-1-turtle && npm run spec-1-1-ntriples && npm run spec-1-1-nquads && npm run spec-1-1-trig",
"spec-1-1-earl": "npm run spec-1-1-earl-turtle && npm run spec-1-1-earl-ntriples && npm run spec-1-1-earl-nquads && npm run spec-1-1-earl-trig",
"spec-1-1-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-triples/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/",
"spec-1-1-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-quads/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/",
"spec-1-1-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-turtle/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/",
"spec-1-1-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-trig/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/",
"spec-1-1-earl-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-triples/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-ntriples.ttl",
"spec-1-1-earl-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-n-quads/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-nquads.ttl",
"spec-1-1-earl-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-turtle/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-turtle.ttl",
"spec-1-1-earl-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-trig/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-trig.ttl",
"spec-1-2": "npm run spec-1-2-turtle && npm run spec-1-2-ntriples && npm run spec-1-2-nquads && npm run spec-1-2-trig",
"spec-1-2-earl": "npm run spec-1-2-earl-turtle && npm run spec-1-2-earl-ntriples && npm run spec-1-2-earl-nquads && npm run spec-1-2-earl-trig",
"spec-1-2-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-n-triples/syntax/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/",
"spec-1-2-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-n-quads/syntax/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/",
"spec-1-2-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-turtle/syntax/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/",
"spec-1-2-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-trig/syntax/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/",
"spec-1-2-earl-ntriples": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-n-triples/syntax/manifest.ttl -i '{ \"format\": \"n-triples\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-ntriples.ttl",
"spec-1-2-earl-nquads": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-n-quads/syntax/manifest.ttl -i '{ \"format\": \"n-quads\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-nquads.ttl",
"spec-1-2-earl-turtle": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-turtle/syntax/manifest.ttl -i '{ \"format\": \"turtle\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-turtle.ttl",
"spec-1-2-earl-trig": "rdf-test-suite spec/parser.js https://w3c.github.io/rdf-tests/rdf/rdf12/rdf-trig/syntax/manifest.ttl -i '{ \"format\": \"trig\" }' -c .rdf-test-suite-cache/ -o earl -p spec/earl-meta.json > spec/earl-trig.ttl",
"spec-clean": "rm -r .rdf-test-suite-cache/",
"docs": "cd src && docco *.js -o ../docs && cd .."
},
Expand Down
12 changes: 7 additions & 5 deletions src/IRIs.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ export default {
string: `${XSD}string`,
},
rdf: {
type: `${RDF}type`,
nil: `${RDF}nil`,
first: `${RDF}first`,
rest: `${RDF}rest`,
langString: `${RDF}langString`,
type: `${RDF}type`,
nil: `${RDF}nil`,
first: `${RDF}first`,
rest: `${RDF}rest`,
langString: `${RDF}langString`,
dirLangString: `${RDF}dirLangString`,
reifies: `${RDF}reifies`,
},
owl: {
sameAs: 'http://www.w3.org/2002/07/owl#sameAs',
Expand Down
47 changes: 38 additions & 9 deletions src/N3DataFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,17 @@ export class Literal extends Term {
// Find the last quotation mark (e.g., '"abc"@en-us')
const id = this.id;
let atPos = id.lastIndexOf('"') + 1;
const dirPos = id.lastIndexOf('--');
// If "@" it follows, return the remaining substring; empty otherwise
return atPos < id.length && id[atPos++] === '@' ? id.substr(atPos).toLowerCase() : '';
return atPos < id.length && id[atPos++] === '@' ? (dirPos > atPos ? id.substr(0, dirPos) : id).substr(atPos).toLowerCase() : '';
}

// ### The direction of this literal
get direction() {
// Find the last double dash (e.g., '"abc"@en-us--ltr')
const id = this.id;
const atPos = id.lastIndexOf('--') + 2;
return atPos > 1 && atPos < id.length ? id.substr(atPos).toLowerCase() : '';
}

// ### The datatype IRI of this literal
Expand All @@ -104,8 +113,8 @@ export class Literal extends Term {
const char = dtPos < id.length ? id[dtPos] : '';
// If "^" it follows, return the remaining substring
return char === '^' ? id.substr(dtPos + 2) :
// If "@" follows, return rdf:langString; xsd:string otherwise
(char !== '@' ? xsd.string : rdf.langString);
// If "@" follows, return rdf:langString or rdf:dirLangString; xsd:string otherwise
(char !== '@' ? xsd.string : (id.indexOf('--', dtPos) > 0 ? rdf.dirLangString : rdf.langString));
}

// ### Returns whether this object represents the same term as the other
Expand All @@ -119,14 +128,16 @@ export class Literal extends Term {
this.termType === other.termType &&
this.value === other.value &&
this.language === other.language &&
((this.direction === other.direction) || (this.direction === '' && !other.direction)) &&
this.datatype.value === other.datatype.value;
}

toJSON() {
return {
termType: this.termType,
value: this.value,
language: this.language,
termType: this.termType,
value: this.value,
language: this.language,
direction: this.direction,
datatype: { termType: 'NamedNode', value: this.datatypeString },
};
}
Expand Down Expand Up @@ -216,9 +227,22 @@ export function termFromId(id, factory, nested) {
return factory.literal(id.substr(1, id.length - 2));
// Literal with datatype or language
const endPos = id.lastIndexOf('"', id.length - 1);
let languageOrDatatype;
if (id[endPos + 1] === '@') {
languageOrDatatype = id.substr(endPos + 2);
const dashDashIndex = languageOrDatatype.lastIndexOf('--');
if (dashDashIndex > 0 && dashDashIndex < languageOrDatatype.length) {
languageOrDatatype = {
language: languageOrDatatype.substr(0, dashDashIndex),
direction: languageOrDatatype.substr(dashDashIndex + 2),
};
}
}
else {
languageOrDatatype = factory.namedNode(id.substr(endPos + 3));
}
return factory.literal(id.substr(1, endPos - 1),
id[endPos + 1] === '@' ? id.substr(endPos + 2)
: factory.namedNode(id.substr(endPos + 3)));
languageOrDatatype);
case '[':
id = JSON.parse(id);
break;
Expand Down Expand Up @@ -255,7 +279,7 @@ export function termToId(term, nested) {
case 'Variable': return `?${term.value}`;
case 'DefaultGraph': return '';
case 'Literal': return `"${term.value}"${
term.language ? `@${term.language}` :
term.language ? `@${term.language}${term.direction ? `--${term.direction}` : ''}` :
(term.datatype && term.datatype.value !== xsd.string ? `^^${term.datatype.value}` : '')}`;
case 'Quad':
const res = [
Expand Down Expand Up @@ -350,6 +374,11 @@ function literal(value, languageOrDataType) {
if (typeof languageOrDataType === 'string')
return new Literal(`"${value}"@${languageOrDataType.toLowerCase()}`);

// Create a language-tagged string with base direction
if (languageOrDataType !== undefined && !('termType' in languageOrDataType)) {
return new Literal(`"${value}"@${languageOrDataType.language.toLowerCase()}--${languageOrDataType.direction.toLowerCase()}`);
}

// Automatically determine datatype for booleans and numbers
let datatype = languageOrDataType ? languageOrDataType.value : '';
if (datatype === '') {
Expand Down
31 changes: 27 additions & 4 deletions src/N3Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const lineModeRegExps = {
_unescapedIri: true,
_simpleQuotedString: true,
_langcode: true,
_dircode: true,
_blank: true,
_newline: true,
_comment: true,
Expand All @@ -38,7 +39,8 @@ export default class N3Lexer {
this._unescapedIri = /^<([^\x00-\x20<>\\"\{\}\|\^\`]*)>[ \t]*/; // IRI without escape sequences; no unescaping
this._simpleQuotedString = /^"([^"\\\r\n]*)"(?=[^"])/; // string without escape sequences
this._simpleApostropheString = /^'([^'\\\r\n]*)'(?=[^'])/;
this._langcode = /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\-])/i;
this._langcode = /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9])/i;
this._dircode = /^--(ltr)|(rtl)/;
this._prefix = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:(?=[#\s<])/;
this._prefixed = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?:[ \t]+|(?=\.?[,;!\^\s#()\[\]\{\}"'<>]))/;
this._variable = /^\?(?:(?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=[.,;!\^\s#()\[\]\{\}"'<>])/;
Expand Down Expand Up @@ -148,15 +150,19 @@ export default class N3Lexer {
return reportSyntaxError(this);
type = 'IRI';
}
// Try to find a nested triple
else if (input.length > 1 && input[1] === '<')
// Try to find a triple term
else if (input.length > 2 && input[1] === '<' && input[2] === '(')
type = '<<(', matchLength = 3;
// Try to find a reified triple
else if (!this._lineMode && input.length > (inputFinished ? 1 : 2) && input[1] === '<')
type = '<<', matchLength = 2;
// Try to find a backwards implication arrow
else if (this._n3Mode && input.length > 1 && input[1] === '=')
type = 'inverse', matchLength = 2, value = '>';
break;

case '>':
// Try to find a reified triple
if (input.length > 1 && input[1] === '>')
type = '>>', matchLength = 2;
break;
Expand Down Expand Up @@ -240,6 +246,13 @@ export default class N3Lexer {
case '9':
case '+':
case '-':
if (input[1] === '-') {
// Try to find a direction code
if (this._previousMarker === 'langcode' && (match = this._dircode.exec(input)))
type = 'dircode', matchLength = 2, value = (match[1] || match[2]), matchLength = value.length + 2;
break;
}

// Try to find a number. Since it can contain (but not end with) a dot,
// we always need a non-dot character before deciding it is a number.
// Therefore, try inserting a space if we're at the end of the input.
Expand Down Expand Up @@ -295,13 +308,23 @@ export default class N3Lexer {
case '!':
if (!this._n3Mode)
break;
case ')':
if (!inputFinished && (input.length === 1 || (input.length === 2 && input[1] === '>'))) {
// Don't consume yet, as it *could* become a triple term end.
break;
}
// Try to find a triple term
if (input.length > 2 && input[1] === '>' && input[2] === '>') {
type = ')>>', matchLength = 3;
break;
}
case ',':
case ';':
case '[':
case ']':
case '(':
case ')':
case '}':
case '~':
if (!this._lineMode) {
matchLength = 1;
type = firstChar;
Expand Down
Loading