Skip to content

Commit

Permalink
Merge pull request #589 from zazuko/er-multi-datasetbaseurl
Browse files Browse the repository at this point in the history
Entity renderer: add support for multiple `datasetBaseUrl`
  • Loading branch information
ludovicm67 authored Dec 4, 2024
2 parents e0ab4c4 + 0056079 commit 5aa7a97
Show file tree
Hide file tree
Showing 7 changed files with 490 additions and 112 deletions.
5 changes: 5 additions & 0 deletions .changeset/three-berries-perform.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-entity-renderer": minor
---

Add support for multiple `datasetBaseUrl`
165 changes: 53 additions & 112 deletions packages/entity-renderer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import { fileURLToPath } from 'node:url'
import { parsers } from '@rdfjs/formats-common'
import rdf from '@zazuko/env'
import { sparqlSerializeQuadStream, sparqlSupportedTypes, sparqlGetRewriteConfiguration } from 'trifid-core'
import mimeparse from 'mimeparse'

import { defaultConfiguration } from './lib/config.js'
import { getAcceptHeader } from './lib/headers.js'
import { checkDatasetBaseUrl } from './lib/base.js'

import { createEntityRenderer } from './renderer/entity.js'
import { createMetadataProvider } from './renderer/metadata.js'
Expand All @@ -13,102 +16,10 @@ const currentDir = dirname(fileURLToPath(import.meta.url))

const DEFAULT_ENDPOINT_NAME = 'default'

const getAcceptHeader = (req) => {
const queryStringValue = req.query.format

const supportedQueryStringValues = {
ttl: 'text/turtle',
jsonld: 'application/ld+json',
xml: 'application/rdf+xml',
nt: 'application/n-triples',
trig: 'application/trig',
csv: 'text/csv',
html: 'text/html',
}

if (
Object.hasOwnProperty.call(supportedQueryStringValues, queryStringValue)
) {
return supportedQueryStringValues[queryStringValue]
}

const acceptHeader = `${req.headers.accept || ''}`.toLocaleLowerCase()
const selectedHeader = mimeparse.bestMatch([
...sparqlSupportedTypes,
'text/html',
], acceptHeader)

return selectedHeader || acceptHeader
}

const replaceIriInQuery = (query, iri) => {
return query.split('{{iri}}').join(iri)
}

const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {
# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.
?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.
OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}
UNION
# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.
?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.
OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}
# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}

const fixContentTypeHeader = (contentType) => {
return contentType.split(';')[0].trim().toLocaleLowerCase()
}
Expand All @@ -119,11 +30,24 @@ const factory = async (trifid) => {
const entityRenderer = createEntityRenderer({ options: config, logger, query })
const metadataProvider = createMetadataProvider({ options: config })

const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl: datasetBaseUrlValue, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const allowEndpointSwitch = `${allowEndpointSwitchConfigValue}` === 'true'
const entityTemplatePath = path || `${currentDir}/views/render.hbs`
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, datasetBaseUrl)
const { rewrite: rewriteValue, replaceIri, iriOrigin } = rewriteConfig
const datasetBaseUrls = checkDatasetBaseUrl(logger, datasetBaseUrlValue)

/**
* Map of dataset base URLs with their rewrite configuration.
* @type {Map<string, { rewrite: boolean, replaceIri: (iri: string) => string, iriOrigin: (iri: string) => string, datasetBaseUrl: string }>}
*/
const dbu = new Map()
datasetBaseUrls.forEach((value) => {
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, value)
// Just to have all the fields explicitly defined
const { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl } = rewriteConfig
dbu.set(value, { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl })

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'} for '${value}' dataset base URL`)
})

const additionalRewritesConfig = config.additionalRewrites || []
if (!Array.isArray(additionalRewritesConfig)) {
Expand All @@ -139,15 +63,6 @@ const factory = async (trifid) => {
return value
})

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'}`)

if (rewriteValue) {
if (!datasetBaseUrl.endsWith('/')) {
logger.warn('The value for `datasetBaseUrl` should usually end with a `/`')
}
logger.debug(`Using '${datasetBaseUrl}' as dataset base URL`)
}

// If `ignorePaths` is not provided or invalid, we configure some defaults values
let ignoredPaths = ignorePaths
if (!ignorePaths || !Array.isArray(ignorePaths)) {
Expand Down Expand Up @@ -200,9 +115,37 @@ const factory = async (trifid) => {
iriUrl.search = ''
iriUrl.searchParams.forEach((_value, key) => iriUrl.searchParams.delete(key))
const iriUrlString = iriUrl.toString()
const iri = replaceIri(iriUrlString)
const isContainer = mergedConfig.resourceNoSlash && iri.endsWith('/')
logger.debug(`IRI value: ${iri}${rewriteValue ? ' (rewritten)' : ''} - is container: ${isContainer ? 'true' : 'false'}`)

let iri
let iriOrigin
let replaceIri
let rewriteValue
let datasetBaseUrl
let isContainer
for (const [_key, value] of dbu) {
if (iri !== undefined) {
break
}

const tmpIri = value.replaceIri(iriUrlString)
const tmpIsContainer = mergedConfig.resourceNoSlash && tmpIri.endsWith('/')
logger.debug(`IRI value: ${tmpIri}${value.rewriteValue ? ' (rewritten)' : ''} - is container: ${tmpIsContainer ? 'true' : 'false'}`)

// Check if the IRI exists in the dataset ; if so, use it for the rest of the process
const askQuery = tmpIsContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, tmpIri), { ask: true, headers: queryHeaders })
if (exists) {
iriOrigin = value.iriOrigin
replaceIri = value.replaceIri
rewriteValue = value.rewrite
datasetBaseUrl = value.datasetBaseUrl
isContainer = tmpIsContainer
iri = tmpIri
logger.debug(`IRI found: ${iri}`)
break
}
}

const rewriteResponse = rewriteValue
? [
...additionalRewrites.map(({ find, replace }) => {
Expand All @@ -216,10 +159,8 @@ const factory = async (trifid) => {
]
: []

// Check if the IRI exists in the dataset
const askQuery = isContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, iri), { ask: true, headers: queryHeaders })
if (!exists) {
// If the IRI is not found, we return a 404
if (!iri) {
reply.callNotFound()
return reply
}
Expand Down
50 changes: 50 additions & 0 deletions packages/entity-renderer/lib/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// @ts-check

/**
* Check the dataset base URL.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If a value is empty, an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string} datasetBaseUrl - The dataset base URL
* @returns {true} The dataset base URL as an array
*/
export const checkSingleDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (typeof datasetBaseUrl !== 'string') {
throw new Error('The datasetBaseUrl must be a string')
}

if (!datasetBaseUrl) {
throw new Error("Value for 'datasetBaseUrl' is missing")
}

if (!datasetBaseUrl.endsWith('/')) {
logger.warn(`The value for 'datasetBaseUrl' should usually end with a '/' ; it is not the case for '${datasetBaseUrl}'`)
}

return true
}

/**
* Check the dataset base URL, and make sure it returns an array.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If the dataset base URL is an array, each value is checked.
* If a value is empty, then an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string | string[]} datasetBaseUrl - The dataset base URL
* @returns {string[]} The dataset base URL as an array
*/
export const checkDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (!datasetBaseUrl) {
throw new Error('No datasetBaseUrl provided')
}

if (Array.isArray(datasetBaseUrl)) {
datasetBaseUrl.forEach((value) => checkSingleDatasetBaseUrl(logger, value))
return datasetBaseUrl
} else {
checkSingleDatasetBaseUrl(logger, datasetBaseUrl)
return [datasetBaseUrl]
}
}
63 changes: 63 additions & 0 deletions packages/entity-renderer/lib/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
export const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {
# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.
?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.
OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}
UNION
# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.
?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.
OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}
# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}
36 changes: 36 additions & 0 deletions packages/entity-renderer/lib/headers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import mimeparse from 'mimeparse'
import { sparqlSupportedTypes } from 'trifid-core'

/**
* Get the accept header from the request
*
* @param {{query?: Record<string, any>, headers?: Record<string, any>}} req - The request object
* @returns {string} The accept header
*/
export const getAcceptHeader = (req) => {
const queryStringValue = req.query.format

const supportedQueryStringValues = {
ttl: 'text/turtle',
jsonld: 'application/ld+json',
xml: 'application/rdf+xml',
nt: 'application/n-triples',
trig: 'application/trig',
csv: 'text/csv',
html: 'text/html',
}

if (
Object.hasOwnProperty.call(supportedQueryStringValues, queryStringValue)
) {
return supportedQueryStringValues[queryStringValue]
}

const acceptHeader = `${req.headers?.accept || 'text/html'}`.toLocaleLowerCase()
const selectedHeader = mimeparse.bestMatch([
...sparqlSupportedTypes,
'text/html',
], acceptHeader)

return selectedHeader || acceptHeader
}
Loading

0 comments on commit 5aa7a97

Please sign in to comment.