Skip to content

Commit

Permalink
WIP validate files with skohub shape #14
Browse files Browse the repository at this point in the history
  • Loading branch information
sroertgen committed Jan 29, 2024
1 parent af2802f commit 8eacb88
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 44 deletions.
134 changes: 90 additions & 44 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,33 @@ on:
description: 'Test scenario tags'

jobs:
all-ttl-files:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.file-list }}
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Find TTL files
id: find-ttl
run: |
ttl_files=$(find . -name '*.ttl' -printf '"%p",' | sed 's/,$//')
echo "ttl_files=$ttl_files"
echo "::set-env name=ttl_files::$ttl_files"
- name: Set matrix for TTL files
id: set-matrix
run: echo "::set-output name=file-list::[${ttl_files}]"

- name: List all changed files
run: echo "${ttl_files}"

changedfiles:
runs-on: ubuntu-latest
outputs:
ttl: ${{ steps.set-matrix.outputs.ttl }}
yml: ${{ steps.set-matrix.outputs.yml }}
ttl: ${{ steps.set-ttl-matrix.outputs.ttl }}
yml: ${{ steps.set-yml-matrix.outputs.yml }}
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -27,15 +49,15 @@ jobs:
with:
files: |
**/*.ttl
json: "true"
json: true

- name: Get changed yml-files
id: changed-yml-files
uses: tj-actions/changed-files@v42
with:
files: |
**/*.yml
json: "true"
json: true

- name: Set turtle file matrix
id: set-ttl-matrix
Expand All @@ -45,15 +67,24 @@ jobs:
id: set-yml-matrix
run: echo "yml=${{ steps.changed-yml-files.outputs.all_changed_files }}" >> $GITHUB_OUTPUT

- name: List all changed files
run: echo '${{ steps.changed-ttl-files.outputs.all_changed_files }}'

- name: List all changed files
run: echo '${{ steps.changed-yml-files.outputs.all_changed_files }}'

validate:
runs-on: ubuntu-latest
needs: changedfiles
needs: [changedfiles, all-ttl-files]
# only run there are changed files
if: ${{needs.changedfiles.outputs.ttl != '[]'}}
if: ${{needs.changedfiles.outputs.ttl != '[]' || needs.changedfiles.outputs.yml != '[]'}}
strategy:
max-parallel: 1
fail-fast: false # other validation jobs should continue checking even if one file is invalid
matrix:
file: ${{ fromJson(needs.changedfiles.outputs.ttl) }}
# file: ${{ fromJSON(needs.changedfiles.outputs.ttl) || fromJson(needs.all-ttl-files.outputs.matrix) }}
file: ${{ fromJson(needs.all-ttl-files.outputs.matrix) }}

steps:
- uses: actions/checkout@v3
- name: echo changed files
Expand All @@ -62,55 +93,70 @@ jobs:
- name: get shape
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/skohub.shacl.ttl --output skohub.shacl.ttl

- name: make scripts directory
run: mkdir scripts
# - name: make scripts directory
# run: mkdir scripts

- name: get script
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/validate-skos --output scripts/validate-skos
- name: make fuseki directory
run: mkdir fuseki

- name: get violation query
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/checkForViolation.rq --output scripts/checkForViolation.rq
- name: get fuseki inference config
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/fuseki/config_inference.ttl --output fuseki/config_inference.ttl

# TODO turn on after fixing
# - name: get script
# run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/validate-skos --output scripts/validate-skos
- name: get violation query
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/checkForViolation.rq --output checkForViolation.rq
- name: get violation query
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/checkForWarning.rq --output scripts/checkForWarning.rq
- name: get violation query
run: curl https://raw.githubusercontent.com/skohub-io/shapes/main/scripts/checkForBoth.rq --output scripts/checkForBoth.rq
- run: |
ls -la
ls -la scripts
ls -la fuseki
- run: docker image ls
- name: Validate with script
run: bash ${GITHUB_WORKSPACE}/scripts/validate-skos -s skohub.shacl.ttl ${{ matrix.file }}
# run: bash ${GITHUB_WORKSPACE}/scripts/validate-skos -s skohub.shacl.ttl ${{ matrix.file }}
run: bash ./scripts/validate-skos -s skohub.shacl.ttl ${{ matrix.file }}

build:
runs-on: ubuntu-latest
needs: [changedfiles, validate]
steps:
- name: Checkout 🛎️
uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
with:
persist-credentials: false
# build:
# runs-on: ubuntu-latest
# needs: [changedfiles, validate]
# steps:
# - name: Checkout 🛎️
# uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
# with:
# persist-credentials: false

- name: remove public and data-dir if already exists
run: rm -rf public data
# - name: remove public and data-dir if already exists
# run: rm -rf public data

- run: mkdir public
# - run: mkdir public

- run: chmod -R 777 public # user in container is node which won't have write access to public
# - run: chmod -R 777 public # user in container is node which won't have write access to public

- run: mkdir data
# - run: mkdir data

- run: chmod -R 777 data # user in container is node which won't have write access to public
# - run: chmod -R 777 data # user in container is node which won't have write access to public

- run: git clone https://github.com/skohub-io/skohub-docker-vocabs.git data/ # <-- add link to your repo here
# - run: git clone https://github.com/skohub-io/skohub-docker-vocabs.git data/ # <-- add link to your repo here

- name: make .env file
# - name: make .env file

run: echo "BASEURL=/skohub-docker-vocabs" > .env
# run: echo "BASEURL=/skohub-docker-vocabs" > .env

- name: build public dir with docker image
run: >
docker run
-v $(pwd)/public:/app/public
-v $(pwd)/data:/app/data
-v $(pwd)/.env:/app/.env
-e GATSBY_RESPOSITORY_URL=https://github.com/skohub-io/skohub-docker-vocabs.git
skohub/skohub-vocabs-docker:latest
# - name: build public dir with docker image
# run: >
# docker run
# -v $(pwd)/public:/app/public
# -v $(pwd)/data:/app/data
# -v $(pwd)/.env:/app/.env
# -e GATSBY_RESPOSITORY_URL=https://github.com/skohub-io/skohub-docker-vocabs.git
# skohub/skohub-vocabs-docker:latest

- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./public
# - name: Deploy
# uses: peaceiris/actions-gh-pages@v3
# with:
# github_token: ${{ secrets.GITHUB_TOKEN }}
# publish_dir: ./public
138 changes: 138 additions & 0 deletions scripts/validate-skos
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/bin/bash
set -euo pipefail

scripts=$(realpath $(dirname -- "$0"))
shape=$(realpath "$scripts/../skos.shacl.ttl")
severity=all
report=

usage() {
echo "$0 [OPTION]... FILE"
echo "Validate SKOS file (Turtle syntax). No return message means everything is fine."
echo
echo "Options:"
echo " -s FILE shape file (default: $shape)"
echo " -l LEVEL severity violation|warning|all (default: $severity)"
echo " -o FILE keep full validation report in this file"
echo " -r show raw validation report and exit"
echo " -h show this help message"
exit $1
}

die() {
echo "$*" >&2
exit 1
}

cleanup() {
echo "Cleaning up"
#rm -f -- $testfile
#rm -f -- $result
docker container stop validate-skos-fuseki > /dev/null
}

trap cleanup 0 2 3 15

while getopts s:l:o:rh flag
do
case "${flag}" in
s) shape=${OPTARG};;
l) severity=${OPTARG};;
o) result=${OPTARG};;
r) report=1;;
h) usage 0;;
*) usage 1;;
esac
done
shift $(($OPTIND - 1))

[ -z "${1:-}" ] && usage 1

file=$(realpath "$1")
[ -f "$file" ] || die "File not found: $file"
# create temporary testfile and make sure it gets deleted
testfile=$(mktemp /tmp/validate-script.XXXXXX)


shape=$(realpath "$shape")
[ -f "$shape" ] || die "File not found: $shape"
# add the skos definitions to the file if the shape is "skos.shacl.ttl"
if [ "$(basename $shape)" = "skos.shacl.ttl" ]; then
cat $file $(realpath skosClassAndPropertyDefinitions.ttl) > $testfile
else
cat $file > $testfile
fi

grep -vE '^\s*(#.*)?$' "$file" >/dev/null || die "File contains no RDF statements: $testfile"

if [[ $severity == "warning" ]]; then
SEVERITY_FILE="./scripts/checkForWarning.rq"
elif [[ $severity == "all" ]]; then
SEVERITY_FILE="./scripts/checkForBoth.rq"
elif [[ $severity == "violation" ]]; then
SEVERITY_FILE="./scripts/checkForViolation.rq"
else
die "Unknown severity: $severity"
fi

# create temporary file (will be deleted in cleanup function)
if [[ -z "${result:-}" ]]; then
result=$(mktemp /tmp/validate-script.XXXXXX)
else
result=$(realpath "$result")
fi

# Check if the container is running
if docker ps | grep -q "validate-skos-fuseki"; then
docker stop validate-skos-fuseki
sleep 1
fi

# wait till fuseki is up
max_attempts=5
delay=3
attempt=1

echo "Starting validation container"

while [ $attempt -le $max_attempts ]; do
# start fuseki
docker run -d --rm --name validate-skos-fuseki -p 0:3030 -v /fuseki/config_inference.ttl:/fuseki/config_inference.ttl skohub/jena-fuseki:latest /jena-fuseki/fuseki-server --config /fuseki/config_inference.ttl > /dev/null
port=$(docker port validate-skos-fuseki 3030/tcp | head -1 | awk -F: '{print $2}')
sleep $delay
curl "http://localhost:$port/$/ping" > /dev/null && break
attempt=$((attempt + 1))
done

if [ $attempt -gt $max_attempts ]; then
echo "The command has failed after $max_attempts attempts."
exit 1
fi

# upload file
curl --request POST \
--url "http://localhost:$port/dataset/data?graph=default" \
--header 'Content-Type: text/turtle' \
--data-binary @$testfile > /dev/null

# validate w/ shacl
curl --request POST \
--url "http://localhost:$port/dataset/shacl?graph=default" \
--header 'Content-Type: text/turtle' \
--data-binary @$shape > "$result"

echo "Checking validation result"
ls -la /

if [[ "$report" -eq 1 ]]; then
cat "$result"
else
# changed to v flag for mounting
validationResult="$(docker run --rm -v ./checkForViolation.rq:/rdf/checkForViolation.rq -v $result:/rdf/result.ttl skohub/jena:4.6.1 arq --data /rdf/result.ttl --query /rdf/checkForViolation.rq)"

lines=$(echo "$validationResult" | wc -l )

# Correct validation has 4 lines of output
[[ ${lines} -eq 4 ]] || die "$validationResult"

fi

0 comments on commit 8eacb88

Please sign in to comment.