Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Feb 22, 2022
0 parents commit ead91cf
Show file tree
Hide file tree
Showing 45 changed files with 3,232 additions and 0 deletions.
43 changes: 43 additions & 0 deletions .github/workflows/arche.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: ARCHE-Ingest

on:
workflow_dispatch

jobs:
build_pages:
name: Ingest Data into ARCHE
runs-on: ubuntu-latest
env:
ANT_OPTS: -Xmx5g
steps:
- name: Perform Checkout
uses: actions/checkout@v2
- name: Install Saxon, Ant and Fundament
run: |
apt-get update && apt-get install openjdk-11-jre-headless ant -y --no-install-recommend
./script.sh
- name: Build
run: |
ant -f build_arche.xml
- name: ingestion dependencies
run: |
composer require "acdh-oeaw/arche-ingest:^1"
- name: ingest resources
run: |
vendor/bin/arche-import-metadata html/arche.rdf https://arche-dev.acdh-dev.oeaw.ac.at/api ${{secrets.ARCHE_LOGIN}} ${{secrets.ARCHE_PASSWORD}}--retriesOnConflict 25
- name: Copy XML Files to to_ingest
run: |
mkdir $PWD/to_ingest
find -path "./data/editions/*.xml" -exec cp -prv '{}' './to_ingest' ';'
find -path "./data/indices/*.xml" -exec cp -prv '{}' './to_ingest' ';'
find -path "./data/meta/*.xml" -exec cp -prv '{}' './to_ingest' ';'
- name: install repo-file-checker
run: |
composer require "acdh-oeaw/repo-file-checker:^2"
mkdir -p filechecker/tmp filechecker/reports
- name: run repo-file-checker
run: |
php -f vendor/acdh-oeaw/repo-file-checker/index.php -- --tmpDir filechecker/tmp --reportDir filechecker/reports to_ingest 0
- name: ingest binaries
run: |
vendor/bin/arche-import-binary to_ingest https://id.acdh.oeaw.ac.at/rita-static https://arche-dev.acdh-dev.oeaw.ac.at/api ${{secrets.ARCHE_LOGIN}} ${{secrets.ARCHE_PASSWORD}} --skip not_exist
33 changes: 33 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Build and publish

on:
push:

jobs:
build_pages:
name: Publish Edition as GitHub Pages Website
runs-on: ubuntu-latest
env:
ANT_OPTS: -Xmx5g
steps:
- name: Perform Checkout
uses: actions/checkout@v2
- name: Install Saxon, Ant and Fundament
run: |
apt-get update && apt-get install openjdk-11-jre-headless ant -y --no-install-recommend
./script.sh
- name: Install Ant-Contrib
run: |
wget https://repo1.maven.org/maven2/ant-contrib/ant-contrib/1.0b3/ant-contrib-1.0b3.jar
mv ant-contrib-1.0b3.jar /usr/share/ant/lib
- name: Build
run: |
ant
- name: Build Index
run: |
./build_index.sh
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{secrets.GITHUB_TOKEN}}
publish_dir: ./html
152 changes: 152 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# project specific
html/*.html
html/search.html
/saxon
data/imprint.xml
html/*.rdf
html/*.xml
html/*.txt
/static-search
html/static-search
.env
data/editions
data/indices
data/meta
data/inventare
data/rita1

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Windows
desktop.ini

# MacOS
.DS_Store
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CC BY 4.0
https://creativecommons.org/licenses/by/4.0/legalcode
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Privater Buchbesitz im katholisch dominierten Zentralalpenraum 1750–1800. Eine systematische Untersuchung anhand von Inventaren aus dem Pustertal und dem Stubaital


* build with [DSE-Static-Cookiecutter](https://github.com/acdh-oeaw/dse-static-cookiecutter)
67 changes: 67 additions & 0 deletions build.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="tei2html">
<property name="index" value="./data/meta/about.xml"/>
<property name="target" value="./html"/>
<property name="target_xml" value="./html/xml-view"/>
<delete>
<fileset dir="${target}" includes="*.html"/>
</delete>
<delete dir="${target}/static-search"/>
<!-- <xslt style="./xslt/editions-xml.xsl" basedir="./data/editions" destdir="${target_xml}" includes="*.xml">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt> -->
<xslt style="./xslt/editions.xsl" basedir="./data/editions" destdir="${target}" includes="*.xml">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt style="./xslt/meta.xsl" basedir="./data/meta" destdir="${target}" includes="*.xml">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="${index}" out="${target}/index.html" style="./xslt/index.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="${index}" out="${target}/toc.html" style="./xslt/toc.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="${index}" out="${target}/search.html" style="./xslt/search.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/imprint.xml" out="${target}/imprint.html" style="./xslt/imprint.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/indices/listperson.xml" out="${target}/listperson.html" style="./xslt/listperson.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/indices/listplace.xml" out="${target}/listplace.html" style="./xslt/listplace.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/indices/listorg.xml" out="${target}/listorg.html" style="./xslt/listorg.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/meta/arche_constants.rdf" out="${target}/arche.rdf" style="./xslt/arche.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<xslt in="./data/indices/listperson.xml" out="${target}/beacon.txt" style="./xslt/beacon.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
<replace dir="${target}" value="">
<include name="*.html"/>
<replacetoken> xmlns=""</replacetoken>
</replace>
<copy todir="${target}" flatten="true">
<fileset dir="./data/">
<include name="**/*.xml"/>
</fileset>
</copy>
</project>
8 changes: 8 additions & 0 deletions build_arche.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="tei2html">
<xslt in="./data/meta/arche_constants.rdf" out="./html/arche.rdf" style="./xslt/arche.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
</project>
2 changes: 2 additions & 0 deletions build_index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo "build index"
ant -f ./static-search/build.xml -DssConfigFile=${PWD}/ss_config.xml
Empty file added data/.gitkeep
Empty file.
8 changes: 8 additions & 0 deletions dl_fundament.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# bin/bash

echo "download and copy fundament"
FUNDAMENT_VERSION=1.2.1
FUNDAMENT_GIT=https://github.com/acdh-oeaw/fundament/archive/refs/tags/
wget ${FUNDAMENT_GIT}v${FUNDAMENT_VERSION}.zip && unzip v${FUNDAMENT_VERSION}.zip && rm -rf v${FUNDAMENT_VERSION}.zip
cp -r fundament-${FUNDAMENT_VERSION}/dist html/
rm -rf fundament-${FUNDAMENT_VERSION}
9 changes: 9 additions & 0 deletions dl_imprint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# bin/bash

REDMINE_ID=6930
IMPRINT_XML=./data/imprint.xml
rm ${IMPRINT_XML}
echo '<?xml version="1.0" encoding="UTF-8"?>'
echo "<root>" >> ${IMPRINT_XML}
curl https://shared.acdh.oeaw.ac.at/acdh-common-assets/api/imprint.php?serviceID=${REDMINE_ID} >> ${IMPRINT_XML}
echo "</root>" >> ${IMPRINT_XML}
2 changes: 2 additions & 0 deletions dl_saxon.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo "downloading saxon"
wget https://sourceforge.net/projects/saxon/files/Saxon-HE/9.9/SaxonHE9-9-1-7J.zip/download && unzip download -d saxon && rm -rf download
11 changes: 11 additions & 0 deletions dl_staticsearch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# bin/bash

echo "download static search"
rm -rf ./static-search
rm -rf ./tmp
wget https://github.com/projectEndings/staticSearch/archive/refs/tags/v1.4.1.zip && unzip v1.4.1.zip -d tmp && rm v1.4.1.zip
mv ./tmp/staticSearch-1.4.1 ./static-search && rm -rf ./tmp

echo "get stopword list"
wget https://raw.githubusercontent.com/stopwords-iso/stopwords-de/master/stopwords-de.txt -O stopwords.txt
touch words.txt
24 changes: 24 additions & 0 deletions fetch_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
rm -rf rita2-data-master && rm master.zip
rm -rf data/editions && rm -rf data/indices && rm -rf data/meta
rm -rf data/rita1 && rm -rf data/inventare
wget https://github.com/reading-in-the-alps/rita2-data/archive/refs/heads/master.zip
unzip master
mv rita2-data-master/data/editions ./data/editions
mv rita2-data-master/data/indices ./data/indices
mv rita2-data-master/data/meta ./data/meta
mv rita2-data-master/data/inventare ./data/inventare
mv rita2-data-master/data/rita1 ./data/rita1


./dl_imprint.sh
rm -rf rita2-data-master && rm master.zip

# echo "create calendar data"
# python make_calendar_data.py
add-attributes -g "./data/editions/*.xml" -b "https://id.acdh.oeaw.ac.at/rita/editions/"
add-attributes -g "./data/meta/*.xml" -b "https://id.acdh.oeaw.ac.at/rita/meta/"
add-attributes -g "./data/rita1/*.xml" -b "https://id.acdh.oeaw.ac.at/rita/rita1/"
add-attributes -g "./data/inventare/*.xml" -b "https://id.acdh.oeaw.ac.at/rita/inventare/"
add-attributes -g "./data/indices/*.xml" -b "https://id.acdh.oeaw.ac.at/rita/indices/"

denormalize-indices -x './/tei:title[@type="short"]/text()' -i './data/indices/list*.xml' -f './data/editions/*.xml'
Loading

0 comments on commit ead91cf

Please sign in to comment.