Skip to content

Commit

Permalink
added typesense fulltext search
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Feb 16, 2023
1 parent a4c3097 commit 108231f
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 53 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ jobs:
name: Publish Edition as GitHub Pages Website
runs-on: ubuntu-latest
env:
ANT_OPTS: -Xmx5g
ANT_OPTS: -Xmx6g
TYPESENSE_HOST: typesense.acdh-dev.oeaw.ac.at
TYPESENSE_PORT: 443
TYPESENSE_PROTOCOL: https
TYPESENSE_API_KEY: ${{secrets.TYPESENSE_API_KEY}}
steps:
- name: Perform Checkout
uses: actions/checkout@v3
Expand All @@ -19,6 +23,8 @@ jobs:
pip install -r requirements.txt
- name: fetch data
run: ./fetch_data.sh
- name: fulltext index
run: python make_typesense_index.py
- name: Build
run: ant
- name: Deploy
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,5 @@ dmypy.json
desktop.ini

# MacOS
.DS_Store
.DS_Store
secret.sh
25 changes: 0 additions & 25 deletions html/js/osd.js

This file was deleted.

19 changes: 0 additions & 19 deletions html/js/osd_single.js

This file was deleted.

174 changes: 174 additions & 0 deletions html/js/ts_index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
const typesenseInstantsearchAdapter = new TypesenseInstantSearchAdapter({
server: {
apiKey: "yDReCmobcw6d0HSQD4IcvHA3c85HVI3b",
nodes: [
{
host: "typesense.acdh-dev.oeaw.ac.at",
port: "443",
protocol: "https",
},
],
cacheSearchResultsForSeconds: 2 * 60,
},
additionalSearchParameters: {
query_by: "full_text"
},
});


const searchClient = typesenseInstantsearchAdapter.searchClient;
const search = instantsearch({
indexName: 'rita',
searchClient,
});

search.addWidgets([
instantsearch.widgets.searchBox({
container: '#searchbox',
autofocus: true,
cssClasses: {
form: 'form-inline',
input: 'form-control col-md-11',
submit: 'btn',
reset: 'btn'
},
}),

instantsearch.widgets.hits({
container: '#hits',
templates: {
empty: 'Keine Ergebnisse',
item: `
<h4><a href="{{ id }}.html">{{ title }}</a></h4>
<p>{{#helpers.snippet}}{ "attribute": "full_text" }{{/helpers.snippet}}</p>
<h5><span class="badge badge-primary">{{ project }}</span></h5>
<div>
{{#persons}}
<span class="badge badge-secondary">{{ . }}</span>
{{/persons}}
</div>
{{#books}}
<span class="badge badge-success">{{ . }}</span>
{{/books}}
<div>
{{#places}}
<span class="badge badge-info">{{ . }}</span>
{{/places}}
</div>
</div>
`
}
}),

instantsearch.widgets.stats({
container: '#stats-container',
templates: {
text: `
{{#areHitsSorted}}
{{#hasNoSortedResults}}Keine Treffer{{/hasNoSortedResults}}
{{#hasOneSortedResults}}1 Treffer{{/hasOneSortedResults}}
{{#hasManySortedResults}}{{#helpers.formatNumber}}{{nbSortedHits}}{{/helpers.formatNumber}} Treffer {{/hasManySortedResults}}
aus {{#helpers.formatNumber}}{{nbHits}}{{/helpers.formatNumber}}
{{/areHitsSorted}}
{{^areHitsSorted}}
{{#hasNoResults}}Keine Treffer{{/hasNoResults}}
{{#hasOneResult}}1 Treffer{{/hasOneResult}}
{{#hasManyResults}}{{#helpers.formatNumber}}{{nbHits}}{{/helpers.formatNumber}} Treffer{{/hasManyResults}}
{{/areHitsSorted}}
gefunden in {{processingTimeMS}}ms
`,
}
}),

instantsearch.widgets.refinementList({
container: '#refinement-list-places',
attribute: 'places',
searchable: true,
searchablePlaceholder: 'Suche',
cssClasses: {
searchableInput: 'form-control form-control-sm mb-2 border-light-2',
searchableSubmit: 'd-none',
searchableReset: 'd-none',
showMore: 'btn btn-secondary btn-sm align-content-center',
list: 'list-unstyled',
count: 'badge ml-2 badge-info',
label: 'd-flex align-items-center text-capitalize',
checkbox: 'mr-2',
}
}),

instantsearch.widgets.refinementList({
container: '#refinement-list-persons',
attribute: 'persons',
searchable: true,
searchablePlaceholder: 'Suche',
cssClasses: {
searchableInput: 'form-control form-control-sm mb-2 border-light-2',
searchableSubmit: 'd-none',
searchableReset: 'd-none',
showMore: 'btn btn-secondary btn-sm align-content-center',
list: 'list-unstyled',
count: 'badge ml-2 badge-secondary',
label: 'd-flex align-items-center text-capitalize',
checkbox: 'mr-2',
}
}),

instantsearch.widgets.refinementList({
container: '#refinement-list-books',
attribute: 'books',
searchable: true,
searchablePlaceholder: 'Suche',
cssClasses: {
searchableInput: 'form-control form-control-sm mb-2 border-light-2',
searchableSubmit: 'd-none',
searchableReset: 'd-none',
showMore: 'btn btn-secondary btn-sm align-content-center',
list: 'list-unstyled',
count: 'badge ml-2 badge-success',
label: 'd-flex align-items-center text-capitalize',
checkbox: 'mr-2',
}
}),

instantsearch.widgets.pagination({
container: '#pagination',
padding: 2,
cssClasses: {
list: 'pagination',
item: 'page-item',
link: 'page-link'
}
}),
instantsearch.widgets.clearRefinements({
container: '#clear-refinements',
templates: {
resetLabel: 'Filter zurücksetzen',
},
cssClasses: {
button: 'btn'
}
}),



instantsearch.widgets.currentRefinements({
container: '#current-refinements',
cssClasses: {
delete: 'btn',
label: 'badge'
}
})
]);



search.addWidgets([
instantsearch.widgets.configure({
attributesToSnippet: ['full_text'],
})
]);



search.start();
103 changes: 103 additions & 0 deletions make_typesense_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import glob
import os

from typesense.api_call import ObjectNotFound
from acdh_cfts_pyutils import TYPESENSE_CLIENT as client
from acdh_cfts_pyutils import CFTS_COLLECTION
from acdh_tei_pyutils.tei import TeiReader
from tqdm import tqdm


files = glob.glob('./data/editions/*.xml')
for x in glob.glob('./data/rita1/*.xml'):
files.append(x)
print(len(files))


try:
client.collections['rita'].delete()
except ObjectNotFound:
pass

current_schema = {
'name': 'rita',
'fields': [
{
'name': 'id',
'type': 'string'
},
{
'name': 'rec_id',
'type': 'string'
},
{
'name': 'title',
'type': 'string'
},
{
'name': 'full_text',
'type': 'string'
},
{
'name': 'persons',
'type': 'string[]',
'facet': True,
'optional': True
},
{
'name': 'places',
'type': 'string[]',
'facet': True,
'optional': True
},
{
'name': 'books',
'type': 'string[]',
'facet': True,
'optional': True
},
]
}

client.collections.create(current_schema)

records = []
cfts_records = []
for x in tqdm(files, total=len(files)):
cfts_record = {
'project': 'rita',
}
record = {}

doc = TeiReader(x)
body = doc.any_xpath('.//tei:body')[0]
record['id'] = os.path.split(x)[-1].replace('.xml', '')
cfts_record['id'] = record['id']
cfts_record['resolver'] = f"https://rita.acdh.oeaw.ac.at/{record['id']}.html"
record['rec_id'] = os.path.split(x)[-1]
cfts_record['rec_id'] = record['rec_id']
record['title'] = " ".join(" ".join(doc.any_xpath('.//tei:titleStmt/tei:title//text()')).split())
cfts_record['title'] = record['title']
record['persons'] = [
" ".join(" ".join(x.xpath('.//text()')).split()) for x in doc.any_xpath('.//tei:person//tei:persName[1]')
]
record['places'] = [
" ".join(" ".join(x.xpath('.//text()')).split()) for x in doc.any_xpath('.//tei:place//tei:placeName[1]')
]

record['books'] = [
" ".join(" ".join(x.xpath('.//text()')).split()) for x in doc.any_xpath('.//tei:back//tei:listBibl//tei:bibl[@xml:id]/tei:title[1]')
]
cfts_record['books'] = record['books']
record['full_text'] = " ".join(''.join(body.itertext()).split())
cfts_record['full_text'] = record['full_text']
records.append(record)
cfts_records.append(cfts_record)

make_index = client.collections['rita'].documents.import_(records)
print(make_index)
print('done with indexing rita')

make_index = CFTS_COLLECTION.documents.import_(cfts_records, {'action': 'upsert'})
print(make_index)
print('done with cfts-index rita')
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
acdh-tei-pyutils
acdh-tei-pyutils
acdh-cfts-pyutils==0.2
18 changes: 18 additions & 0 deletions xslt/partials/html_head.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs4/jq-3.3.1/jszip-2.5.0/dt-1.11.0/b-2.0.0/b-html5-2.0.0/cr-1.5.4/r-2.2.9/sp-1.4.0/datatables.min.css"></link>
<script src="https://code.jquery.com/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/openseadragon/2.4.2/openseadragon.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/typesense-instantsearch-adapter@2/dist/typesense-instantsearch-adapter.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/algoliasearch-lite.umd.js"
integrity="sha256-EXPXz4W6pQgfYY3yTpnDa3OH8/EPn16ciVsPQ/ypsjk=" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/instantsearch.production.min.js"
integrity="sha256-LAGhRRdtVoD6RLo2qDQsU2mp+XVSciKRC8XPOBWmofM=" crossorigin="anonymous"></script>
<script type="text/javascript">
var _paq = _paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://matomo.acdh.oeaw.ac.at/";
_paq.push(['setTrackerUrl', u+'piwik.php']);
_paq.push(['setSiteId', '46']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
</xsl:template>
</xsl:stylesheet>
Loading

0 comments on commit 108231f

Please sign in to comment.