From 2267945532ad3dbbf7a223171dedcea8745a5493 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 22 Jan 2024 15:48:35 +0100 Subject: [PATCH] UI for similar datasets --- README.md | 10 +++------- ckanext/embeddings/actions.py | 2 +- ckanext/embeddings/assets/style.css | 8 ++++++++ ckanext/embeddings/helpers.py | 12 ++++++++++++ ckanext/embeddings/plugin.py | 12 ++++++++++-- ckanext/embeddings/templates/package/read.html | 17 +++++++++++++++++ 6 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 ckanext/embeddings/helpers.py create mode 100644 ckanext/embeddings/templates/package/read.html diff --git a/README.md b/README.md index e00c370..3751d79 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ datasets to a "Bathing Water Quality" one, besides other datasets explicitly men metadata you'll get others that might include things like "Wastewater Treatment", "Aquaculture Sites" or "Lakes". The plugin adds a `package_similar_show` action that will return the closest datasets to the one provided with -the `id` parameter (id or name). 5 are returned by default, which can be changed using the `limit` paramater. +the `id` parameter (id or name). 5 are returned by default, which can be changed using the `limit` parameter. #### 2. Semantic search @@ -84,10 +84,6 @@ ckanapi action package_search q=boats extras='{"ext_vector_search":"true"}' | jq Remember that the Semantic Search will always return a fixed number of datasets (the default in this case is 10). - - - - ## Requirements Tested on CKAN 2.10/master @@ -132,7 +128,7 @@ TODO ## Customizing You can choose the backend used to generate the embeddings by settings the `ckanext.embeddings.backend` config option. -Right now the plugins includes two backends, one that runs locally using [Sentence Transformers](https://www.sbert.net/)'s `all-MiniLM-L6-v2` model (`sentence_transformers`, the default one) and one that uses OpenAI's Embeddings API (`openai`). You will need +Right now the plugin includes two backends, one that runs locally using [Sentence Transformers](https://www.sbert.net/)'s `all-MiniLM-L6-v2` model (`sentence_transformers`, the default one) and one that uses OpenAI's Embeddings API (`openai`). You will need to provide an API key for this one, either via the `ckanext.embeddings.openai.api_key` config option or a `OPENAI_API_KEY` env var. Additionally, it's really easy to provide your own backends. You can write your own class that inherits from @@ -206,7 +202,7 @@ USER solr ``` -and then in the ini file: +And then in the ini file you can choose the Solr field used to when indexing/querying: ```ini ckanext.embeddings.solr_vector_field_name = vector diff --git a/ckanext/embeddings/actions.py b/ckanext/embeddings/actions.py index 61e3f44..83f6dec 100644 --- a/ckanext/embeddings/actions.py +++ b/ckanext/embeddings/actions.py @@ -6,7 +6,7 @@ @toolkit.side_effect_free def package_similar_show(context, data_dict): dataset_id = toolkit.get_or_bust(data_dict, "id") - limit = data_dict.get("limit", 5) + limit = data_dict.get("limit") or 5 try: limit = int(limit) except ValueError: diff --git a/ckanext/embeddings/assets/style.css b/ckanext/embeddings/assets/style.css index 83a84aa..0c5f1c9 100644 --- a/ckanext/embeddings/assets/style.css +++ b/ckanext/embeddings/assets/style.css @@ -5,3 +5,11 @@ .semantic-search-label:after { content: ''; } + +.similar-datasets { + margin-top: 30px; +} + +.similar-datasets > h3 { + margin-bottom: 20px; +} diff --git a/ckanext/embeddings/helpers.py b/ckanext/embeddings/helpers.py new file mode 100644 index 0000000..3c73f1f --- /dev/null +++ b/ckanext/embeddings/helpers.py @@ -0,0 +1,12 @@ +from ckan.plugins import toolkit + + +def get_similar_datasets(dataset_id, limit=None): + try: + datasets = toolkit.get_action("package_similar_show")( + {}, {"id": dataset_id, "limit": limit} + ) + except toolkit.ObjectNotFound: + datasets = [] + + return datasets diff --git a/ckanext/embeddings/plugin.py b/ckanext/embeddings/plugin.py index 1002c1b..e1ab302 100644 --- a/ckanext/embeddings/plugin.py +++ b/ckanext/embeddings/plugin.py @@ -6,7 +6,7 @@ import ckan.plugins.toolkit as toolkit from ckanext.embeddings.model import DatasetEmbedding -from ckanext.embeddings import cli +from ckanext.embeddings import cli, helpers from ckanext.embeddings.actions import package_similar_show from ckanext.embeddings.auth import package_similar_show as package_similar_show_auth from ckanext.embeddings.backends import get_embeddings_backend @@ -18,6 +18,7 @@ class EmbeddingPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IClick) plugins.implements(plugins.IActions) plugins.implements(plugins.IAuthFunctions) + plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IPackageController, inherit=True) backend = None @@ -46,6 +47,11 @@ def get_actions(self): def get_auth_functions(self): return {"package_similar_show": package_similar_show_auth} + # ITemplateHelpers + + def get_helpers(self): + return {"embeddings_get_similar_datasets": helpers.get_similar_datasets} + # IDatasetForm def before_dataset_index(self, dataset_dict): @@ -70,7 +76,9 @@ def before_dataset_search(self, search_params): try: extras = json.loads(extras) except ValueError: - raise toolkit.ValidationError({"extras": f"Wrong JSON object: {extras}"}) + raise toolkit.ValidationError( + {"extras": f"Wrong JSON object: {extras}"} + ) if not toolkit.asbool(extras.get("ext_vector_search")): return search_params diff --git a/ckanext/embeddings/templates/package/read.html b/ckanext/embeddings/templates/package/read.html new file mode 100644 index 0000000..8112847 --- /dev/null +++ b/ckanext/embeddings/templates/package/read.html @@ -0,0 +1,17 @@ +{% ckan_extends %} + +{% block primary_content_inner %} + {{ super() }} + + {% asset "ckanext-embeddings/css" %} + +
+

{{ _('Similar Datasets') }}

+ + {% set similar_datasets = h.embeddings_get_similar_datasets(pkg.id) %} + + {{ h.snippet('snippets/package_list.html', packages=similar_datasets) }} +
+ +{% endblock %} +