From aff5184dc6906c6e111eb835d5e92af61bb3fb52 Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Thu, 4 May 2023 09:59:14 +0200 Subject: [PATCH 1/3] Fix a problem with data-dictization of lists to fix search-index rebuild --- CHANGELOG.rst | 2 ++ ckanext/harvest/logic/dictization.py | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8d32ef8d7..99f6cbdf6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,8 @@ and this project adheres to `Semantic Versioning `_ Unreleased_ *********** +- Fix a problem with data-dictization when using sqlalchemy 1.4+ + *********** 1.4.2_ - 2023-01-12 *********** diff --git a/ckanext/harvest/logic/dictization.py b/ckanext/harvest/logic/dictization.py index d6dbedd86..5e1a39b0d 100644 --- a/ckanext/harvest/logic/dictization.py +++ b/ckanext/harvest/logic/dictization.py @@ -68,7 +68,7 @@ def harvest_job_dictize(job, context): .group_by(HarvestObjectError.message) \ .order_by(text('error_count desc')) \ .limit(context.get('error_summmary_limit', 20)) - out['object_error_summary'] = q.all() + out['object_error_summary'] = _dictize_list(q.all()) q = model.Session.query( HarvestGatherError.message, func.count(HarvestGatherError.message).label('error_count')) \ @@ -76,7 +76,7 @@ def harvest_job_dictize(job, context): .group_by(HarvestGatherError.message) \ .order_by(text('error_count desc')) \ .limit(context.get('error_summmary_limit', 20)) - out['gather_error_summary'] = q.all() + out['gather_error_summary'] = _dictize_list(q.all()) return out @@ -144,3 +144,13 @@ def _get_source_status(source, context): out['last_harvest_request'] = 'Not yet harvested' return out + + +def _dictize_list(db_result_list): + ''' + Helper method to dictize all elements of a database result list. + ''' + dictized_list = [] + for elem in db_result_list: + dictized_list.append(elem._asdict()) + return dictized_list From 9926cac529bcedf56369fba9e6afd75045bbc04f Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Tue, 9 May 2023 13:10:39 +0200 Subject: [PATCH 2/3] Refactor changes in dictization.py --- ckanext/harvest/logic/dictization.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ckanext/harvest/logic/dictization.py b/ckanext/harvest/logic/dictization.py index 5e1a39b0d..00e8ec984 100644 --- a/ckanext/harvest/logic/dictization.py +++ b/ckanext/harvest/logic/dictization.py @@ -68,7 +68,7 @@ def harvest_job_dictize(job, context): .group_by(HarvestObjectError.message) \ .order_by(text('error_count desc')) \ .limit(context.get('error_summmary_limit', 20)) - out['object_error_summary'] = _dictize_list(q.all()) + out['object_error_summary'] = harvest_error_dictize(q.all(), context) q = model.Session.query( HarvestGatherError.message, func.count(HarvestGatherError.message).label('error_count')) \ @@ -76,7 +76,8 @@ def harvest_job_dictize(job, context): .group_by(HarvestGatherError.message) \ .order_by(text('error_count desc')) \ .limit(context.get('error_summmary_limit', 20)) - out['gather_error_summary'] = _dictize_list(q.all()) + out['gather_error_summary'] = harvest_error_dictize(q.all(), context) + return out @@ -106,6 +107,13 @@ def harvest_log_dictize(obj, context): return out +def harvest_error_dictize(obj, context): + out = [] + for elem in obj: + out.append(elem._asdict()) + return out + + def _get_source_status(source, context): ''' TODO: Deprecated, use harvest_source_show_status instead @@ -144,13 +152,3 @@ def _get_source_status(source, context): out['last_harvest_request'] = 'Not yet harvested' return out - - -def _dictize_list(db_result_list): - ''' - Helper method to dictize all elements of a database result list. - ''' - dictized_list = [] - for elem in db_result_list: - dictized_list.append(elem._asdict()) - return dictized_list From bae212784ba309409bb079605e622eb7028740f9 Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Tue, 9 May 2023 16:02:06 +0200 Subject: [PATCH 3/3] Add new testcase for harvest_source_show_status --- ckanext/harvest/tests/test_action.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ckanext/harvest/tests/test_action.py b/ckanext/harvest/tests/test_action.py index d2927d599..1c937f492 100644 --- a/ckanext/harvest/tests/test_action.py +++ b/ckanext/harvest/tests/test_action.py @@ -755,3 +755,33 @@ def test_harvest_job_create_as_admin(self): assert job['status'] == 'Running' assert job['gather_started'] is None assert 'stats' in job.keys() + + def test_harvest_source_show_status(self): + + source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) + job = factories.HarvestJobObj(source=source) + dataset = ckan_factories.Dataset() + obj = factories.HarvestObjectObj( + job=job, source=source, package_id=dataset['id']) + + harvest_gather_error = harvest_model.HarvestGatherError(message="Unexpected gather error", job=job) + harvest_gather_error.save() + harvest_object_error = harvest_model.HarvestObjectError(message="Unexpected object error", object=obj) + harvest_object_error.save() + + context = {'model': model} + data_dict = {'id': source.id} + + source_status = get_action('harvest_source_show_status')(context, data_dict) + + # verifiy that the response is dictized properly + json.dumps(source_status) + + last_job = source_status['last_job'] + assert last_job['source_id'] == source.id + assert last_job['status'] == 'New' + assert last_job['stats']['errored'] == 2 + assert len(last_job['object_error_summary']) == 1 + assert last_job['object_error_summary'][0]['message'] == harvest_object_error.message + assert len(last_job['gather_error_summary']) == 1 + assert last_job['gather_error_summary'][0]['message'] == harvest_gather_error.message