From 7ac080b9b6d926b04ed61c4c77e72f908925427b Mon Sep 17 00:00:00 2001 From: Felix Schmitz <33942086+felixschmitz@users.noreply.github.com> Date: Sat, 8 Oct 2022 21:32:53 +0200 Subject: [PATCH] Feature/#40 automatic job query (#70) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added clean_cache function function that removes all files from cache directory or only one, specified by name. Likely to be moved to cache.py, once this is on dev * Added ToDos, Unified query param comparison * Updated destatis status check #45 Incorporated further response codes and response type checks. Output for Code 0 tbd * Merged updates from personal branch * Added http 5xx error check, added destatis status tests * Updated urls, unified Destatis type-style * Added generic response creation for generic http_helper test * Updated type hints, made contents rely on functionality from http_helpers * merged current dev into feature branch. * Merged changes from draft branch, including pylint fixes, Exception and Error clarification. Also moved clean_cache function. Some ToDos still left, #45. * fixed linting issues, including inconsistent returns. * hotfix of overlooked error #45. * narrowed the catched exception * Added notes w\ Felix regarding automatic job support, #40. * Notebook for interaction & workflow with jobs * integrating jobs into http_helper.py * Simplified if selections, added TODO comments, added catches for fail cases, #40 * splitting up jobs function and writing first tests for user input * work in progress * changes in jobs params for faster tests * Changes in job due to http_helper #43 * refactor: Updated & added documentation, identified Windows input issue, #40. * Commit before merge of dev * commit before PR * last working commit, #40. * seem to have fixed issue with destatis response, however, currently am timed out and can not finally check, #40. * Fixed new implementation and updated jobs notebook. Code needs clean-up & tests need to be updated (for input), #40. * changes with respect to first PR * Removed user input, always starting job, test update, currently on timeout * pylint changes * commit before merge * Code quality http_helper.py and tests * pylint uand black changes * removing _generic_status_dict() * Changes due to bandit * Changes due to mypy * refactor load_data to handle jobs * fix tests * Removed one old comment, #40 Co-authored-by: MarcoHuebner <57489799+MarcoHuebner@users.noreply.github.com> Co-authored-by: Michael Aydinbas --- nb/jobs.ipynb | 541 +++++++++++++++++++++++++++++++++++ src/pygenesis/http_helper.py | 136 ++++++++- tests/test_http_helper.py | 42 ++- 3 files changed, 700 insertions(+), 19 deletions(-) create mode 100644 nb/jobs.ipynb diff --git a/nb/jobs.ipynb b/nb/jobs.ipynb new file mode 100644 index 0000000..9ea4e40 --- /dev/null +++ b/nb/jobs.ipynb @@ -0,0 +1,541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Jobs\n", + "\n", + "Die Funktion `load_data` unterstützt das Abrufen von Jobs für zu große Tabellen. Destatis verweigert bei zu großen Tabellen die initiale Abfrage und gibt einen Status Code 98 zurück. Mit dem speziellen Query-Parameter `job=true` kann man jedoch die Verarbeitung im Hintergrund anstoßen. Die Tabelle wird dann für den User nach einiger Zeit bereitgestellt. Diese Tabelle kann dann über `data/resultfile` statt `data/tablefile` abgerufen werden. All diese Details verbergen wir jedoch vor dem Benutzer. Die Klasse `Table` verbirgt diese Komplexität und stellt die Daten, die von einem Job kommen, genauso zur Verfügung wie Daten, die über den normalen Destatis Endpunkt kommen. Der Benutzer muss lediglich eine etwas längere Wartezeit in Kauf nehmen (aktuell ein Timeout von 1 Minute)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import logging\n", + "\n", + "from pygenesis.table import Table" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Die Tabelle ist zu groß, um direkt abgerufen zu werden. Es wird eine Verarbeitung im Hintergrund gestartet.\n" + ] + } + ], + "source": [ + "# Wir wollen die Tabelle mit der Nummer 42153-0001 laden\n", + "# Sofern die Daten nicht gecacht wurden, wird uns eine Warnmeldung darüber informieren, dass hier ein Job angestoßen wird\n", + "t = Table(name=\"42153-0001\")\n", + "t.get_data(timeslices=20) # timeslices muss hier relativ hoch gewählt werden, damit ein Job angestoßen wird " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Eine erneute Abfrage findet die Daten im Cache und stellt sie dementsprechend sofort bereit\n", + "t = Table(name=\"42153-0001\")\n", + "t.get_data(timeslices=20)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Statistik_CodeStatistik_LabelZeit_CodeZeit_LabelZeit1_Merkmal_Code1_Merkmal_Label1_Auspraegung_Code1_Auspraegung_Label2_Merkmal_Code...2_Auspraegung_Label3_Merkmal_Code3_Merkmal_Label3_Auspraegung_Code3_Auspraegung_Label4_Merkmal_Code4_Merkmal_Label4_Auspraegung_Code4_Auspraegung_LabelPRO101__Produktionsindex__2015=100
042153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2003DINSGDeutschland insgesamtDGDeutschlandWERT03...OriginalwerteWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-B-05Produzierendes GewerbeMONATMonateMONAT01Januar76,9
142153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2003DINSGDeutschland insgesamtDGDeutschlandWERT03...OriginalwerteWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-B-05Produzierendes GewerbeMONATMonateMONAT02Februar77,9
242153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2003DINSGDeutschland insgesamtDGDeutschlandWERT03...OriginalwerteWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-B-05Produzierendes GewerbeMONATMonateMONAT03März84,9
342153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2003DINSGDeutschland insgesamtDGDeutschlandWERT03...OriginalwerteWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-B-05Produzierendes GewerbeMONATMonateMONAT04April82,9
442153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2003DINSGDeutschland insgesamtDGDeutschlandWERT03...OriginalwerteWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-B-05Produzierendes GewerbeMONATMonateMONAT05Mai81,4
..................................................................
6239542153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2022DINSGDeutschland insgesamtDGDeutschlandWERT03...BV4.1 TrendWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-30-01Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a....MONATMonateMONAT08August...
6239642153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2022DINSGDeutschland insgesamtDGDeutschlandWERT03...BV4.1 TrendWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-30-01Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a....MONATMonateMONAT09September...
6239742153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2022DINSGDeutschland insgesamtDGDeutschlandWERT03...BV4.1 TrendWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-30-01Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a....MONATMonateMONAT10Oktober...
6239842153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2022DINSGDeutschland insgesamtDGDeutschlandWERT03...BV4.1 TrendWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-30-01Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a....MONATMonateMONAT11November...
6239942153Indizes der Produktion im Verarbeitenden GewerbeJAHRJahr2022DINSGDeutschland insgesamtDGDeutschlandWERT03...BV4.1 TrendWZ08V1WZ2008 (Hauptgruppen, Aggregate): Verarb. GewerbeWZ08-30-01Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a....MONATMonateMONAT12Dezember...
\n", + "

62400 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " Statistik_Code Statistik_Label \\\n", + "0 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "1 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "2 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "3 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "4 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "... ... ... \n", + "62395 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "62396 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "62397 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "62398 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "62399 42153 Indizes der Produktion im Verarbeitenden Gewerbe \n", + "\n", + " Zeit_Code Zeit_Label Zeit 1_Merkmal_Code 1_Merkmal_Label \\\n", + "0 JAHR Jahr 2003 DINSG Deutschland insgesamt \n", + "1 JAHR Jahr 2003 DINSG Deutschland insgesamt \n", + "2 JAHR Jahr 2003 DINSG Deutschland insgesamt \n", + "3 JAHR Jahr 2003 DINSG Deutschland insgesamt \n", + "4 JAHR Jahr 2003 DINSG Deutschland insgesamt \n", + "... ... ... ... ... ... \n", + "62395 JAHR Jahr 2022 DINSG Deutschland insgesamt \n", + "62396 JAHR Jahr 2022 DINSG Deutschland insgesamt \n", + "62397 JAHR Jahr 2022 DINSG Deutschland insgesamt \n", + "62398 JAHR Jahr 2022 DINSG Deutschland insgesamt \n", + "62399 JAHR Jahr 2022 DINSG Deutschland insgesamt \n", + "\n", + " 1_Auspraegung_Code 1_Auspraegung_Label 2_Merkmal_Code ... \\\n", + "0 DG Deutschland WERT03 ... \n", + "1 DG Deutschland WERT03 ... \n", + "2 DG Deutschland WERT03 ... \n", + "3 DG Deutschland WERT03 ... \n", + "4 DG Deutschland WERT03 ... \n", + "... ... ... ... ... \n", + "62395 DG Deutschland WERT03 ... \n", + "62396 DG Deutschland WERT03 ... \n", + "62397 DG Deutschland WERT03 ... \n", + "62398 DG Deutschland WERT03 ... \n", + "62399 DG Deutschland WERT03 ... \n", + "\n", + " 2_Auspraegung_Label 3_Merkmal_Code \\\n", + "0 Originalwerte WZ08V1 \n", + "1 Originalwerte WZ08V1 \n", + "2 Originalwerte WZ08V1 \n", + "3 Originalwerte WZ08V1 \n", + "4 Originalwerte WZ08V1 \n", + "... ... ... \n", + "62395 BV4.1 Trend WZ08V1 \n", + "62396 BV4.1 Trend WZ08V1 \n", + "62397 BV4.1 Trend WZ08V1 \n", + "62398 BV4.1 Trend WZ08V1 \n", + "62399 BV4.1 Trend WZ08V1 \n", + "\n", + " 3_Merkmal_Label 3_Auspraegung_Code \\\n", + "0 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-B-05 \n", + "1 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-B-05 \n", + "2 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-B-05 \n", + "3 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-B-05 \n", + "4 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-B-05 \n", + "... ... ... \n", + "62395 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-30-01 \n", + "62396 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-30-01 \n", + "62397 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-30-01 \n", + "62398 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-30-01 \n", + "62399 WZ2008 (Hauptgruppen, Aggregate): Verarb. Gewerbe WZ08-30-01 \n", + "\n", + " 3_Auspraegung_Label 4_Merkmal_Code \\\n", + "0 Produzierendes Gewerbe MONAT \n", + "1 Produzierendes Gewerbe MONAT \n", + "2 Produzierendes Gewerbe MONAT \n", + "3 Produzierendes Gewerbe MONAT \n", + "4 Produzierendes Gewerbe MONAT \n", + "... ... ... \n", + "62395 Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a.... MONAT \n", + "62396 Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a.... MONAT \n", + "62397 Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a.... MONAT \n", + "62398 Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a.... MONAT \n", + "62399 Sonstiger Fahrzeugbau ohne H. v. Fahrzeugen a.... MONAT \n", + "\n", + " 4_Merkmal_Label 4_Auspraegung_Code 4_Auspraegung_Label \\\n", + "0 Monate MONAT01 Januar \n", + "1 Monate MONAT02 Februar \n", + "2 Monate MONAT03 März \n", + "3 Monate MONAT04 April \n", + "4 Monate MONAT05 Mai \n", + "... ... ... ... \n", + "62395 Monate MONAT08 August \n", + "62396 Monate MONAT09 September \n", + "62397 Monate MONAT10 Oktober \n", + "62398 Monate MONAT11 November \n", + "62399 Monate MONAT12 Dezember \n", + "\n", + " PRO101__Produktionsindex__2015=100 \n", + "0 76,9 \n", + "1 77,9 \n", + "2 84,9 \n", + "3 82,9 \n", + "4 81,4 \n", + "... ... \n", + "62395 ... \n", + "62396 ... \n", + "62397 ... \n", + "62398 ... \n", + "62399 ... \n", + "\n", + "[62400 rows x 22 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "c50015765afe066708d859da3faaa0505e12b679b95f6727e524b172064c6917" + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/src/pygenesis/http_helper.py b/src/pygenesis/http_helper.py index 5965e51..c71fcb9 100644 --- a/src/pygenesis/http_helper.py +++ b/src/pygenesis/http_helper.py @@ -1,6 +1,8 @@ """Wrapper module for the data endpoint.""" import json import logging +import re +import time from pathlib import Path from typing import Union @@ -17,6 +19,9 @@ logger = logging.getLogger(__name__) +JOB_ID_PATTERN = re.compile(r"\d+-\d+_\d+") +JOB_TIMEOUT = 60 + def load_data( endpoint: str, method: str, params: dict, as_json: bool = False @@ -45,10 +50,27 @@ def load_data( if hit_in_cash(cache_dir, name, params): data = read_from_cache(cache_dir, name, params) else: - data = get_data_from_endpoint(endpoint, method, params) + response = get_data_from_endpoint(endpoint, method, params) + data = response.text + + # status code 98 means that the table is too big + # we have to start a job and wait for it to be ready + response_status_code = 200 + try: + # test for job-relevant status code + response_status_code = response.json().get("Status").get("Code") + except json.decoder.JSONDecodeError: + pass + + if response_status_code == 98: + job_response = start_job(endpoint, method, params) + job_id = get_job_id_from_response(job_response) + data = get_data_from_resultfile(job_id) + cache_data(cache_dir, name, params, data) else: - data = get_data_from_endpoint(endpoint, method, params) + response = get_data_from_endpoint(endpoint, method, params) + data = response.text if as_json: parsed_data: dict = json.loads(data) @@ -57,9 +79,11 @@ def load_data( return data -def get_data_from_endpoint(endpoint: str, method: str, params: dict) -> str: +def get_data_from_endpoint( + endpoint: str, method: str, params: dict +) -> requests.Response: """ - Wrapper method which constructs a url for querying data from Destatis and + Wrapper method which constructs an url for querying data from Destatis and sends a GET request. Args: @@ -68,7 +92,7 @@ def get_data_from_endpoint(endpoint: str, method: str, params: dict) -> str: params (dict): dictionary of query parameters Returns: - str: the raw text response from Destatis. + requests.Response: the response object holding the response from calling the Destatis endpoint. """ config = load_config() url = f"{config['GENESIS API']['base_url']}{endpoint}/{method}" @@ -83,11 +107,100 @@ def get_data_from_endpoint(endpoint: str, method: str, params: dict) -> str: ) response = requests.get(url, params=params_, timeout=(5, 15)) - response.encoding = "UTF-8" + response.encoding = "UTF-8" _check_invalid_status_code(response.status_code) _check_invalid_destatis_status_code(response) + return response + + +def start_job(endpoint: str, method: str, params: dict) -> requests.Response: + """Small helper function to start a job in the background. + + Args: + endpoint (str): Destatis endpoint (eg. data, catalogue, ..) + method (str): Destatis method (eg. cube, tablefile, ...) + params (dict): dictionary of query parameters + + Returns: + requests.Response: the response object holding the response from calling the Destatis endpoint. + """ + logger.warning( + "Die Tabelle ist zu groß, um direkt abgerufen zu werden. Es wird eine Verarbeitung im Hintergrund gestartet." + ) + params["job"] = "true" + + # starting a job + response = get_data_from_endpoint( + endpoint=endpoint, method=method, params=params + ) + + return response + + +def get_job_id_from_response(response: requests.Response) -> str: + """Get the job ID of a successful started job. + + Args: + response (requests.Response): Response from endpoint request with job set equal to true. + + Returns: + str: the job id. + """ + # check out job_id & inform user + content = "" + try: + content = response.json().get("Status").get("Content") + except json.JSONDecodeError: + pass + + match_result = JOB_ID_PATTERN.search(content) + job_id = match_result.group() if match_result is not None else "" + + return job_id + + +def get_data_from_resultfile(job_id: str) -> str: + """Get data from a job once it is finished or when the timeout is reached. + + Args: + job_id (str): Job ID generated by Destatis API. + + Returns: + str: The raw data of the table file as returned by Destatis. + """ + params = { + "selection": "*" + job_id, + "searchcriterion": "code", + "sortcriterion": "code", + "type": "all", + } + + time_ = time.perf_counter() + + while (time.perf_counter() - time_) < JOB_TIMEOUT: + response = get_data_from_endpoint( + endpoint="catalogue", method="jobs", params=params + ) + + jobs = response.json().get("List") + if len(jobs) > 0 and jobs[0].get("State") == "Fertig": + break + + time.sleep(5) + else: + return "" + + params = { + "name": job_id, + "area": "all", + "compress": "false", + "format": "ffcsv", + } + response = get_data_from_endpoint( + endpoint="data", method="resultfile", params=params + ) return str(response.text) @@ -158,13 +271,14 @@ def _check_destatis_status(destatis_status: dict) -> None: # check for generic/ system error if destatis_status_code == -1: - raise DestatisStatusError( - "Error: There is a system error. Please check your query parameters." - ) + raise DestatisStatusError(destatis_status_content) # check for destatis/ query errors elif (destatis_status_code == 104) or (destatis_status_type in error_en_de): - raise DestatisStatusError(destatis_status_content) + if destatis_status_code == 98: + pass + else: + raise DestatisStatusError(destatis_status_content) # output warnings to user elif (destatis_status_code == 22) or ( @@ -175,5 +289,5 @@ def _check_destatis_status(destatis_status: dict) -> None: # output information to user elif destatis_status_type.lower() == "information": logger.info( - "Code %d : %s", destatis_status_code, destatis_status_content + "Code %d: %s", destatis_status_code, destatis_status_content ) diff --git a/tests/test_http_helper.py b/tests/test_http_helper.py index d3faf2f..6105820 100644 --- a/tests/test_http_helper.py +++ b/tests/test_http_helper.py @@ -9,6 +9,7 @@ _check_invalid_destatis_status_code, _check_invalid_status_code, get_data_from_endpoint, + get_job_id_from_response, ) @@ -16,6 +17,7 @@ def _generic_request_status( status_response: bool = True, code: int = 0, status_type: str = "Information", + status_content: str = "Erfolg/ Success/ Some Issue", ) -> requests.Response: """ Helper method which allows to create a generic request.Response that covers all Destatis answers @@ -36,14 +38,14 @@ def _generic_request_status( }, "Status": { "Code": code, - "Content": "Erfolg/ Success/ Some Issue", + "Content": status_content, "Type": status_type, }, } response_text = "Some text for a successful response without status..." - # set up generic requests.Reponse + # set up generic requests.Response request_status = requests.Response() request_status.status_code = 200 # success @@ -78,7 +80,7 @@ def test_get_response_from_endpoint(mocker): get_data_from_endpoint(endpoint="endpoint", method="method", params={}) -def test__check_invalid_status_code_with_error(): +def test_check_invalid_status_code_with_error(): """ Basic tests to check an error status code (4xx, 5xx) for _handle_status_code method. @@ -92,7 +94,7 @@ def test__check_invalid_status_code_with_error(): ) -def test__check_invalid_status_code_without_error(): +def test_check_invalid_status_code_without_error(): """ Basic test to check a valid status code (2xx) for the _handle_status_code method. @@ -104,7 +106,7 @@ def test__check_invalid_status_code_without_error(): assert False -def test__check_invalid_destatis_status_code_with_error(): +def test_check_invalid_destatis_status_code_with_error(): """ Basic tests to check an error status code as defined in the documentation via code (e.g. -1, 104) or type ('Error', 'Fehler'). @@ -122,7 +124,10 @@ def test__check_invalid_destatis_status_code_with_error(): assert str(e.value) == status_content # also test generic -1 error code - generic_error_status = _generic_request_status(code=-1) + generic_error_status = _generic_request_status( + code=-1, + status_content="Error: There is a system error. Please check your query parameters.", + ) with pytest.raises(DestatisStatusError) as e: _check_invalid_destatis_status_code(generic_error_status) @@ -132,7 +137,7 @@ def test__check_invalid_destatis_status_code_with_error(): ) -def test__check_invalid_destatis_status_code_with_warning(caplog): +def test_check_invalid_destatis_status_code_with_warning(caplog): """ Basic tests to check a warning status code as defined in the documentation via code (e.g. 22) or type ('Warning', 'Warnung'). @@ -152,7 +157,7 @@ def test__check_invalid_destatis_status_code_with_warning(caplog): assert status_content in caplog.text -def test__check_invalid_destatis_status_code_without_error(caplog): +def test_check_invalid_destatis_status_code_without_error(caplog): """ Basic tests to check the successful status code 0 or only text response as defined in the documentation. """ @@ -170,3 +175,24 @@ def test__check_invalid_destatis_status_code_without_error(caplog): _check_invalid_destatis_status_code(status_text) except Exception: assert False + + +def test_get_job_id_from_response(): + response = requests.Response() + response._content = """{"Status": {"Content": "Der Bearbeitungsauftrag wurde erstellt. Die Tabelle kann in Kürze als Ergebnis mit folgendem Namen abgerufen werden: 42153-0001_001597503 (Mindestens ein Parameter enthält ungültige Werte. Er wurde angepasst, um den Service starten zu können.: stand"}}""".encode() + job_id = get_job_id_from_response(response) + assert job_id == "42153-0001_001597503" + + +def test_get_job_id_from_response_with_no_id(): + response = requests.Response() + response._content = """{"Status": {"Content": "Der Bearbeitungsauftrag wurde erstellt."}}""".encode() + job_id = get_job_id_from_response(response) + assert job_id == "" + + +def test_get_job_id_from_response_with_no_json(): + response = requests.Response() + response._content = "Der Bearbeitungsauftrag wurde erstellt. Die Tabelle kann in Kürze als Ergebnis mit folgendem Namen abgerufen werden: 42153-0001_001597503 (Mindestens ein Parameter enthält ungültige Werte. Er wurde angepasst, um den Service starten zu können.: stand".encode() + job_id = get_job_id_from_response(response) + assert job_id == ""