From 624bfd09b72a2edd340a2a284890e2e8dadf0655 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 30 Jan 2024 13:47:46 +0100 Subject: [PATCH 01/36] =?UTF-8?q?=F0=9F=9A=80=20Bumped=20version=20after?= =?UTF-8?q?=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_viadot.py | 2 +- viadot/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_viadot.py b/tests/test_viadot.py index 71c3d6187..29cd4e622 100644 --- a/tests/test_viadot.py +++ b/tests/test_viadot.py @@ -2,4 +2,4 @@ def test_version(): - assert __version__ == "0.4.25" + assert __version__ == "0.4.26" diff --git a/viadot/__init__.py b/viadot/__init__.py index 1cc3baa70..9c8003d45 100644 --- a/viadot/__init__.py +++ b/viadot/__init__.py @@ -1 +1 @@ -__version__ = "0.4.25" +__version__ = "0.4.26" From c28e057a4e010611630bfb05a781538fbd580a03 Mon Sep 17 00:00:00 2001 From: mgwinner Date: Tue, 6 Feb 2024 13:09:39 +0100 Subject: [PATCH 02/36] =?UTF-8?q?=F0=9F=8E=A8=20Delete=20promote=5Fto=20ta?= =?UTF-8?q?sk=20from=20the=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/adls_to_azure_sql.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index c12cc7e1d..abac388b3 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -327,22 +327,6 @@ def gen_flow(self) -> Flow: flow=self, ) - promote_to_conformed_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_conformed_task.bind( - from_path=self.adls_path, - to_path=self.adls_path_conformed, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - promote_to_operations_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_operations_task.bind( - from_path=self.adls_path_conformed, - to_path=self.adls_path_operations, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) create_table_task = AzureSQLCreateTable(timeout=self.timeout) create_table_task.bind( schema=self.schema, @@ -372,9 +356,5 @@ def gen_flow(self) -> Flow: df_reorder.set_upstream(lake_to_df_task, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) - promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) - promote_to_operations_task.set_upstream( - promote_to_conformed_task, flow=self - ) bulk_insert_task.set_upstream(create_table_task, flow=self) From 02b1a8a8697b058e07e2191f11a5aff73c1e0d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Gwinner?= <34861507+malgorzatagwinner@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:25:51 +0100 Subject: [PATCH 03/36] Delete first promote_test --- tests/integration/flows/test_adls_to_azure_sql.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index e3ae45623..b83a59e51 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -9,16 +9,6 @@ from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task -def test_get_promoted_adls_path_csv_file(): - adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.csv" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) - promoted_path = flow.get_promoted_path(env="conformed") - assert ( - promoted_path - == "conformed/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.csv" - ) - - def test_get_promoted_adls_path_parquet_file(): adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.parquet" flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) From 7c76e7ac10b87c78b3f5bb016d09d17b51f7211d Mon Sep 17 00:00:00 2001 From: mgwinner Date: Tue, 13 Feb 2024 14:39:33 +0100 Subject: [PATCH 04/36] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Delete=20get=5Fpromo?= =?UTF-8?q?ted=20tests=20and=20get=5Fpromoted=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flows/test_adls_to_azure_sql.py | 35 ------------------- viadot/flows/adls_to_azure_sql.py | 16 --------- 2 files changed, 51 deletions(-) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index b83a59e51..34cef2f9e 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -9,41 +9,6 @@ from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task -def test_get_promoted_adls_path_parquet_file(): - adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.parquet" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_file_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_slash(): - adls_path_dir_slash = "raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir(): - adls_path_dir = "raw/supermetrics/adls_ga_load_times_fr_test" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - def test_df_to_csv_task(): d = {"col1": ["rat", "\tdog"], "col2": ["cat", 4]} df = pd.DataFrame(data=d) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index abac388b3..d41ed129a 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -221,8 +221,6 @@ def __init__( self.overwrite_adls = overwrite_adls self.if_empty = if_empty self.adls_sp_credentials_secret = adls_sp_credentials_secret - self.adls_path_conformed = self.get_promoted_path(env="conformed") - self.adls_path_operations = self.get_promoted_path(env="operations") # AzureSQLCreateTable self.table = table @@ -257,20 +255,6 @@ def _map_if_exists(if_exists: str) -> str: def slugify(name): return name.replace(" ", "_").lower() - def get_promoted_path(self, env: str) -> str: - adls_path_clean = self.adls_path.strip("/") - extension = adls_path_clean.split(".")[-1].strip() - if extension == "parquet": - file_name = adls_path_clean.split("/")[-2] + ".csv" - common_path = "/".join(adls_path_clean.split("/")[1:-2]) - else: - file_name = adls_path_clean.split("/")[-1] - common_path = "/".join(adls_path_clean.split("/")[1:-1]) - - promoted_path = os.path.join(env, common_path, file_name) - - return promoted_path - def gen_flow(self) -> Flow: lake_to_df_task = AzureDataLakeToDF(timeout=self.timeout) df = lake_to_df_task.bind( From e1d3f741cd702d2a670ef032399bdd9cc6f791ed Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 28 Feb 2024 07:53:08 +0100 Subject: [PATCH 05/36] =?UTF-8?q?=F0=9F=90=9B=20Changed=20dbt=20packages?= =?UTF-8?q?=20in=20requirements.txt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49b99fd7..033175d07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Changed - +- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run ## [0.4.25] - 2024-01-30 ### Added diff --git a/requirements.txt b/requirements.txt index 72bbb20b2..990e4a89c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,8 +39,7 @@ aiolimiter==1.0.0 protobuf>=3.19.0, <3.20 avro-python3==1.10.2 pygit2>=1.10.1, <1.11.0 -dbt-core==1.3.2 -dbt-sqlserver==1.3.1 +dbt-sqlserver @ git+https://github.com/djagoda881/dbt-sqlserver.git@v1.3.latest_option_clause lumaCLI==0.0.19 Office365-REST-Python-Client==2.4.4 TM1py==1.11.3 From 74f0bf5c1517f5499e09c65aeee9fea3297bfb26 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 1 Mar 2024 15:33:33 +0100 Subject: [PATCH 06/36] =?UTF-8?q?=F0=9F=8E=A8=20Improve=20CHANGELOG=20info?= =?UTF-8?q?rmation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 033175d07..d4207c636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed +- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run ### Changed -- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run + +### Removed +- Removed dbt-core==1.3.2 from requirements.txt ## [0.4.25] - 2024-01-30 ### Added From 5cf158cc856056a7bb4ae26d72f4e0d827b861d1 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Fri, 1 Mar 2024 15:46:36 +0000 Subject: [PATCH 07/36] =?UTF-8?q?=F0=9F=9A=A7=20Added=20`no=20config`=20fl?= =?UTF-8?q?ag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/luma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/tasks/luma.py b/viadot/tasks/luma.py index 11eb91e45..3d0870657 100644 --- a/viadot/tasks/luma.py +++ b/viadot/tasks/luma.py @@ -41,7 +41,7 @@ def __init__( self.helper_script = dbt_project_path self.url = url self.metadata_dir_path = metadata_dir_path - self.command = f"luma dbt send-test-results --luma-url {url} --metadata-dir {metadata_dir_path}" + self.command = f"luma dbt send-test-results --luma-url {url} --metadata-dir {metadata_dir_path} --no config" self.return_all = True self.stream_output = True self.log_stderr = True From 64bce200d2c2db2f8d32a4c00f0aa4582362cbb5 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 5 Mar 2024 10:37:15 +0000 Subject: [PATCH 08/36] =?UTF-8?q?=F0=9F=94=A5=20Removed=20generate=5Fcatal?= =?UTF-8?q?og=5Fjson=20task?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/transform_and_catalog.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/viadot/flows/transform_and_catalog.py b/viadot/flows/transform_and_catalog.py index 08ac6b895..6c739978e 100644 --- a/viadot/flows/transform_and_catalog.py +++ b/viadot/flows/transform_and_catalog.py @@ -191,8 +191,8 @@ def gen_flow(self) -> Flow: run_select = self.dbt_selects.get("run") run_select_safe = f"-s {run_select}" if run_select is not None else "" - run = ShellTask( - name="dbt_task_run", + run_model = ShellTask( + name="dbt_task_run_model", command=f"dbt run {run_select_safe} {dbt_target_option}", helper_script=f"cd {local_dbt_repo_path}", return_all=True, @@ -202,24 +202,14 @@ def gen_flow(self) -> Flow: test_select = self.dbt_selects.get("test", run_select) test_select_safe = f"-s {test_select}" if test_select is not None else "" - test = ShellTask( - name="dbt_task_test", + run_tests = ShellTask( + name="dbt_task_run_tests", command=f"dbt test {test_select_safe} {dbt_target_option}", helper_script=f"cd {local_dbt_repo_path}", return_all=True, stream_output=True, ).bind(flow=self) - # Generate docs - # Produces `catalog.json`, `run-results.json`, and `manifest.json` - - generate_catalog_json = custom_shell_task.bind( - name="dbt_task_docs_generate", - command=f"dbt docs generate {dbt_target_option} --no-compile", - helper_script=f"cd {self.dbt_project_path}", - flow=self, - ) - # Upload build metadata to Luma path_expanded = os.path.expandvars(self.metadata_dir_path) metadata_dir_path = Path(path_expanded) @@ -237,8 +227,7 @@ def gen_flow(self) -> Flow: dbt_clean_up.set_upstream(clone, flow=self) pull_dbt_deps.set_upstream(dbt_clean_up, flow=self) - run.set_upstream(pull_dbt_deps, flow=self) - test.set_upstream(run, flow=self) - generate_catalog_json.set_upstream(test, flow=self) - upload_metadata_luma.set_upstream(generate_catalog_json, flow=self) + run_model.set_upstream(pull_dbt_deps, flow=self) + run_tests.set_upstream(run_model, flow=self) + upload_metadata_luma.set_upstream(run_tests, flow=self) _cleanup_repo.set_upstream(upload_metadata_luma, flow=self) From 6e19eddb66f705168eee832ce77bf431f7bcb592 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 6 Mar 2024 14:26:48 +0100 Subject: [PATCH 09/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Changed=20credential?= =?UTF-8?q?s=20logic=20to=20get=20credentials=20from=20KeyVault?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 8 +++++++ viadot/sources/sap_rfc.py | 38 +++++++++++++++++++-------------- viadot/tasks/sap_rfc.py | 37 ++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 16 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index d7a2ac390..e8d917581 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -17,6 +17,8 @@ def __init__( rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, sap_credentials: dict = None, + saprfc_credentials_key: str = "SAP", + env: str = "PROD", output_file_extension: str = ".parquet", local_file_path: str = None, file_sep: str = "\t", @@ -67,6 +69,8 @@ def __init__( ... ) sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. + saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + env (str, optional): SAP environment. Defaults to "PROD". output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. file_sep(str, optional): The separator to use in the CSV. Defaults to "\t". @@ -91,6 +95,8 @@ def __init__( self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit self.rfc_unique_id = rfc_unique_id self.sap_credentials = sap_credentials + self.saprfc_credentials_key = saprfc_credentials_key + self.env = env self.output_file_extension = output_file_extension self.local_file_path = local_file_path self.file_sep = file_sep @@ -121,6 +127,8 @@ def gen_flow(self) -> Flow: rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, credentials=self.sap_credentials, + saprfc_credentials_key=self.saprfc_credentials_key, + env=self.env, flow=self, ) if self.validate_df_dict: diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index a9d109148..7e6f6d7a2 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -256,16 +256,17 @@ def __init__( """ self._con = None - DEFAULT_CREDENTIALS = local_config.get("SAP").get("DEV") + saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") + env = kwargs.pop("env") credentials = kwargs.pop("credentials", None) if credentials is None: - credentials = DEFAULT_CREDENTIALS - if credentials is None: - raise CredentialError("Missing credentials.") logger.warning( - "Your credentials will use DEV environment. If you would like to use different one - please specified it." + f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) + credentials = local_config.get(saprfc_credentials_key).get(env) + if credentials is None: + raise CredentialError(f"Missing {env} credentials!") super().__init__(*args, credentials=credentials, **kwargs) @@ -455,9 +456,11 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: self.aliases_keyed_by_columns = aliases_keyed_by_columns columns = [ - aliases_keyed_by_columns[col] - if col in aliases_keyed_by_columns - else col + ( + aliases_keyed_by_columns[col] + if col in aliases_keyed_by_columns + else col + ) for col in columns ] @@ -699,16 +702,17 @@ def __init__( """ self._con = None - DEFAULT_CREDENTIALS = local_config.get("SAP").get("DEV") + saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") + env = kwargs.pop("env") credentials = kwargs.pop("credentials", None) if credentials is None: - credentials = DEFAULT_CREDENTIALS - if credentials is None: - raise CredentialError("Missing credentials.") logger.warning( - "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow." + f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) + credentials = local_config.get(saprfc_credentials_key).get(env) + if credentials is None: + raise CredentialError(f"Missing {env} credentials!") super().__init__(*args, credentials=credentials, **kwargs) @@ -904,9 +908,11 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: self.aliases_keyed_by_columns = aliases_keyed_by_columns columns = [ - aliases_keyed_by_columns[col] - if col in aliases_keyed_by_columns - else col + ( + aliases_keyed_by_columns[col] + if col in aliases_keyed_by_columns + else col + ) for col in columns ] diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 19d55353d..c70becf2d 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -2,13 +2,18 @@ from typing import List import pandas as pd +import json from prefect import Task from prefect.utilities.tasks import defaults_from_attrs +from viadot.tasks import AzureKeyVaultSecret try: from viadot.sources import SAPRFC, SAPRFCV2 except ImportError: raise +from prefect.utilities import logging + +logger = logging.get_logger() class SAPRFCToDF(Task): @@ -20,6 +25,8 @@ def __init__( func: str = None, rfc_total_col_width_character_limit: int = 400, credentials: dict = None, + saprfc_credentials_key: str = "SAP", + env: str = "PROD", max_retries: int = 3, retry_delay: timedelta = timedelta(seconds=10), timeout: int = 3600, @@ -52,12 +59,16 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. credentials (dict, optional): The credentials to use to authenticate with SAP. + saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + env (str, optional): SAP environment. Defaults to "PROD". By default, they're taken from the local viadot config. """ self.query = query self.sep = sep self.replacement = replacement self.credentials = credentials + self.saprfc_credentials_key = saprfc_credentials_key + self.env = env self.func = func self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit @@ -84,6 +95,8 @@ def run( sep: str = None, replacement: str = "-", credentials: dict = None, + saprfc_credentials_key: str = "SAP", + env: str = "PROD", func: str = None, rfc_total_col_width_character_limit: int = None, rfc_unique_id: List[str] = None, @@ -97,6 +110,9 @@ def run( multiple options are automatically tried. Defaults to None. replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". + credentials (dict, optional): The credentials to use to authenticate with SAP. + saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + env (str, optional): SAP environment. Defaults to "PROD". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks in case of too many columns for RFC function. According to SAP documentation, the limit is @@ -116,6 +132,23 @@ def run( Returns: pd.DataFrame: DataFrame with SAP data. """ + + if isinstance(credentials, dict): + credentials_keys = list(credentials.keys()) + required_credentials_params = ["sysnr", "user", "passwd", "ashost"] + for key in required_credentials_params: + if key not in credentials_keys: + self.logger.warning( + f"Required key '{key}' not found in your 'sap_credentials' dictionary!" + ) + credentials = None + + if credentials is None: + credentials_str = AzureKeyVaultSecret( + secret=saprfc_credentials_key, + ).run() + credentials = json.loads(credentials_str).get(env) + if query is None: raise ValueError("Please provide the query.") @@ -128,6 +161,8 @@ def run( sep=sep, replacement=replacement, credentials=credentials, + saprfc_credentials_key=saprfc_credentials_key, + env=env, func=func, rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, rfc_unique_id=rfc_unique_id, @@ -136,6 +171,8 @@ def run( sap = SAPRFC( sep=sep, credentials=credentials, + saprfc_credentials_key=saprfc_credentials_key, + env=env, func=func, rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, ) From 58ec8fcb67a08ea9e0bbb070cb25f3fce361eb2e Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 6 Mar 2024 14:34:21 +0100 Subject: [PATCH 10/36] =?UTF-8?q?=F0=9F=9A=80=20Added=20changes=20to=20cha?= =?UTF-8?q?ngelog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49b99fd7..c022d5dc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Added option for sap_rfc connector to get credentials from Azure KeyVault or directly passing dictionary inside flow. -### Fixed +### Fixed ### Changed From c47c120223765f2814aa282bd02c3a56ea96471f Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Wed, 6 Mar 2024 15:11:28 +0000 Subject: [PATCH 11/36] =?UTF-8?q?=F0=9F=90=9B=20Updated=20`--no-config`=20?= =?UTF-8?q?flag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/luma.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viadot/tasks/luma.py b/viadot/tasks/luma.py index 3d0870657..ebe709720 100644 --- a/viadot/tasks/luma.py +++ b/viadot/tasks/luma.py @@ -38,10 +38,10 @@ def __init__( secret=credentials_secret, vault_name=vault_name ) url = json.loads(credentials_str).get("url") - self.helper_script = dbt_project_path + self.helper_script = f"cd {dbt_project_path}" self.url = url self.metadata_dir_path = metadata_dir_path - self.command = f"luma dbt send-test-results --luma-url {url} --metadata-dir {metadata_dir_path} --no config" + self.command = f"luma dbt send-test-results --luma-url {url} --metadata-dir {metadata_dir_path} --no-config" self.return_all = True self.stream_output = True self.log_stderr = True From db63d913192a114c179c5db7d185289e4584d000 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 14 Mar 2024 13:55:48 +0100 Subject: [PATCH 12/36] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Change=20'query'=20p?= =?UTF-8?q?arameter=20as=20required=20on=20the=20=5F=5Finit=5F=5F=20level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 2 +- viadot/tasks/sap_rfc.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index e8d917581..c95b6e181 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -10,7 +10,7 @@ class SAPRFCToADLS(Flow): def __init__( self, name: str, - query: str = None, + query: str, rfc_sep: str = None, rfc_replacement: str = "-", func: str = "RFC_READ_TABLE", diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index c70becf2d..58f7008a9 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -19,7 +19,7 @@ class SAPRFCToDF(Task): def __init__( self, - query: str = None, + query: str, sep: str = None, replacement: str = "-", func: str = None, @@ -91,7 +91,7 @@ def __init__( ) def run( self, - query: str = None, + query: str, sep: str = None, replacement: str = "-", credentials: dict = None, @@ -149,9 +149,6 @@ def run( ).run() credentials = json.loads(credentials_str).get(env) - if query is None: - raise ValueError("Please provide the query.") - if alternative_version is True: if rfc_unique_id: self.logger.warning( From bec218bf2b9f2b579393942315662dc35b38d765 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 14 Mar 2024 14:13:49 +0100 Subject: [PATCH 13/36] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20docstring=20for?= =?UTF-8?q?=20parameter=20'saprfc=5Fcredentials=5Fkey'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 2 +- viadot/tasks/sap_rfc.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index c95b6e181..d60eb2e1a 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -69,7 +69,7 @@ def __init__( ... ) sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 58f7008a9..56c541d7f 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -59,7 +59,7 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. credentials (dict, optional): The credentials to use to authenticate with SAP. - saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". By default, they're taken from the local viadot config. """ @@ -111,7 +111,7 @@ def run( replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". credentials (dict, optional): The credentials to use to authenticate with SAP. - saprfc_credentials_key (str, optional): Azure KV secret. Defaults to "SAP". + saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks From bdec339732d9b4deb3fdb1a840f5acc3bb9773bc Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 14 Mar 2024 14:45:53 +0100 Subject: [PATCH 14/36] =?UTF-8?q?=F0=9F=8E=A8=20Change=20'sap=5Fcredential?= =?UTF-8?q?s'=20varable=20name=20and=20docstring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 2 +- viadot/sources/sap_rfc.py | 20 ++++++++++---------- viadot/tasks/sap_rfc.py | 24 ++++++++++++------------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index d60eb2e1a..38114707c 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -126,7 +126,7 @@ def gen_flow(self) -> Flow: rfc_total_col_width_character_limit=self.rfc_total_col_width_character_limit, rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, - credentials=self.sap_credentials, + sap_credentials=self.sap_credentials, saprfc_credentials_key=self.saprfc_credentials_key, env=self.env, flow=self, diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 7e6f6d7a2..4873d568a 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -259,16 +259,16 @@ def __init__( saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") env = kwargs.pop("env") - credentials = kwargs.pop("credentials", None) - if credentials is None: + sap_credentials = kwargs.pop("sap_credentials", None) + if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - credentials = local_config.get(saprfc_credentials_key).get(env) - if credentials is None: + sap_credentials = local_config.get(saprfc_credentials_key).get(env) + if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") - super().__init__(*args, credentials=credentials, **kwargs) + super().__init__(*args, credentials=sap_credentials, **kwargs) self.sep = sep self.client_side_filters = None @@ -705,16 +705,16 @@ def __init__( saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") env = kwargs.pop("env") - credentials = kwargs.pop("credentials", None) - if credentials is None: + sap_credentials = kwargs.pop("sap_credentials", None) + if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - credentials = local_config.get(saprfc_credentials_key).get(env) - if credentials is None: + sap_credentials = local_config.get(saprfc_credentials_key).get(env) + if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") - super().__init__(*args, credentials=credentials, **kwargs) + super().__init__(*args, credentials=sap_credentials, **kwargs) self.sep = sep self.replacement = replacement diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 56c541d7f..f79e2e2aa 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -24,7 +24,7 @@ def __init__( replacement: str = "-", func: str = None, rfc_total_col_width_character_limit: int = 400, - credentials: dict = None, + sap_credentials: dict = None, saprfc_credentials_key: str = "SAP", env: str = "PROD", max_retries: int = 3, @@ -58,7 +58,7 @@ def __init__( in case of too many columns for RFC function. According to SAP documentation, the limit is 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. - credentials (dict, optional): The credentials to use to authenticate with SAP. + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". By default, they're taken from the local viadot config. @@ -66,7 +66,7 @@ def __init__( self.query = query self.sep = sep self.replacement = replacement - self.credentials = credentials + self.sap_credentials = sap_credentials self.saprfc_credentials_key = saprfc_credentials_key self.env = env self.func = func @@ -94,7 +94,7 @@ def run( query: str, sep: str = None, replacement: str = "-", - credentials: dict = None, + sap_credentials: dict = None, saprfc_credentials_key: str = "SAP", env: str = "PROD", func: str = None, @@ -110,7 +110,7 @@ def run( multiple options are automatically tried. Defaults to None. replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". - credentials (dict, optional): The credentials to use to authenticate with SAP. + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". func (str, optional): SAP RFC function to use. Defaults to None. @@ -133,21 +133,21 @@ def run( pd.DataFrame: DataFrame with SAP data. """ - if isinstance(credentials, dict): - credentials_keys = list(credentials.keys()) + if isinstance(sap_credentials, dict): + credentials_keys = list(sap_credentials.keys()) required_credentials_params = ["sysnr", "user", "passwd", "ashost"] for key in required_credentials_params: if key not in credentials_keys: self.logger.warning( f"Required key '{key}' not found in your 'sap_credentials' dictionary!" ) - credentials = None + sap_credentials = None - if credentials is None: + if sap_credentials is None: credentials_str = AzureKeyVaultSecret( secret=saprfc_credentials_key, ).run() - credentials = json.loads(credentials_str).get(env) + sap_credentials = json.loads(credentials_str).get(env) if alternative_version is True: if rfc_unique_id: @@ -157,7 +157,7 @@ def run( sap = SAPRFCV2( sep=sep, replacement=replacement, - credentials=credentials, + sap_credentials=sap_credentials, saprfc_credentials_key=saprfc_credentials_key, env=env, func=func, @@ -167,7 +167,7 @@ def run( else: sap = SAPRFC( sep=sep, - credentials=credentials, + sap_credentials=sap_credentials, saprfc_credentials_key=saprfc_credentials_key, env=env, func=func, From 505196a7a5b15853b4dff301ceaf3c7a12a4ae80 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 14 Mar 2024 15:03:47 +0100 Subject: [PATCH 15/36] =?UTF-8?q?=F0=9F=8E=A8=20changed=20variable=20name?= =?UTF-8?q?=20from=20'saprfc=5Fcredential=5Fkey'=20to=20'sap=5Fcredential?= =?UTF-8?q?=5Fkey'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 10 +++++----- viadot/sources/sap_rfc.py | 8 ++++---- viadot/tasks/sap_rfc.py | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index 38114707c..a4f65eb3c 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -17,7 +17,7 @@ def __init__( rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, sap_credentials: dict = None, - saprfc_credentials_key: str = "SAP", + sap_credentials_key: str = "SAP", env: str = "PROD", output_file_extension: str = ".parquet", local_file_path: str = None, @@ -69,7 +69,7 @@ def __init__( ... ) sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". + sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. @@ -95,7 +95,7 @@ def __init__( self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit self.rfc_unique_id = rfc_unique_id self.sap_credentials = sap_credentials - self.saprfc_credentials_key = saprfc_credentials_key + self.sap_credentials_key = sap_credentials_key self.env = env self.output_file_extension = output_file_extension self.local_file_path = local_file_path @@ -126,8 +126,8 @@ def gen_flow(self) -> Flow: rfc_total_col_width_character_limit=self.rfc_total_col_width_character_limit, rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, - sap_credentials=self.sap_credentials, - saprfc_credentials_key=self.saprfc_credentials_key, + credentials=self.sap_credentials, + sap_credentials_key=self.sap_credentials_key, env=self.env, flow=self, ) diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 4873d568a..5a9cf5b2d 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -256,7 +256,7 @@ def __init__( """ self._con = None - saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") + sap_credentials_key = kwargs.pop("sap_credentials_key") env = kwargs.pop("env") sap_credentials = kwargs.pop("sap_credentials", None) @@ -264,7 +264,7 @@ def __init__( logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - sap_credentials = local_config.get(saprfc_credentials_key).get(env) + sap_credentials = local_config.get(sap_credentials_key).get(env) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") @@ -702,7 +702,7 @@ def __init__( """ self._con = None - saprfc_credentials_key = kwargs.pop("saprfc_credentials_key") + sap_credentials_key = kwargs.pop("sap_credentials_key") env = kwargs.pop("env") sap_credentials = kwargs.pop("sap_credentials", None) @@ -710,7 +710,7 @@ def __init__( logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - sap_credentials = local_config.get(saprfc_credentials_key).get(env) + sap_credentials = local_config.get(sap_credentials_key).get(env) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index f79e2e2aa..1925439df 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -25,7 +25,7 @@ def __init__( func: str = None, rfc_total_col_width_character_limit: int = 400, sap_credentials: dict = None, - saprfc_credentials_key: str = "SAP", + sap_credentials_key: str = "SAP", env: str = "PROD", max_retries: int = 3, retry_delay: timedelta = timedelta(seconds=10), @@ -59,7 +59,7 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". + sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". By default, they're taken from the local viadot config. """ @@ -67,7 +67,7 @@ def __init__( self.sep = sep self.replacement = replacement self.sap_credentials = sap_credentials - self.saprfc_credentials_key = saprfc_credentials_key + self.sap_credentials_key = sap_credentials_key self.env = env self.func = func self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit @@ -95,7 +95,7 @@ def run( sep: str = None, replacement: str = "-", sap_credentials: dict = None, - saprfc_credentials_key: str = "SAP", + sap_credentials_key: str = "SAP", env: str = "PROD", func: str = None, rfc_total_col_width_character_limit: int = None, @@ -111,7 +111,7 @@ def run( replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - saprfc_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". + sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". env (str, optional): SAP environment. Defaults to "PROD". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks @@ -145,7 +145,7 @@ def run( if sap_credentials is None: credentials_str = AzureKeyVaultSecret( - secret=saprfc_credentials_key, + secret=sap_credentials_key, ).run() sap_credentials = json.loads(credentials_str).get(env) @@ -158,7 +158,7 @@ def run( sep=sep, replacement=replacement, sap_credentials=sap_credentials, - saprfc_credentials_key=saprfc_credentials_key, + sap_credentials_key=sap_credentials_key, env=env, func=func, rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, @@ -168,7 +168,7 @@ def run( sap = SAPRFC( sep=sep, sap_credentials=sap_credentials, - saprfc_credentials_key=saprfc_credentials_key, + sap_credentials_key=sap_credentials_key, env=env, func=func, rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, From 2895eb8f6933e46f78e4e79f859c9b5f842e8d1d Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 14 Mar 2024 15:08:01 +0100 Subject: [PATCH 16/36] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20env=20default=20?= =?UTF-8?q?value=20'PROD'=20to=20'DEV'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 4 ++-- viadot/tasks/sap_rfc.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index a4f65eb3c..009296d44 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -18,7 +18,7 @@ def __init__( rfc_unique_id: List[str] = None, sap_credentials: dict = None, sap_credentials_key: str = "SAP", - env: str = "PROD", + env: str = "DEV", output_file_extension: str = ".parquet", local_file_path: str = None, file_sep: str = "\t", @@ -70,7 +70,7 @@ def __init__( ) sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "PROD". + env (str, optional): SAP environment. Defaults to "DEV". output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. file_sep(str, optional): The separator to use in the CSV. Defaults to "\t". diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 1925439df..b2ef7552b 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -26,7 +26,7 @@ def __init__( rfc_total_col_width_character_limit: int = 400, sap_credentials: dict = None, sap_credentials_key: str = "SAP", - env: str = "PROD", + env: str = "DEV", max_retries: int = 3, retry_delay: timedelta = timedelta(seconds=10), timeout: int = 3600, @@ -60,7 +60,7 @@ def __init__( of characters, so we add a safety margin. Defaults to 400. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "PROD". + env (str, optional): SAP environment. Defaults to "DEV". By default, they're taken from the local viadot config. """ self.query = query @@ -96,7 +96,7 @@ def run( replacement: str = "-", sap_credentials: dict = None, sap_credentials_key: str = "SAP", - env: str = "PROD", + env: str = "DEV", func: str = None, rfc_total_col_width_character_limit: int = None, rfc_unique_id: List[str] = None, @@ -112,7 +112,7 @@ def run( inside the string to avoid flow breakdowns. Defaults to "-". sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "PROD". + env (str, optional): SAP environment. Defaults to "DEV". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks in case of too many columns for RFC function. According to SAP documentation, the limit is From f456ed295f7c2bfada5e011cd8cdce7d0f80118f Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 15 Mar 2024 08:49:17 +0100 Subject: [PATCH 17/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Change=20the=20logic?= =?UTF-8?q?=20of=20passing=20'env'=20,=20'sap=5Fcredentials'=20and=20'sap?= =?UTF-8?q?=5Fcredentias=5Fkey'=20variables=20into=20source=20class?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/sources/sap_rfc.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 5a9cf5b2d..365fa72f6 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -237,6 +237,9 @@ def __init__( sep: str = None, func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, + sap_credentials: dict = None, + sap_credentials_key: str = "SAP", + env: str = "DEV", *args, **kwargs, ): @@ -250,16 +253,19 @@ def __init__( in case of too many columns for RFC function. According to SAP documentation, the limit is 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. + sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". + env (str, optional): SAP environment. Defaults to "DEV". Raises: CredentialError: If provided credentials are incorrect. """ self._con = None - sap_credentials_key = kwargs.pop("sap_credentials_key") - env = kwargs.pop("env") + self.sap_credentials = sap_credentials + self.sap_credentials_key = sap_credentials_key + self.env = env - sap_credentials = kwargs.pop("sap_credentials", None) if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." @@ -680,6 +686,9 @@ def __init__( func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, + sap_credentials: dict = None, + sap_credentials_key: str = "SAP", + env: str = "DEV", *args, **kwargs, ): @@ -696,16 +705,19 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. rfc_unique_id (List[str], optional): Reference columns to merge chunks Data Frames. These columns must to be unique. Defaults to None. + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. + sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". + env (str, optional): SAP environment. Defaults to "DEV". Raises: CredentialError: If provided credentials are incorrect. """ self._con = None - sap_credentials_key = kwargs.pop("sap_credentials_key") - env = kwargs.pop("env") + self.sap_credentials = sap_credentials + self.sap_credentials_key = sap_credentials_key + self.env = env - sap_credentials = kwargs.pop("sap_credentials", None) if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." From e6a98107aee79bdb629967c1f649a3e01e454bce Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 15 Mar 2024 09:04:48 +0100 Subject: [PATCH 18/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Change=20logic=20of?= =?UTF-8?q?=20checking=20credentials=5Fkeys=20dictionary=20from=20task=20t?= =?UTF-8?q?o=20the=20source=20level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/sources/sap_rfc.py | 20 ++++++++++++++++++++ viadot/tasks/sap_rfc.py | 10 ---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 365fa72f6..261a673bb 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -266,6 +266,16 @@ def __init__( self.sap_credentials_key = sap_credentials_key self.env = env + if isinstance(sap_credentials, dict): + credentials_keys = list(sap_credentials.keys()) + required_credentials_params = ["sysnr", "user", "passwd", "ashost"] + for key in required_credentials_params: + if key not in credentials_keys: + self.logger.warning( + f"Required key '{key}' not found in your 'sap_credentials' dictionary!" + ) + sap_credentials = None + if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." @@ -718,6 +728,16 @@ def __init__( self.sap_credentials_key = sap_credentials_key self.env = env + if isinstance(sap_credentials, dict): + credentials_keys = list(sap_credentials.keys()) + required_credentials_params = ["sysnr", "user", "passwd", "ashost"] + for key in required_credentials_params: + if key not in credentials_keys: + self.logger.warning( + f"Required key '{key}' not found in your 'sap_credentials' dictionary!" + ) + sap_credentials = None + if sap_credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index b2ef7552b..469b0b1df 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -133,16 +133,6 @@ def run( pd.DataFrame: DataFrame with SAP data. """ - if isinstance(sap_credentials, dict): - credentials_keys = list(sap_credentials.keys()) - required_credentials_params = ["sysnr", "user", "passwd", "ashost"] - for key in required_credentials_params: - if key not in credentials_keys: - self.logger.warning( - f"Required key '{key}' not found in your 'sap_credentials' dictionary!" - ) - sap_credentials = None - if sap_credentials is None: credentials_str = AzureKeyVaultSecret( secret=sap_credentials_key, From 92c2b7043d80207c8deb3f6c1484b214c86630aa Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 15 Mar 2024 13:05:59 +0100 Subject: [PATCH 19/36] =?UTF-8?q?=F0=9F=90=9B=20fix=20parameter=20name=20b?= =?UTF-8?q?ug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index 009296d44..5bc4153fd 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -126,7 +126,7 @@ def gen_flow(self) -> Flow: rfc_total_col_width_character_limit=self.rfc_total_col_width_character_limit, rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, - credentials=self.sap_credentials, + sap_credentials=self.sap_credentials, sap_credentials_key=self.sap_credentials_key, env=self.env, flow=self, From 2d3ee8d89d6422595a2eda335f50cc251d33afe1 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 15 Mar 2024 13:12:33 +0100 Subject: [PATCH 20/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor=20saprfc=20?= =?UTF-8?q?task=20in=20order=20to=20meet=20new=20logic=20in=20source?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/sap_rfc.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 469b0b1df..fb059a467 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -19,7 +19,7 @@ class SAPRFCToDF(Task): def __init__( self, - query: str, + query: str = None, sep: str = None, replacement: str = "-", func: str = None, @@ -87,7 +87,7 @@ def __init__( "replacement", "func", "rfc_total_col_width_character_limit", - "credentials", + "sap_credentials", ) def run( self, @@ -134,10 +134,15 @@ def run( """ if sap_credentials is None: - credentials_str = AzureKeyVaultSecret( - secret=sap_credentials_key, - ).run() - sap_credentials = json.loads(credentials_str).get(env) + try: + credentials_str = AzureKeyVaultSecret( + secret=sap_credentials_key, + ).run() + sap_credentials = json.loads(credentials_str).get(env) + except: + logger.warning( + f"Getting credentials from Azure Key Vault was not possible. Either there is no key: {sap_credentials_key} or env: {env} or there is not Key Vault in your environment." + ) if alternative_version is True: if rfc_unique_id: From e5ee5b735299c3daf6200e2289a4f1383fb9d197 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 15 Mar 2024 13:22:25 +0100 Subject: [PATCH 21/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20logic=20i?= =?UTF-8?q?n=20source=20regarding=20credentials=20exceptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/sources/sap_rfc.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 261a673bb..4a63b44e9 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -262,7 +262,6 @@ def __init__( """ self._con = None - self.sap_credentials = sap_credentials self.sap_credentials_key = sap_credentials_key self.env = env @@ -271,7 +270,7 @@ def __init__( required_credentials_params = ["sysnr", "user", "passwd", "ashost"] for key in required_credentials_params: if key not in credentials_keys: - self.logger.warning( + logger.warning( f"Required key '{key}' not found in your 'sap_credentials' dictionary!" ) sap_credentials = None @@ -280,12 +279,21 @@ def __init__( logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - sap_credentials = local_config.get(sap_credentials_key).get(env) + try: + sap_credentials = local_config.get(sap_credentials_key).get(env) + except AttributeError: + raise CredentialError( + f"Sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") - super().__init__(*args, credentials=sap_credentials, **kwargs) + super().__init__( + *args, + **kwargs, + ) + self.sap_credentials = sap_credentials self.sep = sep self.client_side_filters = None self.func = func @@ -295,7 +303,7 @@ def __init__( def con(self) -> pyrfc.Connection: if self._con is not None: return self._con - con = pyrfc.Connection(**self.credentials) + con = pyrfc.Connection(**self.sap_credentials) self._con = con return con @@ -724,7 +732,6 @@ def __init__( """ self._con = None - self.sap_credentials = sap_credentials self.sap_credentials_key = sap_credentials_key self.env = env @@ -733,7 +740,7 @@ def __init__( required_credentials_params = ["sysnr", "user", "passwd", "ashost"] for key in required_credentials_params: if key not in credentials_keys: - self.logger.warning( + logger.warning( f"Required key '{key}' not found in your 'sap_credentials' dictionary!" ) sap_credentials = None @@ -742,12 +749,18 @@ def __init__( logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." ) - sap_credentials = local_config.get(sap_credentials_key).get(env) + try: + sap_credentials = local_config.get(sap_credentials_key).get(env) + except AttributeError: + raise CredentialError( + f"Sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") - super().__init__(*args, credentials=sap_credentials, **kwargs) + super().__init__(*args, **kwargs) + self.sap_credentials = sap_credentials self.sep = sep self.replacement = replacement self.client_side_filters = None From f5ac9274c7af34070cc933272dcd0b41d453fc61 Mon Sep 17 00:00:00 2001 From: dominikjedlinski Date: Fri, 15 Mar 2024 15:01:01 +0100 Subject: [PATCH 22/36] changed upstream order for df_validation --- viadot/flows/adls_to_azure_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index c12cc7e1d..ed3877fd6 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -369,8 +369,8 @@ def gen_flow(self) -> Flow: if self.validate_df_dict: validate_df.bind(df=df, tests=self.validate_df_dict, flow=self) validate_df.set_upstream(lake_to_df_task, flow=self) + df_reorder.set_upstream(validate_df, flow=self) - df_reorder.set_upstream(lake_to_df_task, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) From bec22d9f46678f5f2a26d30bd0d5ae6ebae960b9 Mon Sep 17 00:00:00 2001 From: dominikjedlinski Date: Mon, 18 Mar 2024 13:22:37 +0100 Subject: [PATCH 23/36] updated upstream order --- viadot/flows/adls_to_azure_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index ed3877fd6..f95250ab7 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -368,9 +368,9 @@ def gen_flow(self) -> Flow: # data validation function (optional) if self.validate_df_dict: validate_df.bind(df=df, tests=self.validate_df_dict, flow=self) - validate_df.set_upstream(lake_to_df_task, flow=self) df_reorder.set_upstream(validate_df, flow=self) + df_to_csv.set_upstream(dtypes, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) From b2db3b4abc35375d86ca315d6575ccf70ad680c4 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 19 Mar 2024 14:27:53 +0100 Subject: [PATCH 24/36] =?UTF-8?q?=F0=9F=90=9B=20fixed=20parameter=20litera?= =?UTF-8?q?l=20definition.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/azure_sql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/viadot/tasks/azure_sql.py b/viadot/tasks/azure_sql.py index b6481975a..c4bc00484 100644 --- a/viadot/tasks/azure_sql.py +++ b/viadot/tasks/azure_sql.py @@ -64,7 +64,7 @@ def run( table: str, dtypes: Dict[str, Any], sep: str = None, - if_exists: Literal = ["fail", "replace", "append", "delete"], + if_exists: Literal["fail", "replace", "append", "delete"] = "fail", ): """ Create a table from an Azure Blob object. @@ -75,8 +75,8 @@ def run( schema (str): Destination schema. table (str): Destination table. dtypes (Dict[str, Any]): Data types to force. - sep (str): The separator to use to read the CSV file. - if_exists (Literal, optional): What to do if the table already exists. + sep (str, optional): The separator to use to read the CSV file. Defaults to None. + if_exists (Literal["fail", "replace", "append", "delete"], optional): What to do if the table already exists. Defaults to "fail". """ fqn = f"{schema}.{table}" if schema else table From b73d5e74043683b381d2ee9e0ce621074e4a625e Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 19 Mar 2024 14:30:33 +0100 Subject: [PATCH 25/36] =?UTF-8?q?=F0=9F=93=9D=20updated=20CHANGELOG.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49b99fd7..e0dae1557 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed +- `tasks/azure_sql` fixed parameter definition issue in `CreateTableFromBlob` task. ### Changed From 371ddbd684c25dbdb55c6b764eedc44d97a66707 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 21 Mar 2024 15:50:30 +0100 Subject: [PATCH 26/36] =?UTF-8?q?=F0=9F=9A=80=20Add=20tests=20for=20new=20?= =?UTF-8?q?logic=20for=20source=20and=20task=20level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration/tasks/test_sap_rfc_to_df.py | 47 ++++++++++- tests/integration/test_sap_rfc.py | 83 ++++++++++++++++++- 2 files changed, 123 insertions(+), 7 deletions(-) diff --git a/tests/integration/tasks/test_sap_rfc_to_df.py b/tests/integration/tasks/test_sap_rfc_to_df.py index 4151adfdc..7007a978c 100644 --- a/tests/integration/tasks/test_sap_rfc_to_df.py +++ b/tests/integration/tasks/test_sap_rfc_to_df.py @@ -1,13 +1,52 @@ +import pytest +import logging + +from viadot.exceptions import CredentialError from viadot.config import local_config from viadot.tasks import SAPRFCToDF def test_sap_rfc_to_df_bbp(): - sap_test_creds = local_config.get("SAP").get("QA") task = SAPRFCToDF( - credentials=sap_test_creds, - query="SELECT MATNR, MATKL, MTART, LAEDA FROM MARA WHERE LAEDA LIKE '2022%'", + query="SELECT MATNR, MATKL, MTART, LAEDA FROM MARA WHERE LAEDA LIKE '20220110%'", func="BBP_RFC_READ_TABLE", ) - df = task.run() + df = task.run(sap_credentials_key="SAP", env="QA") assert len(df.columns) == 4 and not df.empty + + +def test_sap_rfc_to_df_wrong_sap_credential_key_bbp(caplog): + task = SAPRFCToDF( + query="SELECT MATNR, MATKL, MTART, LAEDA FROM MARA WHERE LAEDA LIKE '20220110%'", + func="BBP_RFC_READ_TABLE", + ) + with pytest.raises( + CredentialError, + match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + ): + task.run( + sap_credentials_key="SAP_test", + ) + assert ( + f"Getting credentials from Azure Key Vault was not possible. Either there is no key: SAP_test or env: DEV or there is not Key Vault in your environment." + in caplog.text + ) + + +def test_sap_rfc_to_df_wrong_env_bbp(caplog): + task = SAPRFCToDF( + query="SELECT MATNR, MATKL, MTART, LAEDA FROM MARA WHERE LAEDA LIKE '20220110%'", + func="BBP_RFC_READ_TABLE", + ) + with pytest.raises( + CredentialError, + match="Missing PROD_test credentials!", + ): + task.run( + sap_credentials_key="SAP", + env="PROD_test", + ) + assert ( + f"Getting credentials from Azure Key Vault was not possible. Either there is no key: SAP or env: PROD_test or there is not Key Vault in your environment." + in caplog.text + ) diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py index 28ab044a2..2667d4d81 100644 --- a/tests/integration/test_sap_rfc.py +++ b/tests/integration/test_sap_rfc.py @@ -1,6 +1,9 @@ -from collections import OrderedDict +import pytest +import logging +from collections import OrderedDict from viadot.sources import SAPRFC, SAPRFCV2 +from viadot.exceptions import CredentialError sap = SAPRFC() sap2 = SAPRFCV2() @@ -192,7 +195,7 @@ def test___build_pandas_filter_query_v2(): def test_default_credentials_warning_SAPRFC(caplog): _ = SAPRFC() assert ( - "Your credentials will use DEV environment. If you would like to use different one - please specified it." + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" in caplog.text ) @@ -200,6 +203,80 @@ def test_default_credentials_warning_SAPRFC(caplog): def test_default_credentials_warning_SAPRFCV2(caplog): _ = SAPRFCV2() assert ( - "Your credentials will use DEV environment. If you would like to use different one - please specified it." + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + in caplog.text + ) + + +def test_credentials_dictionary_wrong_key_warning_SAPRFC(caplog): + _ = SAPRFC( + sap_credentials={ + "sysnr_test": "test", + "user": "test", + "passwd": "test", + "ashost": "test", + } + ) + assert ( + f"Required key 'sysnr' not found in your 'sap_credentials' dictionary!" + in caplog.text + ) + assert ( + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" in caplog.text ) + + +def test_credentials_dictionary_wrong_key_warning_SAPRFCV2(caplog): + _ = SAPRFCV2( + sap_credentials={ + "sysnr_test": "test", + "user": "test", + "passwd": "test", + "ashost": "test", + } + ) + assert ( + f"Required key 'sysnr' not found in your 'sap_credentials' dictionary!" + in caplog.text + ) + assert ( + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + in caplog.text + ) + + +def test_sap_credentials_key_wrong_value_error_SAPRFC(caplog): + with pytest.raises( + CredentialError, + match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + ): + with caplog.at_level(logging.ERROR): + _ = SAPRFC(sap_credentials_key="SAP_test") + + +def test_sap_credentials_key_wrong_value_error_SAPRFCV2(caplog): + with pytest.raises( + CredentialError, + match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + ): + with caplog.at_level(logging.ERROR): + _ = SAPRFC(sap_credentials_key="SAP_test") + + +def test_env_wrong_value_error_SAPRFC(caplog): + with pytest.raises( + CredentialError, + match="Missing PROD_test credentials!", + ): + with caplog.at_level(logging.ERROR): + _ = SAPRFC(env="PROD_test") + + +def test_env_wrong_value_error_SAPRFCV2(caplog): + with pytest.raises( + CredentialError, + match="Missing PROD_test credentials!", + ): + with caplog.at_level(logging.ERROR): + _ = SAPRFC(env="PROD_test") From 5169f329d8e9e668caae6f4ecc0490183158a2c4 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Tue, 26 Mar 2024 14:51:12 +0100 Subject: [PATCH 27/36] =?UTF-8?q?=F0=9F=8E=A8=20improve=20structure=20of?= =?UTF-8?q?=20changelog=20inputs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4207c636..44b4a3521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed -- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run +- Changed `requirements.txt` to level up version of `dbt-sqlserver` in order to fix bug with `MAXRECURSION` error in dbt_run ### Changed ### Removed -- Removed dbt-core==1.3.2 from requirements.txt +- Removed `dbt-core==1.3.2` from `requirements.txt` ## [0.4.25] - 2024-01-30 ### Added From 9e4aae834e4e3e46ecd60e52315d09406713426f Mon Sep 17 00:00:00 2001 From: Diego <108733861+Diego-H-S@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:07:09 +0100 Subject: [PATCH 28/36] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: RafaƂ Ziemianek <49795849+Rafalz13@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0dae1557..dfe42548f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed -- `tasks/azure_sql` fixed parameter definition issue in `CreateTableFromBlob` task. +- Fixed the `if_exists` parameter definition in the `CreateTableFromBlob` task. ### Changed From ad17a3f3c4ad2480b2351d0ce7d98970598ad82e Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Tue, 2 Apr 2024 08:07:47 +0200 Subject: [PATCH 29/36] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20docstrings=20in?= =?UTF-8?q?=20source,=20task=20nad=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 4 ++-- viadot/sources/sap_rfc.py | 8 ++++---- viadot/tasks/sap_rfc.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index 5bc4153fd..d5f171ada 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -69,8 +69,8 @@ def __init__( ... ) sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "DEV". + sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV" output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. file_sep(str, optional): The separator to use in the CSV. Defaults to "\t". diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 4a63b44e9..df9404c06 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -254,8 +254,8 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "DEV". + sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". Raises: CredentialError: If provided credentials are incorrect. @@ -724,8 +724,8 @@ def __init__( of characters, so we add a safety margin. Defaults to 400. rfc_unique_id (List[str], optional): Reference columns to merge chunks Data Frames. These columns must to be unique. Defaults to None. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "DEV". + sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". Raises: CredentialError: If provided credentials are incorrect. diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index fb059a467..91c754678 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -59,8 +59,8 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "DEV". + sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". By default, they're taken from the local viadot config. """ self.query = query @@ -111,8 +111,8 @@ def run( replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): Local config or Azure KV secret. Defaults to "SAP". - env (str, optional): SAP environment. Defaults to "DEV". + sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks in case of too many columns for RFC function. According to SAP documentation, the limit is From f0d218acec88f58309a482fb7620c8ac42050272 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Tue, 2 Apr 2024 08:10:12 +0200 Subject: [PATCH 30/36] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20logger.warning?= =?UTF-8?q?=20string=20in=20source?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/sources/sap_rfc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index df9404c06..0ebad30f4 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -283,7 +283,7 @@ def __init__( sap_credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( - f"Sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") @@ -753,7 +753,7 @@ def __init__( sap_credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( - f"Sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") From 5108fec38ae51f6f63900b14c914a1b8e0f8827d Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 4 Apr 2024 10:02:35 +0200 Subject: [PATCH 31/36] =?UTF-8?q?=F0=9F=8E=A8=20Correct=20docstrings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/sap_rfc_to_adls.py | 4 ++-- viadot/sources/sap_rfc.py | 8 ++++---- viadot/tasks/sap_rfc.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index d5f171ada..5a380bbaa 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -68,8 +68,8 @@ def __init__( rfc_unique_id=["VBELN", "LPRIO"], ... ) - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV" output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". local_file_path (str, optional): Local destination path. Defaults to None. diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 0ebad30f4..8d53bd230 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -253,8 +253,8 @@ def __init__( in case of too many columns for RFC function. According to SAP documentation, the limit is 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". Raises: @@ -723,8 +723,8 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. rfc_unique_id (List[str], optional): Reference columns to merge chunks Data Frames. These columns must to be unique. Defaults to None. - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". Raises: diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 91c754678..2d47e573a 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -59,7 +59,7 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". By default, they're taken from the local viadot config. """ @@ -110,8 +110,8 @@ def run( multiple options are automatically tried. Defaults to None. replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. - sap_credentials_key (str, optional): The key for sap credentials located in the local config for Azure Key Vault. Defaults to "SAP". + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". func (str, optional): SAP RFC function to use. Defaults to None. rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks From 0fe6897b0a37c7942de474a088dd011812db763e Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 9 Apr 2024 08:03:58 +0200 Subject: [PATCH 32/36] =?UTF-8?q?=F0=9F=93=9D=20Added=20missing=20docstrin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/luma.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/viadot/tasks/luma.py b/viadot/tasks/luma.py index ebe709720..bce06ef2d 100644 --- a/viadot/tasks/luma.py +++ b/viadot/tasks/luma.py @@ -12,7 +12,7 @@ def __init__( self, metadata_dir_path: str, url: str = None, - dbt_project_path: str = None, + dbt_project_path: str = "tmp_dbt_repo_dir", credentials_secret: str = None, vault_name: str = None, *args, @@ -26,6 +26,8 @@ def __init__( In the case of dbt, it's dbt project's `target` directory, which contains dbt artifacts (`sources.json`, `catalog.json`, `manifest.json`, and `run_results.json`). url (str, optional): The url of the Luma ingestion API. Defaults to None. + dbt_project_path (str, optional): The path to the dbt project (the directory containing + the `dbt_project.yml` file). Defaults to 'tmp_dbt_repo_dir'. credentials_secret (str, optional): The name of the Azure Key Vault secret containing Luma credentials. Defaults to None. vault_name (str, optional): The name of the vault from which to obtain the secrets. Defaults to None. From c445782985d299b5a55a90e3eb1bbfbbcc46173c Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 10 Apr 2024 10:06:33 +0200 Subject: [PATCH 33/36] =?UTF-8?q?=F0=9F=90=9B=20Fix=20mug=20related=20to?= =?UTF-8?q?=20logger.warning=20and=20tests=20in=20sap=5Frfc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration/tasks/test_sap_rfc_to_df.py | 2 +- tests/integration/test_sap_rfc.py | 12 ++++++------ viadot/sources/sap_rfc.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/tasks/test_sap_rfc_to_df.py b/tests/integration/tasks/test_sap_rfc_to_df.py index 7007a978c..57e3451cb 100644 --- a/tests/integration/tasks/test_sap_rfc_to_df.py +++ b/tests/integration/tasks/test_sap_rfc_to_df.py @@ -22,7 +22,7 @@ def test_sap_rfc_to_df_wrong_sap_credential_key_bbp(caplog): ) with pytest.raises( CredentialError, - match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + match="sap_credentials_key: SAP_test is not stored neither in KeyVault or local config!", ): task.run( sap_credentials_key="SAP_test", diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py index 2667d4d81..34d5106ed 100644 --- a/tests/integration/test_sap_rfc.py +++ b/tests/integration/test_sap_rfc.py @@ -195,7 +195,7 @@ def test___build_pandas_filter_query_v2(): def test_default_credentials_warning_SAPRFC(caplog): _ = SAPRFC() assert ( - f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in env parameter" in caplog.text ) @@ -203,7 +203,7 @@ def test_default_credentials_warning_SAPRFC(caplog): def test_default_credentials_warning_SAPRFCV2(caplog): _ = SAPRFCV2() assert ( - f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in env parameter" in caplog.text ) @@ -222,7 +222,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFC(caplog): in caplog.text ) assert ( - f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in env parameter" in caplog.text ) @@ -241,7 +241,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFCV2(caplog): in caplog.text ) assert ( - f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in sap_credentials parameter" + f"Your credentials will use DEV environment from local config. If you would like to use different one - please specified it in env parameter" in caplog.text ) @@ -249,7 +249,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFCV2(caplog): def test_sap_credentials_key_wrong_value_error_SAPRFC(caplog): with pytest.raises( CredentialError, - match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + match="sap_credentials_key: SAP_test is not stored neither in KeyVault or local config!", ): with caplog.at_level(logging.ERROR): _ = SAPRFC(sap_credentials_key="SAP_test") @@ -258,7 +258,7 @@ def test_sap_credentials_key_wrong_value_error_SAPRFC(caplog): def test_sap_credentials_key_wrong_value_error_SAPRFCV2(caplog): with pytest.raises( CredentialError, - match="Sap_credentials_key: SAP_test is not stored neither in KeyVault or Local Config!", + match="sap_credentials_key: SAP_test is not stored neither in KeyVault or local config!", ): with caplog.at_level(logging.ERROR): _ = SAPRFC(sap_credentials_key="SAP_test") diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 8d53bd230..94d1b1f07 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -277,13 +277,13 @@ def __init__( if sap_credentials is None: logger.warning( - f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." + f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in env parameter." ) try: sap_credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( - f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or local config!" ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") @@ -747,13 +747,13 @@ def __init__( if sap_credentials is None: logger.warning( - f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in sap_credentials parameter." + f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in env parameter." ) try: sap_credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( - f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or Local Config!" + f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or local config!" ) if sap_credentials is None: raise CredentialError(f"Missing {env} credentials!") From 7b9b4735cc7c0c8c1f97ad37e617a472e77530b2 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 10 Apr 2024 15:01:07 +0200 Subject: [PATCH 34/36] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Change=20=20`sap=5Fc?= =?UTF-8?q?redential`=20variable=20to=20`credentials`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration/flows/test_sap_rfc_to_adls.py | 6 +-- tests/integration/test_sap_rfc.py | 8 ++-- viadot/flows/sap_rfc_to_adls.py | 8 ++-- viadot/sources/sap_rfc.py | 42 +++++++++---------- viadot/tasks/sap_rfc.py | 20 ++++----- 5 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/integration/flows/test_sap_rfc_to_adls.py b/tests/integration/flows/test_sap_rfc_to_adls.py index 5503b4684..dc07d6160 100644 --- a/tests/integration/flows/test_sap_rfc_to_adls.py +++ b/tests/integration/flows/test_sap_rfc_to_adls.py @@ -19,7 +19,7 @@ def test_sap_rfc_to_adls_query(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - sap_credentials=SAP_TEST_CREDS, + credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, @@ -37,7 +37,7 @@ def test_sap_rfc_to_adls_validation_fail(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - sap_credentials=SAP_TEST_CREDS, + credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, @@ -54,7 +54,7 @@ def test_sap_rfc_to_adls_validation_success(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - sap_credentials=SAP_TEST_CREDS, + credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py index 34d5106ed..bfdb47bbb 100644 --- a/tests/integration/test_sap_rfc.py +++ b/tests/integration/test_sap_rfc.py @@ -210,7 +210,7 @@ def test_default_credentials_warning_SAPRFCV2(caplog): def test_credentials_dictionary_wrong_key_warning_SAPRFC(caplog): _ = SAPRFC( - sap_credentials={ + credentials={ "sysnr_test": "test", "user": "test", "passwd": "test", @@ -218,7 +218,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFC(caplog): } ) assert ( - f"Required key 'sysnr' not found in your 'sap_credentials' dictionary!" + f"Required key 'sysnr' not found in your 'credentials' dictionary!" in caplog.text ) assert ( @@ -229,7 +229,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFC(caplog): def test_credentials_dictionary_wrong_key_warning_SAPRFCV2(caplog): _ = SAPRFCV2( - sap_credentials={ + credentials={ "sysnr_test": "test", "user": "test", "passwd": "test", @@ -237,7 +237,7 @@ def test_credentials_dictionary_wrong_key_warning_SAPRFCV2(caplog): } ) assert ( - f"Required key 'sysnr' not found in your 'sap_credentials' dictionary!" + f"Required key 'sysnr' not found in your 'credentials' dictionary!" in caplog.text ) assert ( diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index 5a380bbaa..a72716b0f 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -16,7 +16,7 @@ def __init__( func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, - sap_credentials: dict = None, + credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", output_file_extension: str = ".parquet", @@ -68,7 +68,7 @@ def __init__( rfc_unique_id=["VBELN", "LPRIO"], ... ) - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV" output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". @@ -94,7 +94,7 @@ def __init__( self.func = func self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit self.rfc_unique_id = rfc_unique_id - self.sap_credentials = sap_credentials + self.credentials = credentials self.sap_credentials_key = sap_credentials_key self.env = env self.output_file_extension = output_file_extension @@ -126,7 +126,7 @@ def gen_flow(self) -> Flow: rfc_total_col_width_character_limit=self.rfc_total_col_width_character_limit, rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, - sap_credentials=self.sap_credentials, + credentials=self.credentials, sap_credentials_key=self.sap_credentials_key, env=self.env, flow=self, diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py index 94d1b1f07..99524a0d0 100644 --- a/viadot/sources/sap_rfc.py +++ b/viadot/sources/sap_rfc.py @@ -237,7 +237,7 @@ def __init__( sep: str = None, func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, - sap_credentials: dict = None, + credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", *args, @@ -253,7 +253,7 @@ def __init__( in case of too many columns for RFC function. According to SAP documentation, the limit is 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". @@ -265,27 +265,27 @@ def __init__( self.sap_credentials_key = sap_credentials_key self.env = env - if isinstance(sap_credentials, dict): - credentials_keys = list(sap_credentials.keys()) + if isinstance(credentials, dict): + credentials_keys = list(credentials.keys()) required_credentials_params = ["sysnr", "user", "passwd", "ashost"] for key in required_credentials_params: if key not in credentials_keys: logger.warning( - f"Required key '{key}' not found in your 'sap_credentials' dictionary!" + f"Required key '{key}' not found in your 'credentials' dictionary!" ) - sap_credentials = None + credentials = None - if sap_credentials is None: + if credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in env parameter." ) try: - sap_credentials = local_config.get(sap_credentials_key).get(env) + credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or local config!" ) - if sap_credentials is None: + if credentials is None: raise CredentialError(f"Missing {env} credentials!") super().__init__( @@ -293,7 +293,7 @@ def __init__( **kwargs, ) - self.sap_credentials = sap_credentials + self.credentials = credentials self.sep = sep self.client_side_filters = None self.func = func @@ -303,7 +303,7 @@ def __init__( def con(self) -> pyrfc.Connection: if self._con is not None: return self._con - con = pyrfc.Connection(**self.sap_credentials) + con = pyrfc.Connection(**self.credentials) self._con = con return con @@ -704,7 +704,7 @@ def __init__( func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, - sap_credentials: dict = None, + credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", *args, @@ -723,7 +723,7 @@ def __init__( 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. rfc_unique_id (List[str], optional): Reference columns to merge chunks Data Frames. These columns must to be unique. Defaults to None. - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". @@ -735,32 +735,32 @@ def __init__( self.sap_credentials_key = sap_credentials_key self.env = env - if isinstance(sap_credentials, dict): - credentials_keys = list(sap_credentials.keys()) + if isinstance(credentials, dict): + credentials_keys = list(credentials.keys()) required_credentials_params = ["sysnr", "user", "passwd", "ashost"] for key in required_credentials_params: if key not in credentials_keys: logger.warning( - f"Required key '{key}' not found in your 'sap_credentials' dictionary!" + f"Required key '{key}' not found in your 'credentials' dictionary!" ) - sap_credentials = None + credentials = None - if sap_credentials is None: + if credentials is None: logger.warning( f"Your credentials will use {env} environment from local config. If you would like to use different one - please specified it in env parameter." ) try: - sap_credentials = local_config.get(sap_credentials_key).get(env) + credentials = local_config.get(sap_credentials_key).get(env) except AttributeError: raise CredentialError( f"sap_credentials_key: {sap_credentials_key} is not stored neither in KeyVault or local config!" ) - if sap_credentials is None: + if credentials is None: raise CredentialError(f"Missing {env} credentials!") super().__init__(*args, **kwargs) - self.sap_credentials = sap_credentials + self.credentials = credentials self.sep = sep self.replacement = replacement self.client_side_filters = None diff --git a/viadot/tasks/sap_rfc.py b/viadot/tasks/sap_rfc.py index 2d47e573a..3527e4632 100644 --- a/viadot/tasks/sap_rfc.py +++ b/viadot/tasks/sap_rfc.py @@ -24,7 +24,7 @@ def __init__( replacement: str = "-", func: str = None, rfc_total_col_width_character_limit: int = 400, - sap_credentials: dict = None, + credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", max_retries: int = 3, @@ -58,7 +58,7 @@ def __init__( in case of too many columns for RFC function. According to SAP documentation, the limit is 512 characters. However, we observed SAP raising an exception even on a slightly lower number of characters, so we add a safety margin. Defaults to 400. - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. + credentials (dict, optional): The credentials to use to authenticate with SAP. By default, they're taken from the local viadot config. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". By default, they're taken from the local viadot config. @@ -66,7 +66,7 @@ def __init__( self.query = query self.sep = sep self.replacement = replacement - self.sap_credentials = sap_credentials + self.credentials = credentials self.sap_credentials_key = sap_credentials_key self.env = env self.func = func @@ -87,14 +87,14 @@ def __init__( "replacement", "func", "rfc_total_col_width_character_limit", - "sap_credentials", + "credentials", ) def run( self, query: str, sep: str = None, replacement: str = "-", - sap_credentials: dict = None, + credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", func: str = None, @@ -110,7 +110,7 @@ def run( multiple options are automatically tried. Defaults to None. replacement (str, optional): In case of sep is on a columns, set up a new character to replace inside the string to avoid flow breakdowns. Defaults to "-". - sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV". func (str, optional): SAP RFC function to use. Defaults to None. @@ -133,12 +133,12 @@ def run( pd.DataFrame: DataFrame with SAP data. """ - if sap_credentials is None: + if credentials is None: try: credentials_str = AzureKeyVaultSecret( secret=sap_credentials_key, ).run() - sap_credentials = json.loads(credentials_str).get(env) + credentials = json.loads(credentials_str).get(env) except: logger.warning( f"Getting credentials from Azure Key Vault was not possible. Either there is no key: {sap_credentials_key} or env: {env} or there is not Key Vault in your environment." @@ -152,7 +152,7 @@ def run( sap = SAPRFCV2( sep=sep, replacement=replacement, - sap_credentials=sap_credentials, + credentials=credentials, sap_credentials_key=sap_credentials_key, env=env, func=func, @@ -162,7 +162,7 @@ def run( else: sap = SAPRFC( sep=sep, - sap_credentials=sap_credentials, + credentials=credentials, sap_credentials_key=sap_credentials_key, env=env, func=func, From 1937ebbb0effb580983181040fd1ed9fa6630faa Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Thu, 11 Apr 2024 09:21:58 +0200 Subject: [PATCH 35/36] =?UTF-8?q?=F0=9F=93=9D=20Updated=20Changelog=20befo?= =?UTF-8?q?re=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9256b1ccf..fb82fdead 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,18 +6,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Added option for sap_rfc connector to get credentials from Azure KeyVault or directly passing dictionary inside flow. +### Fixed + +### Changed + +### Removed + + +## [0.4.26] - 2024-04-11 +### Added +- Added option for `SAP RFC` connector to get credentials from Azure KeyVault or directly passing dictionary inside flow. ### Fixed - Fixed the `if_exists` parameter definition in the `CreateTableFromBlob` task. -- Changed `requirements.txt` to level up version of `dbt-sqlserver` in order to fix bug with `MAXRECURSION` error in dbt_run - +- Changed `requirements.txt` to level up version of `dbt-sqlserver` in order to fix bug with `MAXRECURSION` error in dbt_run. ### Changed +- Changed `dbt-sqlserver` version to `git+https://github.com/djagoda881/dbt-sqlserver.git@v1.3.latest_option_clause`. ### Removed -- Removed `dbt-core==1.3.2` from `requirements.txt` +- Removed `dbt-core==1.3.2` from `requirements.txt`. +- Removed copying files to conformed/ and operational/ directories when running `ADLSTOAzureSQL` flow. ## [0.4.25] - 2024-01-30 ### Added From d4a800d7282a3a1f25c55eb4cbe734906b1474fd Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Thu, 11 Apr 2024 10:09:48 +0200 Subject: [PATCH 36/36] Changed parameter from `credentials` to `sap_credentials` for SAPRFCToADLS --- tests/integration/flows/test_sap_rfc_to_adls.py | 6 +++--- viadot/flows/sap_rfc_to_adls.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/flows/test_sap_rfc_to_adls.py b/tests/integration/flows/test_sap_rfc_to_adls.py index dc07d6160..5503b4684 100644 --- a/tests/integration/flows/test_sap_rfc_to_adls.py +++ b/tests/integration/flows/test_sap_rfc_to_adls.py @@ -19,7 +19,7 @@ def test_sap_rfc_to_adls_query(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - credentials=SAP_TEST_CREDS, + sap_credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, @@ -37,7 +37,7 @@ def test_sap_rfc_to_adls_validation_fail(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - credentials=SAP_TEST_CREDS, + sap_credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, @@ -54,7 +54,7 @@ def test_sap_rfc_to_adls_validation_success(): name="test flow", query="SELECT MATNR, MATKL FROM MARA WHERE LAEDA LIKE '2022%' LIMIT 5", func="BBP_RFC_READ_TABLE", - credentials=SAP_TEST_CREDS, + sap_credentials=SAP_TEST_CREDS, local_file_path=FILE_NAME, adls_path=ADLS_PATH, overwrite=True, diff --git a/viadot/flows/sap_rfc_to_adls.py b/viadot/flows/sap_rfc_to_adls.py index a72716b0f..6028bba04 100644 --- a/viadot/flows/sap_rfc_to_adls.py +++ b/viadot/flows/sap_rfc_to_adls.py @@ -16,7 +16,7 @@ def __init__( func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, rfc_unique_id: List[str] = None, - credentials: dict = None, + sap_credentials: dict = None, sap_credentials_key: str = "SAP", env: str = "DEV", output_file_extension: str = ".parquet", @@ -68,7 +68,7 @@ def __init__( rfc_unique_id=["VBELN", "LPRIO"], ... ) - credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. + sap_credentials (dict, optional): The credentials to use to authenticate with SAP. Defaults to None. sap_credentials_key (str, optional): The key for sap credentials located in the local config or Azure Key Vault. Defaults to "SAP". env (str, optional): The key for sap_credentials_key pointing to the SAP environment. Defaults to "DEV" output_file_extension (str, optional): Output file extension - to allow selection of .csv for data which is not easy to handle with parquet. Defaults to ".parquet". @@ -94,7 +94,7 @@ def __init__( self.func = func self.rfc_total_col_width_character_limit = rfc_total_col_width_character_limit self.rfc_unique_id = rfc_unique_id - self.credentials = credentials + self.sap_credentials = sap_credentials self.sap_credentials_key = sap_credentials_key self.env = env self.output_file_extension = output_file_extension @@ -126,7 +126,7 @@ def gen_flow(self) -> Flow: rfc_total_col_width_character_limit=self.rfc_total_col_width_character_limit, rfc_unique_id=self.rfc_unique_id, alternative_version=self.alternative_version, - credentials=self.credentials, + credentials=self.sap_credentials, sap_credentials_key=self.sap_credentials_key, env=self.env, flow=self,