From dae2276edf28bcf3c7eb570e7a7d6c8a8fd41e38 Mon Sep 17 00:00:00 2001
From: Maroun Touma <touma@us.ibm.com>
Date: Tue, 26 Nov 2024 13:45:12 -0500
Subject: [PATCH] build transforms==0.2.3.dev0 release with toolkit==0.2.2

Signed-off-by: Maroun Touma <touma@us.ibm.com>
---
 .make.versions                                |   4 +-
 data-processing-lib/pyproject.toml            |   2 +-
 .../createRayClusterComponent.yaml            |   2 +-
 .../deleteRayClusterComponent.yaml            |   2 +-
 .../executeRayJobComponent.yaml               |   2 +-
 .../executeRayJobComponent_multi_s3.yaml      |   2 +-
 .../executeSubWorkflowComponent.yaml          |   2 +-
 .../kfp_v1_workflow_support/pyproject.toml    |   4 +-
 .../kfp_v2_workflow_support/pyproject.toml    |   4 +-
 .../shared_workflow_support/pyproject.toml    |   4 +-
 transforms/Makefile                           |   2 +-
 .../code2parquet/kfp_ray/code2parquet_wf.py   |   2 +-
 .../code/code2parquet/python/pyproject.toml   |   2 +-
 .../code/code2parquet/python/requirements.txt |   2 +-
 .../code/code2parquet/ray/pyproject.toml      |   6 +-
 .../code/code_profiler/python/pyproject.toml  |   2 +-
 .../code_profiler/python/requirements.txt     |   2 +-
 .../code/code_profiler/ray/pyproject.toml     |   6 +-
 .../code_quality/kfp_ray/code_quality_wf.py   |   2 +-
 .../code/code_quality/python/pyproject.toml   |   2 +-
 .../code/code_quality/python/requirements.txt |   2 +-
 .../code/code_quality/ray/pyproject.toml      |   6 +-
 .../kfp_ray/header_cleanser_wf.py             |   2 +-
 .../header_cleanser/python/pyproject.toml     |   2 +-
 .../header_cleanser/python/requirements.txt   |   2 +-
 .../code/header_cleanser/ray/pyproject.toml   |   6 +-
 .../kfp_ray/license_select_wf.py              |   2 +-
 .../code/license_select/python/pyproject.toml |   2 +-
 .../license_select/python/requirements.txt    |   2 +-
 .../code/license_select/ray/pyproject.toml    |   6 +-
 transforms/code/malware/kfp_ray/malware_wf.py |   2 +-
 transforms/code/malware/python/pyproject.toml |   4 +-
 transforms/code/malware/ray/pyproject.toml    |   6 +-
 .../kfp_ray/proglang_select_wf.py             |   2 +-
 .../proglang_select/python/pyproject.toml     |   2 +-
 .../proglang_select/python/requirements.txt   |   2 +-
 .../code/proglang_select/ray/pyproject.toml   |   6 +-
 .../kfp_ray/repo_level_order_wf.py            |   2 +-
 .../repo_level_ordering/ray/pyproject.toml    |   4 +-
 .../kfp_ray/doc_chunk_multiple_wf.py          |   2 +-
 .../doc_chunk/kfp_ray/doc_chunk_wf.py         |   2 +-
 .../doc_chunk/python/requirements.txt         |   2 +-
 .../language/doc_chunk/ray/pyproject.toml     |   2 +-
 .../kfp_ray/doc_quality_multiple_wf.py        |   2 +-
 .../doc_quality/kfp_ray/doc_quality_wf.py     |   2 +-
 .../doc_quality/python/pyproject.toml         |   2 +-
 .../doc_quality/python/requirements.txt       |   2 +-
 .../language/doc_quality/ray/pyproject.toml   |   6 +-
 .../html2parquet/kfp_ray/html2parquet_wf.py   |   2 +-
 .../html2parquet/python/pyproject.toml        |   2 +-
 .../html2parquet/python/requirements.txt      |   2 +-
 .../language/html2parquet/ray/pyproject.toml  |   2 +-
 .../html2parquet/ray/requirements.txt         |   4 +-
 .../lang_id/kfp_ray/lang_id_multiple_wf.py    |   2 +-
 .../language/lang_id/kfp_ray/lang_id_wf.py    |   2 +-
 .../language/lang_id/python/pyproject.toml    |   2 +-
 .../language/lang_id/python/requirements.txt  |   2 +-
 .../language/lang_id/ray/pyproject.toml       |   6 +-
 .../kfp_ray/pdf2parquet_multiple_wf.py        |   2 +-
 .../pdf2parquet/kfp_ray/pdf2parquet_wf.py     |   2 +-
 .../pdf2parquet/python/requirements.txt       |   2 +-
 .../language/pdf2parquet/ray/requirements.txt |   2 +-
 .../pii_redactor/python/pyproject.toml        |   2 +-
 .../pii_redactor/python/requirements.txt      |   2 +-
 .../language/pii_redactor/ray/pyproject.toml  |   6 +-
 .../kfp_ray/text_encoder_multiple_wf.py       |   2 +-
 .../text_encoder/kfp_ray/text_encoder_wf.py   |   2 +-
 .../text_encoder/python/pyproject.toml        |   2 +-
 .../text_encoder/python/requirements.txt      |   2 +-
 .../language/text_encoder/ray/pyproject.toml  |   6 +-
 transforms/requirements-ray.txt               |   2 +-
 transforms/requirements.txt                   |   2 +-
 .../universal/doc_id/kfp_ray/doc_id_wf.py     |   2 +-
 .../universal/doc_id/python/pyproject.toml    |   2 +-
 .../universal/doc_id/python/requirements.txt  |   2 +-
 .../universal/doc_id/ray/pyproject.toml       |   6 +-
 .../universal/doc_id/spark/pyproject.toml     |   4 +-
 .../universal/ededup/kfp_ray/ededup_wf.py     |   2 +-
 .../universal/ededup/python/pyproject.toml    |   2 +-
 .../universal/ededup/python/requirements.txt  |   2 +-
 .../universal/ededup/ray/pyproject.toml       |   6 +-
 .../universal/fdedup/fdedup_python.ipynb      | 377 +++++++++++++++-
 transforms/universal/fdedup/fdedup_ray.ipynb  | 417 +++++++++++++++++-
 .../universal/fdedup/kfp_ray/fdedup_wf.py     |   2 +-
 .../universal/fdedup/python/pyproject.toml    |   2 +-
 .../universal/fdedup/python/requirements.txt  |   2 +-
 .../universal/fdedup/ray/pyproject.toml       |   2 +-
 .../universal/fdedup/ray/requirements.txt     |   4 +-
 .../universal/fdedup/spark/pyproject.toml     |   2 +-
 .../universal/fdedup/spark/requirements.txt   |   4 +-
 .../universal/filter/kfp_ray/filter_wf.py     |   2 +-
 .../universal/filter/python/pyproject.toml    |   2 +-
 .../universal/filter/python/requirements.txt  |   2 +-
 .../universal/filter/ray/pyproject.toml       |   6 +-
 .../universal/filter/spark/pyproject.toml     |   4 +-
 .../universal/hap/kfp_ray.disable/hap_wf.py   |   2 +-
 .../universal/hap/python/pyproject.toml       |   2 +-
 .../universal/hap/python/requirements.txt     |   2 +-
 transforms/universal/hap/ray/pyproject.toml   |   2 +-
 transforms/universal/hap/ray/requirements.txt |   4 +-
 .../noop/kfp_ray/noop_multiple_wf.py          |   2 +-
 transforms/universal/noop/kfp_ray/noop_wf.py  |   2 +-
 .../universal/noop/python/pyproject.toml      |   4 +-
 transforms/universal/noop/ray/pyproject.toml  |   6 +-
 .../universal/noop/spark/pyproject.toml       |   6 +-
 .../universal/profiler/kfp_ray/profiler_wf.py |   2 +-
 .../universal/profiler/python/pyproject.toml  |   2 +-
 .../profiler/python/requirements.txt          |   2 +-
 .../universal/profiler/ray/pyproject.toml     |   6 +-
 .../universal/profiler/spark/pyproject.toml   |   6 +-
 .../universal/resize/kfp_ray/resize_wf.py     |   2 +-
 .../universal/resize/python/pyproject.toml    |   2 +-
 .../universal/resize/python/requirements.txt  |   2 +-
 .../universal/resize/ray/pyproject.toml       |   6 +-
 .../universal/resize/spark/pyproject.toml     |   6 +-
 .../tokenization/kfp_ray/tokenization_wf.py   |   2 +-
 .../tokenization/python/pyproject.toml        |   2 +-
 .../tokenization/python/requirements.txt      |   2 +-
 .../universal/tokenization/ray/pyproject.toml |   6 +-
 .../universal/web2parquet/requirements.txt    |   2 +-
 120 files changed, 938 insertions(+), 202 deletions(-)

diff --git a/.make.versions b/.make.versions
index 564caa214..ba5e87b0f 100644
--- a/.make.versions
+++ b/.make.versions
@@ -16,10 +16,10 @@ DPK_MAJOR_VERSION=0
 # The minor version is incremented manually when significant features have been added that are backward compatible with the previous major.minor release.
 DPK_MINOR_VERSION=2
 # The minor version is incremented AUTOMATICALLY by the release.sh script when a new release is set.
-DPK_MICRO_VERSION=3
+DPK_MICRO_VERSION=2
 # The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
 # It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. 
-DPK_VERSION_SUFFIX=.dev0
+DPK_VERSION_SUFFIX=
 
 DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)
 
diff --git a/data-processing-lib/pyproject.toml b/data-processing-lib/pyproject.toml
index a347a14a1..4f5734be1 100644
--- a/data-processing-lib/pyproject.toml
+++ b/data-processing-lib/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "data_prep_toolkit"
-version = "0.2.3.dev0"
+version = "0.2.2"
 keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
 requires-python = ">=3.10,<3.13"
 description = "Data Preparation Toolkit Library for Ray and Python"
diff --git a/kfp/kfp_ray_components/createRayClusterComponent.yaml b/kfp/kfp_ray_components/createRayClusterComponent.yaml
index 30b0b66d8..78976a97c 100644
--- a/kfp/kfp_ray_components/createRayClusterComponent.yaml
+++ b/kfp/kfp_ray_components/createRayClusterComponent.yaml
@@ -11,7 +11,7 @@ inputs:
 
 implementation:
     container:
-        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
         # command is a list of strings (command-line arguments).
         # The YAML language has two syntaxes for lists and you can use either of them.
         # Here we use the "flow syntax" - comma-separated strings inside square brackets.
diff --git a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml
index 44e199c47..c75554d5f 100644
--- a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml
+++ b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml
@@ -9,7 +9,7 @@ inputs:
 
 implementation:
     container:
-        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
         # command is a list of strings (command-line arguments).
         # The YAML language has two syntaxes for lists and you can use either of them.
         # Here we use the "flow syntax" - comma-separated strings inside square brackets.
diff --git a/kfp/kfp_ray_components/executeRayJobComponent.yaml b/kfp/kfp_ray_components/executeRayJobComponent.yaml
index 7ab517bff..2e02c3adf 100644
--- a/kfp/kfp_ray_components/executeRayJobComponent.yaml
+++ b/kfp/kfp_ray_components/executeRayJobComponent.yaml
@@ -12,7 +12,7 @@ inputs:
 
 implementation:
     container:
-        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
         # command is a list of strings (command-line arguments).
         # The YAML language has two syntaxes for lists and you can use either of them.
         # Here we use the "flow syntax" - comma-separated strings inside square brackets.
diff --git a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
index 9b98912f0..37c0198bf 100644
--- a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
+++ b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
@@ -13,7 +13,7 @@ inputs:
 
 implementation:
     container:
-        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
         # command is a list of strings (command-line arguments).
         # The YAML language has two syntaxes for lists and you can use either of them.
         # Here we use the "flow syntax" - comma-separated strings inside square brackets.
diff --git a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml
index 6b261a003..ec82e9484 100644
--- a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml
+++ b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml
@@ -27,7 +27,7 @@ outputs:
 
 implementation:
     container:
-        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+        image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
         # command is a list of strings (command-line arguments).
         # The YAML language has two syntaxes for lists, and you can use either of them.
         # Here we use the "flow syntax" - comma-separated strings inside square brackets.
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
index f09b2f32a..daa903aaf 100644
--- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "data_prep_toolkit_kfp_v1"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Data Preparation Kit Library. KFP support"
 license = {text = "Apache-2.0"}
@@ -13,7 +13,7 @@ authors = [
 ]
 dependencies = [
     "kfp==1.8.22",
-    "data-prep-toolkit-kfp-shared==0.2.3.dev0",
+    "data-prep-toolkit-kfp-shared==0.2.2",
 ]
 
 [build-system]
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
index 01c5b3e17..61f54663f 100644
--- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "data_prep_toolkit_kfp_v2"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Data Preparation Kit Library. KFP support"
 license = {text = "Apache-2.0"}
@@ -14,7 +14,7 @@ authors = [
 dependencies = [
     "kfp==2.8.0",
     "kfp-kubernetes==1.2.0",
-    "data-prep-toolkit-kfp-shared==0.2.3.dev0",
+    "data-prep-toolkit-kfp-shared==0.2.2",
 ]
 
 [build-system]
diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
index aa7a6dd3a..17ed57ea9 100644
--- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
+++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "data_prep_toolkit_kfp_shared"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Data Preparation Kit Library. KFP support"
 license = {text = "Apache-2.0"}
@@ -14,7 +14,7 @@ authors = [
 dependencies = [
     "requests",
     "kubernetes",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/Makefile b/transforms/Makefile
index 3e8b9cfde..ed492db4d 100644
--- a/transforms/Makefile
+++ b/transforms/Makefile
@@ -107,7 +107,7 @@ build-pkg-dist:
 	-rm -fr src
 	mkdir src
 	# Copy all the src folders recursively (not clear if they have subfolders)
-	for x in $(shell find . | grep '[ray| python]/src$$') ; do \
+	for x in $(shell find . | grep '[ray| python | spark]/src$$') ; do \
 		echo $$x ; \
 		if [ -d "$$x" ]; then \
 		   cp -r $$x/* src ; \
diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py
index f3f491e4b..3e5f262b9 100644
--- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py
+++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py
@@ -25,7 +25,7 @@
 
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml
index be84b2f20..d4f8c11cf 100644
--- a/transforms/code/code2parquet/python/pyproject.toml
+++ b/transforms/code/code2parquet/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code2parquet_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "code2parquet Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/code2parquet/python/requirements.txt b/transforms/code/code2parquet/python/requirements.txt
index cec7f9c5f..0ce538837 100644
--- a/transforms/code/code2parquet/python/requirements.txt
+++ b/transforms/code/code2parquet/python/requirements.txt
@@ -1,3 +1,3 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 parameterized
 pandas
diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml
index d56fed1e8..666551e94 100644
--- a/transforms/code/code2parquet/ray/pyproject.toml
+++ b/transforms/code/code2parquet/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code2parquet_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "code2parquet Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[ray]==0.2.3.dev0",
-    "dpk-code2parquet-transform-python==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
+    "dpk-code2parquet-transform-python==0.2.2",
     "parameterized",
     "pandas",
 ]
diff --git a/transforms/code/code_profiler/python/pyproject.toml b/transforms/code/code_profiler/python/pyproject.toml
index 334c86fed..d3c2c2196 100644
--- a/transforms/code/code_profiler/python/pyproject.toml
+++ b/transforms/code/code_profiler/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code_profiler_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Code Profiler Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/code_profiler/python/requirements.txt b/transforms/code/code_profiler/python/requirements.txt
index 27706b467..a38213e0f 100644
--- a/transforms/code/code_profiler/python/requirements.txt
+++ b/transforms/code/code_profiler/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 parameterized
 pandas
 aiolimiter==1.1.0
diff --git a/transforms/code/code_profiler/ray/pyproject.toml b/transforms/code/code_profiler/ray/pyproject.toml
index 9b760c1c3..773ae353b 100644
--- a/transforms/code/code_profiler/ray/pyproject.toml
+++ b/transforms/code/code_profiler/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code_profiler_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Code Profiler Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Pankaj Thorat", email = "pankaj.thorat@ibm.com" },
 ]
 dependencies = [
-	"dpk-code-profiler-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+	"dpk-code-profiler-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 	]
 
 [build-system]
diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py
index 6a4ccec1b..7f5aa9768 100644
--- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py
+++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py
@@ -24,7 +24,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml
index 17cbce67d..d7b452d6b 100644
--- a/transforms/code/code_quality/python/pyproject.toml
+++ b/transforms/code/code_quality/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code_quality_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Code Quality Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/code_quality/python/requirements.txt b/transforms/code/code_quality/python/requirements.txt
index ef627d39f..10eb1001b 100644
--- a/transforms/code/code_quality/python/requirements.txt
+++ b/transforms/code/code_quality/python/requirements.txt
@@ -1,3 +1,3 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 bs4==0.0.2
 transformers==4.38.2
diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml
index eceee32ed..5bf3d2dff 100644
--- a/transforms/code/code_quality/ray/pyproject.toml
+++ b/transforms/code/code_quality/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_code_quality_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Code Quality Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" },
 ]
 dependencies = [
-    "dpk-code-quality-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-code-quality-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py
index 9bb315569..5049a9c11 100644
--- a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py
+++ b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py
@@ -24,7 +24,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml
index 3703ec55f..2dadeaf02 100644
--- a/transforms/code/header_cleanser/python/pyproject.toml
+++ b/transforms/code/header_cleanser/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_header_cleanser_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "License and Copyright Removal Transform for Python"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/header_cleanser/python/requirements.txt b/transforms/code/header_cleanser/python/requirements.txt
index 915a462dc..9123fc955 100644
--- a/transforms/code/header_cleanser/python/requirements.txt
+++ b/transforms/code/header_cleanser/python/requirements.txt
@@ -1,3 +1,3 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
 
diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml
index 5fb1bcf26..179aa7769 100644
--- a/transforms/code/header_cleanser/ray/pyproject.toml
+++ b/transforms/code/header_cleanser/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_header_cleanser_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "License and copyright removal Transform for Ray"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" },
 ]
 dependencies = [
-    "dpk-header-cleanser-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-header-cleanser-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
     "scancode-toolkit==32.1.0",
 ]
 
diff --git a/transforms/code/license_select/kfp_ray/license_select_wf.py b/transforms/code/license_select/kfp_ray/license_select_wf.py
index 7dba0d9d1..9bdcc6e96 100644
--- a/transforms/code/license_select/kfp_ray/license_select_wf.py
+++ b/transforms/code/license_select/kfp_ray/license_select_wf.py
@@ -25,7 +25,7 @@
 
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/license_select/python/pyproject.toml b/transforms/code/license_select/python/pyproject.toml
index 3345d3a5a..b445c6b09 100644
--- a/transforms/code/license_select/python/pyproject.toml
+++ b/transforms/code/license_select/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_license_select_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "License Select Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/license_select/python/requirements.txt b/transforms/code/license_select/python/requirements.txt
index 2f67f6a80..e9abc2535 100644
--- a/transforms/code/license_select/python/requirements.txt
+++ b/transforms/code/license_select/python/requirements.txt
@@ -1 +1 @@
-data-prep-toolkit==0.2.3.dev0
\ No newline at end of file
+data-prep-toolkit>=0.2.2
\ No newline at end of file
diff --git a/transforms/code/license_select/ray/pyproject.toml b/transforms/code/license_select/ray/pyproject.toml
index ce5979d62..96b293364 100644
--- a/transforms/code/license_select/ray/pyproject.toml
+++ b/transforms/code/license_select/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_license_select_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "License Select Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Mark Lewis", email = "mark_lewis@uk.ibm.com" },
 ]
 dependencies = [
-    "dpk-license-select-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-license-select-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py
index bede80b88..89eb9d730 100644
--- a/transforms/code/malware/kfp_ray/malware_wf.py
+++ b/transforms/code/malware/kfp_ray/malware_wf.py
@@ -24,7 +24,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml
index a1bc05ab4..4dc1a9012 100644
--- a/transforms/code/malware/python/pyproject.toml
+++ b/transforms/code/malware/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_malware_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Malware Python Transform"
 license = {text = "Apache-2.0"}
@@ -9,7 +9,7 @@ authors = [
     { name = "Takuya Goto", email = "tkyg@jp.ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit==0.2.3.dev0",
+    "data-prep-toolkit>=0.2.2",
     "clamd==1.0.2",
 ]
 
diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml
index 659ee62ef..22e7ecc28 100644
--- a/transforms/code/malware/ray/pyproject.toml
+++ b/transforms/code/malware/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_malware_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Malware Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Takuya Goto", email = "tkyg@jp.ibm.com" },
 ]
 dependencies = [
-    "dpk-malware-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-malware-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
index 11f001bfa..bb114e3d6 100644
--- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
+++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
@@ -24,7 +24,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml
index e5736a9c7..e20a62f7c 100644
--- a/transforms/code/proglang_select/python/pyproject.toml
+++ b/transforms/code/proglang_select/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_proglang_select_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Programming Language Selection Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/code/proglang_select/python/requirements.txt b/transforms/code/proglang_select/python/requirements.txt
index 2f67f6a80..e9abc2535 100644
--- a/transforms/code/proglang_select/python/requirements.txt
+++ b/transforms/code/proglang_select/python/requirements.txt
@@ -1 +1 @@
-data-prep-toolkit==0.2.3.dev0
\ No newline at end of file
+data-prep-toolkit>=0.2.2
\ No newline at end of file
diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml
index d8288d189..3d330d3cc 100644
--- a/transforms/code/proglang_select/ray/pyproject.toml
+++ b/transforms/code/proglang_select/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_proglang_select_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Programming Language Selection Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" },
 ]
 dependencies = [
-    "dpk-proglang-select-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-proglang-select-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py
index 38a829fab..fa739bfd0 100644
--- a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py
+++ b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py
@@ -24,7 +24,7 @@
 EXEC_SCRIPT_NAME: str = "repo_level_order_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml
index 9581c8941..602799503 100644
--- a/transforms/code/repo_level_ordering/ray/pyproject.toml
+++ b/transforms/code/repo_level_ordering/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_repo_level_order_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "repo_level_order Ray Transform"
 license = {text = "Apache-2.0"}
@@ -11,7 +11,7 @@ authors = [
     { name = "Shanmukha Guttula", email = "shagutt1@in.ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
     "networkx==3.3",
     "colorlog==6.8.2",
     "func-timeout==4.3.5",
diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py
index 7e30ee8b8..1fd927356 100644
--- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py
+++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
index 387c3bda7..e128df8b0 100644
--- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
+++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/doc_chunk/python/requirements.txt b/transforms/language/doc_chunk/python/requirements.txt
index 207ab9249..2d282a8ac 100644
--- a/transforms/language/doc_chunk/python/requirements.txt
+++ b/transforms/language/doc_chunk/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 docling-core==2.3.0
 pydantic>=2.0.0,<2.10.0 
 llama-index-core>=0.11.22,<0.12.0
diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml
index 4fb356038..6694456ce 100644
--- a/transforms/language/doc_chunk/ray/pyproject.toml
+++ b/transforms/language/doc_chunk/ray/pyproject.toml
@@ -12,7 +12,7 @@ authors = [
 ]
 dependencies = [
     "dpk-doc-chunk-transform-python==0.3.0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py
index 436d93ff3..f103b7269 100644
--- a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py
+++ b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py
index f39fd7e39..0ca4fb865 100644
--- a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py
+++ b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml
index 23538b8c7..f3abe0337 100644
--- a/transforms/language/doc_quality/python/pyproject.toml
+++ b/transforms/language/doc_quality/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_doc_quality_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Document Quality Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/doc_quality/python/requirements.txt b/transforms/language/doc_quality/python/requirements.txt
index 4aa2d8111..2e29c9cb4 100644
--- a/transforms/language/doc_quality/python/requirements.txt
+++ b/transforms/language/doc_quality/python/requirements.txt
@@ -1,2 +1,2 @@
 
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml
index ec56ac2c7..62f97e538 100644
--- a/transforms/language/doc_quality/ray/pyproject.toml
+++ b/transforms/language/doc_quality/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_doc_quality_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Document Quality Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" }
 ]
 dependencies = [
-    "dpk-doc_quality-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-doc_quality-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py
index 4eb8b9de1..4eaef2fea 100644
--- a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py
+++ b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "html2parquet_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/html2parquet/python/pyproject.toml b/transforms/language/html2parquet/python/pyproject.toml
index 3a7a6efbc..af6b64763 100644
--- a/transforms/language/html2parquet/python/pyproject.toml
+++ b/transforms/language/html2parquet/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_html2parquet_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "HTML2PARQUET Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/html2parquet/python/requirements.txt b/transforms/language/html2parquet/python/requirements.txt
index f21e65774..42e2459b2 100644
--- a/transforms/language/html2parquet/python/requirements.txt
+++ b/transforms/language/html2parquet/python/requirements.txt
@@ -1,2 +1,2 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 trafilatura==1.12.0
diff --git a/transforms/language/html2parquet/ray/pyproject.toml b/transforms/language/html2parquet/ray/pyproject.toml
index 5e888748c..859706621 100644
--- a/transforms/language/html2parquet/ray/pyproject.toml
+++ b/transforms/language/html2parquet/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_html2parquet_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "HTML2PARQUET Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/html2parquet/ray/requirements.txt b/transforms/language/html2parquet/ray/requirements.txt
index 9aa193432..700267692 100644
--- a/transforms/language/html2parquet/ray/requirements.txt
+++ b/transforms/language/html2parquet/ray/requirements.txt
@@ -1,3 +1,3 @@
-dpk-html2parquet-transform-python==0.2.3.dev0
-data-prep-toolkit[ray]==0.2.3.dev0
+dpk-html2parquet-transform-python==0.2.2
+data-prep-toolkit[ray]>=0.2.2
 trafilatura==1.12.0
\ No newline at end of file
diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py
index a89c54ab3..e853c2328 100644
--- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py
+++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py
index 2ac84645d..5aed719c5 100644
--- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py
+++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml
index a69724a2d..43650a50a 100644
--- a/transforms/language/lang_id/python/pyproject.toml
+++ b/transforms/language/lang_id/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_lang_id_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Language Identification Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/lang_id/python/requirements.txt b/transforms/language/lang_id/python/requirements.txt
index 06bec1ab9..1f90bcd54 100644
--- a/transforms/language/lang_id/python/requirements.txt
+++ b/transforms/language/lang_id/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 fasttext==0.9.2
 langcodes==3.3.0
 huggingface-hub >= 0.21.4, <1.0.0
diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml
index dba929905..b60a3a5bb 100644
--- a/transforms/language/lang_id/ray/pyproject.toml
+++ b/transforms/language/lang_id/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_lang_id_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Language Identification Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" }
 ]
 dependencies = [
-    "dpk-lang_id-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-lang_id-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py
index 8992f1145..56e881b5e 100644
--- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py
+++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py
index c9cdbf652..395918ac3 100644
--- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py
+++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/pdf2parquet/python/requirements.txt b/transforms/language/pdf2parquet/python/requirements.txt
index 310909164..1d1aa2570 100644
--- a/transforms/language/pdf2parquet/python/requirements.txt
+++ b/transforms/language/pdf2parquet/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 docling-core==2.3.0
 docling-ibm-models==2.0.3
 deepsearch-glm==0.26.1
diff --git a/transforms/language/pdf2parquet/ray/requirements.txt b/transforms/language/pdf2parquet/ray/requirements.txt
index 34831cde8..40650d1a5 100644
--- a/transforms/language/pdf2parquet/ray/requirements.txt
+++ b/transforms/language/pdf2parquet/ray/requirements.txt
@@ -1,5 +1,5 @@
 dpk-pdf2parquet-transform-python==0.3.0
-data-prep-toolkit[ray]==0.2.3.dev0
+data-prep-toolkit[ray]>=0.2.2
 # docling-core==1.7.2
 # docling-ibm-models==2.0.0
 # deepsearch-glm==0.22.0
diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml
index 72c1bf783..4a159bba0 100644
--- a/transforms/language/pii_redactor/python/pyproject.toml
+++ b/transforms/language/pii_redactor/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_pii_redactor_transform_python"
-version = "0.2.2.dev2"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "PII redactor Transform for Python"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/pii_redactor/python/requirements.txt b/transforms/language/pii_redactor/python/requirements.txt
index 0abcc1d96..51fbd2494 100644
--- a/transforms/language/pii_redactor/python/requirements.txt
+++ b/transforms/language/pii_redactor/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 presidio-analyzer>=2.2.355
 presidio-anonymizer>=2.2.355
 flair>=0.14.0
diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml
index 4549851d0..a65aa5913 100644
--- a/transforms/language/pii_redactor/ray/pyproject.toml
+++ b/transforms/language/pii_redactor/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_pii_redactor_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "PII Redactor Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "dpk_pii_redactor_transform_python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk_pii_redactor_transform_python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
     "presidio-analyzer>=2.2.355",
     "presidio-anonymizer>=2.2.355",
     "flair>=0.14.0",
diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py
index e522737a1..bad5e24cd 100644
--- a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py
+++ b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py
index f88fe9eef..5c762c2a1 100644
--- a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py
+++ b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml
index dc15beb6e..62182b27b 100644
--- a/transforms/language/text_encoder/python/pyproject.toml
+++ b/transforms/language/text_encoder/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_text_encoder_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Text Encoder Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/language/text_encoder/python/requirements.txt b/transforms/language/text_encoder/python/requirements.txt
index 3ac880bba..0d8160151 100644
--- a/transforms/language/text_encoder/python/requirements.txt
+++ b/transforms/language/text_encoder/python/requirements.txt
@@ -1,2 +1,2 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 sentence-transformers==3.0.1
diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml
index f1b2c09d5..2f8483e2d 100644
--- a/transforms/language/text_encoder/ray/pyproject.toml
+++ b/transforms/language/text_encoder/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_text_encoder_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Text Encoder Ray Transform"
 license = {text = "Apache-2.0"}
@@ -11,8 +11,8 @@ authors = [
     { name = "Peter Staar", email = "taa@zurich.ibm.com" },
 ]
 dependencies = [
-    "dpk-text_encoder-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-text_encoder-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/requirements-ray.txt b/transforms/requirements-ray.txt
index b0527bdd6..11d0decf5 100644
--- a/transforms/requirements-ray.txt
+++ b/transforms/requirements-ray.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit[ray]>=0.2.3.dev0
+data-prep-toolkit[ray]>=0.2.2
 networkx==3.3
 colorlog==6.8.2
 func-timeout==4.3.5
diff --git a/transforms/requirements.txt b/transforms/requirements.txt
index 934c95182..7317d33e3 100644
--- a/transforms/requirements.txt
+++ b/transforms/requirements.txt
@@ -1 +1 @@
-data-prep-toolkit>=0.2.3.dev0
+data-prep-toolkit>=0.2.2
diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py
index f41231159..7e1bd0b8e 100644
--- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py
+++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py
@@ -22,7 +22,7 @@
 # the name of the job script
 EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py"
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/doc_id/python/pyproject.toml b/transforms/universal/doc_id/python/pyproject.toml
index 1a962662d..a9e69f0bf 100644
--- a/transforms/universal/doc_id/python/pyproject.toml
+++ b/transforms/universal/doc_id/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_doc_id_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "ededup Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/doc_id/python/requirements.txt b/transforms/universal/doc_id/python/requirements.txt
index 2f67f6a80..e9abc2535 100644
--- a/transforms/universal/doc_id/python/requirements.txt
+++ b/transforms/universal/doc_id/python/requirements.txt
@@ -1 +1 @@
-data-prep-toolkit==0.2.3.dev0
\ No newline at end of file
+data-prep-toolkit>=0.2.2
\ No newline at end of file
diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml
index da34dded3..ee022af54 100644
--- a/transforms/universal/doc_id/ray/pyproject.toml
+++ b/transforms/universal/doc_id/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_doc_id_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "docid Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "dpk_doc_id_transform_python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk_doc_id_transform_python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/doc_id/spark/pyproject.toml b/transforms/universal/doc_id/spark/pyproject.toml
index 369a1bb72..f50d4f70d 100644
--- a/transforms/universal/doc_id/spark/pyproject.toml
+++ b/transforms/universal/doc_id/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_doc_id_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Doc ID Spark Transform"
 license = {text = "Apache-2.0"}
@@ -10,7 +10,7 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[spark]==0.2.3.dev0",
+    "data-prep-toolkit[spark]==0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py
index ab46daadb..d878bd3e2 100644
--- a/transforms/universal/ededup/kfp_ray/ededup_wf.py
+++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py
@@ -24,7 +24,7 @@
 EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/ededup/python/pyproject.toml b/transforms/universal/ededup/python/pyproject.toml
index da28e715f..67fd0f758 100644
--- a/transforms/universal/ededup/python/pyproject.toml
+++ b/transforms/universal/ededup/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_ededup_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "ededup Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/ededup/python/requirements.txt b/transforms/universal/ededup/python/requirements.txt
index aa73a106a..9fe419975 100644
--- a/transforms/universal/ededup/python/requirements.txt
+++ b/transforms/universal/ededup/python/requirements.txt
@@ -1,3 +1,3 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 mmh3>=4.1.0
 xxhash==3.4.1
diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml
index 424e220fd..58b39d7d7 100644
--- a/transforms/universal/ededup/ray/pyproject.toml
+++ b/transforms/universal/ededup/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_ededup_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "ededup Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[ray]==0.2.3.dev0",
-    "dpk_ededup_transform_python==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
+    "dpk_ededup_transform_python==0.2.2",
     "tqdm==4.66.3",
 ]
 
diff --git a/transforms/universal/fdedup/fdedup_python.ipynb b/transforms/universal/fdedup/fdedup_python.ipynb
index 83f9bd600..684583ffd 100644
--- a/transforms/universal/fdedup/fdedup_python.ipynb
+++ b/transforms/universal/fdedup/fdedup_python.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "4c45c3c6-e4d7-4e61-8de6-32d61f2ce695",
    "metadata": {},
    "outputs": [],
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "c2a12abc-9460-4e45-8961-873b48a9ab19",
    "metadata": {},
    "outputs": [],
@@ -71,7 +71,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "e90a853e-412f-45d7-af3d-959e755aeebb",
    "metadata": {},
    "outputs": [],
@@ -102,10 +102,102 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "0775e400-7469-49a6-8998-bd4772931459",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "13:30:29 INFO - Starting SignatureCalculation step\n",
+      "13:30:29 INFO - Got parameters for SignatureCalculation\n",
+      "13:30:29 INFO - minhash parameters are : {'document_id_column': 'int_id_column', 'contents_column': 'contents', 'seed': 42, 'num_permutations': 112, 'jaccard_similarity_threshold': 0.75, 'word_shingle_size': 5, 'num_bands': 14, 'num_minhashes_per_band': 8, 'num_segments': 1, 'shingle_option': 'word'}\n",
+      "13:30:29 INFO - data factory scdata_ is using local configuration without input/output path\n",
+      "13:30:29 INFO - data factory scdata_ max_files -1, n_sample -1\n",
+      "13:30:29 INFO - data factory scdata_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:29 INFO - pipeline id pipeline_id\n",
+      "13:30:29 INFO - code location None\n",
+      "13:30:29 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/test-data/input output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output\n",
+      "13:30:29 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:30:29 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:29 INFO - orchestrator minhash started at 2024-11-26 13:30:29\n",
+      "13:30:29 INFO - Number of files is 2, source profile {'max_file_size': 0.0029497146606445312, 'min_file_size': 0.0013322830200195312, 'total_file_size': 0.0042819976806640625}\n",
+      "13:30:33 INFO - Completed 1 files (50.0%) in 0.074 min\n",
+      "13:30:33 INFO - Completed 2 files (100.0%) in 0.074 min\n",
+      "13:30:33 INFO - Done processing 2 files, waiting for flush() completion.\n",
+      "13:30:33 INFO - Starting flush()\n",
+      "13:30:34 INFO - Wrote 14 tables with a total size of 80,640 bytes\n",
+      "13:30:34 INFO - done flushing in 0.063 sec\n",
+      "13:30:34 INFO - Completed execution in 0.075 min, execution result 0\n",
+      "13:30:34 INFO - SignatureCalculation completed successfully\n",
+      "13:30:34 INFO - Starting ClusterAnalysis step\n",
+      "13:30:34 INFO - Got parameters for ClusterAnalysis\n",
+      "13:30:34 INFO - cluster parameters are : {'jaccard_similarity_threshold': 0.75, 'num_bands': 14, 'num_segments': 1, 'sort_output': False}\n",
+      "13:30:34 INFO - pipeline id pipeline_id\n",
+      "13:30:34 INFO - code location None\n",
+      "13:30:34 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output/bands output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output/docs_to_remove\n",
+      "13:30:34 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:30:34 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:34 INFO - orchestrator cluster started at 2024-11-26 13:30:34\n",
+      "13:30:34 INFO - Number of folders is 14\n",
+      "13:30:34 INFO - Completed 1 files (7.14%) in 0.0 min\n",
+      "13:30:34 INFO - Completed 2 files (14.29%) in 0.0 min\n",
+      "13:30:34 INFO - Completed 3 files (21.43%) in 0.001 min\n",
+      "13:30:34 INFO - Completed 4 files (28.57%) in 0.001 min\n",
+      "13:30:34 INFO - Completed 5 files (35.71%) in 0.001 min\n",
+      "13:30:34 INFO - Completed 6 files (42.86%) in 0.001 min\n",
+      "13:30:34 INFO - Completed 7 files (50.0%) in 0.001 min\n",
+      "13:30:34 INFO - Completed 8 files (57.14%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 9 files (64.29%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 10 files (71.43%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 11 files (78.57%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 12 files (85.71%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 13 files (92.86%) in 0.002 min\n",
+      "13:30:34 INFO - Completed 14 files (100.0%) in 0.003 min\n",
+      "13:30:34 INFO - Done processing 14 files, waiting for flush() completion.\n",
+      "13:30:34 INFO - done flushing in 0.0 sec\n",
+      "13:30:34 INFO - Completed execution in 0.003 min, execution result 0\n",
+      "13:30:34 INFO - ClusterAnalysis completed successfully\n",
+      "13:30:34 INFO - Starting GetDuplicateList step\n",
+      "13:30:34 INFO - Got parameters for GetDuplicateList\n",
+      "13:30:34 INFO - fdlist parameters are : {'docs_to_remove': 'docs_to_remove', 'consolidated_filename': 'docs_to_remove_consolidated/docs_to_remove_consolidated.parquet', 'sort_output': False}\n",
+      "13:30:34 INFO - pipeline id pipeline_id\n",
+      "13:30:34 INFO - code location None\n",
+      "13:30:34 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output\n",
+      "13:30:34 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:30:34 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:34 INFO - orchestrator fdlist started at 2024-11-26 13:30:34\n",
+      "13:30:34 INFO - Number of folders is 1\n",
+      "13:30:34 INFO - Get Duplicate List for folder docs_to_remove\n",
+      "13:30:34 INFO - 8 documents marked as duplicates\n",
+      "13:30:34 INFO - Completed 1 files (100.0%) in 0.0 min\n",
+      "13:30:34 INFO - Done processing 1 files, waiting for flush() completion.\n",
+      "13:30:34 INFO - done flushing in 0.0 sec\n",
+      "13:30:34 INFO - Completed execution in 0.001 min, execution result 0\n",
+      "13:30:34 INFO - GetDuplicateList completed successfully\n",
+      "13:30:34 INFO - Starting DataCleaning step\n",
+      "13:30:34 INFO - Got parameters for DataCleaning\n",
+      "13:30:34 INFO - fdclean parameters are : {'document_id_column': 'int_id_column', 'duplicate_list_location': 'docs_to_remove_consolidated/docs_to_remove_consolidated.parquet', 'operation_mode': 'filter_duplicates'}\n",
+      "13:30:34 INFO - data factory dcdata_ is using local configuration without input/output path\n",
+      "13:30:34 INFO - data factory dcdata_ max_files -1, n_sample -1\n",
+      "13:30:34 INFO - data factory dcdata_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:34 INFO - pipeline id pipeline_id\n",
+      "13:30:34 INFO - code location None\n",
+      "13:30:34 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/test-data/input output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/python/output/cleaned\n",
+      "13:30:34 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:30:34 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:34 INFO - orchestrator fdclean started at 2024-11-26 13:30:34\n",
+      "13:30:34 INFO - Number of files is 2, source profile {'max_file_size': 0.0029497146606445312, 'min_file_size': 0.0013322830200195312, 'total_file_size': 0.0042819976806640625}\n",
+      "13:30:34 INFO - Completed 1 files (50.0%) in 0.0 min\n",
+      "13:30:34 INFO - Completed 2 files (100.0%) in 0.0 min\n",
+      "13:30:34 INFO - Done processing 2 files, waiting for flush() completion.\n",
+      "13:30:34 INFO - done flushing in 0.0 sec\n",
+      "13:30:34 INFO - Completed execution in 0.0 min, execution result 0\n",
+      "13:30:34 INFO - DataCleaning completed successfully\n"
+     ]
+    }
+   ],
    "source": [
     "\n",
     "sys.argv = ParamsUtils.dict_to_req(d=params)\n",
@@ -126,10 +218,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "7276fe84-6512-4605-ab65-747351e13a7c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['python/output/cleaned/metadata.json',\n",
+       " 'python/output/cleaned/data_1',\n",
+       " 'python/output/cleaned/data_2']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import glob\n",
     "glob.glob(\"python/output/cleaned/*\")"
@@ -145,10 +250,167 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "5b22234f-f7a1-4b92-b2ac-376b2545abce",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "shape: (12, 2)\n",
+      "┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐\n",
+      "│ int_id_column ┆ contents                                                                         │\n",
+      "│ ---           ┆ ---                                                                              │\n",
+      "│ i64           ┆ str                                                                              │\n",
+      "╞═══════════════╪══════════════════════════════════════════════════════════════════════════════════╡\n",
+      "│ 1             ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ PR Newswire October 12, 2019                                                     │\n",
+      "│               ┆ 145-year-old Retailer Anchors Woodland Mall Just Outside Grand Rapids;           │\n",
+      "│               ┆ New Location Continues Strategic National Expansion Plans                        │\n",
+      "│               ┆ DAVENPORT, Iowa, Oct. 12, 2019 /PRNewswire/ -- Von Maur Department Stores opened │\n",
+      "│               ┆ a new store today at Woodland Mall in Kentwood, Mich. The 90,000-square-foot     │\n",
+      "│               ┆ store is the Company's third location in Michigan.                               │\n",
+      "│               ┆ Known for its outstanding selection of brand name and specialty apparel, shoes,  │\n",
+      "│               ┆ accessories and gifts, the store features products from leading brands such as   │\n",
+      "│               ┆ Eileen Fisher, Vineyard Vines, Free People, and Kendra Scott, among many others. │\n",
+      "│               ┆ Von Maur is also widely-regarded for its superior customer service, including an │\n",
+      "│               ┆ interest-free charge card, accommodating return policy, free gift wrapping and   │\n",
+      "│               ┆ free shipping services.                                                          │\n",
+      "│               ┆ Today's opening continues to build upon the momentum of the family-owned         │\n",
+      "│               ┆ Company's targeted national growth strategy. Von Maur opened its first Wisconsin │\n",
+      "│               ┆ location in 2017 and a second Minnesota location in 2018, and it has grown in    │\n",
+      "│               ┆ new states beyond its Midwestern footprint, including New York, Alabama and      │\n",
+      "│               ┆ Oklahoma. Additionally, the Company has plans to open its second Wisconsin       │\n",
+      "│               ┆ location in Madison in Fall 2021.                                                │\n",
+      "│               ┆ \"With its easy accessibility to the larger Grand Rapids area and exceptional     │\n",
+      "│               ┆ collection of shopping, dining and entertainment options, Woodland Mall is a     │\n",
+      "│               ┆ fantastic location for us to continue growing our brand in Michigan,\" said Jim   │\n",
+      "│               ┆ von Maur, president of Von Maur. \"From the moment shoppers walk through our      │\n",
+      "│               ┆ doors, creating an unrivaled shopping experience is the motivation behind        │\n",
+      "│               ┆ everything we do. We look forward to extending our offerings of brand name       │\n",
+      "│               ┆ merchandise and signature customer service to the Grand Rapids area for many     │\n",
+      "│               ┆ years to come.\"                                                                  │\n",
+      "│               ┆ \"We are thrilled to welcome Von Maur, known for their high-quality merchandise   │\n",
+      "│               ┆ and exceptional service, as the anchor of the newly developed wing at Woodland   │\n",
+      "│               ┆ Mall,\" said Joe Coradino, CEO of PREIT. \"The addition most certainly solidifies  │\n",
+      "│               ┆ Woodland Mall's place as the premier retail and entertainment destination in     │\n",
+      "│               ┆ Grand Rapids, driving its place as a top-performing PREIT property.\"             │\n",
+      "│               ┆ Centrally-located for shoppers from Grand Rapids and the surrounding areas, the  │\n",
+      "│               ┆ new single story Von Maur store features the Company's signature exterior brick  │\n",
+      "│               ┆ façade, open expansive floor plan, and residential ambiance, including music     │\n",
+      "│               ┆ from the store's grand piano.                                                    │\n",
+      "│               ┆ The Woodland Mall store will eventually employ up to 150 associates; the         │\n",
+      "│               ┆ majority of them will be full-time. Von Maur offers above-market wages,          │\n",
+      "│               ┆ excellent benefits and a positive, professional work environment. Hours of       │\n",
+      "│               ┆ operation are Monday to Saturday, 10 a.m. – 9 p.m. ET, and Sunday, 12 p.m. – 6   │\n",
+      "│               ┆ p.m. ET.                                                                         │\n",
+      "│               ┆ About Von Maur                                                                   │\n",
+      "│               ┆ Von Maur was founded 145 years ago in downtown Davenport, Iowa. The Company      │\n",
+      "│               ┆ currently operates 35 stores in 15 states, along with a 120,000 square foot      │\n",
+      "│               ┆ E-Commerce facility that drives its successful online business at vonmaur.com.   │\n",
+      "│               ┆ Courtney Smith                                                                   │\n",
+      "│               ┆ courtney@reputationpartners.com                                                  │\n",
+      "│               ┆ View original content:http://www.prnewswire.com/news-releases/von-maur-departmen │\n",
+      "│               ┆ t-store-opens-third-location-in-michigan-300937186.html                          │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│ 3             ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful minds of our time about  │\n",
+      "│               ┆ what it means to live like a Genius.                                             │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│ 4             ┆                                                                                  │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 5             ┆                                                                                  │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 6             ┆                                                                                  │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 11            ┆ A couple of capricious capybaras chatted coolly by the cactus, curiously         │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe. Yesterday, a pair of     │\n",
+      "│               ┆ capricious pigeons prattled placidly by the cactus, curiously considering        │\n",
+      "│               ┆ another pigeon capably pecking at cantaloupe. The lazy llama lightly limped      │\n",
+      "│               ┆ through the lilacs, laboriously longing for a lozenge                            │\n",
+      "│ 12            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously           │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe.                          │\n",
+      "│ 13            ┆ The lazy llama lightly limped through the lilacs, laboriously longing for a      │\n",
+      "│               ┆ lozenge. A couple of capricious capybaras chatted coolly by the cactus,          │\n",
+      "│               ┆ curiously considering another capy capably chewing on cantaloupe. Yesterday, a   │\n",
+      "│               ┆ pair of capricious pigeons prattled placidly by the cactus, curiously            │\n",
+      "│               ┆ considering another pigeon capably pecking at cantaloupe.                        │\n",
+      "│ 14            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously pondering │\n",
+      "│               ┆ another capy capably chewing on cantaloupe                                       │\n",
+      "│ 15            ┆ The new sheepskin leather coat with natural fur is 46-48 times warmer. The color │\n",
+      "│               ┆ is very beautiful bright green looks very beautiful. Purchased by the shopping   │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "│ 16            ┆ New sheepskin leather coat with natural fur is 50 times warmer. The color is     │\n",
+      "│               ┆ very beautiful bright green looks very beautiful. Purchased by the shopping      │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "│ 17            ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful minds of our time about  │\n",
+      "│               ┆ what it means to live like a Genius.                                             │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘\n"
+     ]
+    }
+   ],
    "source": [
     "import polars as pl\n",
     "input_df_1 = pl.read_parquet(os.path.join(os.path.abspath(\"\"), \"python\", \"test-data\", \"input\", \"data_1\", \"df1.parquet\"))\n",
@@ -169,10 +431,97 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "0b2eddb9-4fb6-41eb-916c-3741b9129f2c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "shape: (4, 2)\n",
+      "┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐\n",
+      "│ int_id_column ┆ contents                                                                         │\n",
+      "│ ---           ┆ ---                                                                              │\n",
+      "│ i64           ┆ str                                                                              │\n",
+      "╞═══════════════╪══════════════════════════════════════════════════════════════════════════════════╡\n",
+      "│ 1             ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ PR Newswire October 12, 2019                                                     │\n",
+      "│               ┆ 145-year-old Retailer Anchors Woodland Mall Just Outside Grand Rapids;           │\n",
+      "│               ┆ New Location Continues Strategic National Expansion Plans                        │\n",
+      "│               ┆ DAVENPORT, Iowa, Oct. 12, 2019 /PRNewswire/ -- Von Maur Department Stores opened │\n",
+      "│               ┆ a new store today at Woodland Mall in Kentwood, Mich. The 90,000-square-foot     │\n",
+      "│               ┆ store is the Company's third location in Michigan.                               │\n",
+      "│               ┆ Known for its outstanding selection of brand name and specialty apparel, shoes,  │\n",
+      "│               ┆ accessories and gifts, the store features products from leading brands such as   │\n",
+      "│               ┆ Eileen Fisher, Vineyard Vines, Free People, and Kendra Scott, among many others. │\n",
+      "│               ┆ Von Maur is also widely-regarded for its superior customer service, including an │\n",
+      "│               ┆ interest-free charge card, accommodating return policy, free gift wrapping and   │\n",
+      "│               ┆ free shipping services.                                                          │\n",
+      "│               ┆ Today's opening continues to build upon the momentum of the family-owned         │\n",
+      "│               ┆ Company's targeted national growth strategy. Von Maur opened its first Wisconsin │\n",
+      "│               ┆ location in 2017 and a second Minnesota location in 2018, and it has grown in    │\n",
+      "│               ┆ new states beyond its Midwestern footprint, including New York, Alabama and      │\n",
+      "│               ┆ Oklahoma. Additionally, the Company has plans to open its second Wisconsin       │\n",
+      "│               ┆ location in Madison in Fall 2021.                                                │\n",
+      "│               ┆ \"With its easy accessibility to the larger Grand Rapids area and exceptional     │\n",
+      "│               ┆ collection of shopping, dining and entertainment options, Woodland Mall is a     │\n",
+      "│               ┆ fantastic location for us to continue growing our brand in Michigan,\" said Jim   │\n",
+      "│               ┆ von Maur, president of Von Maur. \"From the moment shoppers walk through our      │\n",
+      "│               ┆ doors, creating an unrivaled shopping experience is the motivation behind        │\n",
+      "│               ┆ everything we do. We look forward to extending our offerings of brand name       │\n",
+      "│               ┆ merchandise and signature customer service to the Grand Rapids area for many     │\n",
+      "│               ┆ years to come.\"                                                                  │\n",
+      "│               ┆ \"We are thrilled to welcome Von Maur, known for their high-quality merchandise   │\n",
+      "│               ┆ and exceptional service, as the anchor of the newly developed wing at Woodland   │\n",
+      "│               ┆ Mall,\" said Joe Coradino, CEO of PREIT. \"The addition most certainly solidifies  │\n",
+      "│               ┆ Woodland Mall's place as the premier retail and entertainment destination in     │\n",
+      "│               ┆ Grand Rapids, driving its place as a top-performing PREIT property.\"             │\n",
+      "│               ┆ Centrally-located for shoppers from Grand Rapids and the surrounding areas, the  │\n",
+      "│               ┆ new single story Von Maur store features the Company's signature exterior brick  │\n",
+      "│               ┆ façade, open expansive floor plan, and residential ambiance, including music     │\n",
+      "│               ┆ from the store's grand piano.                                                    │\n",
+      "│               ┆ The Woodland Mall store will eventually employ up to 150 associates; the         │\n",
+      "│               ┆ majority of them will be full-time. Von Maur offers above-market wages,          │\n",
+      "│               ┆ excellent benefits and a positive, professional work environment. Hours of       │\n",
+      "│               ┆ operation are Monday to Saturday, 10 a.m. – 9 p.m. ET, and Sunday, 12 p.m. – 6   │\n",
+      "│               ┆ p.m. ET.                                                                         │\n",
+      "│               ┆ About Von Maur                                                                   │\n",
+      "│               ┆ Von Maur was founded 145 years ago in downtown Davenport, Iowa. The Company      │\n",
+      "│               ┆ currently operates 35 stores in 15 states, along with a 120,000 square foot      │\n",
+      "│               ┆ E-Commerce facility that drives its successful online business at vonmaur.com.   │\n",
+      "│               ┆ Courtney Smith                                                                   │\n",
+      "│               ┆ courtney@reputationpartners.com                                                  │\n",
+      "│               ┆ View original content:http://www.prnewswire.com/news-releases/von-maur-departmen │\n",
+      "│               ┆ t-store-opens-third-location-in-michigan-300937186.html                          │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│ 4             ┆                                                                                  │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 12            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously           │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe.                          │\n",
+      "│ 15            ┆ The new sheepskin leather coat with natural fur is 46-48 times warmer. The color │\n",
+      "│               ┆ is very beautiful bright green looks very beautiful. Purchased by the shopping   │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘\n"
+     ]
+    }
+   ],
    "source": [
     "import polars as pl\n",
     "output_df_1 = pl.read_parquet(os.path.join(os.path.abspath(\"\"), \"python\", \"output\", \"cleaned\", \"data_1\", \"df1.parquet\"))\n",
@@ -193,9 +542,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "fdedup_ray",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "fdedup_ray"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -207,7 +556,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/transforms/universal/fdedup/fdedup_ray.ipynb b/transforms/universal/fdedup/fdedup_ray.ipynb
index 533ca019f..bb69579a9 100644
--- a/transforms/universal/fdedup/fdedup_ray.ipynb
+++ b/transforms/universal/fdedup/fdedup_ray.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "4c45c3c6-e4d7-4e61-8de6-32d61f2ce695",
    "metadata": {},
    "outputs": [],
@@ -37,10 +37,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "c2a12abc-9460-4e45-8961-873b48a9ab19",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-11-26 13:30:56,482\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
+     ]
+    }
+   ],
    "source": [
     "import ast\n",
     "import os\n",
@@ -73,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "e90a853e-412f-45d7-af3d-959e755aeebb",
    "metadata": {},
    "outputs": [],
@@ -106,10 +114,126 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "0775e400-7469-49a6-8998-bd4772931459",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "13:30:57 INFO - Starting SignatureCalculation step\n",
+      "13:30:57 INFO - Got parameters for SignatureCalculation\n",
+      "13:30:57 INFO - minhash parameters are : {'document_id_column': 'int_id_column', 'contents_column': 'contents', 'seed': 42, 'num_permutations': 112, 'jaccard_similarity_threshold': 0.75, 'word_shingle_size': 5, 'num_bands': 14, 'num_minhashes_per_band': 8, 'num_segments': 1, 'shingle_option': 'word'}\n",
+      "13:30:57 INFO - data factory scdata_ is using local configuration without input/output path\n",
+      "13:30:57 INFO - data factory scdata_ max_files -1, n_sample -1\n",
+      "13:30:57 INFO - data factory scdata_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:57 INFO - pipeline id pipeline_id\n",
+      "13:30:57 INFO - code location None\n",
+      "13:30:57 INFO - number of workers 3 worker options {'num_cpus': 0.8, 'max_restarts': -1}\n",
+      "13:30:57 INFO - actor creation delay 0\n",
+      "13:30:57 INFO - job details {'job category': 'preprocessing', 'job name': 'minhash', 'job type': 'ray', 'job id': 'job_id'}\n",
+      "13:30:57 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/test-data/input output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output\n",
+      "13:30:57 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:30:57 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:30:57 INFO - Running locally\n",
+      "2024-11-26 13:31:08,860\tINFO worker.py:1777 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:12 INFO - orchestrator started at 2024-11-26 13:31:12\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:12 INFO - Number of files is 1, source profile {'max_file_size': 0.003920555114746094, 'min_file_size': 0.003920555114746094, 'total_file_size': 0.003920555114746094}\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:12 INFO - Cluster resources: {'cpus': 12, 'gpus': 0, 'memory': 11.162438202649355, 'object_store': 2.0}\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:12 INFO - Number of workers - 3 with {'num_cpus': 0.8, 'max_restarts': -1} each\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:14 INFO - Completed 0 files (0.0%)  in 0.0 min. Waiting for completion\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:14 INFO - Completed processing 1 files in 0.002 min\n",
+      "\u001b[36m(RayTransformFileProcessor pid=86984)\u001b[0m 13:31:14 INFO - Starting flush()\n",
+      "\u001b[36m(orchestrate pid=86958)\u001b[0m 13:31:14 INFO - done flushing in 0.045 sec\n",
+      "\u001b[36m(RayTransformFileProcessor pid=86984)\u001b[0m 13:31:14 INFO - Wrote 14 tables with a total size of 80,640 bytes\n",
+      "13:31:24 INFO - Completed execution in 0.446 min, execution result 0\n",
+      "13:31:26 INFO - SignatureCalculation completed successfully\n",
+      "13:31:26 INFO - Starting ClusterAnalysis step\n",
+      "13:31:26 INFO - Got parameters for ClusterAnalysis\n",
+      "13:31:26 INFO - cluster parameters are : {'jaccard_similarity_threshold': 0.75, 'num_bands': 14, 'num_segments': 1, 'sort_output': False}\n",
+      "13:31:26 INFO - pipeline id pipeline_id\n",
+      "13:31:26 INFO - code location None\n",
+      "13:31:26 INFO - number of workers 3 worker options {'num_cpus': 0.8, 'max_restarts': -1}\n",
+      "13:31:26 INFO - actor creation delay 0\n",
+      "13:31:26 INFO - job details {'job category': 'preprocessing', 'job name': 'cluster', 'job type': 'ray', 'job id': 'job_id'}\n",
+      "13:31:26 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output/bands output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output/docs_to_remove\n",
+      "13:31:26 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:31:26 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:31:26 INFO - Running locally\n",
+      "2024-11-26 13:31:28,318\tINFO worker.py:1777 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:31 INFO - orchestrator started at 2024-11-26 13:31:31\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:31 INFO - Number of folders is 14\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:31 INFO - Cluster resources: {'cpus': 12, 'gpus': 0, 'memory': 11.77626838721335, 'object_store': 2.0}\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:31 INFO - Number of workers - 3 with {'num_cpus': 0.8, 'max_restarts': -1} each\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 1 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 2 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 3 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 4 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 5 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 6 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 7 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 8 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 9 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 10 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 11 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed 11 files (78.571%)  in 0.001 min. Waiting for completion\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - Completed processing 14 files in 0.001 min\n",
+      "\u001b[36m(orchestrate pid=87057)\u001b[0m 13:31:33 INFO - done flushing in 0.001 sec\n",
+      "13:31:43 INFO - Completed execution in 0.292 min, execution result 0\n",
+      "13:31:45 INFO - ClusterAnalysis completed successfully\n",
+      "13:31:45 INFO - Starting GetDuplicateList step\n",
+      "13:31:45 INFO - Got parameters for GetDuplicateList\n",
+      "13:31:45 INFO - fdlist parameters are : {'docs_to_remove': 'docs_to_remove', 'consolidated_filename': 'docs_to_remove_consolidated/docs_to_remove_consolidated.parquet', 'sort_output': False}\n",
+      "13:31:45 INFO - pipeline id pipeline_id\n",
+      "13:31:45 INFO - code location None\n",
+      "13:31:45 INFO - number of workers 1 worker options {'num_cpus': 0.8, 'max_restarts': -1}\n",
+      "13:31:45 INFO - actor creation delay 0\n",
+      "13:31:45 INFO - job details {'job category': 'preprocessing', 'job name': 'fdlist', 'job type': 'ray', 'job id': 'job_id'}\n",
+      "13:31:45 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output\n",
+      "13:31:45 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:31:45 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:31:45 INFO - Running locally\n",
+      "2024-11-26 13:31:47,311\tINFO worker.py:1777 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:50 INFO - orchestrator started at 2024-11-26 13:31:50\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:50 INFO - Number of folders is 1\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:50 INFO - Cluster resources: {'cpus': 12, 'gpus': 0, 'memory': 11.749520111829042, 'object_store': 2.0}\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:50 INFO - Number of workers - 1 with {'num_cpus': 0.8, 'max_restarts': -1} each\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:52 INFO - Completed 0 files (0.0%)  in 0.0 min. Waiting for completion\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:52 INFO - Completed processing 1 files in 0.0 min\n",
+      "\u001b[36m(orchestrate pid=87134)\u001b[0m 13:31:52 INFO - done flushing in 0.001 sec\n",
+      "\u001b[36m(RayTransformFileProcessor pid=87153)\u001b[0m 13:31:52 INFO - Get Duplicate List for folder docs_to_remove\n",
+      "\u001b[36m(RayTransformFileProcessor pid=87153)\u001b[0m 13:31:52 INFO - 8 documents marked as duplicates\n",
+      "13:32:02 INFO - Completed execution in 0.295 min, execution result 0\n",
+      "13:32:04 INFO - GetDuplicateList completed successfully\n",
+      "13:32:04 INFO - Starting DataCleaning step\n",
+      "13:32:04 INFO - Got parameters for DataCleaning\n",
+      "13:32:04 INFO - fdclean parameters are : {'document_id_column': 'int_id_column', 'duplicate_list_location': 'docs_to_remove_consolidated/docs_to_remove_consolidated.parquet', 'operation_mode': 'filter_duplicates'}\n",
+      "13:32:04 INFO - data factory dcdata_ is using local configuration without input/output path\n",
+      "13:32:04 INFO - data factory dcdata_ max_files -1, n_sample -1\n",
+      "13:32:04 INFO - data factory dcdata_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:32:04 INFO - pipeline id pipeline_id\n",
+      "13:32:04 INFO - code location None\n",
+      "13:32:04 INFO - number of workers 3 worker options {'num_cpus': 0.8, 'max_restarts': -1}\n",
+      "13:32:04 INFO - actor creation delay 0\n",
+      "13:32:04 INFO - job details {'job category': 'preprocessing', 'job name': 'fdclean', 'job type': 'ray', 'job id': 'job_id'}\n",
+      "13:32:04 INFO - data factory data_ is using local data access: input_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/test-data/input output_folder - /Users/touma/data-prep-kit/transforms/universal/fdedup/ray/output/cleaned\n",
+      "13:32:04 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "13:32:04 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "13:32:04 INFO - Running locally\n",
+      "2024-11-26 13:32:07,526\tINFO worker.py:1777 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:10 INFO - orchestrator started at 2024-11-26 13:32:10\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:10 INFO - Number of files is 1, source profile {'max_file_size': 0.003920555114746094, 'min_file_size': 0.003920555114746094, 'total_file_size': 0.003920555114746094}\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:10 INFO - Cluster resources: {'cpus': 12, 'gpus': 0, 'memory': 11.738976669497788, 'object_store': 2.0}\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:10 INFO - Number of workers - 3 with {'num_cpus': 0.8, 'max_restarts': -1} each\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:13 INFO - Completed 0 files (0.0%)  in 0.0 min. Waiting for completion\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:13 INFO - Completed processing 1 files in 0.002 min\n",
+      "\u001b[36m(orchestrate pid=87217)\u001b[0m 13:32:13 INFO - done flushing in 0.003 sec\n",
+      "13:32:23 INFO - Completed execution in 0.313 min, execution result 0\n",
+      "13:32:24 INFO - DataCleaning completed successfully\n"
+     ]
+    }
+   ],
    "source": [
     "\n",
     "sys.argv = ParamsUtils.dict_to_req(d=params)\n",
@@ -130,10 +254,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "7276fe84-6512-4605-ab65-747351e13a7c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ray/output/cleaned/metadata.json', 'ray/output/cleaned/df1.parquet']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import glob\n",
     "glob.glob(\"ray/output/cleaned/*\")"
@@ -149,10 +284,167 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "5b22234f-f7a1-4b92-b2ac-376b2545abce",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "shape: (12, 2)\n",
+      "┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐\n",
+      "│ int_id_column ┆ contents                                                                         │\n",
+      "│ ---           ┆ ---                                                                              │\n",
+      "│ i64           ┆ str                                                                              │\n",
+      "╞═══════════════╪══════════════════════════════════════════════════════════════════════════════════╡\n",
+      "│ 1             ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ PR Newswire October 12, 2019                                                     │\n",
+      "│               ┆ 145-year-old Retailer Anchors Woodland Mall Just Outside Grand Rapids;           │\n",
+      "│               ┆ New Location Continues Strategic National Expansion Plans                        │\n",
+      "│               ┆ DAVENPORT, Iowa, Oct. 12, 2019 /PRNewswire/ -- Von Maur Department Stores opened │\n",
+      "│               ┆ a new store today at Woodland Mall in Kentwood, Mich. The 90,000-square-foot     │\n",
+      "│               ┆ store is the Company's third location in Michigan.                               │\n",
+      "│               ┆ Known for its outstanding selection of brand name and specialty apparel, shoes,  │\n",
+      "│               ┆ accessories and gifts, the store features products from leading brands such as   │\n",
+      "│               ┆ Eileen Fisher, Vineyard Vines, Free People, and Kendra Scott, among many others. │\n",
+      "│               ┆ Von Maur is also widely-regarded for its superior customer service, including an │\n",
+      "│               ┆ interest-free charge card, accommodating return policy, free gift wrapping and   │\n",
+      "│               ┆ free shipping services.                                                          │\n",
+      "│               ┆ Today's opening continues to build upon the momentum of the family-owned         │\n",
+      "│               ┆ Company's targeted national growth strategy. Von Maur opened its first Wisconsin │\n",
+      "│               ┆ location in 2017 and a second Minnesota location in 2018, and it has grown in    │\n",
+      "│               ┆ new states beyond its Midwestern footprint, including New York, Alabama and      │\n",
+      "│               ┆ Oklahoma. Additionally, the Company has plans to open its second Wisconsin       │\n",
+      "│               ┆ location in Madison in Fall 2021.                                                │\n",
+      "│               ┆ \"With its easy accessibility to the larger Grand Rapids area and exceptional     │\n",
+      "│               ┆ collection of shopping, dining and entertainment options, Woodland Mall is a     │\n",
+      "│               ┆ fantastic location for us to continue growing our brand in Michigan,\" said Jim   │\n",
+      "│               ┆ von Maur, president of Von Maur. \"From the moment shoppers walk through our      │\n",
+      "│               ┆ doors, creating an unrivaled shopping experience is the motivation behind        │\n",
+      "│               ┆ everything we do. We look forward to extending our offerings of brand name       │\n",
+      "│               ┆ merchandise and signature customer service to the Grand Rapids area for many     │\n",
+      "│               ┆ years to come.\"                                                                  │\n",
+      "│               ┆ \"We are thrilled to welcome Von Maur, known for their high-quality merchandise   │\n",
+      "│               ┆ and exceptional service, as the anchor of the newly developed wing at Woodland   │\n",
+      "│               ┆ Mall,\" said Joe Coradino, CEO of PREIT. \"The addition most certainly solidifies  │\n",
+      "│               ┆ Woodland Mall's place as the premier retail and entertainment destination in     │\n",
+      "│               ┆ Grand Rapids, driving its place as a top-performing PREIT property.\"             │\n",
+      "│               ┆ Centrally-located for shoppers from Grand Rapids and the surrounding areas, the  │\n",
+      "│               ┆ new single story Von Maur store features the Company's signature exterior brick  │\n",
+      "│               ┆ façade, open expansive floor plan, and residential ambiance, including music     │\n",
+      "│               ┆ from the store's grand piano.                                                    │\n",
+      "│               ┆ The Woodland Mall store will eventually employ up to 150 associates; the         │\n",
+      "│               ┆ majority of them will be full-time. Von Maur offers above-market wages,          │\n",
+      "│               ┆ excellent benefits and a positive, professional work environment. Hours of       │\n",
+      "│               ┆ operation are Monday to Saturday, 10 a.m. – 9 p.m. ET, and Sunday, 12 p.m. – 6   │\n",
+      "│               ┆ p.m. ET.                                                                         │\n",
+      "│               ┆ About Von Maur                                                                   │\n",
+      "│               ┆ Von Maur was founded 145 years ago in downtown Davenport, Iowa. The Company      │\n",
+      "│               ┆ currently operates 35 stores in 15 states, along with a 120,000 square foot      │\n",
+      "│               ┆ E-Commerce facility that drives its successful online business at vonmaur.com.   │\n",
+      "│               ┆ Courtney Smith                                                                   │\n",
+      "│               ┆ courtney@reputationpartners.com                                                  │\n",
+      "│               ┆ View original content:http://www.prnewswire.com/news-releases/von-maur-departmen │\n",
+      "│               ┆ t-store-opens-third-location-in-michigan-300937186.html                          │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│ 3             ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful minds of our time about  │\n",
+      "│               ┆ what it means to live like a Genius.                                             │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│ 4             ┆                                                                                  │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 5             ┆                                                                                  │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 6             ┆                                                                                  │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 11            ┆ A couple of capricious capybaras chatted coolly by the cactus, curiously         │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe. Yesterday, a pair of     │\n",
+      "│               ┆ capricious pigeons prattled placidly by the cactus, curiously considering        │\n",
+      "│               ┆ another pigeon capably pecking at cantaloupe. The lazy llama lightly limped      │\n",
+      "│               ┆ through the lilacs, laboriously longing for a lozenge                            │\n",
+      "│ 12            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously           │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe.                          │\n",
+      "│ 13            ┆ The lazy llama lightly limped through the lilacs, laboriously longing for a      │\n",
+      "│               ┆ lozenge. A couple of capricious capybaras chatted coolly by the cactus,          │\n",
+      "│               ┆ curiously considering another capy capably chewing on cantaloupe. Yesterday, a   │\n",
+      "│               ┆ pair of capricious pigeons prattled placidly by the cactus, curiously            │\n",
+      "│               ┆ considering another pigeon capably pecking at cantaloupe.                        │\n",
+      "│ 14            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously pondering │\n",
+      "│               ┆ another capy capably chewing on cantaloupe                                       │\n",
+      "│ 15            ┆ The new sheepskin leather coat with natural fur is 46-48 times warmer. The color │\n",
+      "│               ┆ is very beautiful bright green looks very beautiful. Purchased by the shopping   │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "│ 16            ┆ New sheepskin leather coat with natural fur is 50 times warmer. The color is     │\n",
+      "│               ┆ very beautiful bright green looks very beautiful. Purchased by the shopping      │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "│ 17            ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful minds of our time about  │\n",
+      "│               ┆ what it means to live like a Genius.                                             │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘\n"
+     ]
+    }
+   ],
    "source": [
     "import polars as pl\n",
     "input_df = pl.read_parquet(os.path.join(os.path.abspath(\"\"), \"ray\", \"test-data\", \"input\", \"df1.parquet\"))\n",
@@ -170,10 +462,97 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "0b2eddb9-4fb6-41eb-916c-3741b9129f2c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "shape: (4, 2)\n",
+      "┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐\n",
+      "│ int_id_column ┆ contents                                                                         │\n",
+      "│ ---           ┆ ---                                                                              │\n",
+      "│ i64           ┆ str                                                                              │\n",
+      "╞═══════════════╪══════════════════════════════════════════════════════════════════════════════════╡\n",
+      "│ 1             ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ PR Newswire October 12, 2019                                                     │\n",
+      "│               ┆ 145-year-old Retailer Anchors Woodland Mall Just Outside Grand Rapids;           │\n",
+      "│               ┆ New Location Continues Strategic National Expansion Plans                        │\n",
+      "│               ┆ DAVENPORT, Iowa, Oct. 12, 2019 /PRNewswire/ -- Von Maur Department Stores opened │\n",
+      "│               ┆ a new store today at Woodland Mall in Kentwood, Mich. The 90,000-square-foot     │\n",
+      "│               ┆ store is the Company's third location in Michigan.                               │\n",
+      "│               ┆ Known for its outstanding selection of brand name and specialty apparel, shoes,  │\n",
+      "│               ┆ accessories and gifts, the store features products from leading brands such as   │\n",
+      "│               ┆ Eileen Fisher, Vineyard Vines, Free People, and Kendra Scott, among many others. │\n",
+      "│               ┆ Von Maur is also widely-regarded for its superior customer service, including an │\n",
+      "│               ┆ interest-free charge card, accommodating return policy, free gift wrapping and   │\n",
+      "│               ┆ free shipping services.                                                          │\n",
+      "│               ┆ Today's opening continues to build upon the momentum of the family-owned         │\n",
+      "│               ┆ Company's targeted national growth strategy. Von Maur opened its first Wisconsin │\n",
+      "│               ┆ location in 2017 and a second Minnesota location in 2018, and it has grown in    │\n",
+      "│               ┆ new states beyond its Midwestern footprint, including New York, Alabama and      │\n",
+      "│               ┆ Oklahoma. Additionally, the Company has plans to open its second Wisconsin       │\n",
+      "│               ┆ location in Madison in Fall 2021.                                                │\n",
+      "│               ┆ \"With its easy accessibility to the larger Grand Rapids area and exceptional     │\n",
+      "│               ┆ collection of shopping, dining and entertainment options, Woodland Mall is a     │\n",
+      "│               ┆ fantastic location for us to continue growing our brand in Michigan,\" said Jim   │\n",
+      "│               ┆ von Maur, president of Von Maur. \"From the moment shoppers walk through our      │\n",
+      "│               ┆ doors, creating an unrivaled shopping experience is the motivation behind        │\n",
+      "│               ┆ everything we do. We look forward to extending our offerings of brand name       │\n",
+      "│               ┆ merchandise and signature customer service to the Grand Rapids area for many     │\n",
+      "│               ┆ years to come.\"                                                                  │\n",
+      "│               ┆ \"We are thrilled to welcome Von Maur, known for their high-quality merchandise   │\n",
+      "│               ┆ and exceptional service, as the anchor of the newly developed wing at Woodland   │\n",
+      "│               ┆ Mall,\" said Joe Coradino, CEO of PREIT. \"The addition most certainly solidifies  │\n",
+      "│               ┆ Woodland Mall's place as the premier retail and entertainment destination in     │\n",
+      "│               ┆ Grand Rapids, driving its place as a top-performing PREIT property.\"             │\n",
+      "│               ┆ Centrally-located for shoppers from Grand Rapids and the surrounding areas, the  │\n",
+      "│               ┆ new single story Von Maur store features the Company's signature exterior brick  │\n",
+      "│               ┆ façade, open expansive floor plan, and residential ambiance, including music     │\n",
+      "│               ┆ from the store's grand piano.                                                    │\n",
+      "│               ┆ The Woodland Mall store will eventually employ up to 150 associates; the         │\n",
+      "│               ┆ majority of them will be full-time. Von Maur offers above-market wages,          │\n",
+      "│               ┆ excellent benefits and a positive, professional work environment. Hours of       │\n",
+      "│               ┆ operation are Monday to Saturday, 10 a.m. – 9 p.m. ET, and Sunday, 12 p.m. – 6   │\n",
+      "│               ┆ p.m. ET.                                                                         │\n",
+      "│               ┆ About Von Maur                                                                   │\n",
+      "│               ┆ Von Maur was founded 145 years ago in downtown Davenport, Iowa. The Company      │\n",
+      "│               ┆ currently operates 35 stores in 15 states, along with a 120,000 square foot      │\n",
+      "│               ┆ E-Commerce facility that drives its successful online business at vonmaur.com.   │\n",
+      "│               ┆ Courtney Smith                                                                   │\n",
+      "│               ┆ courtney@reputationpartners.com                                                  │\n",
+      "│               ┆ View original content:http://www.prnewswire.com/news-releases/von-maur-departmen │\n",
+      "│               ┆ t-store-opens-third-location-in-michigan-300937186.html                          │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│ 4             ┆                                                                                  │\n",
+      "│               ┆ The Genius Life                                                                  │\n",
+      "│               ┆ Max Lugavere                                                                     │\n",
+      "│               ┆ You don't have to be born a Genius to become one. Follow health and science      │\n",
+      "│               ┆ journalist, New York Times bestselling author, TV personality and nutrition      │\n",
+      "│               ┆ expert Max Lugavere as he speaks to the most insightful                          │\n",
+      "│               ┆ minds of our time about what it means to live like a Genius.                     │\n",
+      "│               ┆ 35: How Wheat, Carbs, and Sugar Can Harm Your Brain | David Perlmutter, MD       │\n",
+      "│               ┆ David Perlmutter, MD is a board-certified neurologist, Fellow of the American    │\n",
+      "│               ┆ College of Nutrition, and the New York Times best-selling author of Brain Maker  │\n",
+      "│               ┆ and Grain Brain, now updated with the latest nutritional and neurological        │\n",
+      "│               ┆ science.                                                                         │\n",
+      "│               ┆ Von Maur Department Store Opens Third Location in Michigan                       │\n",
+      "│               ┆ Zuckerberg on Libra drop outs: 'It's a risky project'                            │\n",
+      "│               ┆                                                                                  │\n",
+      "│ 12            ┆ Yesterday, a pair of capricious pigeons prattled placidly by the cactus,         │\n",
+      "│               ┆ curiously considering another pigeon capably pecking at cantaloupe. The lazy     │\n",
+      "│               ┆ llama lightly limped through the lilacs, laboriously longing for a lozenge. A    │\n",
+      "│               ┆ couple of capricious capybaras chatted coolly by the cactus, curiously           │\n",
+      "│               ┆ considering another capy capably chewing on cantaloupe.                          │\n",
+      "│ 15            ┆ The new sheepskin leather coat with natural fur is 46-48 times warmer. The color │\n",
+      "│               ┆ is very beautiful bright green looks very beautiful. Purchased by the shopping   │\n",
+      "│               ┆ center Dubrovka 19 000 now in the store the price is 22000-24000 call any time.  │\n",
+      "└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘\n"
+     ]
+    }
+   ],
    "source": [
     "import polars as pl\n",
     "output_df = pl.read_parquet(os.path.join(os.path.abspath(\"\"), \"ray\", \"output\", \"cleaned\", \"df1.parquet\"))\n",
@@ -188,13 +567,21 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c11d3a4b-8ef9-417d-a8a2-f688db067a52",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "fdedup_ray",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "fdedup_ray"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -206,7 +593,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py
index ffc6f79bc..8e8795cce 100644
--- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py
+++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py
@@ -34,7 +34,7 @@
 DATA_CLEANING_EXEC_SCRIPT_NAME: str = "data_cleaning_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/fdedup/python/pyproject.toml b/transforms/universal/fdedup/python/pyproject.toml
index 08b20ed75..ff3666695 100644
--- a/transforms/universal/fdedup/python/pyproject.toml
+++ b/transforms/universal/fdedup/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_fdedup_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Fuzzy Dedup Transform for Python"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/fdedup/python/requirements.txt b/transforms/universal/fdedup/python/requirements.txt
index 3e5dfc16d..4cd06d819 100644
--- a/transforms/universal/fdedup/python/requirements.txt
+++ b/transforms/universal/fdedup/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 pyyaml>=6.0.2
 boto3>=1.34.69
 kubernetes>=30.1.0
diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml
index 485d6de21..fa0627f00 100644
--- a/transforms/universal/fdedup/ray/pyproject.toml
+++ b/transforms/universal/fdedup/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_fdedup_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "fdedup Ray Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/fdedup/ray/requirements.txt b/transforms/universal/fdedup/ray/requirements.txt
index 81e48e5ee..ecb79fa77 100644
--- a/transforms/universal/fdedup/ray/requirements.txt
+++ b/transforms/universal/fdedup/ray/requirements.txt
@@ -1,5 +1,5 @@
-data-prep-toolkit[ray]==0.2.3.dev0
-dpk_fdedup_transform_python==0.2.3.dev0
+data-prep-toolkit[ray]>=0.2.2
+dpk_fdedup_transform_python==0.2.2
 mmh3>=4.1.0
 xxhash==3.4.1
 tqdm==4.66.3
diff --git a/transforms/universal/fdedup/spark/pyproject.toml b/transforms/universal/fdedup/spark/pyproject.toml
index 8a072b31b..798931552 100644
--- a/transforms/universal/fdedup/spark/pyproject.toml
+++ b/transforms/universal/fdedup/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_fdedup_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Fuzzy Dedup Spark Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/fdedup/spark/requirements.txt b/transforms/universal/fdedup/spark/requirements.txt
index bfb0f04a2..e70a880bd 100644
--- a/transforms/universal/fdedup/spark/requirements.txt
+++ b/transforms/universal/fdedup/spark/requirements.txt
@@ -1,5 +1,5 @@
-dpk_fdedup_transform_python==0.2.3.dev0
-data-prep-toolkit[spark]==0.2.3.dev0
+dpk_fdedup_transform_python==0.2.2
+data-prep-toolkit[spark]>=0.2.2
 pyyaml>=6.0.2
 boto3>=1.34.69
 kubernetes>=30.1.0
diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py
index b856b1007..4b122d98f 100644
--- a/transforms/universal/filter/kfp_ray/filter_wf.py
+++ b/transforms/universal/filter/kfp_ray/filter_wf.py
@@ -24,7 +24,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml
index fcf0f6419..8e9bb2366 100644
--- a/transforms/universal/filter/python/pyproject.toml
+++ b/transforms/universal/filter/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_filter_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Filter Transform for Python"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/filter/python/requirements.txt b/transforms/universal/filter/python/requirements.txt
index 100626f60..91f37927e 100644
--- a/transforms/universal/filter/python/requirements.txt
+++ b/transforms/universal/filter/python/requirements.txt
@@ -1,3 +1,3 @@
 
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 duckdb>=0.10.1
diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml
index 64776e0c1..94df1cbac 100644
--- a/transforms/universal/filter/ray/pyproject.toml
+++ b/transforms/universal/filter/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_filter_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Filter Transform for Ray"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Constantin Adam", email = "cmadam@us.ibm.com" },
 ]
 dependencies = [
-    "dpk-filter-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-filter-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/filter/spark/pyproject.toml b/transforms/universal/filter/spark/pyproject.toml
index ef46c9a1b..f62a81085 100644
--- a/transforms/universal/filter/spark/pyproject.toml
+++ b/transforms/universal/filter/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_filter_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Filter Spark Transform"
 license = {text = "Apache-2.0"}
@@ -9,7 +9,7 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[spark]==0.2.3.dev0",
+    "data-prep-toolkit[spark]>=0.2.2",
 ]
 
 [project.optional-dependencies]
diff --git a/transforms/universal/hap/kfp_ray.disable/hap_wf.py b/transforms/universal/hap/kfp_ray.disable/hap_wf.py
index 786011d4d..8069ec181 100644
--- a/transforms/universal/hap/kfp_ray.disable/hap_wf.py
+++ b/transforms/universal/hap/kfp_ray.disable/hap_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "hap_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/hap/python/pyproject.toml b/transforms/universal/hap/python/pyproject.toml
index bf7c85577..7b30dd72e 100644
--- a/transforms/universal/hap/python/pyproject.toml
+++ b/transforms/universal/hap/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_hap_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "HAP Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/hap/python/requirements.txt b/transforms/universal/hap/python/requirements.txt
index 1250d1f77..70e633ac9 100644
--- a/transforms/universal/hap/python/requirements.txt
+++ b/transforms/universal/hap/python/requirements.txt
@@ -1,4 +1,4 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 nltk==3.9.1
 transformers==4.38.2
 torch>=2.2.2,<=2.4.1
diff --git a/transforms/universal/hap/ray/pyproject.toml b/transforms/universal/hap/ray/pyproject.toml
index 38e78938b..6518e5277 100644
--- a/transforms/universal/hap/ray/pyproject.toml
+++ b/transforms/universal/hap/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_hap_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "HAP Ray Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/hap/ray/requirements.txt b/transforms/universal/hap/ray/requirements.txt
index 7c4c8eb94..3d18acaa4 100644
--- a/transforms/universal/hap/ray/requirements.txt
+++ b/transforms/universal/hap/ray/requirements.txt
@@ -1,5 +1,5 @@
-data-prep-toolkit[ray]==0.2.3.dev0
-dpk-hap-transform-python==0.2.3.dev0
+data-prep-toolkit[ray]>=0.2.2
+dpk-hap-transform-python==0.2.2
 nltk==3.9.1
 transformers==4.38.2
 torch>=2.2.2,<=2.4.1
diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py
index 3b102d205..737b60121 100644
--- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py
+++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py
@@ -23,7 +23,7 @@
 EXEC_SCRIPT_NAME: str = "noop_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py
index e8125328b..9dbdaf3b0 100644
--- a/transforms/universal/noop/kfp_ray/noop_wf.py
+++ b/transforms/universal/noop/kfp_ray/noop_wf.py
@@ -24,7 +24,7 @@
 EXEC_SCRIPT_NAME: str = "noop_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml
index ff9a24244..b60eef1ef 100644
--- a/transforms/universal/noop/python/pyproject.toml
+++ b/transforms/universal/noop/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_noop_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "NOOP Python Transform"
 license = {text = "Apache-2.0"}
@@ -10,7 +10,7 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit==0.2.3.dev0",
+    "data-prep-toolkit>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml
index da9327917..e9e28eefd 100644
--- a/transforms/universal/noop/ray/pyproject.toml
+++ b/transforms/universal/noop/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_noop_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "NOOP Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "dpk-noop-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-noop-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml
index d3cd47bf6..89d0a18dd 100644
--- a/transforms/universal/noop/spark/pyproject.toml
+++ b/transforms/universal/noop/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_noop_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "NOOP Spark Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "dpk-noop-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[spark]==0.2.3.dev0",
+    "dpk-noop-transform-python==0.2.2",
+    "data-prep-toolkit[spark]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/profiler/kfp_ray/profiler_wf.py b/transforms/universal/profiler/kfp_ray/profiler_wf.py
index 914637895..ee6323d74 100644
--- a/transforms/universal/profiler/kfp_ray/profiler_wf.py
+++ b/transforms/universal/profiler/kfp_ray/profiler_wf.py
@@ -24,7 +24,7 @@
 EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/profiler/python/pyproject.toml b/transforms/universal/profiler/python/pyproject.toml
index 39d9788f8..117be53c0 100644
--- a/transforms/universal/profiler/python/pyproject.toml
+++ b/transforms/universal/profiler/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_profiler_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "profiler Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/profiler/python/requirements.txt b/transforms/universal/profiler/python/requirements.txt
index 526140ada..420e3fe86 100644
--- a/transforms/universal/profiler/python/requirements.txt
+++ b/transforms/universal/profiler/python/requirements.txt
@@ -1,5 +1,5 @@
 
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 mmh3==4.1.0
 xxhash==3.4.1
 
diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml
index ac8d729ec..336d7e35d 100644
--- a/transforms/universal/profiler/ray/pyproject.toml
+++ b/transforms/universal/profiler/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_profiler_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "profiler Ray Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "data-prep-toolkit[ray]==0.2.3.dev0",
-    "dpk_profiler_transform_python==0.2.3.dev0",
+    "data-prep-toolkit[ray]>=0.2.2",
+    "dpk_profiler_transform_python==0.2.2",
     "tqdm==4.66.3",
 ]
 
diff --git a/transforms/universal/profiler/spark/pyproject.toml b/transforms/universal/profiler/spark/pyproject.toml
index 6ba790301..1e1638766 100644
--- a/transforms/universal/profiler/spark/pyproject.toml
+++ b/transforms/universal/profiler/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_profiler_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Profiler Spark Transform"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "dpk-profiler-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[spark]==0.2.3.dev0",
+    "dpk-profiler-transform-python==0.2.2",
+    "data-prep-toolkit[spark]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/resize/kfp_ray/resize_wf.py b/transforms/universal/resize/kfp_ray/resize_wf.py
index 0724ed731..0a9be8e95 100644
--- a/transforms/universal/resize/kfp_ray/resize_wf.py
+++ b/transforms/universal/resize/kfp_ray/resize_wf.py
@@ -22,7 +22,7 @@
 # the name of the job script
 EXEC_SCRIPT_NAME: str = "resize_transform_ray.py"
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 
 # path to kfp component specifications files
 component_spec_path = "../../../../kfp/kfp_ray_components/"
diff --git a/transforms/universal/resize/python/pyproject.toml b/transforms/universal/resize/python/pyproject.toml
index 6fdad69d0..836388694 100644
--- a/transforms/universal/resize/python/pyproject.toml
+++ b/transforms/universal/resize/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_resize_transform_python"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "resize Python Transform"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/resize/python/requirements.txt b/transforms/universal/resize/python/requirements.txt
index 2f67f6a80..e9abc2535 100644
--- a/transforms/universal/resize/python/requirements.txt
+++ b/transforms/universal/resize/python/requirements.txt
@@ -1 +1 @@
-data-prep-toolkit==0.2.3.dev0
\ No newline at end of file
+data-prep-toolkit>=0.2.2
\ No newline at end of file
diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml
index c266a39f4..fbb4d0f30 100644
--- a/transforms/universal/resize/ray/pyproject.toml
+++ b/transforms/universal/resize/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_resize_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Resize Ray Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
 ]
 dependencies = [
-    "dpk-resize-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-resize-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/resize/spark/pyproject.toml b/transforms/universal/resize/spark/pyproject.toml
index 7de14c673..9f83a6816 100644
--- a/transforms/universal/resize/spark/pyproject.toml
+++ b/transforms/universal/resize/spark/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_resize_transform_spark"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Resize Spark Transform"
 license = {text = "Apache-2.0"}
@@ -10,8 +10,8 @@ authors = [
     { name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
 ]
 dependencies = [
-    "dpk-resize-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[spark]==0.2.3.dev0",
+    "dpk-resize-transform-python==0.2.2",
+    "data-prep-toolkit[spark]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py
index c131d11ea..243cac6be 100644
--- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py
+++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py
@@ -23,7 +23,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest"
 
 # components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
 # path to kfp component specifications files
 
 # path to kfp component specifications files
diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml
index dbb8e84ba..021a1427f 100644
--- a/transforms/universal/tokenization/python/pyproject.toml
+++ b/transforms/universal/tokenization/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "dpk_tokenization_transform_python"
 keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Tokenization Transform for Python"
 license = {text = "Apache-2.0"}
diff --git a/transforms/universal/tokenization/python/requirements.txt b/transforms/universal/tokenization/python/requirements.txt
index 8a1920162..9c2a695a6 100644
--- a/transforms/universal/tokenization/python/requirements.txt
+++ b/transforms/universal/tokenization/python/requirements.txt
@@ -1,2 +1,2 @@
-data-prep-toolkit==0.2.3.dev0
+data-prep-toolkit>=0.2.2
 transformers==4.38.2
diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml
index c094b9e7e..4cea4b905 100644
--- a/transforms/universal/tokenization/ray/pyproject.toml
+++ b/transforms/universal/tokenization/ray/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dpk_tokenization_transform_ray"
-version = "0.2.3.dev0"
+version = "0.2.2"
 requires-python = ">=3.10,<3.13"
 description = "Tokenization Transform for Ray"
 license = {text = "Apache-2.0"}
@@ -9,8 +9,8 @@ authors = [
     { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"},
 ]
 dependencies = [
-    "dpk-tokenization-transform-python==0.2.3.dev0",
-    "data-prep-toolkit[ray]==0.2.3.dev0",
+    "dpk-tokenization-transform-python==0.2.2",
+    "data-prep-toolkit[ray]>=0.2.2",
 ]
 
 [build-system]
diff --git a/transforms/universal/web2parquet/requirements.txt b/transforms/universal/web2parquet/requirements.txt
index 1af3f12a4..dfb74a6ca 100644
--- a/transforms/universal/web2parquet/requirements.txt
+++ b/transforms/universal/web2parquet/requirements.txt
@@ -1,2 +1,2 @@
-data-prep-toolkit>=0.2.3.dev0
+data-prep-toolkit>=0.2.2
 data_prep_connector>=0.2.3
\ No newline at end of file