diff --git a/.make.defaults b/.make.defaults
index 51eb984ee..80df91c8e 100644
--- a/.make.defaults
+++ b/.make.defaults
@@ -209,7 +209,7 @@ __check_defined = \
 # We create both local and remote tags.  Local seems to be needed when using our spark
 # base image.  Remote seems to be needed by kfp.
 .PHONY: .defaults.image
-.defaults.image:: # Must be called with a DOCKER_IMAGE= settings.
+.defaults.image:: # Must be called with a DOCKER_IMAGE_NAME= settings.
 	@# Help: Create the docker image $(DOCKER_LOCAL_IMAGE) and a tag for $(DOCKER_REMOTE_IMAGE)
 	$(call check_defined, DOCKER_IMAGE_NAME)
 	# The following touch seems to be needed to work around a docker build problem in which
@@ -222,14 +222,15 @@ __check_defined = \
 	if [ -e pyproject.toml ]; then 	\
 		touch pyproject.toml;	\
 	fi
-	$(DOCKER) build -t $(DOCKER_LOCAL_IMAGE) $(DOCKER_BUILD_EXTRA_ARGS)	\
+	$(DOCKER) build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_BUILD_EXTRA_ARGS)	\
 		--platform $(DOCKER_PLATFORM)  			\
 		--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL)	\
 		--build-arg BASE_IMAGE=$(BASE_IMAGE)  		\
 		--build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \
 		--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')  	\
 		--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . 
-	$(DOCKER) tag $(DOCKER_LOCAL_IMAGE)  $(DOCKER_REMOTE_IMAGE)
+	$(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION)
+
 
 # Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME
 # Generally used to copy source from within the repo into a local directory for use by a Dockerfile
@@ -244,24 +245,25 @@ __check_defined = \
 	    cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME};	\
 	fi
 	
-
-# Build and image using the local Dockerfile and make the data-processing-lib/python
-# available in the current directory for use by the Dockerfile (i.e. to install the library).
-#.PHONY: .defaults.python-lib-src-image
-#.defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings.
-#	@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source
-#ifeq ($(USE_REPO_LIB_SRC), 1)
-#	$(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
-#endif
-#	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image
-#	-rm -rf data-processing-lib-python
 	
 .PHONY: .default.build-lib-wheel
 .default.build-lib-wheel:
-	make -C $(REPOROOT)/data-processing-lib build-pkg-dist
+	$(MAKE) -C $(REPOROOT)/data-processing-lib build-pkg-dist
 	rm -rf data-processing-dist && mkdir data-processing-dist
 	cp $(REPOROOT)/data-processing-lib/dist/*.whl data-processing-dist
 
+
+# Build and image using the local Dockerfile 
+# Assumes wheel has already been created
+.PHONY: .defaults.lib-whl-image 
+.defaults.lib-whl-image::
+	# Must be called with a DOCKER_LOCAL_IMAGE= settings.
+	@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library
+	@$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl))
+	$(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE)))
+	$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
+
+
 # Build and image using the local Dockerfile and make the wheel for data-processing-lib
 # available in the current directory for use by the Dockerfile (i.e. to install the library).
 .PHONY: .defaults.python-lib-whl-image 
@@ -270,28 +272,9 @@ __check_defined = \
 	@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library
 	@$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl))
 	$(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE)))
-	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
+	$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
 	-rm -rf data-processing-dist
 
-# Build an image using the local Dockerfile and make the data-processing-lib/ray
-# available in the current directory for use by the Dockerfile (i.e. to install the library). 
-# Note that this looks for the ../python directory, which is currently only used in the transform projects,
-# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
-#.PHONY: .defaults.ray-lib-src-image
-#.defaults.ray-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings.
-#	@# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
-#ifeq ($(USE_REPO_LIB_SRC), 1)
-#	$(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
-#	$(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray .defaults.copy-lib
-#endif
-#	if [ -e ../python ]; then								\
-#		$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib;	\
-#	fi
-#	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image
-#	-rm -rf data-processing-lib-python
-#	-rm -rf data-processing-lib-ray
-#	-rm -rf python-transform 
-
 
 # Build an image using the local Dockerfile and make the data-processing wheel 
 # available in the current directory for use by the Dockerfile (i.e. to install the library). 
@@ -306,7 +289,7 @@ __check_defined = \
 	if [ -e ../python ]; then								\
 		$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib;	\
 	fi
-	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
+	$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
 	-rm -rf python-transform 
 	-rm -rf data-processing-dist
 
@@ -316,24 +299,6 @@ __check_defined = \
 .defaults.spark-lib-base-image: 
 	$(MAKE) -C $(DPK_SPARK_LIB_DIR) image
 
-# Note that this looks for the ../python directory, which is currently only used in the transform projects,
-# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
-# Must be called with a DOCKER_LOCAL_IMAGE= settings.
-#.PHONY: .defaults.spark-lib-src-image
-#.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image
-#	@# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
-#	$(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability
-#ifeq ($(USE_REPO_LIB_SRC), 1)
-#	$(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
-#	$(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib
-#endif
-#	if [ -e ../python ]; then								\
-#		$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib;	\
-#	fi
-#	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image
-#	-rm -rf data-processing-lib-python
-#	-rm -rf data-processing-lib-spark
-#	-rm -rf python-transform 
 
 .PHONY: .defaults.spark-lib-whl-image
 .defaults.spark-lib-whl-image:: .default.build-lib-wheel .defaults.spark-lib-base-image
@@ -345,7 +310,7 @@ __check_defined = \
 	if [ -e ../python ]; then								\
 		$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib;	\
 	fi
-	$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
+	$(MAKE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
 	-rm -rf python-transform 
 	-rm -rf data-processing-dist
 
diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets
index 69a5f54fd..e392e8f36 100644
--- a/transforms/.make.cicd.targets
+++ b/transforms/.make.cicd.targets
@@ -7,10 +7,15 @@
 include $(REPOROOT)/transforms/.make.transforms
 
 ######################################################################
-## Default setting for TRANSFORM_RUNTIME uses folder name-- Old layout
-TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).transform
-TRANSFORM_RAY_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform
-TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).spark.transform
+## Default setting for TRANSFORM_RUNTIME entry point: 
+# python -m dpk_html2parquet.ray.transform --help
+# or
+# python -m dpk_html2parquet.transform_python --help
+#
+TRANSFORM_PYTHON_SRC?="-m dpk_$(TRANSFORM_NAME).transform_python"
+TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform"
+TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform"
+
 
 venv::	.defaults.create-venv
 	source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark]
@@ -19,7 +24,6 @@ venv::	.defaults.create-venv
 		source venv/bin/activate && $(PIP) install -r  requirements.txt; \
 	fi; 
 
-
 test:: .transforms.test-src test-image
 
 clean:: .transforms.clean
@@ -28,62 +32,113 @@ clean:: .transforms.clean
 set-versions::
 
 ## We need to think how we want to do this going forward
-build::
 
-image::
-	@if [ -e Dockerfile ]; then \
-		$(MAKE) image-default ; \
-	else \
-		echo "Skipping image for $(shell pwd) since no Dockerfile is present"; \
+build:: image
+
+publish:
+	@if [ -e Dockerfile.python ]; then \
+		$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-python:$(DOCKER_IMAGE_VERSION) \
+				.defaults.publish-image ; \
+	fi
+	@if [ -e Dockerfile.ray ]; then \
+		$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-ray:$(DOCKER_IMAGE_VERSION) \
+				.defaults.publish-image ; \
+	fi
+	@if [ -e Dockerfile.spark ]; then \
+		$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-spark:$(DOCKER_IMAGE_VERSION) \
+				.defaults.publish-image ; \
+	fi
+
+test-image-sequence:: .defaults.lib-whl-image .transforms.test-image-help .transforms.clean
+
+test-image:: .default.build-lib-wheel
+	@if [ -e Dockerfile.python ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.python \
+				TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_PYTHON_SRC) \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \
+				test-image-sequence ; \
+	fi
+	@if [ -e Dockerfile.ray ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.ray \
+				TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_RAY_SRC) \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \
+				BASE_IMAGE=$(RAY_BASE_IMAGE)  \
+				test-image-sequence ; \
 	fi
+	@if [ -e Dockerfile.spark ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.spark \
+				TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_SPARK_SRC) \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \
+				BASE_IMAGE=$(SPARK_BASE_IMAGE)  \
+				test-image-sequence ; \
+	fi
+	-rm -rf data-processing-dist
+
 
-publish::
-	@if [ -e Dockerfile ]; then \
-		$(MAKE) publish-default ; \
-	else \
-		echo "Skipping publish for $(shell pwd) since no Dockerfile is present"; \
+image-python:
+	@if [ -e Dockerfile.python ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.python \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \
+				.defaults.lib-whl-image ; \
 	fi
 
-publish-image::
-	@if [ -e Dockerfile ]; then \
-		$(MAKE) publish-image-default ; \
-	else \
-		echo "Skipping publish-image for $(shell pwd) since no Dockerfile is present"; \
+image-ray:
+	@if [ -e Dockerfile.ray ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.ray \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \
+				BASE_IMAGE=$(RAY_BASE_IMAGE)  \
+				.defaults.lib-whl-image ; \
 	fi
 
-test-image::
-	@if [ -e Dockerfile ]; then \
-		$(MAKE) test-image-default ; \
-	else \
-		echo "Skipping test-image for $(shell pwd) since no Dockerfile is present"; \
+image-spark:
+	@if [ -e Dockerfile.spark ]; then \
+		$(MAKE) DOCKER_FILE=Dockerfile.spark \
+				DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \
+				BASE_IMAGE=$(SPARK_BASE_IMAGE)  \
+				.defaults.lib-whl-image ; \
 	fi
 
+image:: .default.build-lib-wheel
+	## Build all possible images unless a specific runtime is specified
+	@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "python" ]; then \
+		$(MAKE) image-python ; \
+	fi
+	@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "ray" ]; then \
+		$(MAKE) image-ray ; \
+	fi
+	@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "spark" ]; then \
+		$(MAKE) image-spark ; \
+	fi
+	-rm -rf data-processing-dist
+
 test-src:: .transforms.test-src
 
 setup:: .transforms.setup
 
-publish-default:: publish-image
-
-publish-image-default:: .defaults.publish-image
-        
-test-image-default:: image .transforms.test-image-help .defaults.test-image-pytest .transforms.clean
-
-build-lib-wheel:
-	make -C $(REPOROOT)/data-processing-lib build-pkg-dist
-
-image-default:: build-lib-wheel
-	@$(eval LIB_WHEEL_FILE := $(shell find $(REPOROOT)/data-processing-lib/dist/*.whl))
-	rm -fr dist && mv $(REPOROOT)/data-processing-lib/dist .
-	$(eval WHEEL_FILE_NAME := $(shell basename $(LIB_WHEEL_FILE)))
-	$(DOCKER) build -t $(DOCKER_IMAGE_NAME) $(DOCKER_BUILD_EXTRA_ARGS)	\
-		--platform $(DOCKER_PLATFORM)  			\
-		--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL)	\
-		--build-arg BASE_IMAGE=$(RAY_BASE_IMAGE)  		\
-		--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')  	\
-		--build-arg WHEEL_FILE_NAME=$(WHEEL_FILE_NAME) \
-		--build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \
-		--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) .
-	$(DOCKER) tag $(DOCKER_LOCAL_IMAGE)  $(DOCKER_REMOTE_IMAGE)
-	rm -fr dist
+kind-load-image:: .transforms.kind-load-image
+
+.PHONY: workflow-vent
+workflow-venv:
+	if [ -e kfp_ray ]; then                 \
+	    $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-venv;   \
+	fi
+
+.PHONY: workflow-test
+workflow-test:
+	if [ -e kfp_ray ]; then                 \
+	    $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-test;   \
+	fi
+
+.PHONY: workflow-upload
+workflow-upload:
+	if [ -e kfp_ray ]; then                 \
+	    $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-upload; \
+	fi
+
+.PHONY: workflow-build
+workflow-build:
+	if [ -e kfp_ray ]; then                 \
+	    $(MAKE) -C  kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-build; \
+	fi
 
 
diff --git a/transforms/language/doc_chunk/python/Dockerfile b/transforms/language/doc_chunk/Dockerfile.python
similarity index 70%
rename from transforms/language/doc_chunk/python/Dockerfile
rename to transforms/language/doc_chunk/Dockerfile.python
index 358f9ca13..2571a065c 100644
--- a/transforms/language/doc_chunk/python/Dockerfile
+++ b/transforms/language/doc_chunk/Dockerfile.python
@@ -20,20 +20,9 @@ RUN  pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}
 
 # END OF STEPS destined for a data-prep-kit base image 
 
-COPY --chown=dpk:root src/ src/
-COPY --chown=dpk:root pyproject.toml pyproject.toml 
+COPY --chown=dpk:root dpk_doc_chunk/ dpk_doc_chunk/
 COPY --chown=dpk:root requirements.txt requirements.txt
-RUN pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -e .
-
-# copy transform main() entry point to the image 
-COPY ./src/doc_chunk_transform_python.py .
-
-# copy some of the samples in
-COPY ./src/doc_chunk_local.py local/
-
-# copy test
-COPY test/ test/
-COPY test-data/ test-data/
+RUN pip install ${PIP_INSTALL_EXTRA_ARGS} -r requirements.txt
 
 # Set environment
 ENV PYTHONPATH /home/dpk
diff --git a/transforms/language/doc_chunk/ray/Dockerfile b/transforms/language/doc_chunk/Dockerfile.ray
similarity index 50%
rename from transforms/language/doc_chunk/ray/Dockerfile
rename to transforms/language/doc_chunk/Dockerfile.ray
index c64771cc9..1dbf6cfcf 100644
--- a/transforms/language/doc_chunk/ray/Dockerfile
+++ b/transforms/language/doc_chunk/Dockerfile.ray
@@ -12,26 +12,10 @@ ARG DPK_WHEEL_FILE_NAME
 COPY --chown=ray:users data-processing-dist data-processing-dist
 RUN  pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]
 
-## Copy the python version of the tansform
-COPY --chown=ray:users python-transform/  python-transform/
-RUN cd python-transform && pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -e .
 
-#COPY requirements.txt requirements.txt
-#RUN pip install --no-cache-dir -r  requirements.txt
-
-COPY --chown=ray:users src/ src/
-COPY --chown=ray:users pyproject.toml pyproject.toml 
-RUN pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -e .
-
-# copy the main() entry point to the image 
-COPY ./src/doc_chunk_transform_ray.py .
-
-# copy some of the samples in
-COPY ./src/doc_chunk_local_ray.py local/
-
-# copy test
-COPY test/ test/
-COPY test-data/ test-data/
+COPY --chown=ray:users dpk_doc_chunk/ dpk_doc_chunk/
+COPY --chown=ray:users requirements.txt requirements.txt
+RUN pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -r requirements.txt
 
 # Set environment
 ENV PYTHONPATH /home/ray
diff --git a/transforms/language/doc_chunk/Makefile b/transforms/language/doc_chunk/Makefile
index bca6f7e85..8e3af6da6 100644
--- a/transforms/language/doc_chunk/Makefile
+++ b/transforms/language/doc_chunk/Makefile
@@ -1,79 +1,24 @@
 REPOROOT=../../..
 # Use make help, to see the available rules
-include $(REPOROOT)/.make.defaults
-
-setup::
-	@# Help: Recursively make $@ all subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-clean::
-	@# Help: Recursively make $@ all subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-build::
-	@# Help: Recursively make $@ in subdirs 
-	$(MAKE) RULE=$@ .recurse
-venv::
-	@# Help: Recursively make $@ in subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-image:: 
-	@# Help: Recursively make $@ in all subdirs 
-	@$(MAKE) RULE=$@ .recurse
-
-set-versions:  
-	@# Help: Recursively $@ in all subdirs 
-	@$(MAKE) RULE=$@ .recurse
-
-publish:: 
-	@# Help: Recursively make $@ in all subdirs 
-	@$(MAKE) RULE=$@ .recurse
-
-test-image:: 
-	@# Help: Recursively make $@ in all subdirs 
-	@$(MAKE) RULE=$@ .recurse
-
-test:: 
-	@# Help: Recursively make $@ in all subdirs 
-	@$(MAKE) RULE=$@ .recurse
-
-test-src::
-	@# Help: Recursively make $@ in all subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-kind-load-image::
-	@# Help: Recursively make $@ in all subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-docker-load-image::
-	@# Help: Recursively make $@ in all subdirs
-	$(MAKE) RULE=$@ .recurse
-
-docker-save-image::
-	@# Help: Recursively make $@ in all subdirs 
-	$(MAKE) RULE=$@ .recurse
-
-.PHONY: workflow-venv
-workflow-venv:
-	if [ -e kfp_ray ]; then                 \
-	    $(MAKE) -C kfp_ray workflow-venv;   \
-	fi
-
-.PHONY: workflow-test
-workflow-test:
-	if [ -e kfp_ray ]; then                 \
-	    $(MAKE) -C kfp_ray workflow-test;   \
-	fi
-	
-.PHONY: workflow-upload
-workflow-upload:
-	if [ -e kfp_ray ]; then                 \
-	    $(MAKE) -C kfp_ray workflow-upload; \
-	fi
-
-.PHONY: workflow-build
-workflow-build:
-	if [ -e kfp_ray ]; then                 \
-	    $(MAKE) -C  kfp_ray workflow-build; \
-	fi
-
+include $(REPOROOT)/transforms/.make.cicd.targets
+
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree,  so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name. 
+TRANSFORM_NAME=$(shell basename `pwd`)
+
+################################################################################
+
+
+LINUX_WITH_CPU_TORCH?=true
+OS := $(shell uname -s)
+ifeq ($(OS),Linux)
+	ifeq ($(LINUX_WITH_CPU_TORCH),true)
+	PIP_INSTALL_EXTRA_ARGS=--extra-index-url=https://download.pytorch.org/whl/cpu
+	DOCKER_BUILD_EXTRA_ARGS=--build-arg PIP_INSTALL_EXTRA_ARGS=${PIP_INSTALL_EXTRA_ARGS}
+	endif
+endif
diff --git a/transforms/language/doc_chunk/README.md b/transforms/language/doc_chunk/README.md
index e4a58a3bc..7010439d4 100644
--- a/transforms/language/doc_chunk/README.md
+++ b/transforms/language/doc_chunk/README.md
@@ -1,12 +1,164 @@
 # Chunk documents Transform 
 
-This transform is chunking documents. It supports multiple _chunker modules_.
-More details as well as a description of the parameters can be found in the [python/README.md](python/README.md).
+Please see the set of
+[transform project conventions](../../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
 
+## Contributors
 
-* [python](python/README.md) - provides the base python-based transformation 
-implementation.
-* [ray](ray/README.md) - enables the running of the base python transformation
-in a Ray runtime
-* [kfp](kfp_ray/README.md) - enables running the ray docker image 
-in a kubernetes cluster using a generated `yaml` file.
+- Michele Dolfi (dol@zurich.ibm.com)
+
+## Description 
+
+This transform is chunking documents. It supports multiple _chunker modules_ (see the `chunking_type` parameter).
+
+When using documents converted to JSON, the transform leverages the [Docling Core](https://github.com/DS4SD/docling-core) `HierarchicalChunker`
+to chunk according to the document layout segmentation, i.e. respecting the original document components as paragraphs, tables, enumerations, etc.
+It relies on documents converted with the Docling library in the [pdf2parquet transform](../../pdf2parquet/python/README.md) using the option `contents_type: "application/json"`,
+which provides the required JSON structure.
+
+When using documents converted to Markdown, the transform leverages the [Llama Index](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser) `MarkdownNodeParser`, which is relying on its internal Markdown splitting logic.
+
+
+### Input 
+
+| input column name | data type | description |
+|-|-|-|
+| the one specified in _content_column_name_ configuration | string | the content used in this transform |
+
+
+### Output format
+
+The output parquet file will contain all the original columns, but the content will be replaced with the individual chunks.
+
+
+#### Tracing the origin of the chunks
+
+The transform allows to trace the origin of the chunk with the `source_doc_id` which is set to the value of the `document_id` column (if present) in the input table.
+The actual name of columns can be customized with the parameters described below.
+
+
+## Configuration
+
+The transform can be tuned with the following parameters.
+
+
+| Parameter  | Default  | Description  |
+|------------|----------|--------------|
+| `chunking_type`        | `dl_json` | Chunking type to apply. Valid options are `li_markdown` for using the LlamaIndex [Markdown chunking](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser), `dl_json` for using the [Docling JSON chunking](https://github.com/DS4SD/docling), `li_token_text` for using the LlamaIndex [Token Text Splitter](https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/token_text_splitter/), which chunks the text into fixed-sized windows of tokens. |
+| `content_column_name`        | `contents` | Name of the column containing the text to be chunked. |
+| `doc_id_column_name`         | `document_id` | Name of the column containing the doc_id to be propagated in the output. |
+| `chunk_size_tokens`          | `128` | Size of the chunk in tokens for the token text chunker. |
+| `chunk_overlap_tokens`       | `30` | Number of tokens overlapping between chunks for the token text chunker. |
+| `output_chunk_column_name`   | `contents` | Column name to store the chunks in the output table. |
+| `output_source_doc_id_column_name`   | `source_document_id` | Column name to store the `doc_id` from the input table. |
+| `output_jsonpath_column_name`| `doc_jsonpath` | Column name to store the document path of the chunk in the output table. |
+| `output_pageno_column_name`  | `page_number` | Column name to store the page number of the chunk in the output table. |
+| `output_bbox_column_name`    | `bbox` | Column name to store the bbox of the chunk in the output table. |
+
+
+
+## Usage
+
+### Launched Command Line Options 
+
+When invoking the CLI, the parameters must be set as `--doc_chunk_<name>`, e.g. `--doc_chunk_column_name_key=myoutput`.
+
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/doc_chunk_transform.py using command line args
+* `run-local-sample` - runs src/doc_chunk_local.py
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example, 
+```shell
+make run-cli-sample
+...
+```
+Then 
+```shell
+ls output
+```
+To see results of the transform.
+
+### Code example
+
+TBD (link to the notebook will be provided)
+
+See the sample script [src/doc_chunk_local_python.py](src/doc_chunk_local_python.py).
+
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the 
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
+
+## Testing
+
+Following [the testing strategy of data-processing-lib](../../../../data-processing-lib/doc/transform-testing.md)
+
+Currently we have:
+- [Unit test](test/test_doc_chunk_python.py)
+
+
+## Further Resource
+
+- For the [Docling Core](https://github.com/DS4SD/docling-core) `HierarchicalChunker`
+  - <https://ds4sd.github.io/docling/>
+- For the Markdown chunker in LlamaIndex
+  - [Markdown chunking](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser)
+- For the Token Text Splitter in LlamaIndex
+  - [Token Text Splitter](https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/token_text_splitter/)
+
+
+# Chunk documents Ray Transform 
+
+## Summary 
+This project wraps the doc_chunck transform python implementation with a Ray runtime.
+
+## Configuration and command line Options
+
+chunk documents configuration and command line options are the same as for the base python transform. 
+
+## Running
+
+### Launched Command Line Options 
+In addition to those available to the transform as defined above,
+the set of 
+[ray launcher](../../../../data-processing-lib/doc/ray-launcher-options.md) are available.
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/doc_chunk_transform.py using command line args
+* `run-local-sample` - runs src/doc_chunk_local_ray.py
+* `run-s3-sample` - runs src/doc_chunk_s3_ray.py
+    * Requires prior installation of minio, depending on your platform (e.g., from [here](https://min.io/docs/minio/macos/index.html)
+     and [here](https://min.io/docs/minio/linux/index.html) 
+     and invocation of `make minio-start` to load data into local minio for S3 access.
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example, 
+```shell
+make run-cli-sample
+...
+```
+Then 
+```shell
+ls output
+```
+To see results of the transform.
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the 
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
diff --git a/transforms/language/doc_chunk/dpk_doc_chunk/__init__.py b/transforms/language/doc_chunk/dpk_doc_chunk/__init__.py
new file mode 100644
index 000000000..29621e921
--- /dev/null
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/__init__.py
@@ -0,0 +1 @@
+from .transform import *
diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_chunkers.py b/transforms/language/doc_chunk/dpk_doc_chunk/chunkers.py
similarity index 100%
rename from transforms/language/doc_chunk/python/src/doc_chunk_chunkers.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/chunkers.py
diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_local.py b/transforms/language/doc_chunk/dpk_doc_chunk/local.py
similarity index 96%
rename from transforms/language/doc_chunk/python/src/doc_chunk_local.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/local.py
index 8c016bf7d..956205bad 100644
--- a/transforms/language/doc_chunk/python/src/doc_chunk_local.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/local.py
@@ -13,7 +13,7 @@
 import os
 
 from data_processing.data_access import DataAccessLocal
-from doc_chunk_transform import DocChunkTransform
+from dpk_doc_chunk.transform import DocChunkTransform
 
 
 # create parameters
diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py b/transforms/language/doc_chunk/dpk_doc_chunk/local_python.py
similarity index 95%
rename from transforms/language/doc_chunk/python/src/doc_chunk_local_python.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/local_python.py
index 0c830ee98..51fd4de50 100644
--- a/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/local_python.py
@@ -16,8 +16,8 @@
 
 from data_processing.runtime.pure_python import PythonTransformLauncher
 from data_processing.utils import ParamsUtils
-from doc_chunk_transform_python import DocChunkPythonTransformConfiguration
-from doc_chunk_transform import chunking_types
+from dpk_doc_chunk.transform_python import DocChunkPythonTransformConfiguration
+from dpk_doc_chunk.transform import chunking_types
 
 # create parameters
 input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
diff --git a/transforms/language/doc_chunk/dpk_doc_chunk/ray/__init__.py b/transforms/language/doc_chunk/dpk_doc_chunk/ray/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/transforms/language/doc_chunk/ray/src/doc_chunk_local_ray.py b/transforms/language/doc_chunk/dpk_doc_chunk/ray/local.py
similarity index 96%
rename from transforms/language/doc_chunk/ray/src/doc_chunk_local_ray.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/ray/local.py
index 1e2173150..33f601667 100644
--- a/transforms/language/doc_chunk/ray/src/doc_chunk_local_ray.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/ray/local.py
@@ -15,7 +15,7 @@
 
 from data_processing.utils import ParamsUtils
 from data_processing_ray.runtime.ray import RayTransformLauncher
-from doc_chunk_transform_ray import DocChunkRayTransformConfiguration
+from dpk_doc_chunk.ray.transform import DocChunkRayTransformConfiguration
 
 
 # create parameters
diff --git a/transforms/language/doc_chunk/ray/src/doc_chunk_s3_ray.py b/transforms/language/doc_chunk/dpk_doc_chunk/ray/s3.py
similarity index 96%
rename from transforms/language/doc_chunk/ray/src/doc_chunk_s3_ray.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/ray/s3.py
index 929674a61..1519b603f 100644
--- a/transforms/language/doc_chunk/ray/src/doc_chunk_s3_ray.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/ray/s3.py
@@ -15,7 +15,7 @@
 
 from data_processing.utils import ParamsUtils
 from data_processing_ray.runtime.ray import RayTransformLauncher
-from doc_chunk_transform_ray import DocChunkRayTransformConfiguration
+from dpk_doc_chunk.ray.transform import DocChunkRayTransformConfiguration
 
 
 print(os.environ)
diff --git a/transforms/language/doc_chunk/ray/src/doc_chunk_transform_ray.py b/transforms/language/doc_chunk/dpk_doc_chunk/ray/transform.py
similarity index 96%
rename from transforms/language/doc_chunk/ray/src/doc_chunk_transform_ray.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/ray/transform.py
index 1dffdbd49..8ebe326ef 100644
--- a/transforms/language/doc_chunk/ray/src/doc_chunk_transform_ray.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/ray/transform.py
@@ -23,7 +23,7 @@
 from data_processing_ray.runtime.ray.runtime_configuration import (
     RayTransformRuntimeConfiguration,
 )
-from doc_chunk_transform import DocChunkTransformConfiguration
+from dpk_doc_chunk.transform import DocChunkTransformConfiguration
 
 
 logger = get_logger(__name__)
diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_transform.py b/transforms/language/doc_chunk/dpk_doc_chunk/transform.py
similarity index 99%
rename from transforms/language/doc_chunk/python/src/doc_chunk_transform.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/transform.py
index e64a7c1d1..55c287cc8 100644
--- a/transforms/language/doc_chunk/python/src/doc_chunk_transform.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/transform.py
@@ -11,14 +11,13 @@
 ################################################################################
 
 import enum
-import time
 from argparse import ArgumentParser, Namespace
 from typing import Any
 
 import pyarrow as pa
 from data_processing.transform import AbstractTableTransform, TransformConfiguration
 from data_processing.utils import CLIArgumentProvider, TransformUtils, get_logger
-from doc_chunk_chunkers import ChunkingExecutor, DLJsonChunker, LIMarkdown, LITokenTextSplitter
+from dpk_doc_chunk.chunkers import ChunkingExecutor, DLJsonChunker, LIMarkdown, LITokenTextSplitter
 
 
 short_name = "doc_chunk"
diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_transform_python.py b/transforms/language/doc_chunk/dpk_doc_chunk/transform_python.py
similarity index 96%
rename from transforms/language/doc_chunk/python/src/doc_chunk_transform_python.py
rename to transforms/language/doc_chunk/dpk_doc_chunk/transform_python.py
index 1d2738c3f..f037caeb0 100644
--- a/transforms/language/doc_chunk/python/src/doc_chunk_transform_python.py
+++ b/transforms/language/doc_chunk/dpk_doc_chunk/transform_python.py
@@ -15,7 +15,7 @@
     PythonTransformRuntimeConfiguration,
 )
 from data_processing.utils import get_logger
-from doc_chunk_transform import DocChunkTransformConfiguration
+from dpk_doc_chunk.transform import DocChunkTransformConfiguration
 
 
 logger = get_logger(__name__)
diff --git a/transforms/language/doc_chunk/kfp_ray/Makefile b/transforms/language/doc_chunk/kfp_ray/Makefile
index 30e912e33..fcc12450d 100644
--- a/transforms/language/doc_chunk/kfp_ray/Makefile
+++ b/transforms/language/doc_chunk/kfp_ray/Makefile
@@ -2,10 +2,15 @@ REPOROOT=${CURDIR}/../../../../
 WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
 include $(REPOROOT)/transforms/.make.workflows
 
-# Include the common configuration for this transform
-include ../transform.config
+SRC_DIR=${CURDIR}/../
+# Use the docker image that is built for ray runtime
+TRANSFORM_RUNTIME=ray
+## override settings in .make.default as they assume old structure with ray being the current folder
+DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-$(TRANSFORM_RUNTIME)
+DOCKER_LOCAL_IMAGE=$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION)
 
-SRC_DIR=${CURDIR}/../ray/
+# Only build the image with -f Dockerfile.ray
+BUILD_SPECIFIC_RUNTIME=ray
 
 PYTHON_WF := $(shell find ./ -name '*_wf.py')
 YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
@@ -17,27 +22,6 @@ clean:
 	@# Help: Clean up the virtual environment.
 	rm -rf ${REPOROOT}/transforms/venv 
 
-venv::
-
-build::
-
-setup::
-
-test::
-
-test-src::
-
-test-image::
-
-publish::
-
-image::
-
-kind-load-image::
-
-docker-load-image::
-
-docker-save-image::
 
 .PHONY: workflow-build
 workflow-build: workflow-venv
@@ -45,10 +29,15 @@ workflow-build: workflow-venv
 
 .PHONY: workflow-test
 workflow-test: workflow-build
-	$(MAKE) .workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=doc_chunk_wf.yaml
+	$(MAKE) TRANSFORM_SRC=${SRC_DIR} \
+		TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \
+		TRANSFORM_NAME=$(TRANSFORM_NAME) \
+		BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \
+		DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \
+		PIPELINE_FILE=doc_chunk_wf.yaml .workflows.test-pipeline
 
 .PHONY: workflow-upload
 workflow-upload: workflow-build
 	@for file in $(YAML_WF); do \
 		$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
-	done
+	done
\ No newline at end of file
diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
index 387c3bda7..580443bf3 100644
--- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
+++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py
@@ -20,7 +20,7 @@
 task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:latest"
 
 # the name of the job script
-EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py"
+EXEC_SCRIPT_NAME: str = "-m dpk_doc_chunk.ray.transform"
 
 # components
 base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
diff --git a/transforms/language/doc_chunk/python/.dockerignore b/transforms/language/doc_chunk/python/.dockerignore
deleted file mode 100644
index f7275bbbd..000000000
--- a/transforms/language/doc_chunk/python/.dockerignore
+++ /dev/null
@@ -1 +0,0 @@
-venv/
diff --git a/transforms/language/doc_chunk/python/Makefile b/transforms/language/doc_chunk/python/Makefile
deleted file mode 100644
index 2f2a7e789..000000000
--- a/transforms/language/doc_chunk/python/Makefile
+++ /dev/null
@@ -1,74 +0,0 @@
-# Define the root of the local git clone for the common rules to be able 
-# know where they are running from.
-REPOROOT=../../../..
-
-# Set this, before including .make.defaults, to 
-#   1 if requirements reference the latest code in the data processing library 
-#     in this repo (that is not yet published to pypi).	 This is the default setting.
-#   0 if the transforms DPK dependencies are on wheels published to 
-#     pypi (e.g. data-prep-toolkit=0.2.1)
-#USE_REPO_LIB_SRC=1
-
-# Include a library of common .transform.* targets which most
-# transforms should be able to reuse.  However, feel free
-# to override/redefine the rules below. 
-include $(REPOROOT)/transforms/.make.transforms
-
-# Include the common configuration for this transform
-include ../transform.config
-
-LINUX_WITH_CPU_TORCH?=true
-OS := $(shell uname -s)
-ifeq ($(OS),Linux)
-	ifeq ($(LINUX_WITH_CPU_TORCH),true)
-	PIP_INSTALL_EXTRA_ARGS=--extra-index-url=https://download.pytorch.org/whl/cpu
-	DOCKER_BUILD_EXTRA_ARGS=--build-arg PIP_INSTALL_EXTRA_ARGS=${PIP_INSTALL_EXTRA_ARGS}
-	endif
-endif
-
-
-venv::	.transforms.python-venv
-
-test::	.transforms.python-test
-
-clean:: .transforms.clean
-
-image:: .transforms.python-image
-
-test-src:: .transforms.test-src
-
-setup:: .transforms.setup
-
-build:: build-dist image
-
-publish: publish-image
-
-publish-image:: .transforms.publish-image-python
-
-setup:: .transforms.setup
-
-# distribution versions is the same as image version.
-set-versions:
-	$(MAKE) TRANSFORM_PYTHON_VERSION=$(DOC_CHUNK_PYTHON_VERSION) TOML_VERSION=$(DOC_CHUNK_PYTHON_VERSION) .transforms.set-versions 
-        
-build-dist:: set-versions .defaults.build-dist 
-
-publish-dist:: .defaults.publish-dist
-
-test-image:: .transforms.python-test-image
-
-run-cli-sample: .transforms.run-cli-python-sample
-
-run-local-sample: .transforms.run-local-sample
-
-run-local-python-sample: .transforms.run-local-python-sample
-
-#run-s3-ray-sample: .transforms.run-s3-ray-sample
-
-minio-start:	.minio-start
-
-kind-load-image:: .transforms.kind-load-image
-
-docker-load-image: .defaults.docker-load-image
-
-docker-save-image: .defaults.docker-save-image
diff --git a/transforms/language/doc_chunk/python/README.md b/transforms/language/doc_chunk/python/README.md
deleted file mode 100644
index 1ec3a8080..000000000
--- a/transforms/language/doc_chunk/python/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# Chunk documents Transform 
-
-Please see the set of
-[transform project conventions](../../../README.md#transform-project-conventions)
-for details on general project conventions, transform configuration,
-testing and IDE set up.
-
-## Contributors
-
-- Michele Dolfi (dol@zurich.ibm.com)
-
-## Description 
-
-This transform is chunking documents. It supports multiple _chunker modules_ (see the `chunking_type` parameter).
-
-When using documents converted to JSON, the transform leverages the [Docling Core](https://github.com/DS4SD/docling-core) `HierarchicalChunker`
-to chunk according to the document layout segmentation, i.e. respecting the original document components as paragraphs, tables, enumerations, etc.
-It relies on documents converted with the Docling library in the [pdf2parquet transform](../../pdf2parquet/python/README.md) using the option `contents_type: "application/json"`,
-which provides the required JSON structure.
-
-When using documents converted to Markdown, the transform leverages the [Llama Index](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser) `MarkdownNodeParser`, which is relying on its internal Markdown splitting logic.
-
-
-### Input 
-
-| input column name | data type | description |
-|-|-|-|
-| the one specified in _content_column_name_ configuration | string | the content used in this transform |
-
-
-### Output format
-
-The output parquet file will contain all the original columns, but the content will be replaced with the individual chunks.
-
-
-#### Tracing the origin of the chunks
-
-The transform allows to trace the origin of the chunk with the `source_doc_id` which is set to the value of the `document_id` column (if present) in the input table.
-The actual name of columns can be customized with the parameters described below.
-
-
-## Configuration
-
-The transform can be tuned with the following parameters.
-
-
-| Parameter  | Default  | Description  |
-|------------|----------|--------------|
-| `chunking_type`        | `dl_json` | Chunking type to apply. Valid options are `li_markdown` for using the LlamaIndex [Markdown chunking](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser), `dl_json` for using the [Docling JSON chunking](https://github.com/DS4SD/docling), `li_token_text` for using the LlamaIndex [Token Text Splitter](https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/token_text_splitter/), which chunks the text into fixed-sized windows of tokens. |
-| `content_column_name`        | `contents` | Name of the column containing the text to be chunked. |
-| `doc_id_column_name`         | `document_id` | Name of the column containing the doc_id to be propagated in the output. |
-| `chunk_size_tokens`          | `128` | Size of the chunk in tokens for the token text chunker. |
-| `chunk_overlap_tokens`       | `30` | Number of tokens overlapping between chunks for the token text chunker. |
-| `output_chunk_column_name`   | `contents` | Column name to store the chunks in the output table. |
-| `output_source_doc_id_column_name`   | `source_document_id` | Column name to store the `doc_id` from the input table. |
-| `output_jsonpath_column_name`| `doc_jsonpath` | Column name to store the document path of the chunk in the output table. |
-| `output_pageno_column_name`  | `page_number` | Column name to store the page number of the chunk in the output table. |
-| `output_bbox_column_name`    | `bbox` | Column name to store the bbox of the chunk in the output table. |
-
-
-
-## Usage
-
-### Launched Command Line Options 
-
-When invoking the CLI, the parameters must be set as `--doc_chunk_<name>`, e.g. `--doc_chunk_column_name_key=myoutput`.
-
-
-### Running the samples
-To run the samples, use the following `make` targets
-
-* `run-cli-sample` - runs src/doc_chunk_transform.py using command line args
-* `run-local-sample` - runs src/doc_chunk_local.py
-
-These targets will activate the virtual environment and set up any configuration needed.
-Use the `-n` option of `make` to see the detail of what is done to run the sample.
-
-For example, 
-```shell
-make run-cli-sample
-...
-```
-Then 
-```shell
-ls output
-```
-To see results of the transform.
-
-### Code example
-
-TBD (link to the notebook will be provided)
-
-See the sample script [src/doc_chunk_local_python.py](src/doc_chunk_local_python.py).
-
-
-### Transforming data using the transform image
-
-To use the transform image to transform your data, please refer to the 
-[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
-substituting the name of this transform image and runtime as appropriate.
-
-## Testing
-
-Following [the testing strategy of data-processing-lib](../../../../data-processing-lib/doc/transform-testing.md)
-
-Currently we have:
-- [Unit test](test/test_doc_chunk_python.py)
-
-
-## Further Resource
-
-- For the [Docling Core](https://github.com/DS4SD/docling-core) `HierarchicalChunker`
-  - <https://ds4sd.github.io/docling/>
-- For the Markdown chunker in LlamaIndex
-  - [Markdown chunking](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/#markdownnodeparser)
-- For the Token Text Splitter in LlamaIndex
-  - [Token Text Splitter](https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/token_text_splitter/)
diff --git a/transforms/language/doc_chunk/python/pyproject.toml b/transforms/language/doc_chunk/python/pyproject.toml
deleted file mode 100644
index c9728712e..000000000
--- a/transforms/language/doc_chunk/python/pyproject.toml
+++ /dev/null
@@ -1,47 +0,0 @@
-[project]
-name = "dpk_doc_chunk_transform_python"
-version = "0.3.0"
-requires-python = ">=3.10,<3.13"
-description = "chunk documents Python Transform"
-license = {text = "Apache-2.0"}
-readme = {file = "README.md", content-type = "text/markdown"}
-authors = [
-    { name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
-    { name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
-    { name = "Christoph Auer", email = "cau@zurich.ibm.com" },
-]
-dynamic = ["dependencies"]
-
-[build-system]
-requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
-build-backend = "setuptools.build_meta"
-
-[tool.setuptools.dynamic]
-dependencies = {file = ["requirements.txt"]}
-
-[project.optional-dependencies]
-dev = [
-    "twine",
-    "pytest>=7.3.2",
-    "pytest-dotenv>=0.5.2",
-    "pytest-env>=1.0.0",
-    "pre-commit>=3.3.2",
-    "pytest-cov>=4.1.0",
-    "pytest-mock>=3.10.0",
-    "moto==5.0.5",
-    "markupsafe==2.0.1",
-]
-
-[options]
-package_dir = ["src","test"]
-
-[options.packages.find]
-where = ["src/"]
-
-[tool.pytest.ini_options]
-# Currently we use low coverage since we have to run tests separately (see makefile)
-#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
-markers = ["unit: unit tests", "integration: integration tests"]
-
-[tool.coverage.run]
-include = ["src/*"]
diff --git a/transforms/language/doc_chunk/ray/Makefile b/transforms/language/doc_chunk/ray/Makefile
deleted file mode 100644
index b4f394f84..000000000
--- a/transforms/language/doc_chunk/ray/Makefile
+++ /dev/null
@@ -1,72 +0,0 @@
-# Define the root of the local git clone for the common rules to be able 
-# know where they are running from.
-REPOROOT=../../../..
-
-# Set this, before including .make.defaults, to 
-#   1 if requirements reference the latest code in the data processing library 
-#     in this repo (that is not yet published to pypi).	 This is the default setting.
-#   0 if the transforms DPK dependencies are on wheels published to 
-#     pypi (e.g. data-prep-toolkit=0.2.1)
-#USE_REPO_LIB_SRC=1
-
-# Include a library of common .transform.* targets which most
-# transforms should be able to reuse.  However, feel free
-# to override/redefine the rules below. 
-include $(REPOROOT)/transforms/.make.transforms
-
-# Include the common configuration for this transform
-include ../transform.config
-
-LINUX_WITH_CPU_TORCH?=true
-OS := $(shell uname -s)
-ifeq ($(OS),Linux)
-	ifeq ($(LINUX_WITH_CPU_TORCH),true)
-	PIP_INSTALL_EXTRA_ARGS=--extra-index-url=https://download.pytorch.org/whl/cpu
-	DOCKER_BUILD_EXTRA_ARGS=--build-arg PIP_INSTALL_EXTRA_ARGS=${PIP_INSTALL_EXTRA_ARGS}
-	endif
-endif
-
-BASE_IMAGE=${RAY_BASE_IMAGE}
-venv::	.transforms.ray-venv
-
-test::	.transforms.ray-test
-
-clean:: .transforms.clean
-
-image:: .transforms.ray-image
-
-test-src:: .transforms.test-src
-
-setup:: .transforms.setup
-
-test-image:: .transforms.ray-test-image
-
-build:: build-dist image
-
-publish: publish-image
-
-publish-image:: .transforms.publish-image-ray
-
-setup:: .transforms.setup
-
-# set the version of python transform that this depends on.
-set-versions: 
-	$(MAKE) TRANSFORM_PYTHON_VERSION=${DOC_CHUNK_PYTHON_VERSION} TOML_VERSION=$(DOC_CHUNK_RAY_VERSION) .transforms.set-versions 
-        
-build-dist:: set-versions .defaults.build-dist 
-
-publish-dist:: .defaults.publish-dist
-
-run-cli-sample: .transforms.run-cli-ray-sample
-
-run-local-sample: .transforms.run-local-ray-sample
-
-run-s3-sample: .transforms.run-s3-ray-sample
-
-minio-start:	.minio-start
-
-kind-load-image:: .transforms.kind-load-image
-
-docker-load-image: .defaults.docker-load-image
-
-docker-save-image: .defaults.docker-save-image
diff --git a/transforms/language/doc_chunk/ray/README.md b/transforms/language/doc_chunk/ray/README.md
deleted file mode 100644
index f9bde5a1b..000000000
--- a/transforms/language/doc_chunk/ray/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Chunk documents Ray Transform 
-Please see the set of
-[transform project conventions](../../../README.md#transform-project-conventions)
-for details on general project conventions, transform configuration,
-testing and IDE set up.
-
-## Summary 
-This project wraps the [doc_chunk transform](../python) with a Ray runtime.
-
-## Configuration and command line Options
-
-chunk documents configuration and command line options are the same as for the base python transform. 
-
-## Running
-
-### Launched Command Line Options 
-In addition to those available to the transform as defined in [here](../python/README.md),
-the set of 
-[ray launcher](../../../../data-processing-lib/doc/ray-launcher-options.md) are available.
-
-### Running the samples
-To run the samples, use the following `make` targets
-
-* `run-cli-sample` - runs src/doc_chunk_transform.py using command line args
-* `run-local-sample` - runs src/doc_chunk_local_ray.py
-* `run-s3-sample` - runs src/doc_chunk_s3_ray.py
-    * Requires prior installation of minio, depending on your platform (e.g., from [here](https://min.io/docs/minio/macos/index.html)
-     and [here](https://min.io/docs/minio/linux/index.html) 
-     and invocation of `make minio-start` to load data into local minio for S3 access.
-
-These targets will activate the virtual environment and set up any configuration needed.
-Use the `-n` option of `make` to see the detail of what is done to run the sample.
-
-For example, 
-```shell
-make run-cli-sample
-...
-```
-Then 
-```shell
-ls output
-```
-To see results of the transform.
-
-### Transforming data using the transform image
-
-To use the transform image to transform your data, please refer to the 
-[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
-substituting the name of this transform image and runtime as appropriate.
diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml
deleted file mode 100644
index 29b594fac..000000000
--- a/transforms/language/doc_chunk/ray/pyproject.toml
+++ /dev/null
@@ -1,47 +0,0 @@
-[project]
-name = "dpk_doc_chunk_transform_ray"
-version = "0.3.0"
-requires-python = ">=3.10,<3.13"
-description = "chunk documents Ray Transform"
-license = {text = "Apache-2.0"}
-readme = {file = "README.md", content-type = "text/markdown"}
-authors = [
-    { name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
-    { name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
-    { name = "Christoph Auer", email = "cau@zurich.ibm.com" },
-]
-dependencies = [
-    "dpk-doc-chunk-transform-python==0.3.0",
-    "data-prep-toolkit[ray]==0.2.2.dev2",
-]
-
-[build-system]
-requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
-build-backend = "setuptools.build_meta"
-
-[project.optional-dependencies]
-dev = [
-    "twine",
-    "pytest>=7.3.2",
-    "pytest-dotenv>=0.5.2",
-    "pytest-env>=1.0.0",
-    "pre-commit>=3.3.2",
-    "pytest-cov>=4.1.0",
-    "pytest-mock>=3.10.0",
-    "moto==5.0.5",
-    "markupsafe==2.0.1",
-]
-
-[options]
-package_dir = ["src","test"]
-
-[options.packages.find]
-where = ["src/"]
-
-[tool.pytest.ini_options]
-# Currently we use low coverage since we have to run tests separately (see makefile)
-#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
-markers = ["unit: unit tests", "integration: integration tests"]
-
-[tool.coverage.run]
-include = ["src/*"]
diff --git a/transforms/language/doc_chunk/python/requirements.txt b/transforms/language/doc_chunk/requirements.txt
similarity index 100%
rename from transforms/language/doc_chunk/python/requirements.txt
rename to transforms/language/doc_chunk/requirements.txt
diff --git a/transforms/language/doc_chunk/python/test-data/expected/metadata.json b/transforms/language/doc_chunk/test-data/expected/metadata.json
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected/metadata.json
rename to transforms/language/doc_chunk/test-data/expected/metadata.json
diff --git a/transforms/language/doc_chunk/python/test-data/expected/test1.parquet b/transforms/language/doc_chunk/test-data/expected/test1.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected/test1.parquet
rename to transforms/language/doc_chunk/test-data/expected/test1.parquet
diff --git a/transforms/language/doc_chunk/python/test-data/expected_md/2206.01062.parquet b/transforms/language/doc_chunk/test-data/expected_md/2206.01062.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected_md/2206.01062.parquet
rename to transforms/language/doc_chunk/test-data/expected_md/2206.01062.parquet
diff --git a/transforms/language/doc_chunk/python/test-data/expected_md/metadata.json b/transforms/language/doc_chunk/test-data/expected_md/metadata.json
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected_md/metadata.json
rename to transforms/language/doc_chunk/test-data/expected_md/metadata.json
diff --git a/transforms/language/doc_chunk/python/test-data/expected_token_text/metadata.json b/transforms/language/doc_chunk/test-data/expected_token_text/metadata.json
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected_token_text/metadata.json
rename to transforms/language/doc_chunk/test-data/expected_token_text/metadata.json
diff --git a/transforms/language/doc_chunk/python/test-data/expected_token_text/sample1.parquet b/transforms/language/doc_chunk/test-data/expected_token_text/sample1.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/expected_token_text/sample1.parquet
rename to transforms/language/doc_chunk/test-data/expected_token_text/sample1.parquet
diff --git a/transforms/language/doc_chunk/python/test-data/input/test1.parquet b/transforms/language/doc_chunk/test-data/input/test1.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/input/test1.parquet
rename to transforms/language/doc_chunk/test-data/input/test1.parquet
diff --git a/transforms/language/doc_chunk/python/test-data/input_md/2206.01062.parquet b/transforms/language/doc_chunk/test-data/input_md/2206.01062.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/input_md/2206.01062.parquet
rename to transforms/language/doc_chunk/test-data/input_md/2206.01062.parquet
diff --git a/transforms/language/doc_chunk/python/test-data/input_token_text/sample1.parquet b/transforms/language/doc_chunk/test-data/input_token_text/sample1.parquet
similarity index 100%
rename from transforms/language/doc_chunk/python/test-data/input_token_text/sample1.parquet
rename to transforms/language/doc_chunk/test-data/input_token_text/sample1.parquet
diff --git a/transforms/language/doc_chunk/python/test/test_doc_chunk_python.py b/transforms/language/doc_chunk/test/test_doc_chunk_python.py
similarity index 95%
rename from transforms/language/doc_chunk/python/test/test_doc_chunk_python.py
rename to transforms/language/doc_chunk/test/test_doc_chunk_python.py
index 5ecfa49a2..9f9c9b796 100644
--- a/transforms/language/doc_chunk/python/test/test_doc_chunk_python.py
+++ b/transforms/language/doc_chunk/test/test_doc_chunk_python.py
@@ -16,12 +16,12 @@
 from data_processing.test_support.launch.transform_test import (
     AbstractTransformLauncherTest,
 )
-from doc_chunk_transform import (
+from dpk_doc_chunk.transform import (
     chunking_type_cli_param, 
     output_chunk_column_name_cli_param,
     chunking_types
 )
-from doc_chunk_transform_python import DocChunkPythonTransformConfiguration
+from dpk_doc_chunk.transform_python import DocChunkPythonTransformConfiguration
 
 
 class TestPythonDocChunkTransform(AbstractTransformLauncherTest):
diff --git a/transforms/language/doc_chunk/ray/test/test_doc_chunk_ray.py b/transforms/language/doc_chunk/test/test_doc_chunk_ray.py
similarity index 95%
rename from transforms/language/doc_chunk/ray/test/test_doc_chunk_ray.py
rename to transforms/language/doc_chunk/test/test_doc_chunk_ray.py
index 847101587..c8083ab7d 100644
--- a/transforms/language/doc_chunk/ray/test/test_doc_chunk_ray.py
+++ b/transforms/language/doc_chunk/test/test_doc_chunk_ray.py
@@ -16,7 +16,7 @@
     AbstractTransformLauncherTest,
 )
 from data_processing_ray.runtime.ray import RayTransformLauncher
-from doc_chunk_transform_ray import DocChunkRayTransformConfiguration
+from dpk_doc_chunk.ray.transform import DocChunkRayTransformConfiguration
 
 
 class TestRayDocChunkTransform(AbstractTransformLauncherTest):
diff --git a/transforms/pyproject.toml b/transforms/pyproject.toml
index 2357553e4..00f3d0433 100644
--- a/transforms/pyproject.toml
+++ b/transforms/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "data_prep_toolkit_transforms"
-version = "0.2.2.dev3"
+version = "1.0.1.dev0"
 requires-python = ">=3.10,<3.13"
 keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
 description = "Data Preparation Toolkit Transforms using Ray"
@@ -30,7 +30,7 @@ all = { file = [
 "code/code2parquet/python/requirements.txt",
 
 "language/doc_quality/python/requirements.txt",
-"language/doc_chunk/python/requirements.txt",
+"language/doc_chunk/requirements.txt",
 ##### Cannot have html2parquet until we solve
 ## docling-ibm-models 1.1.7 depends on lxml<5.0.0 and >=4.9.1
 ## trafilatura 1.12.0 depends on lxml>=5.2.2; platform_system != "Darwin" or python_version > "3.8"
@@ -61,7 +61,7 @@ code_quality = { file = ["code/code_quality/python/requirements.txt"]}
 code2parquet = {file =  ["code/code2parquet/python/requirements.txt"]}
 
 doc_quality = { file = ["language/doc_quality/python/requirements.txt"]} 
-doc_chunk = { file = ["language/doc_chunk/python/requirements.txt"]} 
+doc_chunk = { file = ["language/doc_chunk/requirements.txt"]} 
 html2parquet = { file = ["language/html2parquet/python/requirements.txt"]} 
 pii_redactor = { file = ["language/pii_redactor/python/requirements.txt"]} 
 lang_id = { file = ["language/lang_id/python/requirements.txt"]} 
@@ -80,21 +80,18 @@ web2parquet = { file = ["universal/web2parquet/requirements.txt"]}
 # Does not seem to work for our custom layout
 # copy all files to a single src and let automatic discovery find them
 
-[tool.setuptools.package-data]
-"*" = ["*.txt"]
-
-[tool.setuptools.packages.find]
-where = ["src"]
+#[tool.setuptools.package-data]
+#"*" = ["*.txt"]
 
+# To include this, comment out the package.find section, 
+# uncomment the package-dir section and rerun the build 
+# while keeping the build folder from previous run
 #[tool.setuptools.package-dir]
+#dpk_pdf2parquet = "language/html2parquet/dpk_pdf2parquet"
+#dpk_doc_chunck = "universal/doc_chunck/dpk_web2parquet"
+#dpk_html2parquet = "language/html2parquet/dpk_html2parquet"
 #dpk_web2parquet = "universal/web2parquet/dpk_web2parquet"
 
-[options]
-package_dir = ["src","test"]
-
-[options.packages.find]
-where = ["src"]
-
 [tool.pytest.ini_options]
 # Currently we use low coverage since we have to run tests separately (see makefile)
 #addopts = "--cov --cov-report term-missing --cov-fail-under 25"