From 8c70ce3a6b50bd7abe2f7bca81f179adc703e8a4 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Mon, 2 Dec 2024 18:26:23 -0500 Subject: [PATCH] improved make targets for transforms Signed-off-by: Maroun Touma --- .make.defaults | 75 +++++------------ transforms/.make.cicd.targets | 153 +++++++++++++++++++++++----------- 2 files changed, 124 insertions(+), 104 deletions(-) diff --git a/.make.defaults b/.make.defaults index 51eb984ee..80df91c8e 100644 --- a/.make.defaults +++ b/.make.defaults @@ -209,7 +209,7 @@ __check_defined = \ # We create both local and remote tags. Local seems to be needed when using our spark # base image. Remote seems to be needed by kfp. .PHONY: .defaults.image -.defaults.image:: # Must be called with a DOCKER_IMAGE= settings. +.defaults.image:: # Must be called with a DOCKER_IMAGE_NAME= settings. @# Help: Create the docker image $(DOCKER_LOCAL_IMAGE) and a tag for $(DOCKER_REMOTE_IMAGE) $(call check_defined, DOCKER_IMAGE_NAME) # The following touch seems to be needed to work around a docker build problem in which @@ -222,14 +222,15 @@ __check_defined = \ if [ -e pyproject.toml ]; then \ touch pyproject.toml; \ fi - $(DOCKER) build -t $(DOCKER_LOCAL_IMAGE) $(DOCKER_BUILD_EXTRA_ARGS) \ + $(DOCKER) build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_BUILD_EXTRA_ARGS) \ --platform $(DOCKER_PLATFORM) \ --build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \ --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . - $(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE) + $(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) + # Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME # Generally used to copy source from within the repo into a local directory for use by a Dockerfile @@ -244,24 +245,25 @@ __check_defined = \ cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \ fi - -# Build and image using the local Dockerfile and make the data-processing-lib/python -# available in the current directory for use by the Dockerfile (i.e. to install the library). -#.PHONY: .defaults.python-lib-src-image -#.defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. -# @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -#endif -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python .PHONY: .default.build-lib-wheel .default.build-lib-wheel: - make -C $(REPOROOT)/data-processing-lib build-pkg-dist + $(MAKE) -C $(REPOROOT)/data-processing-lib build-pkg-dist rm -rf data-processing-dist && mkdir data-processing-dist cp $(REPOROOT)/data-processing-lib/dist/*.whl data-processing-dist + +# Build and image using the local Dockerfile +# Assumes wheel has already been created +.PHONY: .defaults.lib-whl-image +.defaults.lib-whl-image:: + # Must be called with a DOCKER_LOCAL_IMAGE= settings. + @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library + @$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl)) + $(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE))) + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + + # Build and image using the local Dockerfile and make the wheel for data-processing-lib # available in the current directory for use by the Dockerfile (i.e. to install the library). .PHONY: .defaults.python-lib-whl-image @@ -270,28 +272,9 @@ __check_defined = \ @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library @$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl)) $(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE))) - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf data-processing-dist -# Build an image using the local Dockerfile and make the data-processing-lib/ray -# available in the current directory for use by the Dockerfile (i.e. to install the library). -# Note that this looks for the ../python directory, which is currently only used in the transform projects, -# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms. -#.PHONY: .defaults.ray-lib-src-image -#.defaults.ray-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. -# @# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -# $(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray .defaults.copy-lib -#endif -# if [ -e ../python ]; then \ -# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ -# fi -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python -# -rm -rf data-processing-lib-ray -# -rm -rf python-transform - # Build an image using the local Dockerfile and make the data-processing wheel # available in the current directory for use by the Dockerfile (i.e. to install the library). @@ -306,7 +289,7 @@ __check_defined = \ if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf python-transform -rm -rf data-processing-dist @@ -316,24 +299,6 @@ __check_defined = \ .defaults.spark-lib-base-image: $(MAKE) -C $(DPK_SPARK_LIB_DIR) image -# Note that this looks for the ../python directory, which is currently only used in the transform projects, -# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms. -# Must be called with a DOCKER_LOCAL_IMAGE= settings. -#.PHONY: .defaults.spark-lib-src-image -#.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image -# @# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source -# $(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -# $(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib -#endif -# if [ -e ../python ]; then \ -# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ -# fi -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python -# -rm -rf data-processing-lib-spark -# -rm -rf python-transform .PHONY: .defaults.spark-lib-whl-image .defaults.spark-lib-whl-image:: .default.build-lib-wheel .defaults.spark-lib-base-image @@ -345,7 +310,7 @@ __check_defined = \ if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf python-transform -rm -rf data-processing-dist diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets index 69a5f54fd..e392e8f36 100644 --- a/transforms/.make.cicd.targets +++ b/transforms/.make.cicd.targets @@ -7,10 +7,15 @@ include $(REPOROOT)/transforms/.make.transforms ###################################################################### -## Default setting for TRANSFORM_RUNTIME uses folder name-- Old layout -TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).transform -TRANSFORM_RAY_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform -TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).spark.transform +## Default setting for TRANSFORM_RUNTIME entry point: +# python -m dpk_html2parquet.ray.transform --help +# or +# python -m dpk_html2parquet.transform_python --help +# +TRANSFORM_PYTHON_SRC?="-m dpk_$(TRANSFORM_NAME).transform_python" +TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform" +TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform" + venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark] @@ -19,7 +24,6 @@ venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -r requirements.txt; \ fi; - test:: .transforms.test-src test-image clean:: .transforms.clean @@ -28,62 +32,113 @@ clean:: .transforms.clean set-versions:: ## We need to think how we want to do this going forward -build:: -image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) image-default ; \ - else \ - echo "Skipping image for $(shell pwd) since no Dockerfile is present"; \ +build:: image + +publish: + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-python:$(DOCKER_IMAGE_VERSION) \ + .defaults.publish-image ; \ + fi + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-ray:$(DOCKER_IMAGE_VERSION) \ + .defaults.publish-image ; \ + fi + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-spark:$(DOCKER_IMAGE_VERSION) \ + .defaults.publish-image ; \ + fi + +test-image-sequence:: .defaults.lib-whl-image .transforms.test-image-help .transforms.clean + +test-image:: .default.build-lib-wheel + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.python \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_PYTHON_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \ + test-image-sequence ; \ + fi + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.ray \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_RAY_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \ + BASE_IMAGE=$(RAY_BASE_IMAGE) \ + test-image-sequence ; \ fi + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.spark \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_SPARK_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \ + BASE_IMAGE=$(SPARK_BASE_IMAGE) \ + test-image-sequence ; \ + fi + -rm -rf data-processing-dist + -publish:: - @if [ -e Dockerfile ]; then \ - $(MAKE) publish-default ; \ - else \ - echo "Skipping publish for $(shell pwd) since no Dockerfile is present"; \ +image-python: + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.python \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \ + .defaults.lib-whl-image ; \ fi -publish-image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) publish-image-default ; \ - else \ - echo "Skipping publish-image for $(shell pwd) since no Dockerfile is present"; \ +image-ray: + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.ray \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \ + BASE_IMAGE=$(RAY_BASE_IMAGE) \ + .defaults.lib-whl-image ; \ fi -test-image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) test-image-default ; \ - else \ - echo "Skipping test-image for $(shell pwd) since no Dockerfile is present"; \ +image-spark: + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.spark \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \ + BASE_IMAGE=$(SPARK_BASE_IMAGE) \ + .defaults.lib-whl-image ; \ fi +image:: .default.build-lib-wheel + ## Build all possible images unless a specific runtime is specified + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "python" ]; then \ + $(MAKE) image-python ; \ + fi + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "ray" ]; then \ + $(MAKE) image-ray ; \ + fi + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "spark" ]; then \ + $(MAKE) image-spark ; \ + fi + -rm -rf data-processing-dist + test-src:: .transforms.test-src setup:: .transforms.setup -publish-default:: publish-image - -publish-image-default:: .defaults.publish-image - -test-image-default:: image .transforms.test-image-help .defaults.test-image-pytest .transforms.clean - -build-lib-wheel: - make -C $(REPOROOT)/data-processing-lib build-pkg-dist - -image-default:: build-lib-wheel - @$(eval LIB_WHEEL_FILE := $(shell find $(REPOROOT)/data-processing-lib/dist/*.whl)) - rm -fr dist && mv $(REPOROOT)/data-processing-lib/dist . - $(eval WHEEL_FILE_NAME := $(shell basename $(LIB_WHEEL_FILE))) - $(DOCKER) build -t $(DOCKER_IMAGE_NAME) $(DOCKER_BUILD_EXTRA_ARGS) \ - --platform $(DOCKER_PLATFORM) \ - --build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \ - --build-arg BASE_IMAGE=$(RAY_BASE_IMAGE) \ - --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg WHEEL_FILE_NAME=$(WHEEL_FILE_NAME) \ - --build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \ - --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . - $(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE) - rm -fr dist +kind-load-image:: .transforms.kind-load-image + +.PHONY: workflow-vent +workflow-venv: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-venv; \ + fi + +.PHONY: workflow-test +workflow-test: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-test; \ + fi + +.PHONY: workflow-upload +workflow-upload: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-upload; \ + fi + +.PHONY: workflow-build +workflow-build: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-build; \ + fi