Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move transform version numbers out of .make.versions to transform-specific file #657

Merged
merged 17 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ __check_defined = \
cp -p -R ${LIB_PATH}/src ${LIB_NAME}
cp -p -R ${LIB_PATH}/pyproject.toml ${LIB_NAME}
cp -p -R ${LIB_PATH}/README.md ${LIB_NAME}
if [ -e ${LIB_PATH}/requirements.txt ]; then \
cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \
fi


# Build and image using the local Dockerfile and make the data-processing-lib/python
# available in the current directory for use by the Dockerfile (i.e. to install the library).
Expand Down Expand Up @@ -591,8 +595,9 @@ MINIO_ADMIN_PWD= localminiosecretkey
# Updates the versions references to our repo source as defined in .make.versions
.PHONY: .defaults.__update-toml-lib-dep-versions
.defaults.__update-toml-lib-dep-versions:
ifeq ($(USE_REPO_LIB_SRC), 1)
@# Help: Update pyproject.toml to depend on lib versions defined in .make.versions
@if [ -e pyproject.toml ]; then \
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/"data-prep-toolkit-ray\([=><~][=]\).*"/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-spark\([=><~][=]\).*"/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)"/' \
Expand All @@ -603,7 +608,7 @@ MINIO_ADMIN_PWD= localminiosecretkey
> tt.toml; \
mv tt.toml pyproject.toml; \
fi
@if [ -e requirements.txt ]; then \
if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \
Expand All @@ -615,6 +620,7 @@ MINIO_ADMIN_PWD= localminiosecretkey
> tt.txt; \
mv tt.txt requirements.txt; \
fi
endif

# Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target
.PHONY: .defaults.build-dist
Expand Down
76 changes: 2 additions & 74 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_

# publish docker images with latest tag
ifeq ($(DPK_VERSION_SUFFIX), )
DOCKER_IMAGE_VERSION=$(DPK_VERSION)
DOCKER_IMAGE_VERSION?=$(DPK_VERSION)
else
DOCKER_IMAGE_VERSION=latest
DOCKER_IMAGE_VERSION?=latest
endif

# Data prep lab wheel version
Expand All @@ -39,78 +39,6 @@ DPK_LIB_KFP_SHARED=$(DPK_VERSION)
KFP_DOCKER_VERSION=$(DOCKER_IMAGE_VERSION)
KFP_DOCKER_VERSION_v2=$(DOCKER_IMAGE_VERSION)

# Begin transform versions/tags
BLOCKLIST_VERSION=$(DPK_VERSION)

DOC_ID_PYTHON_VERSION=$(DPK_VERSION)
DOC_ID_RAY_VERSION=$(DPK_VERSION)
DOC_ID_SPARK_VERSION=$(DPK_VERSION)

EDEDUP_PYTHON_VERSION=$(DPK_VERSION)
EDEDUP_RAY_VERSION=$(DPK_VERSION)

FDEDUP_RAY_VERSION=$(DPK_VERSION)

FILTER_PYTHON_VERSION=$(DPK_VERSION)
FILTER_RAY_VERSION=$(DPK_VERSION)
FILTER_SPARK_VERSION=$(DPK_VERSION)

NOOP_PYTHON_VERSION=$(DPK_VERSION)
NOOP_RAY_VERSION=$(DPK_VERSION)
NOOP_SPARK_VERSION=$(DPK_VERSION)

PROFILER_PYTHON_VERSION=$(DPK_VERSION)
PROFILER_RAY_VERSION=$(DPK_VERSION)
PROFILER_SPARK_VERSION=$(DPK_VERSION)

RESIZE_PYTHON_VERSION=$(DPK_VERSION)
RESIZE_RAY_VERSION=$(DPK_VERSION)
RESIZE_SPARK_VERSION=$(DPK_VERSION)

LANG_ID_PYTHON_VERSION=$(DPK_VERSION)
LANG_ID_RAY_VERSION=$(DPK_VERSION)

TOKENIZATION_RAY_VERSION=$(DPK_VERSION)
TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION)

MALWARE_RAY_VERSION=$(DPK_VERSION)
MALWARE_PYTHON_VERSION=$(DPK_VERSION)

PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION)
PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION)

DOC_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
DOC_QUALITY_RAY_VERSION=$(DPK_VERSION)

CODE_QUALITY_RAY_VERSION=$(DPK_VERSION)
CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION)

CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
CODE2PARQUET_RAY_VERSION=$(DPK_VERSION)
INGEST_TO_PARQUET_VERSION=$(DPK_VERSION)
REPO_LVL_ORDER_RAY_VERSION=$(DPK_VERSION)

PDF2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
PDF2PARQUET_RAY_VERSION=$(DPK_VERSION)

DOC_CHUNK_PYTHON_VERSION=$(DPK_VERSION)
DOC_CHUNK_RAY_VERSION=$(DPK_VERSION)

TEXT_ENCODER_PYTHON_VERSION=$(DPK_VERSION)
TEXT_ENCODER_RAY_VERSION=$(DPK_VERSION)

HEADER_CLEANSER_PYTHON_VERSION=$(DPK_VERSION)
HEADER_CLEANSER_RAY_VERSION=$(DPK_VERSION)

LICENSE_SELECT_PYTHON_VERSION=$(DPK_VERSION)
LICENSE_SELECT_RAY_VERSION=$(DPK_VERSION)

PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION)

HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)

DPK_TRANSFORMS_VERSION=$(DPK_VERSION)

################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.

Expand Down
3 changes: 3 additions & 0 deletions transforms/code/code2parquet/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows

# Include the common configuration for this transform
include ../transform.config

SRC_DIR=${CURDIR}/../ray/

PYTHON_WF := $(shell find ./ -name '*_wf.py')
Expand Down
16 changes: 11 additions & 5 deletions transforms/code/code2parquet/python/Makefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.

# $(REPOROOT)/.make.versions file contains the versions

TRANSFORM_NAME=code2parquet

include $(REPOROOT)/transforms/.make.transforms

# Include the common configuration for this transform
include ../transform.config

venv:: .transforms.python-venv

test:: .transforms.python-test
Expand Down
11 changes: 10 additions & 1 deletion transforms/code/code2parquet/ray/Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=code2parquet
# Include the common configuration for this transform
include ../transform.config

BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
Expand Down
20 changes: 20 additions & 0 deletions transforms/code/code2parquet/transform.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# This is intended to be included across the Makefiles provided within
# a given transform's directory tree, so must use compatible syntax.
#
################################################################################
# This defines the name of the transform and is used to match against
# expected files and is used to define the transform's image name.
TRANSFORM_NAME=code2parquet

################################################################################
# This defines the transforms' version number as would be used
# when publishing the wheel. In general, only the micro version
# number should be advanced relative to the DPK_VERSION.
#
# If you change the versions numbers, be sure to run "make set-versions" to
# update version numbers across the transform (e.g., pyproject.toml).
CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@daw3rd How are we getting DPK_VERSION if we are not including .make.versions ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transformer Makefiles include $(REPOROOT)/transforms/.make.transforms it includes $(REPOROOT)/.make.defaults and the last one includes $(REPOROOT)/.make.versions

CODE2PARQUET_RAY_VERSION=$(CODE2PARQUET_PYTHON_VERSION)
CODE2PARQUET_SPARK_VERSION=$(CODE2PARQUET_PYTHON_VERSION)

5 changes: 4 additions & 1 deletion transforms/code/code_quality/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows

# Include the common configuration for this transform
include ../transform.config

SRC_DIR=${CURDIR}/../ray/

PYTHON_WF := $(shell find ./ -name '*_wf.py')
Expand Down Expand Up @@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
done
done
17 changes: 14 additions & 3 deletions transforms/code/code_quality/python/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@

# Define the root of the local git clone for the common rules to be able
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=code_quality
# Include the common configuration for this transform
include ../transform.config

# Use default rule inherited from makefile.common
clean:: .transforms.clean
Expand Down
17 changes: 14 additions & 3 deletions transforms/code/code_quality/ray/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@

# Define the root of the local git clone for the common rules to be able
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=code_quality
# Include the common configuration for this transform
include ../transform.config

BASE_IMAGE=${RAY_BASE_IMAGE}

Expand Down
20 changes: 20 additions & 0 deletions transforms/code/code_quality/transform.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# This is intended to be included across the Makefiles provided within
# a given transform's directory tree, so must use compatible syntax.
#
################################################################################
# This defines the name of the transform and is used to match against
# expected files and is used to define the transform's image name.
TRANSFORM_NAME=code_quality

################################################################################
# This defines the transforms' version number as would be used
# when publishing the wheel. In general, only the micro version
# number should be advanced relative to the DPK_VERSION.
#
# If you change the versions numbers, be sure to run "make set-versions" to
# update version numbers across the transform (e.g., pyproject.toml).
CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
CODE_QUALITY_RAY_VERSION=$(CODE_QUALITY_PYTHON_VERSION)
CODE_QUALITY_SPARK_VERSION=$(CODE_QUALITY_PYTHON_VERSION)

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows

# Include the common configuration for this transform
include ../transform.config

SRC_DIR=${CURDIR}/../ray/

PYTHON_WF := $(shell find ./ -name '*_wf.py')
Expand Down Expand Up @@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
done
done
17 changes: 14 additions & 3 deletions transforms/code/header_cleanser/python/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@

# Define the root of the local git clone for the common rules to be able
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=header_cleanser
# Include the common configuration for this transform
include ../transform.config

# Use default rule inherited from makefile.common
clean:: .transforms.clean
Expand Down
4 changes: 3 additions & 1 deletion transforms/code/header_cleanser/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ COPY --chown=ray:users pyproject.toml pyproject.toml
RUN pip install --no-cache-dir -e .

# Install system dependencies, including libgomp1
USER root
RUN sudo apt-get update && sudo apt-get install -y \
libgomp1 \
&& sudo rm -rf /var/lib/apt/lists/*
User ray

# copy source data
COPY ./src/header_cleanser_transform_ray.py .
Expand All @@ -36,4 +38,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
LABEL git-commit=$GIT_COMMIT
17 changes: 14 additions & 3 deletions transforms/code/header_cleanser/ray/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@

# Define the root of the local git clone for the common rules to be able
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..

# Set this, before including .make.defaults, to
# 1 if requirements reference the latest code in the data processing library
# in this repo (that is not yet published to pypi). This is the default setting.
# 0 if the transforms DPK dependencies are on wheels published to
# pypi (e.g. data-prep-toolkit=0.2.1)
#USE_REPO_LIB_SRC=1

# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=header_cleanser
# Include the common configuration for this transform
include ../transform.config

BASE_IMAGE=${RAY_BASE_IMAGE}

Expand Down
Loading