Skip to content

Commit

Permalink
Fix kfp pipelines testing in github workflow.
Browse files Browse the repository at this point in the history
Signed-off-by: Revital Sur <[email protected]>
  • Loading branch information
revit13 committed Sep 22, 2024
1 parent eba80df commit 88727c4
Show file tree
Hide file tree
Showing 22 changed files with 2,509 additions and 4 deletions.
31 changes: 27 additions & 4 deletions .github/workflows/Makefile
Original file line number Diff line number Diff line change
@@ -1,26 +1,49 @@

define set_env_var
$(eval export $(1)=$(2))
endef

# Directories in the transforms/universal directory for which we want to generate test workflows
UNIVERSAL_TRANSFORMS=doc_id ededup fdedup filter html2parquet noop profiler resize tokenization
# Directories in the transforms/code directory for which we want to generate test workflows
CODE_TRANSFORMS=code2parquet code_quality header_cleanser malware proglang_select repo_level_ordering
# Directories in the transforms/language directory for which we want to generate test workflows
LANG_TRANSFORMS=doc_chunk doc_quality lang_id pdf2parquet pii_redactor text_encoder

KFP_BLACK_LIST="doc_chunk,pdf2parquet,pii_redactor"

transform-tests:
$(MAKE) TRANSFORM_SUBDIR=universal .transform-tests
$(MAKE) TRANSFORM_SUBDIR=language .transform-tests
$(MAKE) TRANSFORM_SUBDIR=code .transform-tests
$(MAKE) TRANSFORM_SUBDIR=universal .transform-tests .transform-kfp-tests
$(MAKE) TRANSFORM_SUBDIR=universal .transform-kfp-tests
$(MAKE) TRANSFORM_SUBDIR=language .transform-tests
$(MAKE) TRANSFORM_SUBDIR=language .transform-kfp-tests
$(MAKE) TRANSFORM_SUBDIR=code .transform-tests
$(MAKE) TRANSFORM_SUBDIR=code .transform-kfp-tests

# Expects
# TRANSFORM_SUBDIR transforms subdirectory (such as universal)
.transform-tests:
@for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -depth 1 -type d); do \
@for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -mindepth 1 -maxdepth 1 -type d); do \
dir=$$(basename $$i); \
yml=test-$(TRANSFORM_SUBDIR)-$$dir.yml; \
echo Generating $$yml; \
cat test-transform.template | sed -e "s?@TARGET_TRANSFORM_DIR@?transforms/$${TRANSFORM_SUBDIR}/$$dir?g" > $$yml; \
done

.transform-kfp-tests:
@for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -mindepth 1 -maxdepth 1 -type d); do \
dir=$$(basename $$i); \
z=$$(echo ${KFP_BLACK_LIST} | grep -v $$dir); \
if [ ! -d ../../transforms/$(TRANSFORM_SUBDIR)/$$dir/kfp_ray ] || [ -z "$$z" ]; then \
continue; \
fi; \
yml=test-$(TRANSFORM_SUBDIR)-kfp-$$dir.yml; \
echo Generating $$yml; \
cat test-kfp-transform.template | sed -e "s?@TARGET_TRANSFORM_DIR@?transforms/$${TRANSFORM_SUBDIR}/$$dir?g" > $$yml; \
done






124 changes: 124 additions & 0 deletions .github/workflows/test-code-inputcode2parquet.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#
# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
#
name: Test - transforms/code/inputcode2parquet

on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
- "transforms/code/inputcode2parquet/**"
- "data-processing-lib/**"
- "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
- "transforms/code/inputcode2parquet/**"
- "data-processing-lib/**"
- "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"

jobs:
check_if_push_image:
# check whether the Docker images should be pushed to the remote repository
# The images are pushed if it is a merge to dev branch or a new tag is created.
# The latter being part of the release process.
# The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
runs-on: ubuntu-22.04
outputs:
publish_images: ${{ steps.version.outputs.publish_images }}
steps:
- id: version
run: |
publish_images='false'
if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
test-src:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/code/inputcode2parquet
run: |
if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then
make -C transforms/code/inputcode2parquet DOCKER=docker test-src
else
echo "transforms/code/inputcode2parquet/Makefile not found - source testing disabled for this transform."
fi
test-image:
needs: [check_if_push_image]
runs-on: ubuntu-22.04
timeout-minutes: 120
env:
DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform image in transforms/code/inputcode2parquet
run: |
if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then
if [ -d "transforms/code/inputcode2parquet/spark" ]; then
make -C data-processing-lib/spark DOCKER=docker image
fi
make -C transforms/code/inputcode2parquet DOCKER=docker test-image
else
echo "transforms/code/inputcode2parquet/Makefile not found - testing disabled for this transform."
fi
- name: Print space
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
docker images
- name: Publish images
if: needs.check_if_push_image.outputs.publish_images == 'true'
run: |
if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then
make -C transforms/code/inputcode2parquet publish
else
echo "transforms/code/inputcode2parquet/Makefile not found - publishing disabled for this transform."
fi
124 changes: 124 additions & 0 deletions .github/workflows/test-code-kfp-code2parquet.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#
# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
#
name: Test - transforms/code/code2parquet

on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
- "transforms/code/code2parquet/**"
- "!data-processing-lib/**" # This is/will be tested in separate workflow
- "!transforms/code/code2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
- "transforms/code/code2parquet/**"
- "!data-processing-lib/**"
- "!transforms/code/code2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"

jobs:
check_if_push_image:
# check whether the Docker images should be pushed to the remote repository
# The images are pushed if it is a merge to dev branch or a new tag is created.
# The latter being part of the release process.
# The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
runs-on: ubuntu-22.04
outputs:
publish_images: ${{ steps.version.outputs.publish_images }}
steps:
- id: version
run: |
publish_images='false'
if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
test-src:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/code/code2parquet
run: |
if [ -e "transforms/code/code2parquet/Makefile" ]; then
make -C transforms/code/code2parquet DOCKER=docker test-src
else
echo "transforms/code/code2parquet/Makefile not found - source testing disabled for this transform."
fi
test-image:
needs: [check_if_push_image]
runs-on: ubuntu-22.04
timeout-minutes: 120
env:
DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform image in transforms/code/code2parquet
run: |
if [ -e "transforms/code/code2parquet/Makefile" ]; then
if [ -d "transforms/code/code2parquet/spark" ]; then
make -C data-processing-lib/spark DOCKER=docker image
fi
make -C transforms/code/code2parquet DOCKER=docker test-image
else
echo "transforms/code/code2parquet/Makefile not found - testing disabled for this transform."
fi
- name: Print space
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
docker images
- name: Publish images
if: needs.check_if_push_image.outputs.publish_images == 'true'
run: |
if [ -e "transforms/code/code2parquet/Makefile" ]; then
make -C transforms/code/code2parquet publish
else
echo "transforms/code/code2parquet/Makefile not found - publishing disabled for this transform."
fi
Loading

0 comments on commit 88727c4

Please sign in to comment.