From 88727c4f2eb1815a429719bef0563a4f98b02e27 Mon Sep 17 00:00:00 2001 From: Revital Sur Date: Sun, 22 Sep 2024 14:03:26 +0300 Subject: [PATCH] Fix kfp pipelines testing in github workflow. Signed-off-by: Revital Sur --- .github/workflows/Makefile | 31 ++++- .../workflows/test-code-inputcode2parquet.yml | 124 ++++++++++++++++++ .../workflows/test-code-kfp-code2parquet.yml | 124 ++++++++++++++++++ .../workflows/test-code-kfp-code_quality.yml | 124 ++++++++++++++++++ .../test-code-kfp-header_cleanser.yml | 124 ++++++++++++++++++ .../test-code-kfp-inputcode2parquet.yml | 124 ++++++++++++++++++ .github/workflows/test-code-kfp-malware.yml | 124 ++++++++++++++++++ .../test-code-kfp-proglang_select.yml | 124 ++++++++++++++++++ .../test-code-kfp-repo_level_ordering.yml | 124 ++++++++++++++++++ .github/workflows/test-kfp-transform.template | 124 ++++++++++++++++++ .github/workflows/test-kfp.yml | 2 + .../test-language-kfp-doc_quality.yml | 124 ++++++++++++++++++ .../workflows/test-language-kfp-lang_id.yml | 124 ++++++++++++++++++ .../test-language-kfp-text_encoder.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-doc_id.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-ededup.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-fdedup.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-filter.yml | 124 ++++++++++++++++++ .github/workflows/test-universal-kfp-noop.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-profiler.yml | 124 ++++++++++++++++++ .../workflows/test-universal-kfp-resize.yml | 124 ++++++++++++++++++ .../test-universal-kfp-tokenization.yml | 124 ++++++++++++++++++ 22 files changed, 2509 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/test-code-inputcode2parquet.yml create mode 100644 .github/workflows/test-code-kfp-code2parquet.yml create mode 100644 .github/workflows/test-code-kfp-code_quality.yml create mode 100644 .github/workflows/test-code-kfp-header_cleanser.yml create mode 100644 .github/workflows/test-code-kfp-inputcode2parquet.yml create mode 100644 .github/workflows/test-code-kfp-malware.yml create mode 100644 .github/workflows/test-code-kfp-proglang_select.yml create mode 100644 .github/workflows/test-code-kfp-repo_level_ordering.yml create mode 100644 .github/workflows/test-kfp-transform.template create mode 100644 .github/workflows/test-language-kfp-doc_quality.yml create mode 100644 .github/workflows/test-language-kfp-lang_id.yml create mode 100644 .github/workflows/test-language-kfp-text_encoder.yml create mode 100644 .github/workflows/test-universal-kfp-doc_id.yml create mode 100644 .github/workflows/test-universal-kfp-ededup.yml create mode 100644 .github/workflows/test-universal-kfp-fdedup.yml create mode 100644 .github/workflows/test-universal-kfp-filter.yml create mode 100644 .github/workflows/test-universal-kfp-noop.yml create mode 100644 .github/workflows/test-universal-kfp-profiler.yml create mode 100644 .github/workflows/test-universal-kfp-resize.yml create mode 100644 .github/workflows/test-universal-kfp-tokenization.yml diff --git a/.github/workflows/Makefile b/.github/workflows/Makefile index d8da3e7205..7086c07338 100644 --- a/.github/workflows/Makefile +++ b/.github/workflows/Makefile @@ -1,4 +1,8 @@ +define set_env_var +$(eval export $(1)=$(2)) +endef + # Directories in the transforms/universal directory for which we want to generate test workflows UNIVERSAL_TRANSFORMS=doc_id ededup fdedup filter html2parquet noop profiler resize tokenization # Directories in the transforms/code directory for which we want to generate test workflows @@ -6,21 +10,40 @@ CODE_TRANSFORMS=code2parquet code_quality header_cleanser malware proglang_selec # Directories in the transforms/language directory for which we want to generate test workflows LANG_TRANSFORMS=doc_chunk doc_quality lang_id pdf2parquet pii_redactor text_encoder +KFP_BLACK_LIST="doc_chunk,pdf2parquet,pii_redactor" transform-tests: - $(MAKE) TRANSFORM_SUBDIR=universal .transform-tests - $(MAKE) TRANSFORM_SUBDIR=language .transform-tests - $(MAKE) TRANSFORM_SUBDIR=code .transform-tests + $(MAKE) TRANSFORM_SUBDIR=universal .transform-tests .transform-kfp-tests + $(MAKE) TRANSFORM_SUBDIR=universal .transform-kfp-tests + $(MAKE) TRANSFORM_SUBDIR=language .transform-tests + $(MAKE) TRANSFORM_SUBDIR=language .transform-kfp-tests + $(MAKE) TRANSFORM_SUBDIR=code .transform-tests + $(MAKE) TRANSFORM_SUBDIR=code .transform-kfp-tests # Expects # TRANSFORM_SUBDIR transforms subdirectory (such as universal) .transform-tests: - @for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -depth 1 -type d); do \ + @for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -mindepth 1 -maxdepth 1 -type d); do \ dir=$$(basename $$i); \ yml=test-$(TRANSFORM_SUBDIR)-$$dir.yml; \ echo Generating $$yml; \ cat test-transform.template | sed -e "s?@TARGET_TRANSFORM_DIR@?transforms/$${TRANSFORM_SUBDIR}/$$dir?g" > $$yml; \ done +.transform-kfp-tests: + @for i in $$(find ../../transforms/$(TRANSFORM_SUBDIR) -mindepth 1 -maxdepth 1 -type d); do \ + dir=$$(basename $$i); \ + z=$$(echo ${KFP_BLACK_LIST} | grep -v $$dir); \ + if [ ! -d ../../transforms/$(TRANSFORM_SUBDIR)/$$dir/kfp_ray ] || [ -z "$$z" ]; then \ + continue; \ + fi; \ + yml=test-$(TRANSFORM_SUBDIR)-kfp-$$dir.yml; \ + echo Generating $$yml; \ + cat test-kfp-transform.template | sed -e "s?@TARGET_TRANSFORM_DIR@?transforms/$${TRANSFORM_SUBDIR}/$$dir?g" > $$yml; \ + done + + + + diff --git a/.github/workflows/test-code-inputcode2parquet.yml b/.github/workflows/test-code-inputcode2parquet.yml new file mode 100644 index 0000000000..fd4c5d2fb2 --- /dev/null +++ b/.github/workflows/test-code-inputcode2parquet.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/inputcode2parquet + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/inputcode2parquet/**" + - "data-processing-lib/**" + - "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/inputcode2parquet/**" + - "data-processing-lib/**" + - "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/inputcode2parquet + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + make -C transforms/code/inputcode2parquet DOCKER=docker test-src + else + echo "transforms/code/inputcode2parquet/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/inputcode2parquet + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + if [ -d "transforms/code/inputcode2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/inputcode2parquet DOCKER=docker test-image + else + echo "transforms/code/inputcode2parquet/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + make -C transforms/code/inputcode2parquet publish + else + echo "transforms/code/inputcode2parquet/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-code2parquet.yml b/.github/workflows/test-code-kfp-code2parquet.yml new file mode 100644 index 0000000000..6df5bb1ea8 --- /dev/null +++ b/.github/workflows/test-code-kfp-code2parquet.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/code2parquet + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/code2parquet/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/code2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/code2parquet/**" + - "!data-processing-lib/**" + - "!transforms/code/code2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/code2parquet + run: | + if [ -e "transforms/code/code2parquet/Makefile" ]; then + make -C transforms/code/code2parquet DOCKER=docker test-src + else + echo "transforms/code/code2parquet/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/code2parquet + run: | + if [ -e "transforms/code/code2parquet/Makefile" ]; then + if [ -d "transforms/code/code2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/code2parquet DOCKER=docker test-image + else + echo "transforms/code/code2parquet/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/code2parquet/Makefile" ]; then + make -C transforms/code/code2parquet publish + else + echo "transforms/code/code2parquet/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-code_quality.yml b/.github/workflows/test-code-kfp-code_quality.yml new file mode 100644 index 0000000000..5cfd1adc2a --- /dev/null +++ b/.github/workflows/test-code-kfp-code_quality.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/code_quality + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/code_quality/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/code_quality/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/code_quality/**" + - "!data-processing-lib/**" + - "!transforms/code/code_quality/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/code_quality + run: | + if [ -e "transforms/code/code_quality/Makefile" ]; then + make -C transforms/code/code_quality DOCKER=docker test-src + else + echo "transforms/code/code_quality/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/code_quality + run: | + if [ -e "transforms/code/code_quality/Makefile" ]; then + if [ -d "transforms/code/code_quality/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/code_quality DOCKER=docker test-image + else + echo "transforms/code/code_quality/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/code_quality/Makefile" ]; then + make -C transforms/code/code_quality publish + else + echo "transforms/code/code_quality/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-header_cleanser.yml b/.github/workflows/test-code-kfp-header_cleanser.yml new file mode 100644 index 0000000000..af53c8f8ce --- /dev/null +++ b/.github/workflows/test-code-kfp-header_cleanser.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/header_cleanser + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/header_cleanser/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/header_cleanser/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/header_cleanser/**" + - "!data-processing-lib/**" + - "!transforms/code/header_cleanser/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/header_cleanser + run: | + if [ -e "transforms/code/header_cleanser/Makefile" ]; then + make -C transforms/code/header_cleanser DOCKER=docker test-src + else + echo "transforms/code/header_cleanser/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/header_cleanser + run: | + if [ -e "transforms/code/header_cleanser/Makefile" ]; then + if [ -d "transforms/code/header_cleanser/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/header_cleanser DOCKER=docker test-image + else + echo "transforms/code/header_cleanser/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/header_cleanser/Makefile" ]; then + make -C transforms/code/header_cleanser publish + else + echo "transforms/code/header_cleanser/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-inputcode2parquet.yml b/.github/workflows/test-code-kfp-inputcode2parquet.yml new file mode 100644 index 0000000000..8ad6646596 --- /dev/null +++ b/.github/workflows/test-code-kfp-inputcode2parquet.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/inputcode2parquet + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/inputcode2parquet/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/inputcode2parquet/**" + - "!data-processing-lib/**" + - "!transforms/code/inputcode2parquet/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/inputcode2parquet + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + make -C transforms/code/inputcode2parquet DOCKER=docker test-src + else + echo "transforms/code/inputcode2parquet/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/inputcode2parquet + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + if [ -d "transforms/code/inputcode2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/inputcode2parquet DOCKER=docker test-image + else + echo "transforms/code/inputcode2parquet/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/inputcode2parquet/Makefile" ]; then + make -C transforms/code/inputcode2parquet publish + else + echo "transforms/code/inputcode2parquet/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-malware.yml b/.github/workflows/test-code-kfp-malware.yml new file mode 100644 index 0000000000..773c36f529 --- /dev/null +++ b/.github/workflows/test-code-kfp-malware.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/malware + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/malware/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/malware/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/malware/**" + - "!data-processing-lib/**" + - "!transforms/code/malware/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/malware + run: | + if [ -e "transforms/code/malware/Makefile" ]; then + make -C transforms/code/malware DOCKER=docker test-src + else + echo "transforms/code/malware/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/malware + run: | + if [ -e "transforms/code/malware/Makefile" ]; then + if [ -d "transforms/code/malware/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/malware DOCKER=docker test-image + else + echo "transforms/code/malware/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/malware/Makefile" ]; then + make -C transforms/code/malware publish + else + echo "transforms/code/malware/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-proglang_select.yml b/.github/workflows/test-code-kfp-proglang_select.yml new file mode 100644 index 0000000000..611f386c7e --- /dev/null +++ b/.github/workflows/test-code-kfp-proglang_select.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/proglang_select + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/proglang_select/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/proglang_select/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/proglang_select/**" + - "!data-processing-lib/**" + - "!transforms/code/proglang_select/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/proglang_select + run: | + if [ -e "transforms/code/proglang_select/Makefile" ]; then + make -C transforms/code/proglang_select DOCKER=docker test-src + else + echo "transforms/code/proglang_select/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/proglang_select + run: | + if [ -e "transforms/code/proglang_select/Makefile" ]; then + if [ -d "transforms/code/proglang_select/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/proglang_select DOCKER=docker test-image + else + echo "transforms/code/proglang_select/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/proglang_select/Makefile" ]; then + make -C transforms/code/proglang_select publish + else + echo "transforms/code/proglang_select/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-code-kfp-repo_level_ordering.yml b/.github/workflows/test-code-kfp-repo_level_ordering.yml new file mode 100644 index 0000000000..c43ea67730 --- /dev/null +++ b/.github/workflows/test-code-kfp-repo_level_ordering.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/repo_level_ordering + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/repo_level_ordering/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/code/repo_level_ordering/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/repo_level_ordering/**" + - "!data-processing-lib/**" + - "!transforms/code/repo_level_ordering/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/repo_level_ordering + run: | + if [ -e "transforms/code/repo_level_ordering/Makefile" ]; then + make -C transforms/code/repo_level_ordering DOCKER=docker test-src + else + echo "transforms/code/repo_level_ordering/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/repo_level_ordering + run: | + if [ -e "transforms/code/repo_level_ordering/Makefile" ]; then + if [ -d "transforms/code/repo_level_ordering/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/repo_level_ordering DOCKER=docker test-image + else + echo "transforms/code/repo_level_ordering/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/repo_level_ordering/Makefile" ]; then + make -C transforms/code/repo_level_ordering publish + else + echo "transforms/code/repo_level_ordering/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-kfp-transform.template b/.github/workflows/test-kfp-transform.template new file mode 100644 index 0000000000..7e51491002 --- /dev/null +++ b/.github/workflows/test-kfp-transform.template @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - @TARGET_TRANSFORM_DIR@ + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "@TARGET_TRANSFORM_DIR@/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!@TARGET_TRANSFORM_DIR@/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "@TARGET_TRANSFORM_DIR@/**" + - "!data-processing-lib/**" + - "!@TARGET_TRANSFORM_DIR@/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in @TARGET_TRANSFORM_DIR@ + run: | + if [ -e "@TARGET_TRANSFORM_DIR@/Makefile" ]; then + make -C @TARGET_TRANSFORM_DIR@ DOCKER=docker test-src + else + echo "@TARGET_TRANSFORM_DIR@/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in @TARGET_TRANSFORM_DIR@ + run: | + if [ -e "@TARGET_TRANSFORM_DIR@/Makefile" ]; then + if [ -d "@TARGET_TRANSFORM_DIR@/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C @TARGET_TRANSFORM_DIR@ DOCKER=docker test-image + else + echo "@TARGET_TRANSFORM_DIR@/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "@TARGET_TRANSFORM_DIR@/Makefile" ]; then + make -C @TARGET_TRANSFORM_DIR@ publish + else + echo "@TARGET_TRANSFORM_DIR@/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-kfp.yml b/.github/workflows/test-kfp.yml index f0984c21b2..dc8bc2bd50 100644 --- a/.github/workflows/test-kfp.yml +++ b/.github/workflows/test-kfp.yml @@ -10,6 +10,7 @@ on: - "*" paths: - "kfp/**" + - "data-processing-lib/**" - "!**.md" - "!**/doc/**" - "!**/images/**" @@ -20,6 +21,7 @@ on: - "releases/**" paths: - "kfp/**" + - "data-processing-lib/**" - "!**.md" - "!**/doc/**" - "!**/images/**" diff --git a/.github/workflows/test-language-kfp-doc_quality.yml b/.github/workflows/test-language-kfp-doc_quality.yml new file mode 100644 index 0000000000..df35635afe --- /dev/null +++ b/.github/workflows/test-language-kfp-doc_quality.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/language/doc_quality + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/language/doc_quality/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/language/doc_quality/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/language/doc_quality/**" + - "!data-processing-lib/**" + - "!transforms/language/doc_quality/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/language/doc_quality + run: | + if [ -e "transforms/language/doc_quality/Makefile" ]; then + make -C transforms/language/doc_quality DOCKER=docker test-src + else + echo "transforms/language/doc_quality/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/language/doc_quality + run: | + if [ -e "transforms/language/doc_quality/Makefile" ]; then + if [ -d "transforms/language/doc_quality/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/language/doc_quality DOCKER=docker test-image + else + echo "transforms/language/doc_quality/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/language/doc_quality/Makefile" ]; then + make -C transforms/language/doc_quality publish + else + echo "transforms/language/doc_quality/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-language-kfp-lang_id.yml b/.github/workflows/test-language-kfp-lang_id.yml new file mode 100644 index 0000000000..001bf9ff42 --- /dev/null +++ b/.github/workflows/test-language-kfp-lang_id.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/language/lang_id + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/language/lang_id/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/language/lang_id/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/language/lang_id/**" + - "!data-processing-lib/**" + - "!transforms/language/lang_id/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/language/lang_id + run: | + if [ -e "transforms/language/lang_id/Makefile" ]; then + make -C transforms/language/lang_id DOCKER=docker test-src + else + echo "transforms/language/lang_id/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/language/lang_id + run: | + if [ -e "transforms/language/lang_id/Makefile" ]; then + if [ -d "transforms/language/lang_id/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/language/lang_id DOCKER=docker test-image + else + echo "transforms/language/lang_id/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/language/lang_id/Makefile" ]; then + make -C transforms/language/lang_id publish + else + echo "transforms/language/lang_id/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-language-kfp-text_encoder.yml b/.github/workflows/test-language-kfp-text_encoder.yml new file mode 100644 index 0000000000..ea9b62613f --- /dev/null +++ b/.github/workflows/test-language-kfp-text_encoder.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/language/text_encoder + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/language/text_encoder/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/language/text_encoder/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/language/text_encoder/**" + - "!data-processing-lib/**" + - "!transforms/language/text_encoder/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/language/text_encoder + run: | + if [ -e "transforms/language/text_encoder/Makefile" ]; then + make -C transforms/language/text_encoder DOCKER=docker test-src + else + echo "transforms/language/text_encoder/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/language/text_encoder + run: | + if [ -e "transforms/language/text_encoder/Makefile" ]; then + if [ -d "transforms/language/text_encoder/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/language/text_encoder DOCKER=docker test-image + else + echo "transforms/language/text_encoder/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/language/text_encoder/Makefile" ]; then + make -C transforms/language/text_encoder publish + else + echo "transforms/language/text_encoder/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-doc_id.yml b/.github/workflows/test-universal-kfp-doc_id.yml new file mode 100644 index 0000000000..02325110a5 --- /dev/null +++ b/.github/workflows/test-universal-kfp-doc_id.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/doc_id + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/doc_id/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/doc_id/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/doc_id/**" + - "!data-processing-lib/**" + - "!transforms/universal/doc_id/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/doc_id + run: | + if [ -e "transforms/universal/doc_id/Makefile" ]; then + make -C transforms/universal/doc_id DOCKER=docker test-src + else + echo "transforms/universal/doc_id/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/doc_id + run: | + if [ -e "transforms/universal/doc_id/Makefile" ]; then + if [ -d "transforms/universal/doc_id/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/doc_id DOCKER=docker test-image + else + echo "transforms/universal/doc_id/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/doc_id/Makefile" ]; then + make -C transforms/universal/doc_id publish + else + echo "transforms/universal/doc_id/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-ededup.yml b/.github/workflows/test-universal-kfp-ededup.yml new file mode 100644 index 0000000000..aa9b628540 --- /dev/null +++ b/.github/workflows/test-universal-kfp-ededup.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/ededup + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/ededup/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/ededup/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/ededup/**" + - "!data-processing-lib/**" + - "!transforms/universal/ededup/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/ededup + run: | + if [ -e "transforms/universal/ededup/Makefile" ]; then + make -C transforms/universal/ededup DOCKER=docker test-src + else + echo "transforms/universal/ededup/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/ededup + run: | + if [ -e "transforms/universal/ededup/Makefile" ]; then + if [ -d "transforms/universal/ededup/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/ededup DOCKER=docker test-image + else + echo "transforms/universal/ededup/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/ededup/Makefile" ]; then + make -C transforms/universal/ededup publish + else + echo "transforms/universal/ededup/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-fdedup.yml b/.github/workflows/test-universal-kfp-fdedup.yml new file mode 100644 index 0000000000..d3e33f2e6a --- /dev/null +++ b/.github/workflows/test-universal-kfp-fdedup.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/fdedup + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/fdedup/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/fdedup/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/fdedup/**" + - "!data-processing-lib/**" + - "!transforms/universal/fdedup/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/fdedup + run: | + if [ -e "transforms/universal/fdedup/Makefile" ]; then + make -C transforms/universal/fdedup DOCKER=docker test-src + else + echo "transforms/universal/fdedup/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/fdedup + run: | + if [ -e "transforms/universal/fdedup/Makefile" ]; then + if [ -d "transforms/universal/fdedup/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/fdedup DOCKER=docker test-image + else + echo "transforms/universal/fdedup/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/fdedup/Makefile" ]; then + make -C transforms/universal/fdedup publish + else + echo "transforms/universal/fdedup/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-filter.yml b/.github/workflows/test-universal-kfp-filter.yml new file mode 100644 index 0000000000..de689a030a --- /dev/null +++ b/.github/workflows/test-universal-kfp-filter.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/filter + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/filter/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/filter/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/filter/**" + - "!data-processing-lib/**" + - "!transforms/universal/filter/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/filter + run: | + if [ -e "transforms/universal/filter/Makefile" ]; then + make -C transforms/universal/filter DOCKER=docker test-src + else + echo "transforms/universal/filter/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/filter + run: | + if [ -e "transforms/universal/filter/Makefile" ]; then + if [ -d "transforms/universal/filter/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/filter DOCKER=docker test-image + else + echo "transforms/universal/filter/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/filter/Makefile" ]; then + make -C transforms/universal/filter publish + else + echo "transforms/universal/filter/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-noop.yml b/.github/workflows/test-universal-kfp-noop.yml new file mode 100644 index 0000000000..7ca640a684 --- /dev/null +++ b/.github/workflows/test-universal-kfp-noop.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/noop + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/noop/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/noop/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/noop/**" + - "!data-processing-lib/**" + - "!transforms/universal/noop/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/noop + run: | + if [ -e "transforms/universal/noop/Makefile" ]; then + make -C transforms/universal/noop DOCKER=docker test-src + else + echo "transforms/universal/noop/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/noop + run: | + if [ -e "transforms/universal/noop/Makefile" ]; then + if [ -d "transforms/universal/noop/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/noop DOCKER=docker test-image + else + echo "transforms/universal/noop/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/noop/Makefile" ]; then + make -C transforms/universal/noop publish + else + echo "transforms/universal/noop/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-profiler.yml b/.github/workflows/test-universal-kfp-profiler.yml new file mode 100644 index 0000000000..bb0773cc30 --- /dev/null +++ b/.github/workflows/test-universal-kfp-profiler.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/profiler + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/profiler/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/profiler/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/profiler/**" + - "!data-processing-lib/**" + - "!transforms/universal/profiler/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/profiler + run: | + if [ -e "transforms/universal/profiler/Makefile" ]; then + make -C transforms/universal/profiler DOCKER=docker test-src + else + echo "transforms/universal/profiler/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/profiler + run: | + if [ -e "transforms/universal/profiler/Makefile" ]; then + if [ -d "transforms/universal/profiler/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/profiler DOCKER=docker test-image + else + echo "transforms/universal/profiler/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/profiler/Makefile" ]; then + make -C transforms/universal/profiler publish + else + echo "transforms/universal/profiler/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-resize.yml b/.github/workflows/test-universal-kfp-resize.yml new file mode 100644 index 0000000000..a42c8d8805 --- /dev/null +++ b/.github/workflows/test-universal-kfp-resize.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/resize + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/resize/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/resize/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/resize/**" + - "!data-processing-lib/**" + - "!transforms/universal/resize/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/resize + run: | + if [ -e "transforms/universal/resize/Makefile" ]; then + make -C transforms/universal/resize DOCKER=docker test-src + else + echo "transforms/universal/resize/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/resize + run: | + if [ -e "transforms/universal/resize/Makefile" ]; then + if [ -d "transforms/universal/resize/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/resize DOCKER=docker test-image + else + echo "transforms/universal/resize/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/resize/Makefile" ]; then + make -C transforms/universal/resize publish + else + echo "transforms/universal/resize/Makefile not found - publishing disabled for this transform." + fi diff --git a/.github/workflows/test-universal-kfp-tokenization.yml b/.github/workflows/test-universal-kfp-tokenization.yml new file mode 100644 index 0000000000..42c7e5bdc4 --- /dev/null +++ b/.github/workflows/test-universal-kfp-tokenization.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/universal/tokenization + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/universal/tokenization/**" + - "!data-processing-lib/**" # This is/will be tested in separate workflow + - "!transforms/universal/tokenization/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/universal/tokenization/**" + - "!data-processing-lib/**" + - "!transforms/universal/tokenization/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/universal/tokenization + run: | + if [ -e "transforms/universal/tokenization/Makefile" ]; then + make -C transforms/universal/tokenization DOCKER=docker test-src + else + echo "transforms/universal/tokenization/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/universal/tokenization + run: | + if [ -e "transforms/universal/tokenization/Makefile" ]; then + if [ -d "transforms/universal/tokenization/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/universal/tokenization DOCKER=docker test-image + else + echo "transforms/universal/tokenization/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/universal/tokenization/Makefile" ]; then + make -C transforms/universal/tokenization publish + else + echo "transforms/universal/tokenization/Makefile not found - publishing disabled for this transform." + fi