diff --git a/.azure/ci-testig-parameterized.yml b/.azure/ci-testig-parameterized.yml index eb6ad1e1..33f99c58 100644 --- a/.azure/ci-testig-parameterized.yml +++ b/.azure/ci-testig-parameterized.yml @@ -14,7 +14,7 @@ schedules: include: ["main"] jobs: -- template: testing-template.yml +- template: cuda-template.yml parameters: configs: - "Lightning-AI/metrics_pl-develop.yaml" @@ -24,3 +24,9 @@ jobs: - "microsoft/deepspeed-release.yaml" - "neptune-ai/lightning_integration.yaml" - "manujosephv/pytorch-tabular_lit-release.yaml" + +- template: habana-template.yml + parameters: + configs: + - "Lightning-AI/metrics_pl-develop.yaml" + - "Lightning-AI/metrics_pl-release.yaml" diff --git a/.azure/testing-template.yml b/.azure/cuda-template.yml similarity index 97% rename from .azure/testing-template.yml rename to .azure/cuda-template.yml index d8b15ac2..e265c786 100644 --- a/.azure/testing-template.yml +++ b/.azure/cuda-template.yml @@ -36,8 +36,6 @@ jobs: timeoutInMinutes: 75 # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: 2 - workspace: - clean: all pool: 'lit-rtx-3090' # this need to have installed docker in the base image... @@ -47,6 +45,9 @@ jobs: # image: "nvcr.io/nvidia/pytorch:21.11-py3" image: "pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime" options: "--gpus=all --shm-size=8g -v /usr/bin/docker:/tmp/docker:ro" + workspace: + clean: all + steps: - bash: | @@ -70,7 +71,7 @@ jobs: - bash: | sudo apt-get update -q --fix-missing - sudo apt-get install -q -y build-essential gcc g++ cmake git unzip tree --no-install-recommends + sudo apt-get install -q -y --no-install-recommends build-essential gcc g++ cmake git unzip tree # Python's dependencies pip --version pip install -r requirements.txt diff --git a/.azure/habana-template.yml b/.azure/habana-template.yml new file mode 100644 index 00000000..95f46ce8 --- /dev/null +++ b/.azure/habana-template.yml @@ -0,0 +1,106 @@ +jobs: + +- job: check_diff + pool: + vmImage: 'Ubuntu-20.04' + steps: + - bash: | + pip --version + pip install -q -r requirements.txt + pip list + displayName: 'Install dependencies' + + - script: | + echo $PR_NUMBER + CONFIGS=$(python _actions/assistant.py changed_configs $PR_NUMBER --as_list=False 2>&1) + printf "Changed configs: $CONFIGS\n" + echo "##vso[task.setvariable variable=diff;isOutput=true]$CONFIGS" + name: files + env: + PR_NUMBER: "$(System.PullRequest.PullRequestNumber)" + displayName: 'Config diff' + + +- ${{ each config in parameters.configs }}: + - job: + displayName: ${{config}} + dependsOn: check_diff + variables: + # map the output variable from A into this job + configs: $[ dependencies.check_diff.outputs['files.diff'] ] + config: "${{ config }}" + + condition: or(eq(variables['Build.SourceBranch'], 'refs/heads/main'), contains(variables['configs'], variables['config'])) + # how long to run the job before automatically cancelling + timeoutInMinutes: 75 + # how much time to give 'run always even if cancelled tasks' before stopping them + cancelTimeoutInMinutes: 2 + + pool: 'intel-hpus' + # this need to have installed docker in the base image... + container: + image: "vault.habana.ai/gaudi-docker/1.8.0/ubuntu20.04/habanalabs/pytorch-installer-1.13.1:latest" + options: "--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host --shm-size=4g -v /usr/bin/docker:/tmp/docker:ro" + workspace: + clean: all + + steps: + + - script: | + container_id=$(head -1 /proc/self/cgroup|cut -d/ -f3) + /tmp/docker exec -t -u 0 $container_id \ + sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo" + echo "##vso[task.setvariable variable=CONTAINER_ID]$container_id" + displayName: 'Install Sudo in container (thanks Microsoft!)' + + - bash: | + whoami && id + sudo apt-get install -q -y hwinfo + hwinfo --short + python --version + python --version + pip --version + pip list + displayName: 'Image info & HW' + + - bash: | + sudo apt-get update -q --fix-missing + sudo apt-get install -q -y --no-install-recommends build-essential gcc g++ cmake git unzip tree + # Python's dependencies + pip --version + pip install -r requirements.txt + pip list + displayName: 'Install dependencies' + + #- bash: | + # echo $CONTAINER_ID + # displayName: 'Sanity check' + + - bash: | + python _actions/assistant.py prepare_env --config_file=${{config}} > prepare_env.sh + cat prepare_env.sh + displayName: 'Create scripts' + + - bash: | + bash prepare_env.sh + # pip list + tree . + displayName: 'Prepare env.' + + - script: | + ENVS=$(python _actions/assistant.py list_env --config_file=${{config}} --export 2>&1) + printf "PyTest env. variables: $ENVS\n" + echo "##vso[task.setvariable variable=envs;isOutput=true]$ENVS" + ARGS=$(python _actions/assistant.py specify_tests --config_file=${{config}} 2>&1) + printf "PyTest arguments: $ARGS\n" + echo "##vso[task.setvariable variable=args;isOutput=true]$ARGS" + name: testing + displayName: 'testing specs' + + - bash: | + $(testing.envs) + python -m pytest $(testing.args) -v + workingDirectory: _integrations + displayName: 'Integration tests' + + # ToDo: add Slack notification