From 347a83afbd5f860b37f91d4726f615ef9b71f1c7 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Mon, 18 Nov 2024 18:55:59 +0000 Subject: [PATCH] Exclude Scarf Usage Data Collection in CI Environments (#44155) Most of the the CI systems add "CI=true" env var. Refereces: - https://docs.pytest.org/en/stable/explanation/ci.html - https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables - https://docs.travis-ci.com/user/environment-variables/ - https://docs.gitlab.com/ee/ci/variables/predefined_variables.html - https://circleci.com/docs/variables/#built-in-environment-variables - https://www.jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables - https://adamj.eu/tech/2020/03/09/detect-if-your-tests-are-running-on-ci/ - https://github.com/The-Compiler/pytest-vw/blob/master/pytest_vw.py --- airflow/utils/usage_data_collection.py | 25 +++++++++++++++++++++++ tests/utils/test_usage_data_collection.py | 2 ++ 2 files changed, 27 insertions(+) diff --git a/airflow/utils/usage_data_collection.py b/airflow/utils/usage_data_collection.py index fe86a2da1cb50..3bdfb180fa912 100644 --- a/airflow/utils/usage_data_collection.py +++ b/airflow/utils/usage_data_collection.py @@ -25,6 +25,7 @@ from __future__ import annotations +import os import platform from urllib.parse import urlencode @@ -43,6 +44,10 @@ def usage_data_collection(): if _version_is_prerelease(airflow_version): return + # Exclude CI environments + if _is_ci_environ(): + return + scarf_domain = "https://apacheairflow.gateway.scarf.sh/scheduler" try: @@ -70,6 +75,26 @@ def _version_is_prerelease(version: str) -> bool: return parse(version).is_prerelease +def _is_ci_environ() -> bool: + """Return True if running in any known CI environment.""" + if os.getenv("CI") == "true": + # Generic CI variable set by many CI systems (GH Actions, Travis, GitLab, CircleCI, Jenkins, Heroku) + return True + + # Other CI variables set by specific CI systems + ci_env_vars = { + "CIRCLECI", # CircleCI + "CODEBUILD_BUILD_ID", # AWS CodeBuild + "GITHUB_ACTIONS", # GitHub Actions + "GITLAB_CI", # GitLab CI + "JENKINS_URL", # Jenkins + "TF_BUILD", # Azure Pipelines + "TRAVIS", # Travis CI + } + + return any(var in os.environ for var in ci_env_vars) + + def get_platform_info() -> tuple[str, str]: return platform.system(), platform.machine() diff --git a/tests/utils/test_usage_data_collection.py b/tests/utils/test_usage_data_collection.py index bc973672089c9..143bce39eca4d 100644 --- a/tests/utils/test_usage_data_collection.py +++ b/tests/utils/test_usage_data_collection.py @@ -43,12 +43,14 @@ def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease): @mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=True) @mock.patch("airflow.utils.usage_data_collection._version_is_prerelease", return_value=False) +@mock.patch("airflow.utils.usage_data_collection._is_ci_environ", return_value=False) @mock.patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3") @mock.patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres") @mock.patch("httpx.get") def test_scarf_analytics( mock_get, mock_is_usage_data_collection_enabled, + mock_version_is_ci, mock_version_is_prerelease, get_database_version, get_database_name,