From ab7591dba10e95d2fc5be84e9533cb1f01860a0b Mon Sep 17 00:00:00 2001 From: Vedant Date: Thu, 24 Jul 2025 21:58:42 +0530 Subject: [PATCH 1/6] Added NBTest assertions in the test notebooks, and modified the CI and Makefile accordingly Signed-off-by: Vedant --- .github/workflows/tests.yml | 193 ++++++++++++++-------------- Makefile | 7 +- pyproject.toml | 31 +++-- tests/notebooks/lazy_pipeline.ipynb | 60 ++++++--- tests/notebooks/meteorites.ipynb | 38 +++--- tests/notebooks/titanic.ipynb | 39 +++--- 6 files changed, 200 insertions(+), 168 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cba38de64..e761197f1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,8 +4,9 @@ on: pull_request: push: branches: - - master - - develop + - master + - develop + - nbtest env: YDATA_PROFILING_NO_ANALYTICS: false @@ -15,49 +16,49 @@ jobs: name: Tests strategy: matrix: - os: [ ubuntu-22.04 ] - python-version: ["3.9", "3.10", "3.11", "3.12" ] - pandas: [ "pandas>1.1" ] - numpy: [ "numpy>=1.21" ] + os: [ubuntu-22.04] + python-version: ["3.9", "3.10", "3.11", "3.12"] + pandas: ["pandas>1.1"] + numpy: ["numpy>=1.21"] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 - - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.cache/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'macOS') - with: - path: ~/Library/Caches/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - - run: echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV - - run: make install - - - run: make test + - uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Linux') + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'macOS') + with: + path: ~/Library/Caches/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + - run: echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV + - run: make install + + - run: make test coverage: name: Coverage @@ -66,68 +67,67 @@ jobs: matrix: os: [ubuntu-22.04] python-version: ["3.12"] - pandas: [ "pandas>1.1" ] - numpy: [ "numpy>=1.21" ] + pandas: ["pandas>1.1"] + numpy: ["numpy>=1.21"] steps: - - uses: actions/checkout@v4 - - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.cache/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'macOS') - with: - path: ~/Library/Caches/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV - - run: make install - - - run: make test_cov - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - - run: make install - - run: make test_cov - - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }} + - uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Linux') + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'macOS') + with: + path: ~/Library/Caches/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV + - run: make install + - run: make test_cov + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + - run: make install + - run: make test_cov + - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }} test_spark: runs-on: ubuntu-24.04 continue-on-error: false strategy: matrix: - python-version: [ "3.9", "3.10", "3.11", "3.12" ] - pyspark-version: [ "3.4" , "3.5" ] + python-version: ["3.9", "3.10", "3.11", "3.12"] + pyspark-version: ["3.4", "3.5"] name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }} @@ -169,4 +169,3 @@ jobs: make install pip install ".[test]" make test_spark - diff --git a/Makefile b/Makefile index 22a69c758..56f37acbf 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ docs: test: pytest tests/unit/ pytest tests/issues/ - pytest --nbval tests/notebooks/ + pytest --nbtest tests/notebooks/ ydata_profiling -h test_spark: @@ -16,7 +16,12 @@ test_spark: test_cov: pytest --cov=. tests/unit/ pytest --cov=. --cov-append tests/issues/ + + # This environment variable allows NBTest assertions to be executed, as NBTest does not have coverage support. + export 'NBTEST_RUN_ASSERTS'='1' pytest --cov=. --cov-append --nbval tests/notebooks/ + export 'NBTEST_RUN_ASSERTS'='0' + ydata_profiling -h examples: diff --git a/pyproject.toml b/pyproject.toml index e1e668788..c9f87ff11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ build-backend = "setuptools.build_meta" requires = [ "setuptools>=72.0.0,<80.0.0", "setuptools-scm>=8.0.0,<9.0.0", - "wheel>=0.38.4,<1.0.0" + "wheel>=0.38.4,<1.0.0", ] [packaging] @@ -12,13 +12,18 @@ package_name = "ydata-profiling" [project] name = "ydata-profiling" requires-python = ">=3.7,<3.13" -authors = [ - {name = "YData Labs Inc", email = "opensource@ydata.ai"} -] +authors = [{ name = "YData Labs Inc", email = "opensource@ydata.ai" }] description = "Generate profile report for pandas DataFrame" -keywords = ["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"] +keywords = [ + "pandas", + "data-science", + "data-analysis", + "python", + "jupyter", + "ipython", +] readme = "README.md" -license = {file = "LICENSE.md"} +license = { file = "LICENSE.md" } classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Software Development :: Build Tools", @@ -70,9 +75,7 @@ dependencies = [ "numba>=0.56.0, <=0.61", ] -dynamic = [ - "version", -] +dynamic = ["version"] [project.optional-dependencies] dev = [ @@ -99,10 +102,7 @@ docs = [ "mkdocs-badges", ] -notebook = [ - "jupyter>=1.0.0", - "ipywidgets>=7.5.1", -] +notebook = ["jupyter>=1.0.0", "ipywidgets>=7.5.1"] # this provides the recommended pyspark and pyarrow versions for spark to work on pandas-profiling # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to @@ -121,14 +121,13 @@ test = [ "codecov", "pytest-cov", "nbval", + "nbtest-gen", "pyarrow", "twine>=3.1.1", "kaggle", ] -unicode= [ - "tangled-up-in-unicode==0.2.0", -] +unicode = ["tangled-up-in-unicode==0.2.0"] [project.urls] Homepage = "https://ydata.ai" diff --git a/tests/notebooks/lazy_pipeline.ipynb b/tests/notebooks/lazy_pipeline.ipynb index 381173414..c4eff4df4 100644 --- a/tests/notebooks/lazy_pipeline.ipynb +++ b/tests/notebooks/lazy_pipeline.ipynb @@ -21,7 +21,15 @@ "\n", "# Our package\n", "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from ydata_profiling.utils.cache import cache_file\n", + "\n", + "import nbtest\n", + "\n", + "# The tests in this notebook only run in the continuous integration pipeline\n", + "# in order to run manually uncomment the following two lines:\n", + "\n", + "# import os\n", + "# os.environ['NBTEST_RUN_ASSERTS'] = '1'" ] }, { @@ -48,10 +56,8 @@ "with capture_output() as out:\n", " profile = ProfileReport(df, title=\"Titanic Dataset\", progress_bar=True, lazy=False)\n", "\n", - "assert all(\n", - " any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs\n", - ")\n", - "assert len(out.outputs) == 2" + "nbtest.assert_true(all(any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs))\n", + "nbtest.assert_equal(len(out.outputs), 2)" ] }, { @@ -69,22 +75,20 @@ " lazy=True,\n", " )\n", "\n", - "assert len(out.outputs) == 0\n", + "nbtest.assert_equal(len(out.outputs), 0)\n", "\n", "with capture_output() as out:\n", " _ = profile.to_html()\n", "\n", "\n", - "assert all(\n", - " any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs\n", - ")\n", - "assert len(out.outputs) == 3\n", + "nbtest.assert_true(all(any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs))\n", + "nbtest.assert_equal(len(out.outputs), 3)\n", "\n", "with capture_output() as out:\n", " _ = profile.to_file(\"/tmp/tmpfile.html\")\n", "\n", - "assert \"Export report to file\" in out.outputs[0].data[\"text/plain\"]\n", - "assert len(out.outputs) == 1" + "nbtest.assert_in(\"Export report to file\", out.outputs[0].data[\"text/plain\"])\n", + "nbtest.assert_equal(len(out.outputs), 1)" ] }, { @@ -96,30 +100,30 @@ "# Test caching of the iterative building process\n", "with capture_output() as out:\n", " profile = ProfileReport(df, title=\"Titanic Dataset\", progress_bar=True, lazy=True)\n", - "assert len(out.outputs) == 0\n", + "nbtest.assert_equal(len(out.outputs), 0)\n", "\n", "with capture_output() as out:\n", " profile.description_set\n", - "assert len(out.outputs) == 1\n", + "nbtest.assert_equal(len(out.outputs), 1)\n", "\n", "with capture_output() as out:\n", " profile.report\n", - "assert len(out.outputs) == 1\n", + "nbtest.assert_equal(len(out.outputs), 1)\n", "\n", "with capture_output() as out:\n", " profile.html\n", - "assert len(out.outputs) == 1\n", + "nbtest.assert_equal(len(out.outputs), 1)\n", "\n", "with capture_output() as out:\n", " profile.config.html.style.theme = \"united\"\n", " profile.invalidate_cache(\"rendering\")\n", " profile.to_file(\"/tmp/cache1.html\")\n", - "assert len(out.outputs) == 2\n", + "nbtest.assert_equal(len(out.outputs), 2)\n", "\n", "with capture_output() as out:\n", " profile.config.pool_size = 1\n", " profile.html\n", - "assert len(out.outputs) == 0\n", + "nbtest.assert_equal(len(out.outputs), 0)\n", "\n", "with capture_output() as out:\n", " profile.config.pool_size = 0\n", @@ -127,15 +131,29 @@ " profile.config.samples.tail = 15\n", " profile.invalidate_cache()\n", " profile.to_file(\"/tmp/cache2.html\")\n", - "assert len(out.outputs) == 4" + "nbtest.assert_equal(len(out.outputs), 4)" ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tests/notebooks/meteorites.ipynb b/tests/notebooks/meteorites.ipynb index 212c11def..8c2afcf66 100644 --- a/tests/notebooks/meteorites.ipynb +++ b/tests/notebooks/meteorites.ipynb @@ -22,7 +22,15 @@ "from IPython.utils.capture import capture_output\n", "\n", "import ydata_profiling\n", - "from ydata_profiling.utils.cache import cache_file" + "from ydata_profiling.utils.cache import cache_file\n", + "\n", + "import nbtest\n", + "\n", + "# The tests in this notebook only run in the continuous integration pipeline\n", + "# in order to run manually uncomment the following two lines:\n", + "\n", + "# import os\n", + "# os.environ['NBTEST_RUN_ASSERTS'] = '1'" ] }, { @@ -83,9 +91,9 @@ " )\n", " display(pr)\n", "\n", - "assert len(out.outputs) == 2\n", - "assert out.outputs[0].data[\"text/plain\"] == \"\"\n", - "assert out.outputs[1].data[\"text/plain\"] == \"\"" + "nbtest.assert_equal(len(out.outputs), 2)\n", + "nbtest.assert_equal(out.outputs[0].data[\"text/plain\"], \"\")\n", + "nbtest.assert_equal(out.outputs[1].data[\"text/plain\"], \"\")" ] }, { @@ -103,10 +111,8 @@ " lazy=False,\n", " )\n", "\n", - "assert all(\n", - " any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs\n", - ")\n", - "assert len(out.outputs) == 2" + "nbtest.assert_true(all(any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs))\n", + "nbtest.assert_equal(len(out.outputs), 2)" ] }, { @@ -119,10 +125,8 @@ "with capture_output() as out:\n", " pfr.to_file(\"/tmp/example.html\")\n", "\n", - "assert all(\n", - " any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs\n", - ")\n", - "assert len(out.outputs) == 2" + "nbtest.assert_true(all(any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs))\n", + "nbtest.assert_equal(len(out.outputs), 2)" ] }, { @@ -135,9 +139,9 @@ "with capture_output() as out:\n", " display(pfr)\n", "\n", - "assert len(out.outputs) == 2\n", - "assert out.outputs[0].data[\"text/plain\"] == \"\"\n", - "assert out.outputs[1].data[\"text/plain\"] == \"\"" + "nbtest.assert_equal(len(out.outputs), 2)\n", + "nbtest.assert_equal(out.outputs[0].data[\"text/plain\"], \"\")\n", + "nbtest.assert_equal(out.outputs[1].data[\"text/plain\"], \"\")" ] } ], @@ -157,9 +161,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.11.11" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tests/notebooks/titanic.ipynb b/tests/notebooks/titanic.ipynb index 768759214..c0313f6c5 100644 --- a/tests/notebooks/titanic.ipynb +++ b/tests/notebooks/titanic.ipynb @@ -21,7 +21,15 @@ "from ipywidgets import widgets\n", "\n", "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from ydata_profiling.utils.cache import cache_file\n", + "\n", + "import nbtest\n", + "\n", + "# The tests in this notebook only run in the continuous integration pipeline\n", + "# in order to run manually uncomment the following two lines:\n", + "\n", + "# import os\n", + "# os.environ['NBTEST_RUN_ASSERTS'] = '1'" ] }, { @@ -54,10 +62,9 @@ " lazy=False,\n", " )\n", "\n", - "assert all(\n", - " any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs\n", - ")\n", - "assert len(out.outputs) == 2" + "nbtest.assert_true(all(any(v in s.data[\"text/plain\"] for v in [\"%|\", \"FloatProgress\"]) for s in out.outputs))\n", + "\n", + "nbtest.assert_equal(len(out.outputs), 2)" ] }, { @@ -76,7 +83,7 @@ " lazy=False,\n", " )\n", "\n", - "assert len(out.outputs) == 0" + "nbtest.assert_equal(len(out.outputs), 0)" ] }, { @@ -87,14 +94,14 @@ "source": [ "# Waiting on issue: https://github.com/computationalmodelling/nbval/issues/136\n", "\n", - "# The Notebook Widgets Interface\n", + "# The Notebook Widgets Interface - faced execution error here, hence, the tests below have been commented\n", "# with capture_output() as out:\n", "# profile.to_widgets()\n", "\n", - "# assert len(out.outputs) == 2\n", - "# assert out.outputs[0].data['text/plain'].startswith('Tab(children=(HTML(value=')\n", - "# assert out.outputs[1].data['text/plain'] == ''\n", - "# assert 'ydata-profiling' in out.outputs[1].data['text/html']" + "# nbtest.assert_equal(len(out.outputs), 2)\n", + "# nbtest.assert_true(out.outputs[0].data['text/plain'].startswith('Tab(children=(HTML(value='))\n", + "# nbtest.assert_equal(out.outputs[1].data['text/plain'], '')\n", + "# nbtest.assert_in('ydata-profiling', out.outputs[1].data['text/html'])" ] }, { @@ -107,14 +114,14 @@ "with capture_output() as out:\n", " profile.to_notebook_iframe()\n", "\n", - "assert len(out.outputs) == 1\n", - "assert out.outputs[0].data[\"text/plain\"] == \"\"" + "nbtest.assert_equal(len(out.outputs), 1)\n", + "nbtest.assert_equal(out.outputs[0].data[\"text/plain\"], \"\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -128,9 +135,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.11.11" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 0e69fea52f1b70c33652ec9a925d8bed0076f25c Mon Sep 17 00:00:00 2001 From: Vedant Date: Thu, 24 Jul 2025 22:25:27 +0530 Subject: [PATCH 2/6] Reordered shell commands in Makefile to test notebooks first Signed-off-by: Vedant --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 56f37acbf..760b89d61 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,9 @@ docs: mkdocs build test: + pytest --nbtest tests/notebooks/ pytest tests/unit/ pytest tests/issues/ - pytest --nbtest tests/notebooks/ ydata_profiling -h test_spark: From 59d19783614cff199b41eb21a8bb9b412d7e9ed1 Mon Sep 17 00:00:00 2001 From: Vedant Date: Fri, 25 Jul 2025 00:17:10 +0530 Subject: [PATCH 3/6] Removing nbtest branch from CI activation, which is no longer needed Signed-off-by: Vedant --- .github/workflows/tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e761197f1..57f930ca4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,6 @@ on: branches: - master - develop - - nbtest env: YDATA_PROFILING_NO_ANALYTICS: false From 23c85a4ef209d899eb7a69067c31b5ac503fa3f2 Mon Sep 17 00:00:00 2001 From: Vedant Date: Fri, 25 Jul 2025 12:07:10 +0530 Subject: [PATCH 4/6] Added coverage shell commands for nbtest Signed-off-by: Vedant --- .github/workflows/tests.yml | 1 + Makefile | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 57f930ca4..e761197f1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,6 +6,7 @@ on: branches: - master - develop + - nbtest env: YDATA_PROFILING_NO_ANALYTICS: false diff --git a/Makefile b/Makefile index 760b89d61..17249b074 100644 --- a/Makefile +++ b/Makefile @@ -14,14 +14,9 @@ test_spark: ydata_profiling -h test_cov: + pytest --cov=. --cov-append --nbtest tests/notebooks/ pytest --cov=. tests/unit/ pytest --cov=. --cov-append tests/issues/ - - # This environment variable allows NBTest assertions to be executed, as NBTest does not have coverage support. - export 'NBTEST_RUN_ASSERTS'='1' - pytest --cov=. --cov-append --nbval tests/notebooks/ - export 'NBTEST_RUN_ASSERTS'='0' - ydata_profiling -h examples: From e58d93c89903c0997757cebe1304cd8b2143b293 Mon Sep 17 00:00:00 2001 From: Vedant Date: Fri, 25 Jul 2025 12:15:23 +0530 Subject: [PATCH 5/6] Removed nbval from test dependencies Signed-off-by: Vedant --- .github/workflows/tests.yml | 1 - make.bat | 2 +- pyproject.toml | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e761197f1..57f930ca4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,6 @@ on: branches: - master - develop - - nbtest env: YDATA_PROFILING_NO_ANALYTICS: false diff --git a/make.bat b/make.bat index 20888d6bd..cc8715ca1 100644 --- a/make.bat +++ b/make.bat @@ -12,7 +12,7 @@ IF "%1%" == "docs" ( IF "%1" == "test" ( pytest tests/unit/ pytest tests/issues/ - pytest --nbval tests/notebooks/ + pytest --nbtest tests/notebooks/ ECHO "Tests completed!" GOTO end ) diff --git a/pyproject.toml b/pyproject.toml index c9f87ff11..57b813a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,6 @@ test = [ "coverage>=6.5, <8", "codecov", "pytest-cov", - "nbval", "nbtest-gen", "pyarrow", "twine>=3.1.1", From 57db2d73f7dadf21b3bab766c0f67d8c0a2ee3a6 Mon Sep 17 00:00:00 2001 From: Vedant Date: Fri, 25 Jul 2025 15:02:04 +0530 Subject: [PATCH 6/6] Reverting spurious formatting changes Signed-off-by: Vedant --- .github/workflows/tests.yml | 191 ++++++++++++++++++------------------ pyproject.toml | 32 +++--- 2 files changed, 113 insertions(+), 110 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 57f930ca4..58d1cd495 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,8 +4,8 @@ on: pull_request: push: branches: - - master - - develop + - master + - develop env: YDATA_PROFILING_NO_ANALYTICS: false @@ -15,49 +15,49 @@ jobs: name: Tests strategy: matrix: - os: [ubuntu-22.04] - python-version: ["3.9", "3.10", "3.11", "3.12"] - pandas: ["pandas>1.1"] - numpy: ["numpy>=1.21"] + os: [ ubuntu-22.04 ] + python-version: ["3.9", "3.10", "3.11", "3.12" ] + pandas: [ "pandas>1.1" ] + numpy: [ "numpy>=1.21" ] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 - - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.cache/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'macOS') - with: - path: ~/Library/Caches/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - - run: echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV - - run: make install - - - run: make test + - uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Linux') + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'macOS') + with: + path: ~/Library/Caches/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + - run: echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV + - run: make install + + - run: make test coverage: name: Coverage @@ -66,67 +66,68 @@ jobs: matrix: os: [ubuntu-22.04] python-version: ["3.12"] - pandas: ["pandas>1.1"] - numpy: ["numpy>=1.21"] + pandas: [ "pandas>1.1" ] + numpy: [ "numpy>=1.21" ] steps: - - uses: actions/checkout@v4 - - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.cache/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'macOS') - with: - path: ~/Library/Caches/pip - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV - - run: make install - - run: make test_cov - - - uses: actions/cache@v4 - if: startsWith(runner.os, 'Windows') - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.pandas }}-pip- - - run: | - pip install --upgrade pip setuptools wheel - pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" - - run: make install - - run: make test_cov - - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }} + - uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Linux') + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'macOS') + with: + path: ~/Library/Caches/pip + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + echo "YDATA_PROFILING_NO_ANALYTICS=False" >> $GITHUB_ENV + - run: make install + + - run: make test_cov + + - uses: actions/cache@v4 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.pandas }}-pip- + - run: | + pip install --upgrade pip setuptools wheel + pip install ".[test]" "${{ matrix.pandas }}" "${{ matrix.numpy }}" + - run: make install + - run: make test_cov + - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }} test_spark: runs-on: ubuntu-24.04 continue-on-error: false strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] - pyspark-version: ["3.4", "3.5"] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] + pyspark-version: [ "3.4" , "3.5" ] name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }} diff --git a/pyproject.toml b/pyproject.toml index 57b813a71..838e9abd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ build-backend = "setuptools.build_meta" requires = [ "setuptools>=72.0.0,<80.0.0", "setuptools-scm>=8.0.0,<9.0.0", - "wheel>=0.38.4,<1.0.0", + "wheel>=0.38.4,<1.0.0" ] [packaging] @@ -12,18 +12,13 @@ package_name = "ydata-profiling" [project] name = "ydata-profiling" requires-python = ">=3.7,<3.13" -authors = [{ name = "YData Labs Inc", email = "opensource@ydata.ai" }] -description = "Generate profile report for pandas DataFrame" -keywords = [ - "pandas", - "data-science", - "data-analysis", - "python", - "jupyter", - "ipython", +authors = [ + {name = "YData Labs Inc", email = "opensource@ydata.ai"} ] +description = "Generate profile report for pandas DataFrame" +keywords = ["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"] readme = "README.md" -license = { file = "LICENSE.md" } +license = {file = "LICENSE.md"} classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Software Development :: Build Tools", @@ -75,7 +70,9 @@ dependencies = [ "numba>=0.56.0, <=0.61", ] -dynamic = ["version"] +dynamic = [ + "version", +] [project.optional-dependencies] dev = [ @@ -102,7 +99,10 @@ docs = [ "mkdocs-badges", ] -notebook = ["jupyter>=1.0.0", "ipywidgets>=7.5.1"] +notebook = [ + "jupyter>=1.0.0", + "ipywidgets>=7.5.1", +] # this provides the recommended pyspark and pyarrow versions for spark to work on pandas-profiling # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to @@ -126,7 +126,9 @@ test = [ "kaggle", ] -unicode = ["tangled-up-in-unicode==0.2.0"] +unicode= [ + "tangled-up-in-unicode==0.2.0", +] [project.urls] Homepage = "https://ydata.ai" @@ -148,4 +150,4 @@ ydata_profiling = ["py.typed"] universal = true [tool.setuptools.package-dir] -"" = "src" +"" = "src" \ No newline at end of file