From 5c2efdc8adf219076e0466b4a38241fd2d112ae6 Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 25 Jul 2022 08:57:24 -0400 Subject: [PATCH] 1.1.2 dtype hotfix & github workflows (#14) * remove buffer col with "..." to avoid changing column dtypes during `outer` sampling method * add test for dtype persistence --- .github/workflows/pypi-release.yaml | 25 +++++++++++++++++++++++ .github/workflows/unit-tests.yaml | 31 +++++++++++++++++++++++++++++ CHANGELOG.md | 3 ++- dx/__init__.py | 2 +- dx/formatters/utils.py | 14 ++----------- dx/tests/test_extras.py | 13 ++++++++++-- pyproject.toml | 2 +- 7 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/pypi-release.yaml create mode 100644 .github/workflows/unit-tests.yaml diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml new file mode 100644 index 00000000..faed8132 --- /dev/null +++ b/.github/workflows/pypi-release.yaml @@ -0,0 +1,25 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* + diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml new file mode 100644 index 00000000..0148aeef --- /dev/null +++ b/.github/workflows/unit-tests.yaml @@ -0,0 +1,31 @@ +name: DX Tests +# Can be developed using: `act --workflows .github/workflows/integration-tests.yaml` + +# Trigger the workflow on all pull requests and only pushes to the main branch +# yamllint disable-line rule:truthy +on: + pull_request: + branches: + - main + +jobs: + unit-tests: + runs-on: ubuntu-20.04 + steps: + - name: Checkout the code + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: 1.1.13 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + poetry install + - name: Pytest - Unit tests + run: | + poetry run pytest dx/tests -x diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e036aea..c88ab445 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,14 @@ All notable changes will be documented here. --- -## `1.1.1` +## `1.1.1`-`1.1.2` _2022-07-22_ ### Added - Additional metadata sent to frontends to triage issues with output sizes and `dx` settings ### Fixed - `simple`/`enhanced` display modes no longer raise JSON errors trying to serialize `pd.NA` values - `SAMPLE_METHOD` returning incorrect value (`True` instead of `DXSampleMethod`) when compared with `COLUMN_SAMPLE_METHOD` and `ROW_SAMPLE_METHOD` +- Truncating rows no longer alters dtypes by adding `...` values ## `1.1.0` _2022-07-22_ diff --git a/dx/__init__.py b/dx/__init__.py index 65132866..d3f3eaba 100644 --- a/dx/__init__.py +++ b/dx/__init__.py @@ -3,6 +3,6 @@ from .formatters import * from .settings import * -__version__ = "1.1.1" +__version__ = "1.1.2" set_display_mode("simple") diff --git a/dx/formatters/utils.py b/dx/formatters/utils.py index c984f35c..f0b0e65f 100644 --- a/dx/formatters/utils.py +++ b/dx/formatters/utils.py @@ -199,20 +199,10 @@ def sample_outer(df: pd.DataFrame, num: int) -> pd.DataFrame: Example: sampling outer 8 of 20 rows: [XXXX............XXXX] """ - # rounding down since we'll be adding one filler row - # as well as using the index - outer_buffer = int(num / 2) - 1 + outer_buffer = int(num / 2) start_rows = df.head(outer_buffer) - - # hack to make a column/row filled with ellipsis values - # to show hidden data between outer rows - buffer_col = df.head(1).transpose() - buffer_col.columns = ["..."] - buffer_col["..."] = "..." - buffer_row = buffer_col.transpose() - end_rows = df.tail(outer_buffer) - return pd.concat([start_rows, buffer_row, end_rows]) + return pd.concat([start_rows, end_rows]) def stringify_columns(df: pd.DataFrame) -> pd.DataFrame: diff --git a/dx/tests/test_extras.py b/dx/tests/test_extras.py index 8a1737a2..7558cfe7 100644 --- a/dx/tests/test_extras.py +++ b/dx/tests/test_extras.py @@ -29,6 +29,15 @@ def test_large_dataframe_is_truncated(sample_large_dataframe): assert truncated_size_bytes <= settings.MAX_RENDER_SIZE_BYTES +def test_truncated_dataframe_keeps_dtypes(sample_large_dataframe): + """ + Test that a truncated dataframe doesn't alter column datatypes. + """ + orig_dtypes = sample_large_dataframe.dtypes + truncated_df = truncate_if_too_big(sample_large_dataframe) + assert (truncated_df.dtypes == orig_dtypes).all() + + def test_wide_dataframe_is_narrowed(sample_wide_dataframe): """ Test that a wide dataframe is narrowed to below the size of @@ -38,7 +47,7 @@ def test_wide_dataframe_is_narrowed(sample_wide_dataframe): narrow_df = truncate_if_too_big(sample_wide_dataframe) narrow_width = len(narrow_df.columns) assert narrow_width < orig_width - assert narrow_width < settings.DISPLAY_MAX_COLUMNS + assert narrow_width <= settings.DISPLAY_MAX_COLUMNS def test_long_dataframe_is_shortened(sample_long_dataframe): @@ -66,6 +75,6 @@ def test_long_wide_dataframe_is_reduced_from_both_dimensions( reduced_width = len(reduced_df.columns) reduced_length = len(reduced_df) assert reduced_width < orig_width - assert reduced_width < settings.DISPLAY_MAX_COLUMNS + assert reduced_width <= settings.DISPLAY_MAX_COLUMNS assert reduced_length < orig_length assert reduced_length <= settings.DISPLAY_MAX_ROWS diff --git a/pyproject.toml b/pyproject.toml index 68d3b33e..eef9d519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dx" -version = "1.1.1" +version = "1.1.2" description = "Python wrapper for Data Explorer" authors = ["Dave Shoup ", "Kyle Kelley "] readme = "README.md"