From 5c2efdc8adf219076e0466b4a38241fd2d112ae6 Mon Sep 17 00:00:00 2001
From: Dave <dave.shoup@gmail.com>
Date: Mon, 25 Jul 2022 08:57:24 -0400
Subject: [PATCH] 1.1.2 dtype hotfix & github workflows (#14)

* remove buffer col with "..." to avoid changing column dtypes during `outer` sampling method
* add test for dtype persistence
---
 .github/workflows/pypi-release.yaml | 25 +++++++++++++++++++++++
 .github/workflows/unit-tests.yaml   | 31 +++++++++++++++++++++++++++++
 CHANGELOG.md                        |  3 ++-
 dx/__init__.py                      |  2 +-
 dx/formatters/utils.py              | 14 ++-----------
 dx/tests/test_extras.py             | 13 ++++++++++--
 pyproject.toml                      |  2 +-
 7 files changed, 73 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/pypi-release.yaml
 create mode 100644 .github/workflows/unit-tests.yaml

diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
new file mode 100644
index 00000000..faed8132
--- /dev/null
+++ b/.github/workflows/pypi-release.yaml
@@ -0,0 +1,25 @@
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-20.04
+
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
+    
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
new file mode 100644
index 00000000..0148aeef
--- /dev/null
+++ b/.github/workflows/unit-tests.yaml
@@ -0,0 +1,31 @@
+name: DX Tests
+# Can be developed using: `act --workflows .github/workflows/integration-tests.yaml`
+
+# Trigger the workflow on all pull requests and only pushes to the main branch
+# yamllint disable-line rule:truthy
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Checkout the code
+        uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+      - name: Install poetry
+        uses: abatilo/actions-poetry@v2.0.0
+        with:
+          poetry-version: 1.1.13
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          poetry install
+      - name: Pytest - Unit tests
+        run: |
+          poetry run pytest dx/tests -x
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2e036aea..c88ab445 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,14 @@
 All notable changes will be documented here.
 
 ---
-## `1.1.1`
+## `1.1.1`-`1.1.2`
 _2022-07-22_
 ### Added
 - Additional metadata sent to frontends to triage issues with output sizes and `dx` settings
 ### Fixed
 - `simple`/`enhanced` display modes no longer raise JSON errors trying to serialize `pd.NA` values
 - `SAMPLE_METHOD` returning incorrect value (`True` instead of `DXSampleMethod`) when compared with `COLUMN_SAMPLE_METHOD` and `ROW_SAMPLE_METHOD`
+- Truncating rows no longer alters dtypes by adding `...` values
   
 ## `1.1.0`
 _2022-07-22_
diff --git a/dx/__init__.py b/dx/__init__.py
index 65132866..d3f3eaba 100644
--- a/dx/__init__.py
+++ b/dx/__init__.py
@@ -3,6 +3,6 @@
 from .formatters import *
 from .settings import *
 
-__version__ = "1.1.1"
+__version__ = "1.1.2"
 
 set_display_mode("simple")
diff --git a/dx/formatters/utils.py b/dx/formatters/utils.py
index c984f35c..f0b0e65f 100644
--- a/dx/formatters/utils.py
+++ b/dx/formatters/utils.py
@@ -199,20 +199,10 @@ def sample_outer(df: pd.DataFrame, num: int) -> pd.DataFrame:
     Example: sampling outer 8 of 20 rows:
     [XXXX............XXXX]
     """
-    # rounding down since we'll be adding one filler row
-    # as well as using the index
-    outer_buffer = int(num / 2) - 1
+    outer_buffer = int(num / 2)
     start_rows = df.head(outer_buffer)
-
-    # hack to make a column/row filled with ellipsis values
-    # to show hidden data between outer rows
-    buffer_col = df.head(1).transpose()
-    buffer_col.columns = ["..."]
-    buffer_col["..."] = "..."
-    buffer_row = buffer_col.transpose()
-
     end_rows = df.tail(outer_buffer)
-    return pd.concat([start_rows, buffer_row, end_rows])
+    return pd.concat([start_rows, end_rows])
 
 
 def stringify_columns(df: pd.DataFrame) -> pd.DataFrame:
diff --git a/dx/tests/test_extras.py b/dx/tests/test_extras.py
index 8a1737a2..7558cfe7 100644
--- a/dx/tests/test_extras.py
+++ b/dx/tests/test_extras.py
@@ -29,6 +29,15 @@ def test_large_dataframe_is_truncated(sample_large_dataframe):
     assert truncated_size_bytes <= settings.MAX_RENDER_SIZE_BYTES
 
 
+def test_truncated_dataframe_keeps_dtypes(sample_large_dataframe):
+    """
+    Test that a truncated dataframe doesn't alter column datatypes.
+    """
+    orig_dtypes = sample_large_dataframe.dtypes
+    truncated_df = truncate_if_too_big(sample_large_dataframe)
+    assert (truncated_df.dtypes == orig_dtypes).all()
+
+
 def test_wide_dataframe_is_narrowed(sample_wide_dataframe):
     """
     Test that a wide dataframe is narrowed to below the size of
@@ -38,7 +47,7 @@ def test_wide_dataframe_is_narrowed(sample_wide_dataframe):
     narrow_df = truncate_if_too_big(sample_wide_dataframe)
     narrow_width = len(narrow_df.columns)
     assert narrow_width < orig_width
-    assert narrow_width < settings.DISPLAY_MAX_COLUMNS
+    assert narrow_width <= settings.DISPLAY_MAX_COLUMNS
 
 
 def test_long_dataframe_is_shortened(sample_long_dataframe):
@@ -66,6 +75,6 @@ def test_long_wide_dataframe_is_reduced_from_both_dimensions(
     reduced_width = len(reduced_df.columns)
     reduced_length = len(reduced_df)
     assert reduced_width < orig_width
-    assert reduced_width < settings.DISPLAY_MAX_COLUMNS
+    assert reduced_width <= settings.DISPLAY_MAX_COLUMNS
     assert reduced_length < orig_length
     assert reduced_length <= settings.DISPLAY_MAX_ROWS
diff --git a/pyproject.toml b/pyproject.toml
index 68d3b33e..eef9d519 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dx"
-version = "1.1.1"
+version = "1.1.2"
 description = "Python wrapper for Data Explorer"
 authors = ["Dave Shoup <dave.shoup@gmail.com>", "Kyle Kelley <rgbkrk@gmail.com>"]
 readme = "README.md"