From eb461ea7f77255ba1e542a72b3451d830f4ba24e Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 26 Apr 2022 17:36:12 -0400 Subject: [PATCH] refactor IPython display formatter usage (#1) --- CHANGELOG.md | 5 ++ dx/__init__.py | 2 +- dx/dx.py | 64 +++------------------- dx/formatters.py | 64 +++++++++++----------- poetry.lock | 139 ++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 5 +- 6 files changed, 189 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89070101..bcd08b77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes will be documented here. --- +## `1.0.3` +_2022-04-26_ +### **Fixed** +* `dx.register()` (`dx.enable()`, deprecated) and `dx.deregister()` (`dx.disable()`, deprecated) will now update the default display formatting for pandas `DataFrame` objects as intended + ## `1.0.2` _2022-04-25_ ### **Fixed** diff --git a/dx/__init__.py b/dx/__init__.py index 048cf3c1..05e61a55 100644 --- a/dx/__init__.py +++ b/dx/__init__.py @@ -2,4 +2,4 @@ from .dx import * from .formatters import * -__version__ = "1.0.2" +__version__ = "1.0.3" diff --git a/dx/dx.py b/dx/dx.py index 856a35d6..369e83c7 100644 --- a/dx/dx.py +++ b/dx/dx.py @@ -1,68 +1,19 @@ import pathlib -from typing import List, Optional, Union +from typing import List, Union import pandas as pd from IPython.display import display as ipydisplay -from pandas.io.json import build_table_schema -from .config import in_noteable_env - -DX_MEDIA_TYPE = "application/vnd.dex.v1+json" -DATARESOURCE_MEDIA_TYPE = "application/vnd.dataresource+json" - - -class DXDataFrame(pd.DataFrame): - """Convenience class to provide DEX-focused methods for IPython rendering""" - - _display_index = False - media_type = DX_MEDIA_TYPE - - def display(self, media_type: Optional[str] = None, index: bool = False) -> None: - """Render DXDataFrame based on provided media type.""" - - if not in_noteable_env(): - # TODO: should this be treated differently? - ipydisplay(self) - return - - media_type = media_type or self.media_type - self._display_index = index - payload = { - "schema": self.table_schema, - "data": self.data_transform(media_type=media_type), - # "summary_statistics": {}, - # "dx-seed": {}, - } - ipydisplay({media_type: payload}, raw=True) - return - - def data_transform(self, media_type: str) -> List: - """ - Transforms the current dataframe into a list of dictionaries - or list of columnar values, depending on the media type provided. - """ - if media_type != self.media_type: - # use default data orient - return self.to_dict(orient="records") - - # we can't use `.to_dict(orient='list')` here since that would return a dictionary of {column: [values]} pairs - if self._display_index: - return self.reset_index().transpose().values.tolist() - return self.transpose().values.tolist() - - @property - def table_schema(self): - return build_table_schema(self, index=self._display_index) +from .formatters import format_dx def display( data: Union[List[dict], pd.DataFrame, Union[pathlib.Path, str]], - media_type: Optional[str] = None, - index: bool = False, ) -> None: - """Convenience function to allow calling `dx.display(df)` on a pandas Dataframe, tabular data structure, or filepath.""" + """ + Display a single object (pd.DataFrame, .csv/.json filepath, or tabular dataset) with the DX display format. + """ - # TODO: handle this in DXDataFrame init instead? if isinstance(data, str): path = pathlib.PurePosixPath(data) if path.suffix == ".csv": @@ -72,7 +23,10 @@ def display( else: raise ValueError(f"Unsupported file type: `{path.suffix}`") - return DXDataFrame(data).display(media_type=media_type, index=index) + df = pd.DataFrame(data) + payload, _ = format_dx(df) + ipydisplay(payload, raw=True) + return # backwards-compatibility diff --git a/dx/formatters.py b/dx/formatters.py index b9e87bb5..874546f7 100644 --- a/dx/formatters.py +++ b/dx/formatters.py @@ -1,48 +1,48 @@ import pandas as pd from IPython import get_ipython -from IPython.core.formatters import BaseFormatter +from IPython.core.formatters import DisplayFormatter +from pandas.io.json import build_table_schema -from .dx import DATARESOURCE_MEDIA_TYPE, DX_MEDIA_TYPE +DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter +DX_MEDIA_TYPE = "application/vnd.dex.v1+json" -class DXSchemaFormatter(BaseFormatter): - # FOLLOWUP: does anything need to change here? - print_method = "_repr_data_resource_" - _return_type = (dict,) +class DXDisplayFormatter(DisplayFormatter): + def format(self, obj, **kwargs): + if isinstance(obj, pd.DataFrame): + return format_dx(obj) -class TableSchemaFormatter(BaseFormatter): - print_method = "_repr_data_resource_" - _return_type = (dict,) + return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs) -def deregister_dx_formatting(media_type: str = DX_MEDIA_TYPE) -> None: - """Reverts IPython.display_formatter.formatters to original states""" - pd.options.display.html.table_schema = False - pd.options.display.max_rows = 60 +def format_dx(df) -> tuple: + """ + Transforms the dataframe to a payload dictionary containing the table schema + and column values as arrays. + """ + # this will include the `df.index` by default (e.g. slicing/sampling) + payload = { + DX_MEDIA_TYPE: { + "schema": build_table_schema(df), + "data": df.reset_index().transpose().values.tolist(), + } + } + metadata = {} + return (payload, metadata) - formatters = get_ipython().display_formatter.formatters - if media_type in formatters: - formatters.pop(media_type) - # this should effectively be the same as using - # `pandas.io.formats.printing.enable_data_resource_formatter(True)`, - # except calling that directly doesn't update the IPython formatters - formatters[DATARESOURCE_MEDIA_TYPE] = TableSchemaFormatter() - formatters[DATARESOURCE_MEDIA_TYPE].enabled = True +def deregister() -> None: + """Reverts IPython.display_formatter to its original state""" + pd.options.display.max_rows = 60 + get_ipython().display_formatter = DEFAULT_IPYTHON_DISPLAY_FORMATTER -def register_dx_formatter(media_type: str = DX_MEDIA_TYPE) -> None: - """Registers a media_type for IPython display formatting""" - pd.options.display.html.table_schema = True +def register() -> None: + """Overrides the default IPython display formatter to use DXDisplayFormatter""" pd.options.display.max_rows = 100_000 - - formatters = get_ipython().display_formatter.formatters - formatters[media_type] = DXSchemaFormatter() - # the default pandas `Dataframe._repl_html_` will not work correctly - # if enabled=True here - formatters[media_type].enabled = False + get_ipython().display_formatter = DXDisplayFormatter() -disable = deregister_dx_formatting -enable = register_dx_formatter +disable = deregister +enable = register diff --git a/poetry.lock b/poetry.lock index 940282fa..ec47c053 100644 --- a/poetry.lock +++ b/poetry.lock @@ -20,6 +20,28 @@ six = "*" [package.extras] test = ["astroid", "pytest"] +[[package]] +name = "atomicwrites" +version = "1.4.0" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "21.4.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] + [[package]] name = "backcall" version = "0.2.0" @@ -52,6 +74,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "ipython" version = "8.2.0" @@ -121,6 +151,17 @@ category = "main" optional = false python-versions = ">=3.8" +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + [[package]] name = "pandas" version = "1.4.2" @@ -173,6 +214,18 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prompt-toolkit" version = "3.0.29" @@ -203,6 +256,14 @@ python-versions = "*" [package.extras] tests = ["pytest"] +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + [[package]] name = "pygments" version = "2.12.0" @@ -211,6 +272,38 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "pyparsing" +version = "3.0.8" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "7.1.2" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +tomli = ">=1.0.0" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -254,6 +347,14 @@ pure-eval = "*" [package.extras] tests = ["pytest", "typeguard", "pygments", "littleutils", "cython"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + [[package]] name = "traitlets" version = "5.1.1" @@ -276,7 +377,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "6cbc154fddf3b1acc0084916fa79649cca2f865ead9d5357d2228af758f73a7c" +content-hash = "d969c8207328ac801a2999af0db407f8d0350e4c6ad660b9101d4fc28e865660" [metadata.files] appnope = [ @@ -287,6 +388,14 @@ asttokens = [ {file = "asttokens-2.0.5-py2.py3-none-any.whl", hash = "sha256:0844691e88552595a6f4a4281a9f7f79b8dd45ca4ccea82e5e05b4bbdb76705c"}, {file = "asttokens-2.0.5.tar.gz", hash = "sha256:9a54c114f02c7a9480d56550932546a3f1fe71d8a02f1bc7ccd0ee3ee35cf4d5"}, ] +atomicwrites = [ + {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, + {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, +] +attrs = [ + {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, + {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, +] backcall = [ {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, @@ -303,6 +412,10 @@ executing = [ {file = "executing-0.8.3-py2.py3-none-any.whl", hash = "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9"}, {file = "executing-0.8.3.tar.gz", hash = "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501"}, ] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] ipython = [ {file = "ipython-8.2.0-py3-none-any.whl", hash = "sha256:1b672bfd7a48d87ab203d9af8727a3b0174a4566b4091e9447c22fb63ea32857"}, {file = "ipython-8.2.0.tar.gz", hash = "sha256:70e5eb132cac594a34b5f799bd252589009905f05104728aea6a403ec2519dc1"}, @@ -337,6 +450,10 @@ numpy = [ {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"}, {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"}, ] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] pandas = [ {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07"}, {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417"}, @@ -372,6 +489,10 @@ pickleshare = [ {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, ] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] prompt-toolkit = [ {file = "prompt_toolkit-3.0.29-py3-none-any.whl", hash = "sha256:62291dad495e665fca0bda814e342c69952086afb0f4094d0893d357e5c78752"}, {file = "prompt_toolkit-3.0.29.tar.gz", hash = "sha256:bd640f60e8cecd74f0dc249713d433ace2ddc62b65ee07f96d358e0b152b6ea7"}, @@ -384,10 +505,22 @@ pure-eval = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, ] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] pygments = [ {file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"}, {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"}, ] +pyparsing = [ + {file = "pyparsing-3.0.8-py3-none-any.whl", hash = "sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06"}, + {file = "pyparsing-3.0.8.tar.gz", hash = "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954"}, +] +pytest = [ + {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, + {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -404,6 +537,10 @@ stack-data = [ {file = "stack_data-0.2.0-py3-none-any.whl", hash = "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e"}, {file = "stack_data-0.2.0.tar.gz", hash = "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12"}, ] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] traitlets = [ {file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"}, {file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"}, diff --git a/pyproject.toml b/pyproject.toml index 2aeb8ac4..86be1305 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dx" -version = "1.0.2" +version = "1.0.3" description = "Python wrapper for Data Explorer" authors = ["Dave Shoup ", "Kyle Kelley "] readme = "README.md" @@ -14,6 +14,9 @@ python = "^3.8" pandas = "^1.3.5" ipython = "^8.2.0" +[tool.poetry.dev-dependencies] +pytest = "^7.1.2" + [build-system] requires = ["poetry_core>=1.0.0"] build-backend = "poetry.core.masonry.api" \ No newline at end of file