Skip to content

Commit

Permalink
refactor IPython display formatter usage (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
shouples authored Apr 26, 2022
1 parent a2f0e46 commit eb461ea
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 90 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ All notable changes will be documented here.

---

## `1.0.3`
_2022-04-26_
### **Fixed**
* `dx.register()` (`dx.enable()`, deprecated) and `dx.deregister()` (`dx.disable()`, deprecated) will now update the default display formatting for pandas `DataFrame` objects as intended

## `1.0.2`
_2022-04-25_
### **Fixed**
Expand Down
2 changes: 1 addition & 1 deletion dx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .dx import *
from .formatters import *

__version__ = "1.0.2"
__version__ = "1.0.3"
64 changes: 9 additions & 55 deletions dx/dx.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,19 @@
import pathlib
from typing import List, Optional, Union
from typing import List, Union

import pandas as pd
from IPython.display import display as ipydisplay
from pandas.io.json import build_table_schema

from .config import in_noteable_env

DX_MEDIA_TYPE = "application/vnd.dex.v1+json"
DATARESOURCE_MEDIA_TYPE = "application/vnd.dataresource+json"


class DXDataFrame(pd.DataFrame):
"""Convenience class to provide DEX-focused methods for IPython rendering"""

_display_index = False
media_type = DX_MEDIA_TYPE

def display(self, media_type: Optional[str] = None, index: bool = False) -> None:
"""Render DXDataFrame based on provided media type."""

if not in_noteable_env():
# TODO: should this be treated differently?
ipydisplay(self)
return

media_type = media_type or self.media_type
self._display_index = index
payload = {
"schema": self.table_schema,
"data": self.data_transform(media_type=media_type),
# "summary_statistics": {},
# "dx-seed": {},
}
ipydisplay({media_type: payload}, raw=True)
return

def data_transform(self, media_type: str) -> List:
"""
Transforms the current dataframe into a list of dictionaries
or list of columnar values, depending on the media type provided.
"""
if media_type != self.media_type:
# use default data orient
return self.to_dict(orient="records")

# we can't use `.to_dict(orient='list')` here since that would return a dictionary of {column: [values]} pairs
if self._display_index:
return self.reset_index().transpose().values.tolist()
return self.transpose().values.tolist()

@property
def table_schema(self):
return build_table_schema(self, index=self._display_index)
from .formatters import format_dx


def display(
data: Union[List[dict], pd.DataFrame, Union[pathlib.Path, str]],
media_type: Optional[str] = None,
index: bool = False,
) -> None:
"""Convenience function to allow calling `dx.display(df)` on a pandas Dataframe, tabular data structure, or filepath."""
"""
Display a single object (pd.DataFrame, .csv/.json filepath, or tabular dataset) with the DX display format.
"""

# TODO: handle this in DXDataFrame init instead?
if isinstance(data, str):
path = pathlib.PurePosixPath(data)
if path.suffix == ".csv":
Expand All @@ -72,7 +23,10 @@ def display(
else:
raise ValueError(f"Unsupported file type: `{path.suffix}`")

return DXDataFrame(data).display(media_type=media_type, index=index)
df = pd.DataFrame(data)
payload, _ = format_dx(df)
ipydisplay(payload, raw=True)
return


# backwards-compatibility
Expand Down
64 changes: 32 additions & 32 deletions dx/formatters.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
import pandas as pd
from IPython import get_ipython
from IPython.core.formatters import BaseFormatter
from IPython.core.formatters import DisplayFormatter
from pandas.io.json import build_table_schema

from .dx import DATARESOURCE_MEDIA_TYPE, DX_MEDIA_TYPE
DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter
DX_MEDIA_TYPE = "application/vnd.dex.v1+json"


class DXSchemaFormatter(BaseFormatter):
# FOLLOWUP: does anything need to change here?
print_method = "_repr_data_resource_"
_return_type = (dict,)
class DXDisplayFormatter(DisplayFormatter):
def format(self, obj, **kwargs):

if isinstance(obj, pd.DataFrame):
return format_dx(obj)

class TableSchemaFormatter(BaseFormatter):
print_method = "_repr_data_resource_"
_return_type = (dict,)
return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs)


def deregister_dx_formatting(media_type: str = DX_MEDIA_TYPE) -> None:
"""Reverts IPython.display_formatter.formatters to original states"""
pd.options.display.html.table_schema = False
pd.options.display.max_rows = 60
def format_dx(df) -> tuple:
"""
Transforms the dataframe to a payload dictionary containing the table schema
and column values as arrays.
"""
# this will include the `df.index` by default (e.g. slicing/sampling)
payload = {
DX_MEDIA_TYPE: {
"schema": build_table_schema(df),
"data": df.reset_index().transpose().values.tolist(),
}
}
metadata = {}
return (payload, metadata)

formatters = get_ipython().display_formatter.formatters
if media_type in formatters:
formatters.pop(media_type)

# this should effectively be the same as using
# `pandas.io.formats.printing.enable_data_resource_formatter(True)`,
# except calling that directly doesn't update the IPython formatters
formatters[DATARESOURCE_MEDIA_TYPE] = TableSchemaFormatter()
formatters[DATARESOURCE_MEDIA_TYPE].enabled = True
def deregister() -> None:
"""Reverts IPython.display_formatter to its original state"""
pd.options.display.max_rows = 60
get_ipython().display_formatter = DEFAULT_IPYTHON_DISPLAY_FORMATTER


def register_dx_formatter(media_type: str = DX_MEDIA_TYPE) -> None:
"""Registers a media_type for IPython display formatting"""
pd.options.display.html.table_schema = True
def register() -> None:
"""Overrides the default IPython display formatter to use DXDisplayFormatter"""
pd.options.display.max_rows = 100_000

formatters = get_ipython().display_formatter.formatters
formatters[media_type] = DXSchemaFormatter()
# the default pandas `Dataframe._repl_html_` will not work correctly
# if enabled=True here
formatters[media_type].enabled = False
get_ipython().display_formatter = DXDisplayFormatter()


disable = deregister_dx_formatting
enable = register_dx_formatter
disable = deregister
enable = register
Loading

0 comments on commit eb461ea

Please sign in to comment.