Skip to content

Commit 8f5a61c

Browse files
authored
big update to support dataresource rendering, reset to pandas defaults, add settings, automatic truncating & sampling modes, and more (#11)
1 parent 795e55b commit 8f5a61c

20 files changed

+1213
-147
lines changed

dx/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from .config import *
22
from .dx import *
33
from .formatters import *
4+
from .settings import *
45

5-
__version__ = "1.0.4"
6+
__version__ = "1.1.0"
7+
8+
set_display_mode("simple")

dx/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import os
22

3+
from IPython import get_ipython
4+
5+
IN_IPYTHON_ENV = get_ipython() is not None
6+
DEFAULT_IPYTHON_DISPLAY_FORMATTER = None
7+
if IN_IPYTHON_ENV:
8+
DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter
9+
310

411
def in_noteable_env() -> bool:
512
"""

dx/dx.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,17 @@
44
import pandas as pd
55
from IPython.display import display as ipydisplay
66

7-
from .formatters import format_dx
7+
from dx.settings import set_display_mode, settings
8+
from dx.types import DXDisplayMode
89

910

1011
def display(
1112
data: Union[List[dict], pd.DataFrame, Union[pathlib.Path, str]],
13+
mode: DXDisplayMode = DXDisplayMode.simple,
1214
) -> None:
1315
"""
14-
Display a single object (pd.DataFrame, .csv/.json filepath, or tabular dataset) with the DX display format.
16+
Display a single object with the DX display format.
17+
(e.g. pd.DataFrame, .csv/.json filepath, or tabular dataset)
1518
"""
1619

1720
if isinstance(data, str):
@@ -24,8 +27,11 @@ def display(
2427
raise ValueError(f"Unsupported file type: `{path.suffix}`")
2528

2629
df = pd.DataFrame(data)
27-
payload, _ = format_dx(df)
28-
ipydisplay(payload, raw=True)
30+
31+
orig_mode = settings.DISPLAY_MODE
32+
set_display_mode(mode)
33+
ipydisplay(df)
34+
set_display_mode(orig_mode)
2935
return
3036

3137

dx/formatters.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

dx/formatters/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .dataresource import *
2+
from .dx import *
3+
from .main import *

dx/formatters/callouts.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import enum
2+
3+
from IPython.display import HTML, display
4+
from pydantic import BaseModel
5+
6+
7+
class CalloutLevel(enum.Enum):
8+
primary = "primary"
9+
secondary = "secondary"
10+
info = "info"
11+
warning = "warning"
12+
danger = "danger"
13+
14+
15+
class Callout(BaseModel):
16+
message: str
17+
level: CalloutLevel = CalloutLevel.info
18+
19+
@property
20+
def html(self):
21+
heading_html = f"<h6 class='bp3-heading'>{self.level.value.title()}</h6>"
22+
callout_classes = " ".join(
23+
[
24+
"bp3-callout",
25+
f"bp3-intent-{self.level.value}",
26+
]
27+
)
28+
return f"<div class='{callout_classes}'>{heading_html}{self.message}</div>"
29+
30+
31+
def display_callout(
32+
message: str,
33+
level: CalloutLevel = CalloutLevel.info,
34+
) -> None:
35+
callout = Callout(message=message, level=level)
36+
display(HTML(callout.html))

dx/formatters/dataresource.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import uuid
2+
from functools import lru_cache
3+
from typing import List, Optional
4+
5+
import numpy as np
6+
import pandas as pd
7+
from IPython import get_ipython
8+
from IPython.core.formatters import DisplayFormatter
9+
from IPython.core.interactiveshell import InteractiveShell
10+
from IPython.display import display as ipydisplay
11+
from pandas.io.json import build_table_schema
12+
from pydantic import BaseSettings, Field
13+
14+
from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV
15+
from dx.formatters.utils import (
16+
stringify_columns,
17+
stringify_indices,
18+
truncate_if_too_big,
19+
)
20+
from dx.settings import settings
21+
22+
23+
class DataResourceSettings(BaseSettings):
24+
# "simple" (classic simpleTable/DEX) display mode
25+
DATARESOURCE_DISPLAY_MAX_ROWS: int = 100_000
26+
DATARESOURCE_DISPLAY_MAX_COLUMNS: int = 50
27+
DATARESOURCE_HTML_TABLE_SCHEMA: bool = Field(True, allow_mutation=False)
28+
DATARESOURCE_MEDIA_TYPE: str = Field(
29+
"application/vnd.dataresource+json", allow_mutation=False
30+
)
31+
DATARESOURCE_RENDERABLE_OBJECTS: List[type] = [pd.DataFrame, np.ndarray]
32+
33+
class Config:
34+
validate_assignment = True # we need this to enforce `allow_mutation`
35+
36+
37+
@lru_cache
38+
def get_dataresource_settings():
39+
return DataResourceSettings()
40+
41+
42+
dataresource_settings = get_dataresource_settings()
43+
44+
45+
class DXDataResourceDisplayFormatter(DisplayFormatter):
46+
def format(self, obj, **kwargs):
47+
48+
if isinstance(obj, tuple(settings.RENDERABLE_OBJECTS)):
49+
display_id = str(uuid.uuid4())
50+
df_obj = pd.DataFrame(obj)
51+
payload, metadata = _render_dataresource(df_obj, display_id)
52+
# TODO: determine if/how we can pass payload/metadata with
53+
# display_id for the frontend to pick up properly
54+
return ({}, {})
55+
56+
return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs)
57+
58+
59+
def format_dataresource(df: pd.DataFrame, display_id: str) -> tuple:
60+
"""
61+
Transforms the dataframe to a payload dictionary containing the
62+
table schema and column values as arrays.
63+
"""
64+
# temporary workaround for numeric column rendering errors with GRID
65+
# https://noteables.slack.com/archives/C03CB8A4Z2L/p1658497348488939
66+
display_df = df.copy()
67+
display_df = stringify_columns(display_df)
68+
69+
# temporary workaround for numeric MultiIndices
70+
# because of pandas build_table_schema() errors
71+
if isinstance(display_df.index, pd.MultiIndex):
72+
display_df = stringify_indices(display_df)
73+
74+
body = {
75+
"schema": build_table_schema(display_df),
76+
"data": display_df.reset_index().to_dict("records"),
77+
"datalink": {},
78+
}
79+
if display_id is not None:
80+
body["datalink"]["display_id"] = display_id
81+
payload = {dataresource_settings.DATARESOURCE_MEDIA_TYPE: body}
82+
metadata = {
83+
dataresource_settings.DATARESOURCE_MEDIA_TYPE: {"display_id": display_id}
84+
}
85+
return (payload, metadata)
86+
87+
88+
def _render_dataresource(df, display_id) -> tuple:
89+
df = truncate_if_too_big(df)
90+
payload, metadata = format_dataresource(df, display_id)
91+
# don't pass a dataframe in here, otherwise you'll get recursion errors
92+
with pd.option_context(
93+
"html.table_schema", dataresource_settings.DATARESOURCE_HTML_TABLE_SCHEMA
94+
):
95+
ipydisplay(payload, raw=True, display_id=display_id)
96+
return (payload, metadata)
97+
98+
99+
def deregister(ipython_shell: Optional[InteractiveShell] = None) -> None:
100+
"""
101+
Sets the current IPython display formatter as the dataresource
102+
display formatter, used for simpleTable / "classic DEX" outputs
103+
and updates global dx & pandas settings with dataresource settings.
104+
"""
105+
if not IN_IPYTHON_ENV and ipython_shell is None:
106+
return
107+
108+
global settings
109+
settings.DISPLAY_MODE = "simple"
110+
111+
settings.DISPLAY_MAX_COLUMNS = (
112+
dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS
113+
)
114+
settings.DISPLAY_MAX_ROWS = dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS
115+
settings.MEDIA_TYPE = dataresource_settings.DATARESOURCE_MEDIA_TYPE
116+
settings.RENDERABLE_OBJECTS = dataresource_settings.DATARESOURCE_RENDERABLE_OBJECTS
117+
118+
pd.set_option(
119+
"display.max_columns", dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS
120+
)
121+
pd.set_option(
122+
"display.max_rows", dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS
123+
)
124+
125+
ipython = ipython_shell or get_ipython()
126+
ipython.display_formatter = DXDataResourceDisplayFormatter()

0 commit comments

Comments
 (0)