Skip to content

Commit

Permalink
Merge pull request #45 from posit-dev/docs-examples
Browse files Browse the repository at this point in the history
Docs examples
  • Loading branch information
rich-iannone authored Dec 4, 2023
2 parents b6b743a + f673a22 commit 3521cdb
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ website:
left:
- text: Get Started
file: get-started/index.qmd
- text: Examples
file: examples/index.qmd
- href: reference/index.qmd
text: Reference
- href: changelog.qmd
Expand Down
149 changes: 149 additions & 0 deletions docs/examples/index.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
---
title: Examples
jupyter: python3
html-table-processing: none
format:
html:
code-fold: true
code-summary: "Show the Code"
---

:::::: {.column-page}
::::: {.grid}

:::{.g-col-6}

```{python}
from great_tables import GT, md, html
from great_tables.data import islands
islands_mini = islands.head(10)
(
GT(islands_mini, rowname_col = "name")
.tab_header(
title = "Large Landmasses of the World",
subtitle = "The top ten largest are presented"
)
.tab_source_note(
source_note = "Source: The World Almanac and Book of Facts, 1975, page 406."
)
.tab_source_note(
source_note = md("Reference: McNeil, D. R. (1977) *Interactive Data Analysis*. Wiley.")
)
. tab_stubhead(label = "landmass")
)
```

:::
:::{.g-col-6}

```{python}
from great_tables.data import airquality
airquality_m = airquality.head(10).assign(Year = 1973)
gt_airquality = (
GT(airquality_m)
.tab_header(
title = "New York Air Quality Measurements",
subtitle = "Daily measurements in New York City (May 1-10, 1973)"
)
.tab_spanner(
label = "Time",
columns = ["Year", "Month", "Day"]
)
.tab_spanner(
label = "Measurement",
columns = ["Ozone", "Solar.R", "Wind", "Temp"]
)
.cols_move_to_start(columns = ["Year", "Month", "Day"])
.cols_label(
Ozone = html("Ozone,<br>ppbV"),
Solar_R = html("Solar R.,<br>cal/m<sup>2</sup>"),
Wind = html("Wind,<br>mph"),
Temp = html("Temp,<br>&deg;F")
)
)
gt_airquality
```

:::

:::{.g-col-6}

```{python}
from great_tables import GT, countrypops
import polars as pl
import polars.selectors as cs
# Get vectors of 2-letter country codes for each region of Oceania
countries = {
"Australasia": ["AU", "NZ"],
"Melanesia": ["NC", "PG", "SB", "VU"],
"Micronesia": ["FM", "GU", "KI", "MH", "MP", "NR", "PW"],
"Polynesia": ["PF", "WS", "TO", "TV"],
}
# a dictionary mapping region to country (e.g. AU -> Australasia)
region_to_country = {
region: country
for country, regions in countries.items()
for region in regions
}
keep_rows = (
countrypops.country_code_2.isin(list(region_to_country))
& countrypops.year.isin([2000, 2010, 2020])
)
# Create a gt table based on a preprocessed `countrypops`
wide_pops = (
pl.from_pandas(countrypops)
.filter(
pl.col("country_code_2").is_in(list(region_to_country))
& pl.col("year").is_in([2000, 2010, 2020])
)
.with_columns(
pl.col("country_code_2")
.map_dict(region_to_country)
.alias("region")
)
.pivot(
index=["country_name", "region"],
columns="year",
values="population"
)
.sort("2020", descending=True)
)
(
GT(wide_pops, rowname_col = "country_name", groupname_col = "region")
.tab_header(title ="Populations of Oceania's Countries in 2000, 2010, and 2020")
.tab_spanner(
label = "Total Population",
columns = cs.all()
)
.fmt_integer()
)
# pivot_wider(names_from = year, values_from = population) |>
# arrange(region, desc(`2020`)) |>
# select(-starts_with("country_code")) |>
# gt(
# rowname_col = "country_name",
# groupname_col = "region"
# ) |>
# tab_header(title = "Populations of Oceania's Countries in 2000, 2010, and 2020") |>
# tab_spanner(
# label = "Total Population",
# columns = everything()
# ) |>
# fmt_integer()
```

:::

:::::
::::::
6 changes: 3 additions & 3 deletions great_tables/_gt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing_extensions import Self, TypeAlias
from dataclasses import dataclass, field, replace
from ._utils import _str_detect
from ._tbl_data import create_empty_frame
from ._tbl_data import create_empty_frame, to_list

from ._styles import CellStyle

Expand Down Expand Up @@ -486,12 +486,12 @@ def __init__(
row_indices = list(range(n_rows(data)))

if groupname_col is not None:
group_id = data[groupname_col].tolist()
group_id = to_list(data[groupname_col])
else:
group_id = [None] * n_rows(data)

if rowname_col is not None:
row_names = data[rowname_col].tolist()
row_names = to_list(data[rowname_col])
else:
row_names = [None] * n_rows(data)

Expand Down
41 changes: 41 additions & 0 deletions great_tables/_tbl_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
PlDataFrame = pl.DataFrame
PlSelectExpr = _selector_proxy_

PdSeries = pd.Series
PlSeries = pl.Series

DataFrameLike = Union[PdDataFrame, PlDataFrame]
SeriesLike = Union[PdSeries, PlSeries]
TblData = DataFrameLike

else:
Expand All @@ -43,13 +47,24 @@ class PlDataFrame(AbstractBackend):
class PlSelectExpr(AbstractBackend):
_backends = [("polars.selectors", "_selector_proxy_")]

class PdSeries(AbstractBackend):
_backends = [("pandas", "Series")]

class PlSeries(AbstractBackend):
_backends = [("polars", "Series")]

# TODO: these types are imported throughout gt, so we need to either put
# those imports under TYPE_CHECKING, or continue to make available dynamically here.
class DataFrameLike(ABC):
"""Represent some DataFrame"""

class SeriesLike(ABC):
"""Represent some Series"""

DataFrameLike.register(PdDataFrame)
DataFrameLike.register(PlDataFrame)
SeriesLike.register(PdSeries)
SeriesLike.register(PlSeries)

TblData = DataFrameLike

Expand Down Expand Up @@ -202,6 +217,17 @@ def _(data: PdDataFrame, group_key: str) -> Dict[Any, List[int]]:
return {k: list(v) for k, v in g_df.grouper.indices.items()}


@group_splits.register
def _(data: PlDataFrame, group_key: str) -> Dict[Any, List[int]]:
# TODO: should ensure row count name isn't already in data
import polars as pl

groups = data.with_row_count("__row_count__").group_by(group_key).agg(pl.col("__row_count__"))

res = dict(zip(groups[group_key].to_list(), groups["__row_count__"].to_list()))
return res


# eval_select ----

_NamePos: TypeAlias = List[Tuple[str, int]]
Expand Down Expand Up @@ -349,3 +375,18 @@ def _(df: PlDataFrame, replacement: PlDataFrame):

exprs = [pl.col(name).fill_null(replacement[name]) for name in df.columns]
return df.select(exprs)


@singledispatch
def to_list(ser: SeriesLike) -> List[Any]:
raise NotImplementedError(f"Unsupported type: {type(ser)}")


@to_list.register
def _(ser: PdSeries) -> List[Any]:
return ser.tolist()


@to_list.register
def _(ser: PlSeries) -> List[Any]:
return ser.to_list()

0 comments on commit 3521cdb

Please sign in to comment.