From ed3486519912ffa9512f88f6082e1b95e833e137 Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Thu, 17 Apr 2025 10:12:21 +0000 Subject: [PATCH 1/3] ENH: Add in new snippet for script structure --- python-script-structure/README.md | 3 + python-script-structure/iris_summary.py | 132 ++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 python-script-structure/README.md create mode 100644 python-script-structure/iris_summary.py diff --git a/python-script-structure/README.md b/python-script-structure/README.md new file mode 100644 index 0000000000..844d6f6670 --- /dev/null +++ b/python-script-structure/README.md @@ -0,0 +1,3 @@ +# How Can You Structure Your Python Script? + +This folder provides the code examples for the Real Python tutorial [How Can You Structure Your Python Script?](https://realpython.com/python-script-structure/). diff --git a/python-script-structure/iris_summary.py b/python-script-structure/iris_summary.py new file mode 100644 index 0000000000..56d6c6f564 --- /dev/null +++ b/python-script-structure/iris_summary.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "click==8.1.8", +# "pandas==2.2.3", +# "rich==14.0.0", +# "ucimlrepo==0.0.7", +# ] +# /// + +import logging +import sys +from dataclasses import dataclass, field +from enum import IntEnum, StrEnum, auto +from pprint import pformat + +import click +import pandas as pd +from rich.console import Console, RenderableType, Text +from rich.logging import RichHandler +from rich.table import Table +from ucimlrepo import fetch_ucirepo + +logging.basicConfig( + level=logging.INFO, + format="%(levelname)s - %(message)s", + handlers=[RichHandler(rich_tracebacks=True)] +) + +class UCIDataset(IntEnum): + IRIS = 53 + +class IrisVariable(StrEnum): + PETAL_LENGTH = "petal length" + PETAL_WIDTH = "petal width" + SEPAL_WIDTH = "sepal width" + SEPAL_LENGTH = "sepal length" + +class Operation(StrEnum): + SUMMARY = auto() + METADATA = auto() + +@dataclass +class DescriptiveStatistics: + data: pd.Series + mean: float = field(init=False) + median: float = field(init=False) + mm_diff: float = field(init=False) + + def __post_init__(self): + if not isinstance(self.data, pd.Series): + raise TypeError( + f"data must be a pandas Series, not {type(self.data)}" + ) + self.mean = self.data.mean() + self.median = self.data.median() + self.mm_diff = self.mean - self.median + + def __str__(self): + return pformat(self) + +@click.command() +@click.option( + "--operation", + default=Operation.SUMMARY, + type=click.Choice(Operation), + help="Operation to perform: variable summary or dataset metadata", +) +@click.option( + "--variable", + type=click.Choice(IrisVariable), + help="Variable to summarize.", + required=False, +) +def main(operation, variable): + """Fetch the Iris dataset from UCI.""" + console = Console() + iris = fetch_iris() + if operation is Operation.SUMMARY: + if variable: + table = generate_table(iris, variable) + logging.info(format_rich_for_log(table)) + logging.info(f"{IrisVariable(variable)} summary:") + logging.info( + DescriptiveStatistics( + iris.data.features[IrisVariable(variable).value] + ) + ) + else: + logging.info("All variables:") + logging.info(pformat(iris.variables)) + elif operation is Operation.METADATA: + logging.info("Metadata summary:") + logging.info(pformat(iris.metadata)) + +def fetch_iris(): + """Return the Iris dataset from the UCI ML Repository.""" + logging.info("Fetching Iris dataset...") + try: + iris_data = fetch_ucirepo(id=UCIDataset.IRIS.value) + assert "data" in iris_data.keys(), \ + "Object does not have expected structure" + except Exception as e: + logging.critical(f"Failed to correctly fetch Iris dataset: {e}") + sys.exit(1) + else: + logging.info("Iris dataset fetched successfully") + return iris_data + +def generate_table(dataset, variable): + """Generate a formatted table of descriptive statistics for a variable.""" + column = IrisVariable(variable).value + stats = DescriptiveStatistics(dataset.data.features[column]) + table = Table(title=f"{column} summary") + table.add_column("Metric", style="cyan", justify="right") + table.add_column("Value", style="magenta") + table.add_row("Mean", f"{stats.mean:.2f}") + table.add_row("Median", f"{stats.median:.2f}") + table.add_row("Mean-Median Diff", f"{stats.mm_diff:.2f}") + return table + +def format_rich_for_log(renderable, width=100): + """Render a rich object to a plain text string suitable for logging.""" + console = Console(width=width) + with console.capture() as capture: + console.print(renderable) + return Text.from_ansi(capture.get()) + +if __name__ == "__main__": + main() From 7a97b3917968168ed5da9f3bb28946f123eb8710 Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Thu, 17 Apr 2025 10:14:25 +0000 Subject: [PATCH 2/3] MAINT: Lint things --- python-script-structure/iris_summary.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/python-script-structure/iris_summary.py b/python-script-structure/iris_summary.py index 56d6c6f564..eceb396b9f 100644 --- a/python-script-structure/iris_summary.py +++ b/python-script-structure/iris_summary.py @@ -26,22 +26,26 @@ logging.basicConfig( level=logging.INFO, format="%(levelname)s - %(message)s", - handlers=[RichHandler(rich_tracebacks=True)] + handlers=[RichHandler(rich_tracebacks=True)], ) + class UCIDataset(IntEnum): IRIS = 53 + class IrisVariable(StrEnum): PETAL_LENGTH = "petal length" PETAL_WIDTH = "petal width" SEPAL_WIDTH = "sepal width" SEPAL_LENGTH = "sepal length" + class Operation(StrEnum): SUMMARY = auto() METADATA = auto() + @dataclass class DescriptiveStatistics: data: pd.Series @@ -61,6 +65,7 @@ def __post_init__(self): def __str__(self): return pformat(self) + @click.command() @click.option( "--operation", @@ -95,13 +100,15 @@ def main(operation, variable): logging.info("Metadata summary:") logging.info(pformat(iris.metadata)) + def fetch_iris(): """Return the Iris dataset from the UCI ML Repository.""" logging.info("Fetching Iris dataset...") try: iris_data = fetch_ucirepo(id=UCIDataset.IRIS.value) - assert "data" in iris_data.keys(), \ - "Object does not have expected structure" + assert ( + "data" in iris_data.keys() + ), "Object does not have expected structure" except Exception as e: logging.critical(f"Failed to correctly fetch Iris dataset: {e}") sys.exit(1) @@ -109,6 +116,7 @@ def fetch_iris(): logging.info("Iris dataset fetched successfully") return iris_data + def generate_table(dataset, variable): """Generate a formatted table of descriptive statistics for a variable.""" column = IrisVariable(variable).value @@ -121,6 +129,7 @@ def generate_table(dataset, variable): table.add_row("Mean-Median Diff", f"{stats.mm_diff:.2f}") return table + def format_rich_for_log(renderable, width=100): """Render a rich object to a plain text string suitable for logging.""" console = Console(width=width) @@ -128,5 +137,6 @@ def format_rich_for_log(renderable, width=100): console.print(renderable) return Text.from_ansi(capture.get()) + if __name__ == "__main__": main() From e7b209aa284689e648d4882dca3906e454307b51 Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Thu, 17 Apr 2025 10:14:58 +0000 Subject: [PATCH 3/3] MAINT: Remove unused --- python-script-structure/iris_summary.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-script-structure/iris_summary.py b/python-script-structure/iris_summary.py index eceb396b9f..ddcee3c745 100644 --- a/python-script-structure/iris_summary.py +++ b/python-script-structure/iris_summary.py @@ -18,7 +18,7 @@ import click import pandas as pd -from rich.console import Console, RenderableType, Text +from rich.console import Console, Text from rich.logging import RichHandler from rich.table import Table from ucimlrepo import fetch_ucirepo @@ -81,7 +81,6 @@ def __str__(self): ) def main(operation, variable): """Fetch the Iris dataset from UCI.""" - console = Console() iris = fetch_iris() if operation is Operation.SUMMARY: if variable: