From ce37450deca789e6d9ac4f457416971d258fd1a7 Mon Sep 17 00:00:00 2001 From: myenugula Date: Wed, 23 Apr 2025 14:32:44 +0800 Subject: [PATCH 1/2] BUG: Fix scatter plot colors in groupby context to match line plot behavior (#59846) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/plotting/_matplotlib/core.py | 4 +-- pandas/tests/plotting/test_groupby.py | 49 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 184ca581902ee..b9a95951cf49b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -763,6 +763,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`DataFrameGroupBy.plot` with ``kind="scatter"`` where all groups used the same color instead of different colors for each group like line plots do (:issue:`59846`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 24aa848de1b4c..94eace2982a17 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1371,7 +1371,7 @@ def _make_plot(self, fig: Figure) -> None: # if a list of non-color strings is passed in as c, color points # by uniqueness of the strings, such same strings get same color create_colors = not self._are_valid_colors(c_values) - if create_colors: + if c_values is not None and create_colors: color_mapping = self._get_color_mapping(c_values) c_values = [color_mapping[s] for s in c_values] @@ -1422,7 +1422,7 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") if c is None and color is None: - c_values = mpl.rcParams["patch.facecolor"] + c_values = None elif color is not None: c_values = color elif color_by_categorical: diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 0cb125d822fd1..e274610a1349e 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -152,3 +152,52 @@ def test_groupby_hist_series_with_legend_raises(self): with pytest.raises(ValueError, match="Cannot use both legend and label"): g.hist(legend=True, label="d") + + def test_groupby_scatter_colors_differ(self): + # GH 59846 - Test that scatter plots use different colors for different groups + # similar to how line plots do + from matplotlib.collections import PathCollection + import matplotlib.pyplot as plt + + # Create test data with distinct groups + df = DataFrame( + { + "x": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "y": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"], + } + ) + + # Set up a figure with both line and scatter plots + fig, (ax1, ax2) = plt.subplots(1, 2) + + # Plot line chart (known to use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax1, kind="line") + + # Plot scatter chart (should also use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax2, kind="scatter") + + # Get the colors used in the line plot and scatter plot + line_colors = [line.get_color() for line in ax1.get_lines()] + + # Get scatter colors + scatter_colors = [] + for collection in ax2.collections: + if isinstance(collection, PathCollection): # This is a scatter plot + # Get the face colors (might be array of RGBA values) + face_colors = collection.get_facecolor() + # If multiple points with same color, we get the first one + if face_colors.ndim > 1: + scatter_colors.append(tuple(face_colors[0])) + else: + scatter_colors.append(tuple(face_colors)) + + # Assert that we have the right number of colors (one per group) + assert len(line_colors) == 3 + assert len(scatter_colors) == 3 + + # Assert that the colors are all different + assert len(set(scatter_colors)) == 3 + assert len(line_colors) == 3 + + plt.close(fig) From c3d388f1d26a2cbe790d1cdf324f6ceedaf7d74c Mon Sep 17 00:00:00 2001 From: myenugula Date: Wed, 23 Apr 2025 14:49:53 +0800 Subject: [PATCH 2/2] BUG: Fix scatter plot colors in groupby context to match line plot behavior (#59846) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/plotting/_matplotlib/core.py | 4 +-- pandas/tests/plotting/test_groupby.py | 49 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f76d94036c6d8..2297b0db3dcc1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -798,6 +798,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`DataFrameGroupBy.plot` with ``kind="scatter"`` where all groups used the same color instead of different colors for each group like line plots do (:issue:`59846`) - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a217ee8a86a16..8b8e39c28cb19 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1365,7 +1365,7 @@ def _make_plot(self, fig: Figure) -> None: # if a list of non-color strings is passed in as c, color points # by uniqueness of the strings, such same strings get same color create_colors = not self._are_valid_colors(c_values) - if create_colors: + if c_values is not None and create_colors: color_mapping = self._get_color_mapping(c_values) c_values = [color_mapping[s] for s in c_values] @@ -1416,7 +1416,7 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") if c is None and color is None: - c_values = mpl.rcParams["patch.facecolor"] + c_values = None elif color is not None: c_values = color elif color_by_categorical: diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 0cb125d822fd1..e274610a1349e 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -152,3 +152,52 @@ def test_groupby_hist_series_with_legend_raises(self): with pytest.raises(ValueError, match="Cannot use both legend and label"): g.hist(legend=True, label="d") + + def test_groupby_scatter_colors_differ(self): + # GH 59846 - Test that scatter plots use different colors for different groups + # similar to how line plots do + from matplotlib.collections import PathCollection + import matplotlib.pyplot as plt + + # Create test data with distinct groups + df = DataFrame( + { + "x": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "y": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"], + } + ) + + # Set up a figure with both line and scatter plots + fig, (ax1, ax2) = plt.subplots(1, 2) + + # Plot line chart (known to use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax1, kind="line") + + # Plot scatter chart (should also use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax2, kind="scatter") + + # Get the colors used in the line plot and scatter plot + line_colors = [line.get_color() for line in ax1.get_lines()] + + # Get scatter colors + scatter_colors = [] + for collection in ax2.collections: + if isinstance(collection, PathCollection): # This is a scatter plot + # Get the face colors (might be array of RGBA values) + face_colors = collection.get_facecolor() + # If multiple points with same color, we get the first one + if face_colors.ndim > 1: + scatter_colors.append(tuple(face_colors[0])) + else: + scatter_colors.append(tuple(face_colors)) + + # Assert that we have the right number of colors (one per group) + assert len(line_colors) == 3 + assert len(scatter_colors) == 3 + + # Assert that the colors are all different + assert len(set(scatter_colors)) == 3 + assert len(line_colors) == 3 + + plt.close(fig)