diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index b7f3a46f9e14..ecdbb342d3e2 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -356,8 +356,8 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, values.codes, mask, index_type, memory_pool) try: dictionary = array( - values.categories.values, type=value_type, - memory_pool=memory_pool) + values.categories, type=value_type, + from_pandas=True, memory_pool=memory_pool) except TypeError: # TODO when removing the deprecation warning, this whole # try/except can be removed (to bubble the TypeError of @@ -371,7 +371,8 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, "TypeError", FutureWarning, stacklevel=2) dictionary = array( - values.categories.values, memory_pool=memory_pool) + values.categories, from_pandas=True, + memory_pool=memory_pool) else: raise diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 0339975f4571..063532140c6e 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -3047,6 +3047,15 @@ def test_all_none_category(self): df['a'] = df['a'].astype('category') _check_pandas_roundtrip(df) + def test_categorical_with_timezone(self): + # GH-49875: timezone was dropped when converting tz-aware categorical + cats = pd.DatetimeIndex(["2024-01-01", "2024-01-02"]).tz_localize("US/Eastern") + cat = pd.Categorical(values=[cats[0], cats[1], cats[0]], categories=cats) + + arr = pa.array(cat, from_pandas=True) + + assert arr.type.value_type.tz == "US/Eastern" + def test_empty_arrays(self): for dtype_str, pa_type in self.type_pairs: if (Version(pd.__version__) >= Version("3.0.0") and