Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def maybe_indices_to_slice(
indices: npt.NDArray[np.intp],
max_len: int,
) -> slice | npt.NDArray[np.intp]: ...
def is_all_scalar(obj: list | tuple) -> bool: ...
def is_all_arraylike(obj: list) -> bool: ...

# -----------------------------------------------------------------
Expand Down
18 changes: 18 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,24 @@ cpdef ndarray[object] ensure_string_array(
return result


def is_all_scalar(obj: list | tuple) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still not fond of doing linear search. But since it doesn't cause a performance regression, I guess it's fine.

cdef:
Py_ssize_t i, n = len(obj)
object temp

all_scalars = True

for i in range(n):
temp = obj[i]
if isinstance(temp, (bytes, str)):
continue
elif hasattr(temp, "__iter__"):
all_scalars = False
break

return all_scalars


def is_all_arraylike(obj: list) -> bool:
"""
Should we treat these as levels of a MultiIndex, as opposed to Index items?
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,11 @@ def array(
return data.copy()
return data

# to avoid returning an array of string representation of objects.
if isinstance(dtype, StringDtype) and isinstance(data, (list, tuple)):
if not lib.is_all_scalar(data):
raise TypeError("Values must be a 1D list-like")

if isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
return cls._from_sequence(data, dtype=dtype, copy=copy)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,12 @@ def test_nd_raises(data):
pd.array(data, dtype="int64")


@pytest.mark.parametrize("data", [[["a"], ["b"]]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some more cases? For example, some of #63112:

int_2d = [[1], [2]]
float_2d = [[1.0], [2.0]]
string_2d = [["a"], ["b"]]
mixed_2d = [[1, 2], ["a", "b"]]

Copy link
Author

@antareepsarkar antareepsarkar Dec 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review.
I did the modifications.

def test_not_1D_like_raises(data):
with pytest.raises(TypeError, match="Values must be a 1D list-like"):
pd.array(data, dtype=pd.StringDtype())


def test_scalar_raises():
with pytest.raises(ValueError, match="Cannot pass scalar '1'"):
pd.array(1)
Expand Down
Loading