Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3029,10 +3029,36 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame:
return obj.to_frame()
else:
raise TypeError(
f"Can only merge Series or DataFrame objects, a {type(obj)} was passed"
_get_merge_error_message(obj)
)


def _get_merge_error_message(obj: object) -> str:
"""Generate a helpful error message for invalid merge arguments."""
obj_type_name = type(obj).__name__
obj_module_name = type(obj).__module__

# Special handling for known DataFrame-like libraries
if obj_module_name == "polars.dataframe.frame" and obj_type_name == "DataFrame":
return (
"Can only merge Series or DataFrame objects, received "
"polars.DataFrame. Please convert the polars DataFrame to a "
"pandas DataFrame using `.to_pandas()` or pass it to "
"pd.DataFrame()."
)
elif "polars" in obj_module_name.lower():
return (
f"Can only merge Series or DataFrame objects, received "
f"{obj_module_name}.{obj_type_name} (a polars object). "
"Please convert to a pandas DataFrame using `.to_pandas()`."
)

return (
f"Can only merge Series or DataFrame objects, received "
f"{obj_module_name}.{obj_type_name}. Expected a pandas Series or DataFrame."
)


def _items_overlap_with_suffix(
left: Index, right: Index, suffixes: Suffixes
) -> tuple[Index, Index]:
Expand Down
129 changes: 129 additions & 0 deletions test_issue_61434_repro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
Pandas Issue #61434 - Reproduction Test

Issue: When attempting to merge a pandas DataFrame with a polars DataFrame,
the error message is unhelpful.

Current behavior: Generic error about missing attributes or type errors
Expected behavior: Clear message saying "other must be pandas.DataFrame,
received: polars.DataFrame"

Snippet from issue #61434:
https://github.com/pandas-dev/pandas/issues/61434
"""

import pandas as pd

# Try to import polars for testing
try:
import polars as pl
POLARS_AVAILABLE = True
except ImportError:
POLARS_AVAILABLE = False
print("Warning: polars not installed. Install with: pip install polars")


def test_merge_with_polars():
"""
Reproduce the issue: Merging pandas DataFrame with polars DataFrame.

Before fix: Generic/confusing error message
After fix: Clear message about type mismatch
"""
if not POLARS_AVAILABLE:
print("Skipping test - polars not available")
return False

print("=" * 70)
print("Test: Merging pandas DataFrame with polars DataFrame")
print("=" * 70)

# Create pandas DataFrame
pdf = pd.DataFrame({
'key': ['a', 'b', 'c'],
'value_x': [1, 2, 3]
})

# Create polars DataFrame
plf = pl.DataFrame({
'key': ['a', 'b', 'c'],
'value_y': [10, 20, 30]
})

print(f"\nPandas DataFrame type: {type(pdf)}")
print(f"Polars DataFrame type: {type(plf)}")
print("\nAttempting merge...")

try:
result = pd.merge(pdf, plf, on='key')
print(f"✗ Unexpected: merge succeeded with result type {type(result)}")
return False
except TypeError as e:
error_msg = str(e)
print(f"\nError caught: {type(e).__name__}")
print(f"Error message: {error_msg}")

# Check if error message is helpful
if "polars" in error_msg.lower() and "pandas" in error_msg.lower():
print("\n✓ GOOD: Error message mentions both polars and pandas")
print("✓ GOOD: User knows what went wrong")
return True
elif "must be" in error_msg.lower() or "expected" in error_msg.lower():
print("\n✓ GOOD: Error message explains what's expected")
return True
else:
print(f"\n✗ BAD: Error message is not helpful enough")
print(f" Expected something like:")
print(f" 'other must be pandas.DataFrame, received: polars.DataFrame'")
print(f" But got: {error_msg}")
return False
except Exception as e:
print(f"\n✗ Unexpected error type: {type(e).__name__}")
print(f" {e}")
return False


def test_merge_pandas_baseline():
"""
Baseline test: merge two pandas DataFrames should work.
"""
print("\n" + "=" * 70)
print("Test: Merging two pandas DataFrames (baseline)")
print("=" * 70)

df1 = pd.DataFrame({
'key': ['a', 'b', 'c'],
'value_x': [1, 2, 3]
})

df2 = pd.DataFrame({
'key': ['a', 'b', 'c'],
'value_y': [10, 20, 30]
})

try:
result = pd.merge(df1, df2, on='key')
print(f"✓ Merge succeeded")
print(f" Result shape: {result.shape}")
print(f" Result columns: {list(result.columns)}")
return True
except Exception as e:
print(f"✗ Baseline test failed: {e}")
return False


if __name__ == "__main__":
print("\n" + "=" * 70)
print("PANDAS ISSUE #61434 - REPRODUCTION TEST")
print("=" * 70)
print()

baseline_ok = test_merge_pandas_baseline()
polars_test_ok = test_merge_with_polars()

print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Baseline (pandas merge): {'✓ PASS' if baseline_ok else '✗ FAIL'}")
print(f"Polars test (error msg): {'✓ GOOD' if polars_test_ok else '✗ NEEDS FIX'}")
print()
141 changes: 141 additions & 0 deletions test_issue_61434_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
Regression tests for issue #61434: Improved error message for incompatible merge types

Tests that:
1. Merging with polars.DataFrame raises TypeError with helpful message
2. Merging with other incompatible types also gets helpful messages
3. Normal pandas merges still work correctly
"""

import pytest
import pandas as pd
from pandas import DataFrame, Series
import pandas._testing as tm


class TestMergeIncompatibleTypes:
"""Test merge error messages with incompatible DataFrame types."""

def test_merge_with_polars_dataframe(self):
"""
Test that merging with polars.DataFrame raises helpful TypeError.

Regression test for issue #61434.
"""
pytest.importorskip("polars")
import polars as pl

pdf = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3]
})

plf = pl.DataFrame({
"key": ["a", "b", "c"],
"value_y": [10, 20, 30]
})

with pytest.raises(TypeError, match=".*polars.*pandas.*"):
pd.merge(pdf, plf, on="key")

def test_merge_polars_to_pandas_conversion(self):
"""
Test that converting polars to pandas works.

Shows the workaround mentioned in error message.
"""
pytest.importorskip("polars")
import polars as pl

pdf = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3]
})

plf = pl.DataFrame({
"key": ["a", "b", "c"],
"value_y": [10, 20, 30]
})

# Convert polars to pandas - this should work
plf_pd = plf.to_pandas()
result = pd.merge(pdf, plf_pd, on="key")

expected = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3],
"value_y": [10, 20, 30]
})

tm.assert_frame_equal(result, expected)

def test_merge_with_dict(self):
"""Test that merging with dict raises TypeError with helpful message."""
df = DataFrame({"key": ["a", "b"], "value": [1, 2]})

dict_obj = {"key": ["a", "b"], "value": [3, 4]}

with pytest.raises(TypeError, match=".*dict.*"):
pd.merge(df, dict_obj, on="key")

def test_merge_with_list(self):
"""Test that merging with list raises TypeError with helpful message."""
df = DataFrame({"key": ["a", "b"], "value": [1, 2]})

list_obj = [["a", 1], ["b", 2]]

msg = "Can only merge Series or DataFrame objects"

with pytest.raises(TypeError, match=msg):
pd.merge(df, list_obj, on="key")

def test_merge_pandas_baseline(self):
"""
Test that normal pandas merge still works.

Baseline test to ensure fix doesn't break existing functionality.
"""
df1 = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3]
})

df2 = DataFrame({
"key": ["a", "b", "c"],
"value_y": [10, 20, 30]
})

result = pd.merge(df1, df2, on="key")

expected = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3],
"value_y": [10, 20, 30]
})

tm.assert_frame_equal(result, expected)

def test_merge_with_series_name(self):
"""Test that merging with named Series works (baseline)."""
df = DataFrame({"key": ["a", "b", "c"], "value_x": [1, 2, 3]})
s = Series([10, 20, 30], name="value_y")

result = pd.merge(df, s, left_index=True, right_index=True)

expected = DataFrame({
"key": ["a", "b", "c"],
"value_x": [1, 2, 3],
"value_y": [10, 20, 30]
})

tm.assert_frame_equal(result, expected)

def test_merge_with_unnamed_series(self):
"""Test that merging with unnamed Series raises helpful error."""
df = DataFrame({"key": ["a", "b", "c"], "value": [1, 2, 3]})
s = Series([10, 20, 30]) # No name

msg = "Cannot merge a Series without a name"

with pytest.raises(ValueError, match=msg):
pd.merge(df, s, left_index=True, right_index=True)
Loading
Loading