diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 53f703b701217..7c42bb5a727ba 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2303,6 +2303,20 @@ def _groupby_op(
         **kwargs,
     ):
         if isinstance(self.dtype, StringDtype):
+            if how in [
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
+                )
             return super()._groupby_op(
                 how=how,
                 has_dropped_na=has_dropped_na,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 5f2c2a7772f78..4835d808f2433 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2608,6 +2608,20 @@ def _groupby_op(
         # GH#43682
         if isinstance(self.dtype, StringDtype):
             # StringArray
+            if op.how in [
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
+                )
             if op.how not in ["any", "all"]:
                 # Fail early to avoid conversion to object
                 op._get_cython_function(op.kind, op.how, np.dtype(object), False)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 66db033596872..8f2e5d2ee09d4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4162,9 +4162,9 @@ def quantile(
         starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups)
 
         def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
-            if is_object_dtype(vals.dtype):
+            if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype):
                 raise TypeError(
-                    "'quantile' cannot be performed against 'object' dtypes!"
+                    f"dtype '{vals.dtype}' does not support operation 'quantile'"
                 )
 
             inference: DtypeObj | None = None
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index b4f02b6f81b6f..57c803c23b001 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -2113,7 +2113,7 @@ def test_unstack_period_frame(self):
     @pytest.mark.filterwarnings(
         "ignore:The previous implementation of stack is deprecated"
     )
-    def test_stack_multiple_bug(self, future_stack):
+    def test_stack_multiple_bug(self, future_stack, using_infer_string):
         # bug when some uniques are not present in the data GH#3170
         id_col = ([1] * 3) + ([2] * 3)
         name = (["a"] * 3) + (["b"] * 3)
@@ -2125,6 +2125,8 @@ def test_stack_multiple_bug(self, future_stack):
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index d28eb227314c7..b937e7dcc8136 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -148,11 +148,11 @@ def test_cython_agg_return_dict():
 
 def test_cython_fail_agg():
     dr = bdate_range("1/1/2000", periods=50)
-    ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
+    ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)
 
     grouped = ts.groupby(lambda x: x.month)
     summed = grouped.sum()
-    expected = grouped.agg(np.sum)
+    expected = grouped.agg(np.sum).astype(object)
     tm.assert_series_equal(summed, expected)
 
 
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 0e31c0698cb1e..4a8ad65200caa 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -162,7 +162,8 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
 def test_quantile_raises():
     df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
 
-    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
+    msg = "dtype 'object' does not support operation 'quantile'"
+    with pytest.raises(TypeError, match=msg):
         df.groupby("key").quantile()
 
 
@@ -241,7 +242,6 @@ def test_groupby_quantile_nullable_array(values, q):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 @pytest.mark.parametrize("numeric_only", [True, False])
 def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
@@ -251,9 +251,8 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
         expected = df.groupby("a")[["b"]].quantile(q)
         tm.assert_frame_equal(result, expected)
     else:
-        with pytest.raises(
-            TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
-        ):
+        msg = "dtype '.*' does not support operation 'quantile'"
+        with pytest.raises(TypeError, match=msg):
             df.groupby("a").quantile(q, numeric_only=numeric_only)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 0d13db79835ba..3305b48a4dcdc 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -425,7 +425,7 @@ def test_frame_multi_key_function_list():
     tm.assert_frame_equal(agged, expected)
 
 
-def test_frame_multi_key_function_list_partial_failure():
+def test_frame_multi_key_function_list_partial_failure(using_infer_string):
     data = DataFrame(
         {
             "A": [
@@ -476,6 +476,8 @@ def test_frame_multi_key_function_list_partial_failure():
     grouped = data.groupby(["A", "B"])
     funcs = ["mean", "std"]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -662,9 +664,11 @@ def test_groupby_multi_corner(df):
     tm.assert_frame_equal(agged, expected)
 
 
-def test_raises_on_nuisance(df):
+def test_raises_on_nuisance(df, using_infer_string):
     grouped = df.groupby("A")
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -699,7 +703,7 @@ def test_keep_nuisance_agg(df, agg_function):
     ["sum", "mean", "prod", "std", "var", "sem", "median"],
 )
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_omit_nuisance_agg(df, agg_function, numeric_only):
+def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
     # GH 38774, GH 38815
     grouped = df.groupby("A")
 
@@ -707,7 +711,10 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
     if agg_function in no_drop_nuisance and not numeric_only:
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
-        if agg_function in ("std", "sem"):
+        if using_infer_string:
+            msg = f"dtype 'str' does not support operation '{agg_function}'"
+            klass = TypeError
+        elif agg_function in ("std", "sem"):
             klass = ValueError
             msg = "could not convert string to float: 'one'"
         else:
@@ -728,16 +735,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
         tm.assert_frame_equal(result, expected)
 
 
-def test_raise_on_nuisance_python_single(df):
+def test_raise_on_nuisance_python_single(df, using_infer_string):
     # GH 38815
     grouped = df.groupby("A")
-    with pytest.raises(ValueError, match="could not convert"):
+
+    err = ValueError
+    msg = "could not convert"
+    if using_infer_string:
+        err = TypeError
+        msg = "dtype 'str' does not support operation 'skew'"
+    with pytest.raises(err, match=msg):
         grouped.skew()
 
 
-def test_raise_on_nuisance_python_multiple(three_group):
+def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
     grouped = three_group.groupby(["A", "B"])
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -775,12 +790,16 @@ def test_nonsense_func():
         df.groupby(lambda x: x + "foo")
 
 
-def test_wrap_aggregated_output_multindex(multiindex_dataframe_random_data):
+def test_wrap_aggregated_output_multindex(
+    multiindex_dataframe_random_data, using_infer_string
+):
     df = multiindex_dataframe_random_data.T
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -960,8 +979,10 @@ def test_groupby_with_hier_columns():
 
 def test_grouping_ndarray(df):
     grouped = df.groupby(df["A"].values)
+    grouped2 = df.groupby(df["A"].rename(None))
+
     result = grouped.sum()
-    expected = df.groupby(df["A"].rename(None)).sum()
+    expected = grouped2.sum()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1457,8 +1478,8 @@ def test_no_dummy_key_names(df):
     result = df.groupby(df["A"].values).sum()
     assert result.index.name is None
 
-    result = df.groupby([df["A"].values, df["B"].values]).sum()
-    assert result.index.names == (None, None)
+    result2 = df.groupby([df["A"].values, df["B"].values]).sum()
+    assert result2.index.names == (None, None)
 
 
 def test_groupby_sort_multiindex_series():
@@ -1761,6 +1782,7 @@ def get_categorical_invalid_expected():
     is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
     is_dt64 = df.dtypes.iloc[0].kind == "M"
     is_cat = isinstance(values, Categorical)
+    is_str = isinstance(df.dtypes.iloc[0], pd.StringDtype)
 
     if (
         isinstance(values, Categorical)
@@ -1785,13 +1807,15 @@ def get_categorical_invalid_expected():
 
     if op in ["prod", "sum", "skew"]:
         # ops that require more than just ordered-ness
-        if is_dt64 or is_cat or is_per:
+        if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
             # GH#41291
             # datetime64 -> prod and sum are invalid
             if is_dt64:
                 msg = "datetime64 type does not support"
             elif is_per:
                 msg = "Period type does not support"
+            elif is_str:
+                msg = f"dtype 'str' does not support operation '{op}'"
             else:
                 msg = "category type does not support"
             if op == "skew":
@@ -2714,7 +2738,7 @@ def test_obj_with_exclusions_duplicate_columns():
 def test_groupby_numeric_only_std_no_result(numeric_only):
     # GH 51080
     dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}]
-    df = DataFrame(dicts_non_numeric)
+    df = DataFrame(dicts_non_numeric, dtype=object)
     dfgb = df.groupby("a", as_index=False, sort=False)
 
     if numeric_only:
@@ -2773,10 +2797,14 @@ def test_grouping_with_categorical_interval_columns():
 def test_groupby_sum_on_nan_should_return_nan(bug_var):
     # GH 24196
     df = DataFrame({"A": [bug_var, bug_var, bug_var, np.nan]})
+    if isinstance(bug_var, str):
+        df = df.astype(object)
     dfgb = df.groupby(lambda x: x)
     result = dfgb.sum(min_count=1)
 
-    expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
+    expected_df = DataFrame(
+        [bug_var, bug_var, bug_var, None], columns=["A"], dtype=df["A"].dtype
+    )
     tm.assert_frame_equal(result, expected_df)
 
 
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index 0832b67b38098..a1f4627475bab 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -109,7 +109,7 @@ def test_groupby_resample_preserves_subclass(obj):
 
     df = obj(
         {
-            "Buyer": "Carl Carl Carl Carl Joe Carl".split(),
+            "Buyer": Series("Carl Carl Carl Carl Joe Carl".split(), dtype=object),
             "Quantity": [18, 3, 5, 1, 9, 3],
             "Date": [
                 datetime(2013, 9, 1, 13, 0),
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 41e00f8121b14..cb4569812f600 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -28,7 +28,8 @@ def df(self):
                 "group": [1, 1, 2],
                 "int": [1, 2, 3],
                 "float": [4.0, 5.0, 6.0],
-                "string": list("abc"),
+                "string": Series(["a", "b", "c"], dtype="str"),
+                "object": Series(["a", "b", "c"], dtype=object),
                 "category_string": Series(list("abc")).astype("category"),
                 "category_int": [7, 8, 9],
                 "datetime": date_range("20130101", periods=3),
@@ -40,6 +41,7 @@ def df(self):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -112,6 +114,7 @@ def test_first_last(self, df, method):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -159,7 +162,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
 
         # object dtypes for transformations are not implemented in Cython and
         # have no Python fallback
-        exception = NotImplementedError if method.startswith("cum") else TypeError
+        exception = (
+            (NotImplementedError, TypeError) if method.startswith("cum") else TypeError
+        )
 
         if method in ("min", "max", "cummin", "cummax", "cumsum", "cumprod"):
             # The methods default to numeric_only=False and raise TypeError
@@ -170,6 +175,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     # cumsum/cummin/cummax/cumprod
                     "function is not implemented for this dtype",
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -180,7 +186,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -198,7 +204,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -299,7 +305,9 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
                 re.escape(f"agg function failed [how->{kernel},dtype->object]"),
             ]
         )
-        if kernel == "idxmin":
+        if kernel == "quantile":
+            msg = "dtype 'object' does not support operation 'quantile'"
+        elif kernel == "idxmin":
             msg = "'<' not supported between instances of 'type' and 'type'"
         elif kernel == "idxmax":
             msg = "'>' not supported between instances of 'type' and 'type'"
@@ -379,7 +387,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
     # that succeed should not be allowed to fail (without deprecation, at least)
     if groupby_func in fails_on_numeric_object and dtype is object:
         if groupby_func == "quantile":
-            msg = "cannot be performed against 'object' dtypes"
+            msg = "dtype 'object' does not support operation 'quantile'"
         else:
             msg = "is not supported for object dtype"
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 38b4abfddda1e..1e0a15d0ba796 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     Categorical,
     DataFrame,
@@ -106,10 +104,9 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
                     gb.transform(groupby_func, *args)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_string(
-    how, by, groupby_series, groupby_func, df_with_string_col
+    how, by, groupby_series, groupby_func, df_with_string_col, using_infer_string
 ):
     df = df_with_string_col
     args = get_groupby_method_args(groupby_func, df)
@@ -169,7 +166,7 @@ def test_groupby_raises_string(
             TypeError,
             re.escape("agg function failed [how->prod,dtype->object]"),
         ),
-        "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
+        "quantile": (TypeError, "dtype 'object' does not support operation 'quantile'"),
         "rank": (None, ""),
         "sem": (ValueError, "could not convert string to float"),
         "shift": (None, ""),
@@ -183,6 +180,37 @@ def test_groupby_raises_string(
         ),
     }[groupby_func]
 
+    if using_infer_string:
+        if groupby_func in [
+            "prod",
+            "mean",
+            "median",
+            "cumsum",
+            "cumprod",
+            "std",
+            "sem",
+            "var",
+            "skew",
+            "quantile",
+        ]:
+            msg = f"dtype 'str' does not support operation '{groupby_func}'"
+            if groupby_func in ["sem", "std", "skew"]:
+                # The object-dtype raises ValueError when trying to convert to numeric.
+                klass = TypeError
+        elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            msg = "operation 'truediv' not supported for dtype 'str' with dtype 'str'"
+        elif groupby_func == "diff" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            msg = "operation 'sub' not supported for dtype 'str' with dtype 'str'"
+
+        elif groupby_func in ["cummin", "cummax"]:
+            msg = msg.replace("object", "str")
+        elif groupby_func == "corrwith":
+            msg = "Cannot perform reduction 'mean' with string dtype"
+
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
         warn_msg = f"{kind}GroupBy.fillna is deprecated"
@@ -211,7 +239,12 @@ def func(x):
 @pytest.mark.parametrize("how", ["agg", "transform"])
 @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
 def test_groupby_raises_string_np(
-    how, by, groupby_series, groupby_func_np, df_with_string_col
+    how,
+    by,
+    groupby_series,
+    groupby_func_np,
+    df_with_string_col,
+    using_infer_string,
 ):
     # GH#50749
     df = df_with_string_col
@@ -228,6 +261,15 @@ def test_groupby_raises_string_np(
             "Cannot perform reduction 'mean' with string dtype",
         ),
     }[groupby_func_np]
+
+    if using_infer_string:
+        if groupby_func_np is np.mean:
+            klass = TypeError
+        msg = (
+            f"Cannot perform reduction '{groupby_func_np.__name__}' "
+            "with string dtype"
+        )
+
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index a8fb1b392322d..b7b80b5e427ff 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -187,7 +187,7 @@ def test_api_compat_before_use(attr):
     getattr(rs, attr)
 
 
-def tests_raises_on_nuisance(test_frame):
+def tests_raises_on_nuisance(test_frame, using_infer_string):
     df = test_frame
     df["D"] = "foo"
     r = df.resample("h")
@@ -197,6 +197,8 @@ def tests_raises_on_nuisance(test_frame):
 
     expected = r[["A", "B", "C"]].mean()
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         r.mean()
     result = r.mean(numeric_only=True)
@@ -881,7 +883,9 @@ def test_end_and_end_day_origin(
         ("sem", lib.no_default, "could not convert string to float"),
     ],
 )
-def test_frame_downsample_method(method, numeric_only, expected_data):
+def test_frame_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -898,6 +902,11 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         if method in ("var", "mean", "median", "prod"):
             klass = TypeError
             msg = re.escape(f"agg function failed [how->{method},dtype->")
+            if using_infer_string:
+                msg = f"dtype 'str' does not support operation '{method}'"
+        elif method in ["sum", "std", "sem"] and using_infer_string:
+            klass = TypeError
+            msg = f"dtype 'str' does not support operation '{method}'"
         else:
             klass = ValueError
             msg = expected_data
@@ -932,7 +941,9 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         ("last", lib.no_default, ["cat_2"]),
     ],
 )
-def test_series_downsample_method(method, numeric_only, expected_data):
+def test_series_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -948,8 +959,11 @@ def test_series_downsample_method(method, numeric_only, expected_data):
             func(**kwargs)
     elif method == "prod":
         msg = re.escape("agg function failed [how->prod,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'prod'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
+
     else:
         result = func(**kwargs)
         expected = Series(expected_data, index=expected_index)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 0f743332acbbe..65bfea0b9beea 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -620,7 +620,7 @@ def test_join_non_unique_period_index(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_mixed_type_join_with_suffix(self):
+    def test_mixed_type_join_with_suffix(self, using_infer_string):
         # GH #916
         df = DataFrame(
             np.random.default_rng(2).standard_normal((20, 6)),
@@ -631,6 +631,8 @@ def test_mixed_type_join_with_suffix(self):
 
         grouped = df.groupby("id")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             grouped.mean()
         mn = grouped.mean(numeric_only=True)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index eccf676b87f89..d8a9acdc561fd 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -935,12 +935,14 @@ def test_margins(self, data):
         for value_col in table.columns.levels[0]:
             self._check_output(table[value_col], value_col, data)
 
-    def test_no_col(self, data):
+    def test_no_col(self, data, using_infer_string):
         # no col
 
         # to help with a buglet
         data.columns = [k * 2 for k in data.columns]
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         table = data.drop(columns="CC").pivot_table(
@@ -990,7 +992,7 @@ def test_no_col(self, data):
         ],
     )
     def test_margin_with_only_columns_defined(
-        self, columns, aggfunc, values, expected_columns
+        self, columns, aggfunc, values, expected_columns, using_infer_string
     ):
         # GH 31016
         df = DataFrame(
@@ -1014,6 +1016,8 @@ def test_margin_with_only_columns_defined(
         )
         if aggfunc != "sum":
             msg = re.escape("agg function failed [how->mean,dtype->")
+            if using_infer_string:
+                msg = "dtype 'str' does not support operation 'mean'"
             with pytest.raises(TypeError, match=msg):
                 df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
         if "B" not in columns: