Skip to content

Commit 1f97148

Browse files
merge master
1 parent 42b4c97 commit 1f97148

File tree

4 files changed

+79
-33
lines changed

4 files changed

+79
-33
lines changed

doc/source/whatsnew/v0.25.0.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Other API Changes
9393
Deprecations
9494
~~~~~~~~~~~~
9595

96+
- :func:`Series.str.replace`, when pat is single special regex character and regex is not defined, regex is by default ``False`` for now, but this might be deprecated in the future. (:issue:`24804`)
9697
- Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
9798

9899
.. _whatsnew_0250.prior_deprecations:
@@ -171,10 +172,8 @@ Conversion
171172

172173
Strings
173174
^^^^^^^
175+
- Bug in :func:`Series.str.replace` not applying regex in patterns of length 1 (:issue:`24804`)
174176

175-
-
176-
-
177-
-
178177

179178

180179
Interval

pandas/core/reshape/melt.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,8 @@ def melt_stub(df, stub, i, j, value_vars, sep):
420420
newdf = melt(df, id_vars=i, value_vars=value_vars,
421421
value_name=stub.rstrip(sep), var_name=j)
422422
newdf[j] = Categorical(newdf[j])
423-
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "")
423+
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "",
424+
regex=True)
424425

425426
# GH17627 Cast numerics suffixes to int/float
426427
newdf[j] = to_numeric(newdf[j], errors='ignore')

pandas/core/strings.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def str_endswith(arr, pat, na=np.nan):
425425
return _na_map(f, arr, na, dtype=bool)
426426

427427

428-
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
428+
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None):
429429
r"""
430430
Replace occurrences of pattern/regex in the Series/Index with
431431
some other string. Equivalent to :meth:`str.replace` or
@@ -456,9 +456,13 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
456456
flags : int, default 0 (no flags)
457457
- re module flags, e.g. re.IGNORECASE
458458
- Cannot be set if `pat` is a compiled regex
459-
regex : bool, default True
459+
regex : boolean, default None
460460
- If True, assumes the passed-in pattern is a regular expression.
461461
- If False, treats the pattern as a literal string
462+
- If `pat` is a single character and `regex` is not specified, `pat`
463+
is interpreted as a string literal. If `pat` is also a regular
464+
expression symbol, a warning is issued that in the future `pat`
465+
will be interpreted as a regex, rather than a literal.
462466
- Cannot be set to False if `pat` is a compiled regex or `repl` is
463467
a callable.
464468
@@ -565,7 +569,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
565569
# add case flag, if provided
566570
if case is False:
567571
flags |= re.IGNORECASE
568-
if is_compiled_re or len(pat) > 1 or flags or callable(repl):
572+
if is_compiled_re or pat or flags or callable(repl):
569573
n = n if n >= 0 else 0
570574
compiled = re.compile(pat, flags=flags)
571575
f = lambda x: compiled.sub(repl=repl, string=x, count=n)
@@ -578,6 +582,12 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
578582
if callable(repl):
579583
raise ValueError("Cannot use a callable replacement when "
580584
"regex=False")
585+
# if regex is default None, and a single special character is given
586+
# in pat, still take it as a literal, and raise the Future warning
587+
if regex is None and len(pat) == 1 and pat in list(r"[\^$.|?*+()]"):
588+
warnings.warn("'{}' is interpreted as a literal in ".format(pat) +
589+
"default, not regex. It will change in the future.",
590+
FutureWarning)
581591
f = lambda x: x.replace(pat, repl, n)
582592

583593
return _na_map(f, arr)
@@ -2539,7 +2549,7 @@ def match(self, pat, case=True, flags=0, na=np.nan):
25392549
return self._wrap_result(result, fill_value=na)
25402550

25412551
@copy(str_replace)
2542-
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
2552+
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
25432553
result = str_replace(self._parent, pat, repl, n=n, case=case,
25442554
flags=flags, regex=regex)
25452555
return self._wrap_result(result)

pandas/tests/test_strings.py

+61-25
Original file line numberDiff line numberDiff line change
@@ -967,38 +967,39 @@ def test_casemethods(self):
967967
def test_replace(self):
968968
values = Series(['fooBAD__barBAD', NA])
969969

970-
result = values.str.replace('BAD[_]*', '')
970+
result = values.str.replace('BAD[_]*', '', regex=True)
971971
exp = Series(['foobar', NA])
972972
tm.assert_series_equal(result, exp)
973973

974-
result = values.str.replace('BAD[_]*', '', n=1)
974+
result = values.str.replace('BAD[_]*', '', regex=True, n=1)
975975
exp = Series(['foobarBAD', NA])
976976
tm.assert_series_equal(result, exp)
977977

978978
# mixed
979979
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
980980
None, 1, 2.])
981981

982-
rs = Series(mixed).str.replace('BAD[_]*', '')
982+
rs = Series(mixed).str.replace('BAD[_]*', '', regex=True)
983983
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
984984
assert isinstance(rs, Series)
985985
tm.assert_almost_equal(rs, xp)
986986

987987
# unicode
988988
values = Series([u('fooBAD__barBAD'), NA])
989989

990-
result = values.str.replace('BAD[_]*', '')
990+
result = values.str.replace('BAD[_]*', '', regex=True)
991991
exp = Series([u('foobar'), NA])
992992
tm.assert_series_equal(result, exp)
993993

994-
result = values.str.replace('BAD[_]*', '', n=1)
994+
result = values.str.replace('BAD[_]*', '', n=1, regex=True)
995995
exp = Series([u('foobarBAD'), NA])
996996
tm.assert_series_equal(result, exp)
997997

998998
# flags + unicode
999999
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
10001000
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
1001-
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE)
1001+
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", regex=True,
1002+
flags=re.UNICODE)
10021003
tm.assert_series_equal(result, exp)
10031004

10041005
# GH 13438
@@ -1016,7 +1017,7 @@ def test_replace_callable(self):
10161017

10171018
# test with callable
10181019
repl = lambda m: m.group(0).swapcase()
1019-
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2)
1020+
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2, regex=True)
10201021
exp = Series(['foObaD__baRbaD', NA])
10211022
tm.assert_series_equal(result, exp)
10221023

@@ -1029,21 +1030,21 @@ def test_replace_callable(self):
10291030

10301031
repl = lambda: None
10311032
with pytest.raises(TypeError, match=p_err):
1032-
values.str.replace('a', repl)
1033+
values.str.replace('a', repl, regex=True)
10331034

10341035
repl = lambda m, x: None
10351036
with pytest.raises(TypeError, match=p_err):
1036-
values.str.replace('a', repl)
1037+
values.str.replace('a', repl, regex=True)
10371038

10381039
repl = lambda m, x, y=None: None
10391040
with pytest.raises(TypeError, match=p_err):
1040-
values.str.replace('a', repl)
1041+
values.str.replace('a', repl, regex=True)
10411042

10421043
# test regex named groups
10431044
values = Series(['Foo Bar Baz', NA])
10441045
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
10451046
repl = lambda m: m.group('middle').swapcase()
1046-
result = values.str.replace(pat, repl)
1047+
result = values.str.replace(pat, repl, regex=True)
10471048
exp = Series(['bAR', NA])
10481049
tm.assert_series_equal(result, exp)
10491050

@@ -1053,35 +1054,35 @@ def test_replace_compiled_regex(self):
10531054

10541055
# test with compiled regex
10551056
pat = re.compile(r'BAD[_]*')
1056-
result = values.str.replace(pat, '')
1057+
result = values.str.replace(pat, '', regex=True)
10571058
exp = Series(['foobar', NA])
10581059
tm.assert_series_equal(result, exp)
10591060

10601061
# mixed
10611062
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
10621063
None, 1, 2.])
10631064

1064-
rs = Series(mixed).str.replace(pat, '')
1065+
rs = Series(mixed).str.replace(pat, '', regex=True)
10651066
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
10661067
assert isinstance(rs, Series)
10671068
tm.assert_almost_equal(rs, xp)
10681069

10691070
# unicode
10701071
values = Series([u('fooBAD__barBAD'), NA])
10711072

1072-
result = values.str.replace(pat, '')
1073+
result = values.str.replace(pat, '', regex=True)
10731074
exp = Series([u('foobar'), NA])
10741075
tm.assert_series_equal(result, exp)
10751076

1076-
result = values.str.replace(pat, '', n=1)
1077+
result = values.str.replace(pat, '', n=1, regex=True)
10771078
exp = Series([u('foobarBAD'), NA])
10781079
tm.assert_series_equal(result, exp)
10791080

10801081
# flags + unicode
10811082
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
10821083
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
10831084
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
1084-
result = values.str.replace(pat, ", ")
1085+
result = values.str.replace(pat, ", ", regex=True)
10851086
tm.assert_series_equal(result, exp)
10861087

10871088
# case and flags provided to str.replace will have no effect
@@ -1091,29 +1092,30 @@ def test_replace_compiled_regex(self):
10911092

10921093
with pytest.raises(ValueError,
10931094
match="case and flags cannot be"):
1094-
result = values.str.replace(pat, '', flags=re.IGNORECASE)
1095+
result = values.str.replace(pat, '', flags=re.IGNORECASE,
1096+
regex=True)
10951097

10961098
with pytest.raises(ValueError,
10971099
match="case and flags cannot be"):
1098-
result = values.str.replace(pat, '', case=False)
1100+
result = values.str.replace(pat, '', case=False, regex=True)
10991101

11001102
with pytest.raises(ValueError,
11011103
match="case and flags cannot be"):
1102-
result = values.str.replace(pat, '', case=True)
1104+
result = values.str.replace(pat, '', case=True, regex=True)
11031105

11041106
# test with callable
11051107
values = Series(['fooBAD__barBAD', NA])
11061108
repl = lambda m: m.group(0).swapcase()
11071109
pat = re.compile('[a-z][A-Z]{2}')
1108-
result = values.str.replace(pat, repl, n=2)
1110+
result = values.str.replace(pat, repl, n=2, regex=True)
11091111
exp = Series(['foObaD__baRbaD', NA])
11101112
tm.assert_series_equal(result, exp)
11111113

11121114
def test_replace_literal(self):
11131115
# GH16808 literal replace (regex=False vs regex=True)
11141116
values = Series(['f.o', 'foo', NA])
11151117
exp = Series(['bao', 'bao', NA])
1116-
result = values.str.replace('f.', 'ba')
1118+
result = values.str.replace('f.', 'ba', regex=True)
11171119
tm.assert_series_equal(result, exp)
11181120

11191121
exp = Series(['bao', 'foo', NA])
@@ -2923,6 +2925,7 @@ def test_partition_deprecation(self):
29232925
result = values.str.rpartition(pat='_')
29242926
tm.assert_frame_equal(result, expected)
29252927

2928+
@pytest.mark.filterwarnings("ignore: '|' is interpreted as a literal")
29262929
def test_pipe_failures(self):
29272930
# #2119
29282931
s = Series(['A|B|C'])
@@ -2932,7 +2935,7 @@ def test_pipe_failures(self):
29322935

29332936
tm.assert_series_equal(result, exp)
29342937

2935-
result = s.str.replace('|', ' ')
2938+
result = s.str.replace('|', ' ', regex=None)
29362939
exp = Series(['A B C'])
29372940

29382941
tm.assert_series_equal(result, exp)
@@ -3244,17 +3247,17 @@ def test_replace_moar(self):
32443247
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',
32453248
'dog', 'cat'])
32463249

3247-
result = s.str.replace('A', 'YYY')
3250+
result = s.str.replace('A', 'YYY', regex=True)
32483251
expected = Series(['YYY', 'B', 'C', 'YYYaba', 'Baca', '', NA,
32493252
'CYYYBYYY', 'dog', 'cat'])
32503253
assert_series_equal(result, expected)
32513254

3252-
result = s.str.replace('A', 'YYY', case=False)
3255+
result = s.str.replace('A', 'YYY', case=False, regex=True)
32533256
expected = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', '', NA,
32543257
'CYYYBYYY', 'dog', 'cYYYt'])
32553258
assert_series_equal(result, expected)
32563259

3257-
result = s.str.replace('^.a|dog', 'XX-XX ', case=False)
3260+
result = s.str.replace('^.a|dog', 'XX-XX ', case=False, regex=True)
32583261
expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA,
32593262
'XX-XX BA', 'XX-XX ', 'XX-XX t'])
32603263
assert_series_equal(result, expected)
@@ -3441,11 +3444,44 @@ def test_method_on_bytes(self):
34413444
['ad', 'be', 'cf'], 'S2').astype(object))
34423445
tm.assert_series_equal(result, expected)
34433446

3447+
@pytest.mark.filterwarnings("ignore: '.' is interpreted as a literal")
3448+
@pytest.mark.parametrize("regex, expected_array", [
3449+
(True, ['foofoofoo', 'foofoofoo']),
3450+
(False, ['abc', '123']),
3451+
(None, ['abc', '123'])
3452+
])
3453+
def test_replace_single_pattern(self, regex, expected_array):
3454+
values = Series(['abc', '123'])
3455+
# GH: 24804
3456+
result = values.str.replace('.', 'foo', regex=regex)
3457+
expected = Series(expected_array)
3458+
tm.assert_series_equal(result, expected)
3459+
3460+
@pytest.mark.parametrize("input_array, single_char, replace_char, "
3461+
"expect_array, warn",
3462+
[("a.c", ".", "b", "abc", True),
3463+
("a@c", "@", "at", "aatc", False)]
3464+
)
3465+
def test_replace_warning_single_character(self, input_array,
3466+
single_char, replace_char,
3467+
expect_array, warn):
3468+
# GH: 24804
3469+
values = Series([input_array])
3470+
if warn:
3471+
with tm.assert_produces_warning(FutureWarning,
3472+
check_stacklevel=False):
3473+
result = values.str.replace(single_char, replace_char)
3474+
else:
3475+
result = values.str.replace(single_char, replace_char)
3476+
3477+
expected = Series([expect_array])
3478+
=======
34443479
@pytest.mark.skipif(compat.PY2, reason='not in python2')
34453480
def test_casefold(self):
34463481
# GH25405
34473482
expected = Series(['ss', NA, 'case', 'ssd'])
34483483
s = Series(['ß', NA, 'case', 'ßd'])
34493484
result = s.str.casefold()
34503485

3486+
>>>>>>> github/master
34513487
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)