66from numpy .random import randint
77import pytest
88
9+
910from pandas import DataFrame , Index , MultiIndex , Series , concat , isna , notna
1011import pandas .core .strings as strings
1112import pandas .util .testing as tm
@@ -892,27 +893,39 @@ def test_casemethods(self):
892893 def test_replace (self ):
893894 values = Series (['fooBAD__barBAD' , NA ])
894895
895- result = values .str .replace ('BAD[_]*' , '' )
896+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
896897 exp = Series (['foobar' , NA ])
897898 tm .assert_series_equal (result , exp )
898899
899- result = values .str .replace ('BAD[_]*' , '' , n = 1 )
900+ result = values .str .replace ('BAD[_]*' , '' , regex = True , n = 1 )
900901 exp = Series (['foobarBAD' , NA ])
901902 tm .assert_series_equal (result , exp )
902903
903904 # mixed
904905 mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
905906 None , 1 , 2. ])
906907
907- rs = Series (mixed ).str .replace ('BAD[_]*' , '' )
908+ rs = Series (mixed ).str .replace ('BAD[_]*' , '' , regex = True )
908909 xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
909910 assert isinstance (rs , Series )
910911 tm .assert_almost_equal (rs , xp )
911912
913+ # unicode
914+ values = Series ([u'fooBAD__barBAD' , NA ])
915+
916+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
917+ exp = Series ([u'foobar' , NA ])
918+ tm .assert_series_equal (result , exp )
919+
920+ result = values .str .replace ('BAD[_]*' , '' , n = 1 , regex = True )
921+ exp = Series ([u'foobarBAD' , NA ])
922+ tm .assert_series_equal (result , exp )
923+
912924 # flags + unicode
913925 values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
914926 exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
915- result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE )
927+ result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , regex = True ,
928+ flags = re .UNICODE )
916929 tm .assert_series_equal (result , exp )
917930
918931 # GH 13438
@@ -930,7 +943,7 @@ def test_replace_callable(self):
930943
931944 # test with callable
932945 repl = lambda m : m .group (0 ).swapcase ()
933- result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 )
946+ result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 , regex = True )
934947 exp = Series (['foObaD__baRbaD' , NA ])
935948 tm .assert_series_equal (result , exp )
936949
@@ -940,21 +953,21 @@ def test_replace_callable(self):
940953
941954 repl = lambda : None
942955 with pytest .raises (TypeError , match = p_err ):
943- values .str .replace ('a' , repl )
956+ values .str .replace ('a' , repl , regex = True )
944957
945958 repl = lambda m , x : None
946959 with pytest .raises (TypeError , match = p_err ):
947- values .str .replace ('a' , repl )
960+ values .str .replace ('a' , repl , regex = True )
948961
949962 repl = lambda m , x , y = None : None
950963 with pytest .raises (TypeError , match = p_err ):
951- values .str .replace ('a' , repl )
964+ values .str .replace ('a' , repl , regex = True )
952965
953966 # test regex named groups
954967 values = Series (['Foo Bar Baz' , NA ])
955968 pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
956969 repl = lambda m : m .group ('middle' ).swapcase ()
957- result = values .str .replace (pat , repl )
970+ result = values .str .replace (pat , repl , regex = True )
958971 exp = Series (['bAR' , NA ])
959972 tm .assert_series_equal (result , exp )
960973
@@ -964,28 +977,39 @@ def test_replace_compiled_regex(self):
964977
965978 # test with compiled regex
966979 pat = re .compile (r'BAD[_]*' )
967- result = values .str .replace (pat , '' )
980+ result = values .str .replace (pat , '' , regex = True )
968981 exp = Series (['foobar' , NA ])
969982 tm .assert_series_equal (result , exp )
970983
971- result = values .str .replace (pat , '' , n = 1 )
984+ result = values .str .replace (pat , '' , n = 1 , regex = True )
972985 exp = Series (['foobarBAD' , NA ])
973986 tm .assert_series_equal (result , exp )
974987
975988 # mixed
976989 mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
977990 None , 1 , 2. ])
978991
979- rs = Series (mixed ).str .replace (pat , '' )
992+ rs = Series (mixed ).str .replace (pat , '' , regex = True )
980993 xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
981994 assert isinstance (rs , Series )
982995 tm .assert_almost_equal (rs , xp )
983996
997+ # unicode
998+ values = Series ([u'fooBAD__barBAD' , NA ])
999+
1000+ result = values .str .replace (pat , '' , regex = True )
1001+ exp = Series ([u'foobar' , NA ])
1002+ tm .assert_series_equal (result , exp )
1003+
1004+ result = values .str .replace (pat , '' , n = 1 , regex = True )
1005+ exp = Series ([u'foobarBAD' , NA ])
1006+ tm .assert_series_equal (result , exp )
1007+
9841008 # flags + unicode
9851009 values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
9861010 exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
9871011 pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
988- result = values .str .replace (pat , ", " )
1012+ result = values .str .replace (pat , ", " , regex = True )
9891013 tm .assert_series_equal (result , exp )
9901014
9911015 # case and flags provided to str.replace will have no effect
@@ -995,29 +1019,30 @@ def test_replace_compiled_regex(self):
9951019
9961020 with pytest .raises (ValueError ,
9971021 match = "case and flags cannot be" ):
998- result = values .str .replace (pat , '' , flags = re .IGNORECASE )
1022+ result = values .str .replace (pat , '' , flags = re .IGNORECASE ,
1023+ regex = True )
9991024
10001025 with pytest .raises (ValueError ,
10011026 match = "case and flags cannot be" ):
1002- result = values .str .replace (pat , '' , case = False )
1027+ result = values .str .replace (pat , '' , case = False , regex = True )
10031028
10041029 with pytest .raises (ValueError ,
10051030 match = "case and flags cannot be" ):
1006- result = values .str .replace (pat , '' , case = True )
1031+ result = values .str .replace (pat , '' , case = True , regex = True )
10071032
10081033 # test with callable
10091034 values = Series (['fooBAD__barBAD' , NA ])
10101035 repl = lambda m : m .group (0 ).swapcase ()
10111036 pat = re .compile ('[a-z][A-Z]{2}' )
1012- result = values .str .replace (pat , repl , n = 2 )
1037+ result = values .str .replace (pat , repl , n = 2 , regex = True )
10131038 exp = Series (['foObaD__baRbaD' , NA ])
10141039 tm .assert_series_equal (result , exp )
10151040
10161041 def test_replace_literal (self ):
10171042 # GH16808 literal replace (regex=False vs regex=True)
10181043 values = Series (['f.o' , 'foo' , NA ])
10191044 exp = Series (['bao' , 'bao' , NA ])
1020- result = values .str .replace ('f.' , 'ba' )
1045+ result = values .str .replace ('f.' , 'ba' , regex = True )
10211046 tm .assert_series_equal (result , exp )
10221047
10231048 exp = Series (['bao' , 'foo' , NA ])
@@ -2710,6 +2735,7 @@ def test_partition_deprecation(self):
27102735 result = values .str .rpartition (pat = '_' )
27112736 tm .assert_frame_equal (result , expected )
27122737
2738+ @pytest .mark .filterwarnings ("ignore: '|' is interpreted as a literal" )
27132739 def test_pipe_failures (self ):
27142740 # #2119
27152741 s = Series (['A|B|C' ])
@@ -2719,7 +2745,7 @@ def test_pipe_failures(self):
27192745
27202746 tm .assert_series_equal (result , exp )
27212747
2722- result = s .str .replace ('|' , ' ' )
2748+ result = s .str .replace ('|' , ' ' , regex = None )
27232749 exp = Series (['A B C' ])
27242750
27252751 tm .assert_series_equal (result , exp )
@@ -2980,17 +3006,17 @@ def test_replace_moar(self):
29803006 s = Series (['A' , 'B' , 'C' , 'Aaba' , 'Baca' , '' , NA , 'CABA' ,
29813007 'dog' , 'cat' ])
29823008
2983- result = s .str .replace ('A' , 'YYY' )
3009+ result = s .str .replace ('A' , 'YYY' , regex = True )
29843010 expected = Series (['YYY' , 'B' , 'C' , 'YYYaba' , 'Baca' , '' , NA ,
29853011 'CYYYBYYY' , 'dog' , 'cat' ])
29863012 assert_series_equal (result , expected )
29873013
2988- result = s .str .replace ('A' , 'YYY' , case = False )
3014+ result = s .str .replace ('A' , 'YYY' , case = False , regex = True )
29893015 expected = Series (['YYY' , 'B' , 'C' , 'YYYYYYbYYY' , 'BYYYcYYY' , '' , NA ,
29903016 'CYYYBYYY' , 'dog' , 'cYYYt' ])
29913017 assert_series_equal (result , expected )
29923018
2993- result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False )
3019+ result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False , regex = True )
29943020 expected = Series (['A' , 'B' , 'C' , 'XX-XX ba' , 'XX-XX ca' , '' , NA ,
29953021 'XX-XX BA' , 'XX-XX ' , 'XX-XX t' ])
29963022 assert_series_equal (result , expected )
@@ -3162,6 +3188,40 @@ def test_method_on_bytes(self):
31623188 match = "Cannot use .str.cat with values of.*" ):
31633189 lhs .str .cat (rhs )
31643190
3191+ @pytest .mark .filterwarnings ("ignore: '.' is interpreted as a literal" )
3192+ @pytest .mark .parametrize ("regex, expected_array" , [
3193+ (True , ['foofoofoo' , 'foofoofoo' ]),
3194+ (False , ['abc' , '123' ]),
3195+ (None , ['abc' , '123' ])
3196+ ])
3197+ def test_replace_single_pattern (self , regex , expected_array ):
3198+ values = Series (['abc' , '123' ])
3199+ # GH: 24804
3200+ result = values .str .replace ('.' , 'foo' , regex = regex )
3201+ expected = Series (expected_array )
3202+ tm .assert_series_equal (result , expected )
3203+
3204+ @pytest .mark .parametrize ("input_array, single_char, replace_char, "
3205+ "expect_array, warn" ,
3206+ [("a.c" , "." , "b" , "abc" , True ),
3207+ ("a@c" , "@" , "at" , "aatc" , False )]
3208+ )
3209+ def test_replace_warning_single_character (self , input_array ,
3210+ single_char , replace_char ,
3211+ expect_array , warn ):
3212+ # GH: 24804
3213+ values = Series ([input_array ])
3214+ if warn :
3215+ with tm .assert_produces_warning (FutureWarning ,
3216+ check_stacklevel = False ):
3217+ result = values .str .replace (single_char , replace_char ,
3218+ regex = None )
3219+ else :
3220+ result = values .str .replace (single_char , replace_char )
3221+
3222+ expected = Series ([expect_array ])
3223+ tm .assert_series_equal (result , expected )
3224+
31653225 def test_casefold (self ):
31663226 # GH25405
31673227 expected = Series (['ss' , NA , 'case' , 'ssd' ])
0 commit comments