From f55c96879c3441b0aa9409da28b96b87ce839a57 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Fri, 15 Oct 2021 00:23:35 -0700
Subject: [PATCH 01/23] BUG: sort_index did not respect ignore_index when not
 sorting

---
 doc/source/whatsnew/v1.4.0.rst | 1 +
 pandas/core/generic.py         | 9 ++++++++-
 pandas/tests/test_sorting.py   | 9 +++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 243bcf6900d2e..b10ebd350459a 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -430,6 +430,7 @@ Indexing
 - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
 - Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
 - Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
+- Bug in :meth:`DataFrame.sort_index` where `ignore_index=True` was not being respected when the passed dataframe was already sorted (:issue:`43591`)
 
 
 Missing
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 26c0b7426727c..5d5765977c49b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4590,10 +4590,17 @@ def sort_index(
         )
 
         if indexer is None:
+            if inplace:
+                result = self
+            else:
+                result = self.copy()
+
+            if ignore_index:
+                result.index = default_index(len(self))
             if inplace:
                 return
             else:
-                return self.copy()
+                return result
 
         baxis = self._get_block_manager_axis(axis)
         new_data = self._mgr.take(indexer, axis=baxis, verify=False)
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index a49b7c2b7f86e..17c1703831e3f 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -478,3 +478,12 @@ def test_mixed_str_nan():
     result = safe_sort(values)
     expected = np.array([np.nan, "a", "b", "b"], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
+
+def test_respect_ignore_index():
+    # GH 43591
+    df = DataFrame({'a': [1, 2, 3]})
+    df.index = [4, 2, 0]
+    result = df.sort_index(ascending=False, ignore_index=True)
+    expected = DataFrame({'a': [1, 2, 3]})
+    expected.index = [0, 1, 2]
+    tm.assert_frame_equal(result, expected)
\ No newline at end of file

From e56f8fb6b8d1a8db1f18f5a52a1337824a2277e6 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Fri, 15 Oct 2021 00:26:24 -0700
Subject: [PATCH 02/23] BUG: sort_index did not respect ignore_index when not
 sorting

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index b10ebd350459a..780e6dddb47f0 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -430,7 +430,7 @@ Indexing
 - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
 - Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
 - Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
-- Bug in :meth:`DataFrame.sort_index` where `ignore_index=True` was not being respected when the passed dataframe was already sorted (:issue:`43591`)
+- Bug in :meth:`DataFrame.sort_index` where `ignore_index=True` was not being respected when passed dataframe was already sorted (:issue:`43591`)
 
 
 Missing

From 282ef5148fa2242d667be777f117e2273908e4c6 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Fri, 15 Oct 2021 00:37:07 -0700
Subject: [PATCH 03/23] BUG: sort_index did not respect ignore_index when not
 sorting

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 780e6dddb47f0..b10ebd350459a 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -430,7 +430,7 @@ Indexing
 - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
 - Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
 - Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
-- Bug in :meth:`DataFrame.sort_index` where `ignore_index=True` was not being respected when passed dataframe was already sorted (:issue:`43591`)
+- Bug in :meth:`DataFrame.sort_index` where `ignore_index=True` was not being respected when the passed dataframe was already sorted (:issue:`43591`)
 
 
 Missing

From 2c5402ea64729473367e8c19cee1949a9cf9ce5d Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Fri, 15 Oct 2021 00:38:56 -0700
Subject: [PATCH 04/23] BUG: sort_index did not respect ignore_index when not
 sorting

---
 pandas/tests/test_sorting.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 17c1703831e3f..c75815856c9ff 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -479,11 +479,12 @@ def test_mixed_str_nan():
     expected = np.array([np.nan, "a", "b", "b"], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
 
+
 def test_respect_ignore_index():
     # GH 43591
-    df = DataFrame({'a': [1, 2, 3]})
+    df = DataFrame({"a": [1, 2, 3]})
     df.index = [4, 2, 0]
     result = df.sort_index(ascending=False, ignore_index=True)
-    expected = DataFrame({'a': [1, 2, 3]})
+    expected = DataFrame({"a": [1, 2, 3]})
     expected.index = [0, 1, 2]
-    tm.assert_frame_equal(result, expected)
\ No newline at end of file
+    tm.assert_frame_equal(result, expected)

From 837427bd96fee168ff77e76e8e730a50adbc6c10 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Sat, 16 Oct 2021 19:56:06 -0700
Subject: [PATCH 05/23] moved test to frame test directory

---
 pandas/tests/frame/methods/test_sort_index.py |  8 ++++++++
 pandas/tests/test_sorting.py                  | 10 ----------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index c1141f705acbc..c78a6d015cc16 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -9,6 +9,7 @@
     Index,
     IntervalIndex,
     MultiIndex,
+    RangeIndex,
     Series,
     Timestamp,
 )
@@ -418,6 +419,13 @@ def test_sort_index_ignore_index(
         tm.assert_frame_equal(result_df, expected_df)
         tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index))
 
+    def test_respect_ignore_index(self):
+        # GH 43591
+        df = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2))
+        result = df.sort_index(ascending=False, ignore_index=True)
+        expected = DataFrame({"a": [1, 2, 3]})
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize("inplace", [True, False])
     @pytest.mark.parametrize(
         "original_dict, sorted_dict, ascending, ignore_index, output_index",
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index c75815856c9ff..a49b7c2b7f86e 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -478,13 +478,3 @@ def test_mixed_str_nan():
     result = safe_sort(values)
     expected = np.array([np.nan, "a", "b", "b"], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
-
-
-def test_respect_ignore_index():
-    # GH 43591
-    df = DataFrame({"a": [1, 2, 3]})
-    df.index = [4, 2, 0]
-    result = df.sort_index(ascending=False, ignore_index=True)
-    expected = DataFrame({"a": [1, 2, 3]})
-    expected.index = [0, 1, 2]
-    tm.assert_frame_equal(result, expected)

From 7523b1b84cc30898a8d8b3acff34033682e54897 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 18 Oct 2021 21:39:59 -0700
Subject: [PATCH 06/23] parameterized over inplace and ignore_index

---
 pandas/tests/frame/methods/test_sort_index.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index c78a6d015cc16..71822628473f4 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -419,11 +419,22 @@ def test_sort_index_ignore_index(
         tm.assert_frame_equal(result_df, expected_df)
         tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index))
 
-    def test_respect_ignore_index(self):
+    @pytest.mark.parametrize("inplace", [True, False])
+    @pytest.mark.parametrize("ignore_index", [True, False])
+    def test_respect_ignore_index(self, inplace, ignore_index):
         # GH 43591
         df = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2))
-        result = df.sort_index(ascending=False, ignore_index=True)
-        expected = DataFrame({"a": [1, 2, 3]})
+        result = df.sort_index(
+            ascending=False, ignore_index=ignore_index, inplace=inplace
+        )
+
+        if inplace:
+            result = df
+        if ignore_index:
+            expected = DataFrame({"a": [1, 2, 3]})
+        else:
+            expected = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2))
+
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("inplace", [True, False])

From e1a0aa73efa638d2f71623be1dd011808309fc4e Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Sun, 24 Oct 2021 20:58:45 -0700
Subject: [PATCH 07/23] BUG fix split

---
 pandas/core/strings/accessor.py              | 12 +++++++----
 pandas/core/strings/object_array.py          | 21 +++++++++++++-------
 pandas/tests/strings/test_split_partition.py | 18 +++++++++++++++++
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 4ea29edb7d41b..bd41a67439b8b 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -657,7 +657,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     Parameters
     ----------
-    pat : str, optional
+    pat : str, or compiled regex optional
         String or regular expression to split on.
         If not specified, split on whitespace.
     n : int, default -1 (all)
@@ -668,7 +668,11 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
         * If ``False``, return Series/Index, containing lists of strings.
-
+    regex : bool, default None
+        * If ``True``, assumes the passed-in pattern is a regular expression 
+        * If ``False``, treats the pattern as a literal string
+        * If ``None`` and the pattern length is 1, treats the pattern as a literal string
+        * If ``None`` and the pattern length is not 1, treats the pattern as a regular expression
     Returns
     -------
     Series, Index, DataFrame or MultiIndex
@@ -784,8 +788,8 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
     @forbid_nonstring_types(["bytes"])
-    def split(self, pat=None, n=-1, expand=False):
-        result = self._data.array._str_split(pat, n, expand)
+    def split(self, pat: str | re.Pattern = None, n=-1, expand=False, regex: bool | None = None):
+        result = self._data.array._str_split(pat, n, expand, regex)
         return self._wrap_result(result, returns_string=expand, expand=expand)
 
     @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 76ee55ef5f9ad..6299ce52628cd 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -308,21 +308,28 @@ def f(x):
 
         return self._str_map(f)
 
-    def _str_split(self, pat=None, n=-1, expand=False):
+    def _str_split(self, pat: str | re.Pattern = None, n=-1, expand=False, regex: bool = None):
         if pat is None:
             if n is None or n == 0:
                 n = -1
             f = lambda x: x.split(pat, n)
         else:
-            if len(pat) == 1:
+            if regex is not None:
+                new_pat = pat
+            if regex is None:
+                if len(pat) == 1:
+                    new_pat = pat
+                else:
+                    new_pat = re.compile(pat)
+
+            if isinstance(new_pat, re.Pattern):
+                if n is None or n == -1:
+                    n = 0
+                f = lambda x: new_pat.split(x, maxsplit=n)
+            else:
                 if n is None or n == 0:
                     n = -1
                 f = lambda x: x.split(pat, n)
-            else:
-                if n is None or n == -1:
-                    n = 0
-                regex = re.compile(pat)
-                f = lambda x: regex.split(x, maxsplit=n)
         return self._str_map(f, dtype=object)
 
     def _str_rsplit(self, pat=None, n=-1):
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index f3f5acd0d2f1c..56f3144004137 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -34,6 +34,24 @@ def test_split(any_string_dtype):
     exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
     tm.assert_series_equal(result, exp)
 
+    # explicit regex = True split
+    values = Series('qweqwejpgqweqwe.jpg', dtype=any_string_dtype)
+    result = values.str.split('\.jpg', regex=True)
+    exp = Series([['qweqwejpgqweqwe.jpg']])
+    tm.assert_series_equal(result, exp)
+    # explicit regex = False split
+    result = values.str.split('\.jpg', regex=False)
+    exp = Series([['qweqwejpgqweqwe.jpg']])
+    tm.assert_series_equal(result, exp)
+    # non explicit regex split, pattern length == 1
+    result = values.str.split('.')
+    exp = Series([['qweqwejpgqweqwe','jpg']])
+    tm.assert_series_equal(result, exp)
+    # non explicit regex split, pattern length != 1
+    result = values.str.split('.jpg')
+    exp = Series([['qweqw','qweqwe', '']])
+    tm.assert_series_equal(result, exp)
+
 
 def test_split_object_mixed():
     mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0])

From d7b3d8e61512f151aba22c36b2341337afde7e5c Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Sun, 24 Oct 2021 22:51:23 -0700
Subject: [PATCH 08/23] ENH: added regex argument to Series.str.split

---
 doc/source/whatsnew/v1.4.0.rst               |  2 +-
 pandas/core/strings/accessor.py              | 35 ++++++++++++++------
 pandas/core/strings/object_array.py          |  6 ++--
 pandas/tests/strings/test_split_partition.py | 10 +++---
 4 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 96daf1d825753..f2a5b06352f0a 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -177,7 +177,7 @@ Other enhancements
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
 - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
 - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
--
+- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is `None` (:issue:`43563`, , :issue:`37963`, :issue: `32835`, :issue: `25549`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index bd41a67439b8b..b361955b1b628 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -774,16 +774,31 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     1  https://docs.python.org/3/tutorial  index.html
     2                                 NaN         NaN
 
-    Remember to escape special characters when explicitly using regular
-    expressions.
-
-    >>> s = pd.Series(["1+1=2"])
-    >>> s
-    0    1+1=2
-    dtype: object
-    >>> s.str.split(r"\+|=", expand=True)
-         0    1    2
-    0    1    1    2
+    When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled as a 
+    regex only if ``len(pat) != 1``. 
+
+    >>> s = pd.Series(['foojpgbar.jpg'])
+    >>> s = s.str.split(".", expand=True)
+               0    1
+    0  foojpgbar  jpg
+    >>> s.str.split("\.jpg", expand=True)
+               0 1
+    0  foojpgbar  
+    >>> s.str.split(".jpg", expand=True)
+        0    1 2
+    0  fo  bar  
+
+    When ``regex=True``, `pat` is interpreted as a regex
+    
+    >>> s.str.split("\.jpg", regex=True, expand=True)
+               0 1
+    0  foojpgbar  
+    
+    When ``regex=False``, `pat` is interpreted as the string itself
+    
+    >>> s.str.split("\.jpg", regex=False, expand=True)
+                   0
+    0  foojpgbar.jpg
     """
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 6299ce52628cd..13c7d74462e93 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -314,9 +314,11 @@ def _str_split(self, pat: str | re.Pattern = None, n=-1, expand=False, regex: bo
                 n = -1
             f = lambda x: x.split(pat, n)
         else:
-            if regex is not None:
+            if regex is True:
+                new_pat = re.compile(pat)
+            elif regex is False:
                 new_pat = pat
-            if regex is None:
+            else:
                 if len(pat) == 1:
                     new_pat = pat
                 else:
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 56f3144004137..3a3c69cd39eac 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -36,19 +36,19 @@ def test_split(any_string_dtype):
 
     # explicit regex = True split
     values = Series('qweqwejpgqweqwe.jpg', dtype=any_string_dtype)
-    result = values.str.split('\.jpg', regex=True)
-    exp = Series([['qweqwejpgqweqwe.jpg']])
+    result = values.str.split(r'\.jpg', regex=True)
+    exp = Series([['qweqwejpgqweqwe', '']])
     tm.assert_series_equal(result, exp)
     # explicit regex = False split
-    result = values.str.split('\.jpg', regex=False)
+    result = values.str.split(r'\.jpg', regex=False)
     exp = Series([['qweqwejpgqweqwe.jpg']])
     tm.assert_series_equal(result, exp)
     # non explicit regex split, pattern length == 1
-    result = values.str.split('.')
+    result = values.str.split(r'.')
     exp = Series([['qweqwejpgqweqwe','jpg']])
     tm.assert_series_equal(result, exp)
     # non explicit regex split, pattern length != 1
-    result = values.str.split('.jpg')
+    result = values.str.split(r'.jpg')
     exp = Series([['qweqw','qweqwe', '']])
     tm.assert_series_equal(result, exp)
 

From 20dc2a6edbe4c5fa4d4c5b014b6ff7a54596fd4b Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 15:43:29 -0700
Subject: [PATCH 09/23] format change

---
 pandas/core/strings/accessor.py              | 11 ++++++-----
 pandas/tests/strings/test_split_partition.py |  3 +++
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index b361955b1b628..842cf21c0a5ee 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -673,6 +673,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
         * If ``False``, treats the pattern as a literal string
         * If ``None`` and the pattern length is 1, treats the pattern as a literal string
         * If ``None`` and the pattern length is not 1, treats the pattern as a regular expression
+    
     Returns
     -------
     Series, Index, DataFrame or MultiIndex
@@ -778,25 +779,25 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     regex only if ``len(pat) != 1``. 
 
     >>> s = pd.Series(['foojpgbar.jpg'])
-    >>> s = s.str.split(".", expand=True)
+    >>> s.str.split(r".", expand=True)
                0    1
     0  foojpgbar  jpg
-    >>> s.str.split("\.jpg", expand=True)
+    >>> s.str.split(r"\.jpg", expand=True)
                0 1
     0  foojpgbar  
-    >>> s.str.split(".jpg", expand=True)
+    >>> s.str.split(r".jpg", expand=True)
         0    1 2
     0  fo  bar  
 
     When ``regex=True``, `pat` is interpreted as a regex
     
-    >>> s.str.split("\.jpg", regex=True, expand=True)
+    >>> s.str.split(r"\.jpg", regex=True, expand=True)
                0 1
     0  foojpgbar  
     
     When ``regex=False``, `pat` is interpreted as the string itself
     
-    >>> s.str.split("\.jpg", regex=False, expand=True)
+    >>> s.str.split(r"\.jpg", regex=False, expand=True)
                    0
     0  foojpgbar.jpg
     """
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 3a3c69cd39eac..353bef259c4ca 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -39,14 +39,17 @@ def test_split(any_string_dtype):
     result = values.str.split(r'\.jpg', regex=True)
     exp = Series([['qweqwejpgqweqwe', '']])
     tm.assert_series_equal(result, exp)
+
     # explicit regex = False split
     result = values.str.split(r'\.jpg', regex=False)
     exp = Series([['qweqwejpgqweqwe.jpg']])
     tm.assert_series_equal(result, exp)
+
     # non explicit regex split, pattern length == 1
     result = values.str.split(r'.')
     exp = Series([['qweqwejpgqweqwe','jpg']])
     tm.assert_series_equal(result, exp)
+
     # non explicit regex split, pattern length != 1
     result = values.str.split(r'.jpg')
     exp = Series([['qweqw','qweqwe', '']])

From 0b139f30f1058f7db594210a90c19f79b84a72d9 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 15:48:53 -0700
Subject: [PATCH 10/23] resolve conflict

---
 doc/source/whatsnew/v1.4.0.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 467880efdf0f2..4c79f2f0a32f8 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -530,11 +530,8 @@ Indexing
 - Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`)
 - Bug in :meth:`Series.__setitem__` with object dtype when setting an array with matching size and dtype='datetime64[ns]' or dtype='timedelta64[ns]' incorrectly converting the datetime/timedeltas to integers (:issue:`43868`)
 - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`)
-<<<<<<< HEAD
-=======
 - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`)
 -
->>>>>>> upstream/master
 
 Missing
 ^^^^^^^

From 16049157e10e40af4a3913b3c481b66b00effe82 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 19:10:02 -0700
Subject: [PATCH 11/23] ENH: added regex argument to Series.str.split

---
 doc/source/whatsnew/v1.4.0.rst               |  2 +-
 pandas/core/strings/accessor.py              | 37 ++++++++++++--------
 pandas/core/strings/object_array.py          |  8 +++--
 pandas/tests/strings/test_split_partition.py | 18 +++++-----
 4 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 4c79f2f0a32f8..2e8d0aee041ae 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -179,7 +179,7 @@ Other enhancements
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
 - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
 - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
-- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is `None` (:issue:`43563`, :issue:`37963`, :issue:`32835`)
+- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`37963`, :issue:`32835`, :issue:`25549`)
 - :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
 -
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 842cf21c0a5ee..9755e8a9e62f1 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -669,11 +669,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
         * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
         * If ``False``, return Series/Index, containing lists of strings.
     regex : bool, default None
-        * If ``True``, assumes the passed-in pattern is a regular expression 
+        * If ``True``, assumes the passed-in pattern is a regular expression
         * If ``False``, treats the pattern as a literal string
-        * If ``None`` and the pattern length is 1, treats the pattern as a literal string
-        * If ``None`` and the pattern length is not 1, treats the pattern as a regular expression
-    
+        * If ``None`` and the pattern length is 1, treats the pattern as a
+        literal string
+        * If ``None`` and the pattern length is not 1, treats the pattern as
+        a regular expression
+
     Returns
     -------
     Series, Index, DataFrame or MultiIndex
@@ -774,9 +776,10 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     0          this is a regular sentence        None
     1  https://docs.python.org/3/tutorial  index.html
     2                                 NaN         NaN
-
-    When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled as a 
-    regex only if ``len(pat) != 1``. 
+    
+    Remember to escape special characters when explicitly using regular expressions.
+    When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled
+    as a regex only if ``len(pat) != 1``.
 
     >>> s = pd.Series(['foojpgbar.jpg'])
     >>> s.str.split(r".", expand=True)
@@ -784,19 +787,19 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     0  foojpgbar  jpg
     >>> s.str.split(r"\.jpg", expand=True)
                0 1
-    0  foojpgbar  
+    0  foojpgbar
     >>> s.str.split(r".jpg", expand=True)
         0    1 2
-    0  fo  bar  
+    0  fo  bar
 
     When ``regex=True``, `pat` is interpreted as a regex
-    
+
     >>> s.str.split(r"\.jpg", regex=True, expand=True)
                0 1
-    0  foojpgbar  
-    
+    0  foojpgbar
+
     When ``regex=False``, `pat` is interpreted as the string itself
-    
+
     >>> s.str.split(r"\.jpg", regex=False, expand=True)
                    0
     0  foojpgbar.jpg
@@ -804,7 +807,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
     @forbid_nonstring_types(["bytes"])
-    def split(self, pat: str | re.Pattern = None, n=-1, expand=False, regex: bool | None = None):
+    def split(
+        self,
+        pat: str | re.Pattern | None = None,
+        n=-1,
+        expand=False,
+        regex: bool | None = None,
+    ):
         result = self._data.array._str_split(pat, n, expand, regex)
         return self._wrap_result(result, returns_string=expand, expand=expand)
 
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 13c7d74462e93..5e71ae2eb73bf 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -308,16 +308,20 @@ def f(x):
 
         return self._str_map(f)
 
-    def _str_split(self, pat: str | re.Pattern = None, n=-1, expand=False, regex: bool = None):
+    def _str_split(
+        self, pat: str | re.Pattern | None = None, n=-1, expand=False, regex: bool | None = None
+    ):
         if pat is None:
             if n is None or n == 0:
                 n = -1
             f = lambda x: x.split(pat, n)
         else:
-            if regex is True:
+            new_pat: str | re.Pattern
+            if regex is True or isinstance(pat, re.Pattern):
                 new_pat = re.compile(pat)
             elif regex is False:
                 new_pat = pat
+            # regex is None so link to old behavior #43563
             else:
                 if len(pat) == 1:
                     new_pat = pat
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 353bef259c4ca..7a4fa85840385 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -35,24 +35,24 @@ def test_split(any_string_dtype):
     tm.assert_series_equal(result, exp)
 
     # explicit regex = True split
-    values = Series('qweqwejpgqweqwe.jpg', dtype=any_string_dtype)
-    result = values.str.split(r'\.jpg', regex=True)
-    exp = Series([['qweqwejpgqweqwe', '']])
+    values = Series("qweqwejpgqweqwe.jpg", dtype=any_string_dtype)
+    result = values.str.split(r"\.jpg", regex=True)
+    exp = Series([["qweqwejpgqweqwe", ""]])
     tm.assert_series_equal(result, exp)
 
     # explicit regex = False split
-    result = values.str.split(r'\.jpg', regex=False)
-    exp = Series([['qweqwejpgqweqwe.jpg']])
+    result = values.str.split(r"\.jpg", regex=False)
+    exp = Series([["qweqwejpgqweqwe.jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length == 1
-    result = values.str.split(r'.')
-    exp = Series([['qweqwejpgqweqwe','jpg']])
+    result = values.str.split(r".")
+    exp = Series([["qweqwejpgqweqwe", "jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length != 1
-    result = values.str.split(r'.jpg')
-    exp = Series([['qweqw','qweqwe', '']])
+    result = values.str.split(r".jpg")
+    exp = Series([["qweqw", "qweqwe", ""]])
     tm.assert_series_equal(result, exp)
 
 

From 8312d799858fc650d7bc4b2c1f44b1285850a7bb Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 19:15:07 -0700
Subject: [PATCH 12/23] changed whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 2e8d0aee041ae..2fc5f7506e8c8 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -179,7 +179,7 @@ Other enhancements
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
 - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
 - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
-- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`37963`, :issue:`32835`, :issue:`25549`)
+- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`32835`, :issue:`25549`)
 - :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
 -
 

From a82639cb1a9d1a04835ee85a68aafab8800da8dc Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 19:23:33 -0700
Subject: [PATCH 13/23] fixed mypy error

---
 pandas/core/strings/accessor.py     | 30 ++++++++++++++---------------
 pandas/core/strings/object_array.py |  6 +++++-
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 9755e8a9e62f1..f091997e58922 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -670,11 +670,11 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
         * If ``False``, return Series/Index, containing lists of strings.
     regex : bool, default None
         * If ``True``, assumes the passed-in pattern is a regular expression
-        * If ``False``, treats the pattern as a literal string
+        * If ``False``, treats the pattern as a literal string.
         * If ``None`` and the pattern length is 1, treats the pattern as a
-        literal string
+        literal string.
         * If ``None`` and the pattern length is not 1, treats the pattern as
-        a regular expression
+        a regular expression.
 
     Returns
     -------
@@ -776,33 +776,33 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     0          this is a regular sentence        None
     1  https://docs.python.org/3/tutorial  index.html
     2                                 NaN         NaN
-    
+
     Remember to escape special characters when explicitly using regular expressions.
     When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled
     as a regex only if ``len(pat) != 1``.
 
-    >>> s = pd.Series(['foojpgbar.jpg'])
+    >>> s = pd.Series(['fooojpgbar.jpg'])
     >>> s.str.split(r".", expand=True)
-               0    1
-    0  foojpgbar  jpg
+                0    1
+    0  fooojpgbar  jpg
     >>> s.str.split(r"\.jpg", expand=True)
-               0 1
-    0  foojpgbar
+                0 1
+    0  fooojpgbar
     >>> s.str.split(r".jpg", expand=True)
-        0    1 2
-    0  fo  bar
+         0    1 2
+    0  foo  bar
 
     When ``regex=True``, `pat` is interpreted as a regex
 
     >>> s.str.split(r"\.jpg", regex=True, expand=True)
-               0 1
-    0  foojpgbar
+                0 1
+    0  fooojpgbar
 
     When ``regex=False``, `pat` is interpreted as the string itself
 
     >>> s.str.split(r"\.jpg", regex=False, expand=True)
-                   0
-    0  foojpgbar.jpg
+                    0
+    0  fooojpgbar.jpg
     """
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 5e71ae2eb73bf..3081575f50700 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -309,7 +309,11 @@ def f(x):
         return self._str_map(f)
 
     def _str_split(
-        self, pat: str | re.Pattern | None = None, n=-1, expand=False, regex: bool | None = None
+        self,
+        pat: str | re.Pattern | None = None,
+        n=-1,
+        expand=False,
+        regex: bool | None = None,
     ):
         if pat is None:
             if n is None or n == 0:

From 76e6001f4d241015f96dd64f5d97479c4f88f266 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 25 Oct 2021 21:29:08 -0700
Subject: [PATCH 14/23] more specific docs

---
 pandas/core/strings/accessor.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index f091997e58922..c685b444450e1 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -668,7 +668,12 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
         * If ``False``, return Series/Index, containing lists of strings.
+
     regex : bool, default None
+        Determines whether to handle the pattern as a regular expression.
+        If ``pat`` is a compiled regular expression, it is interpreted as a
+        regular expression regardless of ``regex``
+
         * If ``True``, assumes the passed-in pattern is a regular expression
         * If ``False``, treats the pattern as a literal string.
         * If ``None`` and the pattern length is 1, treats the pattern as a

From 2c43fb50433379cbcf1b52072d426edd9499aaea Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Wed, 27 Oct 2021 10:45:24 -0700
Subject: [PATCH 15/23] added example

---
 pandas/core/strings/accessor.py | 37 ++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index c685b444450e1..a71989d4dffff 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -657,7 +657,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     Parameters
     ----------
-    pat : str, or compiled regex optional
+    pat : str, or compiled regex, optional
         String or regular expression to split on.
         If not specified, split on whitespace.
     n : int, default -1 (all)
@@ -676,10 +676,8 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         * If ``True``, assumes the passed-in pattern is a regular expression
         * If ``False``, treats the pattern as a literal string.
-        * If ``None`` and the pattern length is 1, treats the pattern as a
-        literal string.
-        * If ``None`` and the pattern length is not 1, treats the pattern as
-        a regular expression.
+        * If ``None`` and `pat` length is 1, treats `pat` as a literal string.
+        * If ``None`` and `pat` length is not 1, treats `pat` as a regular expression.
 
     Returns
     -------
@@ -783,31 +781,36 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     2                                 NaN         NaN
 
     Remember to escape special characters when explicitly using regular expressions.
+
+    >>> s = pd.Series(["foo and bar plus baz"])
+    >>> s.str.split(r"and|plus", expand=True)
+        0   1   2
+    0 foo bar baz
+
+    Regular expressions can be used to handle urls or file names.
     When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled
     as a regex only if ``len(pat) != 1``.
 
-    >>> s = pd.Series(['fooojpgbar.jpg'])
+    >>> s = pd.Series(['foojpgbar.jpg'])
     >>> s.str.split(r".", expand=True)
-                0    1
-    0  fooojpgbar  jpg
+               0    1
+    0  foojpgbar  jpg
+
     >>> s.str.split(r"\.jpg", expand=True)
-                0 1
-    0  fooojpgbar
-    >>> s.str.split(r".jpg", expand=True)
-         0    1 2
-    0  foo  bar
+               0 1
+    0  foojpgbar
 
     When ``regex=True``, `pat` is interpreted as a regex
 
     >>> s.str.split(r"\.jpg", regex=True, expand=True)
-                0 1
-    0  fooojpgbar
+               0 1
+    0  foojpgbar
 
     When ``regex=False``, `pat` is interpreted as the string itself
 
     >>> s.str.split(r"\.jpg", regex=False, expand=True)
-                    0
-    0  fooojpgbar.jpg
+                   0
+    0  foojpgbar.jpg
     """
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})

From 2ed798076e173f23b96dc0311b671b95d8a8f084 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Wed, 27 Oct 2021 22:13:14 -0700
Subject: [PATCH 16/23] changed doc to match str_replace, moved tests to a new
 test func

---
 pandas/core/strings/accessor.py              | 34 ++++++++++++++------
 pandas/tests/strings/test_split_partition.py |  3 ++
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index a71989d4dffff..63a5d04a42702 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -653,7 +653,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     Split strings around given separator/delimiter.
 
     Splits the string in the Series/Index from the %(side)s,
-    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
+    at the specified delimiter string.
 
     Parameters
     ----------
@@ -666,24 +666,30 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     expand : bool, default False
         Expand the split strings into separate columns.
 
-        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
-        * If ``False``, return Series/Index, containing lists of strings.
+        - If ``True``, return DataFrame/MultiIndex expanding dimensionality.
+        - If ``False``, return Series/Index, containing lists of strings.
 
     regex : bool, default None
-        Determines whether to handle the pattern as a regular expression.
-        If ``pat`` is a compiled regular expression, it is interpreted as a
-        regular expression regardless of ``regex``
+        Determines if the passed-in pattern is a regular expression:
 
-        * If ``True``, assumes the passed-in pattern is a regular expression
-        * If ``False``, treats the pattern as a literal string.
-        * If ``None`` and `pat` length is 1, treats `pat` as a literal string.
-        * If ``None`` and `pat` length is not 1, treats `pat` as a regular expression.
+        - If ``True``, assumes the passed-in pattern is a regular expression
+        - If ``False``, treats the pattern as a literal string.
+        - If ``None`` and `pat` length is 1, treats `pat` as a literal string.
+        - If ``None`` and `pat` length is not 1, treats `pat` as a regular expression.
+        - Cannot be set to False if `pat` is a compiled regex
+
+        .. versionadded:: 1.4.0
 
     Returns
     -------
     Series, Index, DataFrame or MultiIndex
         Type matches caller unless ``expand=True`` (see Notes).
 
+    Raises
+    ------
+    ValueError
+        * if `regex` is False and `pat` is a compiled regex
+
     See Also
     --------
     Series.str.split : Split strings around given separator/delimiter.
@@ -706,6 +712,9 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     If using ``expand=True``, Series and Index callers return DataFrame and
     MultiIndex objects, respectively.
 
+    Use of `regex=False` with a `pat` as a compiled regex will raise
+    an error.
+
     Examples
     --------
     >>> s = pd.Series(
@@ -822,6 +831,11 @@ def split(
         expand=False,
         regex: bool | None = None,
     ):
+        if not regex and is_re(pat):
+            raise ValueError(
+                "Cannot use a compiled regex as replacement pattern with regex=False"
+            )
+
         result = self._data.array._str_split(pat, n, expand, regex)
         return self._wrap_result(result, returns_string=expand, expand=expand)
 
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 7a4fa85840385..958fc1f54438f 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -34,6 +34,9 @@ def test_split(any_string_dtype):
     exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
     tm.assert_series_equal(result, exp)
 
+
+def test_split_regex(any_string_dtype):
+    # GH 43563
     # explicit regex = True split
     values = Series("qweqwejpgqweqwe.jpg", dtype=any_string_dtype)
     result = values.str.split(r"\.jpg", regex=True)

From 5f0d8dfc5d95a05b26aba56a6b643cfdf72d06b5 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Thu, 28 Oct 2021 14:45:55 -0700
Subject: [PATCH 17/23] changed test string to be readable

---
 pandas/tests/strings/test_split_partition.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 958fc1f54438f..c39b812ff4bdc 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -38,24 +38,24 @@ def test_split(any_string_dtype):
 def test_split_regex(any_string_dtype):
     # GH 43563
     # explicit regex = True split
-    values = Series("qweqwejpgqweqwe.jpg", dtype=any_string_dtype)
+    values = Series("foo-jpg-bar.jpg", dtype=any_string_dtype)
     result = values.str.split(r"\.jpg", regex=True)
-    exp = Series([["qweqwejpgqweqwe", ""]])
+    exp = Series([["foo-jpg-bar", ""]])
     tm.assert_series_equal(result, exp)
 
     # explicit regex = False split
     result = values.str.split(r"\.jpg", regex=False)
-    exp = Series([["qweqwejpgqweqwe.jpg"]])
+    exp = Series([["foo-jpg-bar.jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length == 1
     result = values.str.split(r".")
-    exp = Series([["qweqwejpgqweqwe", "jpg"]])
+    exp = Series([["foo-jpg-bar", "jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length != 1
     result = values.str.split(r".jpg")
-    exp = Series([["qweqw", "qweqwe", ""]])
+    exp = Series([["foo", "-bar", ""]])
     tm.assert_series_equal(result, exp)
 
 

From ba812a12fbac6f85948711b5f61833b78eb1b6f8 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Thu, 28 Oct 2021 14:48:27 -0700
Subject: [PATCH 18/23] changed test string to be readable

---
 pandas/tests/strings/test_split_partition.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index c39b812ff4bdc..4cc759383c1a3 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -38,24 +38,24 @@ def test_split(any_string_dtype):
 def test_split_regex(any_string_dtype):
     # GH 43563
     # explicit regex = True split
-    values = Series("foo-jpg-bar.jpg", dtype=any_string_dtype)
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
     result = values.str.split(r"\.jpg", regex=True)
-    exp = Series([["foo-jpg-bar", ""]])
+    exp = Series([["xxxjpgzzz", ""]])
     tm.assert_series_equal(result, exp)
 
     # explicit regex = False split
     result = values.str.split(r"\.jpg", regex=False)
-    exp = Series([["foo-jpg-bar.jpg"]])
+    exp = Series([["xxxjpgzzz.jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length == 1
     result = values.str.split(r".")
-    exp = Series([["foo-jpg-bar", "jpg"]])
+    exp = Series([["xxxjpgzzz", "jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length != 1
     result = values.str.split(r".jpg")
-    exp = Series([["foo", "-bar", ""]])
+    exp = Series([["xx", "zzz", ""]])
     tm.assert_series_equal(result, exp)
 
 

From e2da86122f37e3a66b33ee435429763119459df9 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Sat, 30 Oct 2021 19:51:41 -0700
Subject: [PATCH 19/23] added test for raises error when regex=False and pat is
 regex

---
 pandas/tests/strings/test_split_partition.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 4cc759383c1a3..23c187699e7d4 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+import re
 
 import numpy as np
 import pytest
@@ -58,6 +59,14 @@ def test_split_regex(any_string_dtype):
     exp = Series([["xx", "zzz", ""]])
     tm.assert_series_equal(result, exp)
 
+    # regex=False with pattern compiled regex raises error
+    with pytest.raises(
+        ValueError,
+        match="Cannot use a compiled regex as replacement pattern with regex=False",
+    ):
+        pat = re.compile("xxx")
+        values.str.split(pat, regex=False)
+
 
 def test_split_object_mixed():
     mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0])

From 057dcfbb104382dc3f70c3848e2ed2a8662c47b8 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 1 Nov 2021 21:57:45 -0700
Subject: [PATCH 20/23] added test for explicit regex=True with compiled regex

---
 pandas/tests/strings/test_split_partition.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 23c187699e7d4..7af5c01367e01 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -44,6 +44,13 @@ def test_split_regex(any_string_dtype):
     exp = Series([["xxxjpgzzz", ""]])
     tm.assert_series_equal(result, exp)
 
+    # explicit regex = True split with compiled regex
+    regex_pat = re.compile(r".jpg")
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
+    result = values.str.split(regex_pat, regex=True)
+    exp = Series([["xx", "zzz", ""]])
+    tm.assert_series_equal(result, exp)
+
     # explicit regex = False split
     result = values.str.split(r"\.jpg", regex=False)
     exp = Series([["xxxjpgzzz.jpg"]])
@@ -64,8 +71,7 @@ def test_split_regex(any_string_dtype):
         ValueError,
         match="Cannot use a compiled regex as replacement pattern with regex=False",
     ):
-        pat = re.compile("xxx")
-        values.str.split(pat, regex=False)
+        values.str.split(regex_pat, regex=False)
 
 
 def test_split_object_mixed():

From ece00f1b7d7afe6e2395b5992b5f2bed283f1ab1 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Mon, 1 Nov 2021 22:01:15 -0700
Subject: [PATCH 21/23] got rid of unnecessary comma in doc string

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index d7f22baf3fc67..d477be9d50496 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -663,7 +663,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     Parameters
     ----------
-    pat : str, or compiled regex, optional
+    pat : str or compiled regex, optional
         String or regular expression to split on.
         If not specified, split on whitespace.
     n : int, default -1 (all)

From b6bbf3e3921101541807f678a24162c592b389c8 Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Tue, 2 Nov 2021 10:39:00 -0700
Subject: [PATCH 22/23] added compiled regex example, changed logic so that 
 becomes true when passed in compiled regex

---
 pandas/core/strings/accessor.py              | 12 ++++++++++--
 pandas/tests/strings/test_split_partition.py |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index d477be9d50496..6bdd799c5b0f5 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -821,6 +821,12 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
                0 1
     0  foojpgbar
 
+    A compiled regex can be passed as `pat`
+
+    >>> s.str.split(re.compile(r"\.jpg"))
+               0 1
+    0  foojpgbar
+
     When ``regex=False``, `pat` is interpreted as the string itself
 
     >>> s.str.split(r"\.jpg", regex=False, expand=True)
@@ -835,13 +841,15 @@ def split(
         pat: str | re.Pattern | None = None,
         n=-1,
         expand=False,
+        *,
         regex: bool | None = None,
     ):
-        if not regex and is_re(pat):
+        if regex is False and is_re(pat):
             raise ValueError(
                 "Cannot use a compiled regex as replacement pattern with regex=False"
             )
-
+        if is_re(pat):
+            regex = True
         result = self._data.array._str_split(pat, n, expand, regex)
         return self._wrap_result(result, returns_string=expand, expand=expand)
 
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 7af5c01367e01..01a397938db52 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -47,7 +47,7 @@ def test_split_regex(any_string_dtype):
     # explicit regex = True split with compiled regex
     regex_pat = re.compile(r".jpg")
     values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
-    result = values.str.split(regex_pat, regex=True)
+    result = values.str.split(regex_pat)
     exp = Series([["xx", "zzz", ""]])
     tm.assert_series_equal(result, exp)
 

From 27ffee767b58b5b73ded417a5cfb9e1eba3a8eef Mon Sep 17 00:00:00 2001
From: Saehui Hwang <shwang@caltech.edu>
Date: Tue, 2 Nov 2021 10:41:36 -0700
Subject: [PATCH 23/23] corrected docs

---
 pandas/core/strings/accessor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 6bdd799c5b0f5..9f163f77a2ae8 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -823,7 +823,8 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     A compiled regex can be passed as `pat`
 
-    >>> s.str.split(re.compile(r"\.jpg"))
+    >>> import re
+    >>> s.str.split(re.compile(r"\.jpg"), expand=True)
                0 1
     0  foojpgbar