Merge pull request #9889 from hsperr/excel_multiindex

jreback · jreback · commit f9f88b24dc43 · 2015-04-17T08:35:41.000-04:00
ENH: Raise error writing excel file with a MultiIndexed DataFrame #9794
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -54,6 +54,8 @@ Enhancements
 - Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
 - Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
 
+- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
+
 .. _whatsnew_0161.api:
 
 API changes
@@ -138,7 +140,6 @@ Bug Fixes
 
 
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
-
 - Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
 - Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`)
 - Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1241,6 +1241,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
         >>> writer.save()
         """
         from pandas.io.excel import ExcelWriter
+        if self.columns.nlevels > 1:
+            raise NotImplementedError("Writing as Excel with a MultiIndex is "
+                                      "not yet implemented.")
 
         need_save = False
         if encoding == None:
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -1132,31 +1132,29 @@ def roundtrip(df, header=True, parser_hdr=0):
 
         nrows = 5
         ncols = 3
-
-        for i in range(1, 4):  # row multindex upto nlevel=3
-            for j in range(1, 4):  # col ""
-                df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
-                res = roundtrip(df)
-                # shape
-                self.assertEqual(res.shape, (nrows, ncols + i))
-
-                # no nans
-                for r in range(len(res.index)):
-                    for c in range(len(res.columns)):
-                        self.assertTrue(res.ix[r, c] is not np.nan)
-
-        for i in range(1, 4):  # row multindex upto nlevel=3
-            for j in range(1, 4):  # col ""
-                df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
-                res = roundtrip(df, False)
-                # shape
-                self.assertEqual(res.shape, (
-                    nrows - 1, ncols + i))  # first row taken as columns
-
-                # no nans
-                for r in range(len(res.index)):
-                    for c in range(len(res.columns)):
-                        self.assertTrue(res.ix[r, c] is not np.nan)
+        for use_headers in (True, False):
+            for i in range(1, 4):  # row multindex upto nlevel=3
+                for j in range(1, 4):  # col ""
+                    df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
+
+                    #this if will be removed once multi column excel writing
+                    #is implemented for now fixing #9794
+                    if j>1:
+                        with tm.assertRaises(NotImplementedError):
+                            res = roundtrip(df, use_headers)
+                    else:
+                        res = roundtrip(df, use_headers)
+
+                    if use_headers:
+                        self.assertEqual(res.shape, (nrows, ncols + i))
+                    else:
+                        # first row taken as columns
+                        self.assertEqual(res.shape, (nrows - 1, ncols + i))
+
+                    # no nans
+                    for r in range(len(res.index)):
+                        for c in range(len(res.columns)):
+                            self.assertTrue(res.ix[r, c] is not np.nan)
 
         res = roundtrip(DataFrame([0]))
         self.assertEqual(res.shape, (1, 1))
@@ -1394,6 +1392,29 @@ class XlwtTests(ExcelWriterBase, tm.TestCase):
     engine_name = 'xlwt'
     check_skip = staticmethod(_skip_if_no_xlwt)
 
+    def test_excel_raise_not_implemented_error_on_multiindex_columns(self):
+        _skip_if_no_xlwt()
+        #MultiIndex as columns is not yet implemented 9794
+        cols = pd.MultiIndex.from_tuples([('site',''),
+                                          ('2014','height'),
+                                          ('2014','weight')])
+        df = pd.DataFrame(np.random.randn(10,3), columns=cols)
+        with tm.assertRaises(NotImplementedError):
+            with ensure_clean(self.ext) as path:
+                df.to_excel(path, index=False)
+
+    def test_excel_multiindex_index(self):
+        _skip_if_no_xlwt()
+        #MultiIndex as index works so assert no error #9794
+        cols = pd.MultiIndex.from_tuples([('site',''),
+                                          ('2014','height'),
+                                          ('2014','weight')])
+        df = pd.DataFrame(np.random.randn(3,10), index=cols)
+        with ensure_clean(self.ext) as path:
+            df.to_excel(path, index=False)
+
+
+
     def test_to_excel_styleconverter(self):
         _skip_if_no_xlwt()