From 101b4d2b2d1cb715711d00ab7b2c35f6c024f970 Mon Sep 17 00:00:00 2001 From: KimDoubleB Date: Wed, 25 Sep 2019 10:23:54 +0900 Subject: [PATCH 1/8] fix unnecessary sort --- pandas/core/indexes/api.py | 5 ++++- pandas/io/json/_json.py | 12 ++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 86d55ce2e7cc3..36553087367c2 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -169,7 +169,10 @@ def _union_indexes(indexes, sort=True): if len(indexes) == 1: result = indexes[0] if isinstance(result, list): - result = Index(sorted(result)) + if sort: + result = Index(sorted(result)) + else: + result = Index(result) return result indexes, kind = _sanitize_and_check(indexes) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 73f4985e201f1..2e6bdb7c1ef3d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1112,14 +1112,10 @@ def _parse_no_numpy(self): self.check_keys_split(decoded) self.obj = DataFrame(dtype=None, **decoded) elif orient == "index": - self.obj = ( - DataFrame.from_dict( - loads(json, precise_float=self.precise_float), - dtype=None, - orient="index", - ) - .sort_index(axis="columns") - .sort_index(axis="index") + self.obj = DataFrame.from_dict( + loads(json, precise_float=self.precise_float), + dtype=None, + orient="index", ) elif orient == "table": self.obj = parse_table_schema(json, precise_float=self.precise_float) From 892c3e39dc6ed9e8ea3e039079768ae9b2ba37ad Mon Sep 17 00:00:00 2001 From: KimDoubleB Date: Wed, 25 Sep 2019 10:24:28 +0900 Subject: [PATCH 2/8] fix tests --- pandas/tests/frame/test_alter_axes.py | 4 ++-- pandas/tests/indexing/multiindex/test_setitem.py | 2 +- pandas/tests/io/json/test_pandas.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 017cbea7ec723..ef2847adfeb46 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -667,10 +667,10 @@ def test_rename(self, float_frame): # gets sorted alphabetical df = DataFrame(data) renamed = df.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"])) renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + tm.assert_index_equal(renamed.index, Index(["FOO", "BAR"])) # have to pass something with pytest.raises(TypeError, match="must pass an index to rename"): diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index c383c38958692..79e82d6136352 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -159,7 +159,7 @@ def test_multiindex_setitem(self): ) expected = df_orig.copy() - expected.iloc[[0, 2, 3]] *= 2 + expected.iloc[[0, 1, 3]] *= 2 idx = pd.IndexSlice df = df_orig.copy() diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 415b1d81eb3e4..b0d5712de8535 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -168,7 +168,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() - if not numpy and (orient == "index" or (PY35 and orient == "columns")): + if not numpy and (PY35 and (orient == "index" or orient == "columns")): # TODO: debug why sort is required expected = expected.sort_index() @@ -188,7 +188,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype ) expected = self.intframe.copy() - if not numpy and (orient == "index" or (PY35 and orient == "columns")): + if not numpy and (PY35 and (orient == "index" or orient == "columns")): expected = expected.sort_index() if orient == "records" or orient == "values": @@ -228,7 +228,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): ) expected = df.copy() - if not numpy and (orient == "index" or (PY35 and orient == "columns")): + if not numpy and (PY35 and (orient == "index" or orient == "columns")): expected = expected.sort_index() if not dtype: From accf764aa2560b27547dd17bdc9c7eb24578af92 Mon Sep 17 00:00:00 2001 From: KimDoubleB Date: Thu, 26 Sep 2019 01:19:55 +0900 Subject: [PATCH 3/8] Py35 sort index --- pandas/io/json/_json.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 2e6bdb7c1ef3d..bbc4ac1ca902b 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.common import ensure_str, is_period_dtype -from pandas import DataFrame, MultiIndex, Series, isna, to_datetime +from pandas import DataFrame, MultiIndex, Series, isna, to_datetime, compat from pandas._typing import Scalar from pandas.core.reshape.concat import concat @@ -1117,6 +1117,9 @@ def _parse_no_numpy(self): dtype=None, orient="index", ) + if compat.PY35: + self.obj.sort_index(axis="columns", inplace=True) + self.obj.sort_index(axis="index", inplace=True) elif orient == "table": self.obj = parse_table_schema(json, precise_float=self.precise_float) else: From e117e17474056595c190ffee14ce659fc286eef8 Mon Sep 17 00:00:00 2001 From: KimDoubleB Date: Thu, 26 Sep 2019 18:23:49 +0900 Subject: [PATCH 4/8] fix tests --- pandas/tests/frame/test_alter_axes.py | 3 ++- pandas/tests/indexing/multiindex/test_setitem.py | 4 ++-- pandas/tests/io/json/test_pandas.py | 7 +++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index ef2847adfeb46..92032d5d1f363 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from datetime import datetime, timedelta import inspect @@ -662,7 +663,7 @@ def test_rename(self, float_frame): ) # index - data = {"A": {"foo": 0, "bar": 1}} + data = {"A": OrderedDict({"foo": 0, "bar": 1})} # gets sorted alphabetical df = DataFrame(data) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 79e82d6136352..7f1d1138b4408 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -146,8 +146,8 @@ def test_multiindex_setitem(self): { "price": { ("DE", "Coal", "Stock"): 2, - ("DE", "Gas", "Stock"): 4, ("DE", "Elec", "Demand"): 1, + ("DE", "Gas", "Stock"): 4, ("FR", "Gas", "Stock"): 5, ("FR", "Solar", "SupIm"): 0, ("FR", "Wind", "SupIm"): 0, @@ -159,7 +159,7 @@ def test_multiindex_setitem(self): ) expected = df_orig.copy() - expected.iloc[[0, 1, 3]] *= 2 + expected.iloc[[0, 2, 3]] *= 2 idx = pd.IndexSlice df = df_orig.copy() diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b0d5712de8535..4f7a5de218edd 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -168,8 +168,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() - if not numpy and (PY35 and (orient == "index" or orient == "columns")): - # TODO: debug why sort is required + if not numpy and PY35 and orient in ("index", "columns"): expected = expected.sort_index() if orient == "records" or orient == "values": @@ -188,7 +187,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype ) expected = self.intframe.copy() - if not numpy and (PY35 and (orient == "index" or orient == "columns")): + if not numpy and PY35 and orient in ("index", "columns"): expected = expected.sort_index() if orient == "records" or orient == "values": @@ -228,7 +227,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): ) expected = df.copy() - if not numpy and (PY35 and (orient == "index" or orient == "columns")): + if not numpy and PY35 and orient in ("index", "columns"): expected = expected.sort_index() if not dtype: From 03f1432805e04d37312ac095e616c93b99af1b11 Mon Sep 17 00:00:00 2001 From: KimDoubleB Date: Thu, 26 Sep 2019 18:24:58 +0900 Subject: [PATCH 5/8] modify the request part --- pandas/core/indexes/api.py | 5 ++--- pandas/io/json/_json.py | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 36553087367c2..ccc34c8e058d1 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -170,9 +170,8 @@ def _union_indexes(indexes, sort=True): result = indexes[0] if isinstance(result, list): if sort: - result = Index(sorted(result)) - else: - result = Index(result) + result = sorted(result) + result = Index(result) return result indexes, kind = _sanitize_and_check(indexes) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index bbc4ac1ca902b..5e803f26a036a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1118,8 +1118,7 @@ def _parse_no_numpy(self): orient="index", ) if compat.PY35: - self.obj.sort_index(axis="columns", inplace=True) - self.obj.sort_index(axis="index", inplace=True) + self.obj = self.obj.sort_index(axis="columns").sort_index(axis="index") elif orient == "table": self.obj = parse_table_schema(json, precise_float=self.precise_float) else: From a3f287219b87dbc5c00840ae311b3d11b03850f4 Mon Sep 17 00:00:00 2001 From: KBB Date: Wed, 2 Oct 2019 02:53:17 +0900 Subject: [PATCH 6/8] back to origin code --- pandas/core/indexes/api.py | 4 +--- pandas/tests/frame/test_alter_axes.py | 7 +++---- pandas/tests/indexing/multiindex/test_setitem.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index ccc34c8e058d1..86d55ce2e7cc3 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -169,9 +169,7 @@ def _union_indexes(indexes, sort=True): if len(indexes) == 1: result = indexes[0] if isinstance(result, list): - if sort: - result = sorted(result) - result = Index(result) + result = Index(sorted(result)) return result indexes, kind = _sanitize_and_check(indexes) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 92032d5d1f363..017cbea7ec723 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from datetime import datetime, timedelta import inspect @@ -663,15 +662,15 @@ def test_rename(self, float_frame): ) # index - data = {"A": OrderedDict({"foo": 0, "bar": 1})} + data = {"A": {"foo": 0, "bar": 1}} # gets sorted alphabetical df = DataFrame(data) renamed = df.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["bar", "foo"])) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, Index(["FOO", "BAR"])) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) # have to pass something with pytest.raises(TypeError, match="must pass an index to rename"): diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 7f1d1138b4408..c383c38958692 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -146,8 +146,8 @@ def test_multiindex_setitem(self): { "price": { ("DE", "Coal", "Stock"): 2, - ("DE", "Elec", "Demand"): 1, ("DE", "Gas", "Stock"): 4, + ("DE", "Elec", "Demand"): 1, ("FR", "Gas", "Stock"): 5, ("FR", "Solar", "SupIm"): 0, ("FR", "Wind", "SupIm"): 0, From b9317ff57934cc64a8e0f60252714f11e410634a Mon Sep 17 00:00:00 2001 From: KBB Date: Wed, 2 Oct 2019 03:36:29 +0900 Subject: [PATCH 7/8] add whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3406f52b06a61..4dec5b05505e8 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -260,6 +260,7 @@ I/O - Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) - Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) - Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) +- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) Plotting ^^^^^^^^ From 7976a8bce0885b2018dadf04ddaf878160f20086 Mon Sep 17 00:00:00 2001 From: KBB Date: Wed, 9 Oct 2019 01:01:16 +0900 Subject: [PATCH 8/8] isort error fix --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 5e803f26a036a..6ce288890b6c7 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.common import ensure_str, is_period_dtype -from pandas import DataFrame, MultiIndex, Series, isna, to_datetime, compat +from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime from pandas._typing import Scalar from pandas.core.reshape.concat import concat