From 4251f99699d9d8b97ba7dc1f8dcdaa22597789b3 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Oct 2018 21:01:07 +0200 Subject: [PATCH 1/3] implement searchsorted for multindex (bug #14833) --- pandas/core/indexes/multi.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3cccb65503378..2dee30e217d41 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2899,6 +2899,12 @@ def isin(self, values, level=None): else: return np.lib.arraysetops.in1d(labs, sought_labels) + def searchsorted(self, arr): + from functools import reduce + dtype = reduce(lambda x, y : x + y, [l.dtype.descr for l in self.levels], []) + return self.values.astype(dtype).searchsorted(np.asarray(arr, dtype=dtype)) + + MultiIndex._add_numeric_methods_disabled() MultiIndex._add_numeric_methods_add_sub_disabled() @@ -2933,5 +2939,6 @@ def _sparsify(label_list, start=0, sentinel=''): return lzip(*result) + def _get_na_rep(dtype): return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') From 563dc040e324d7f956f3b4f576ceb34f77a0a5b0 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Oct 2018 21:28:06 +0200 Subject: [PATCH 2/3] added unit test --- pandas/tests/indexes/multi/test_indexing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 9ec11f1f42b9a..4b0619cf87d1d 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -346,3 +346,7 @@ def test_get_indexer_categorical_time(): Categorical(date_range("2012-01-01", periods=3, freq='H'))]) result = midx.get_indexer(midx) tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) + +def test_searchsorted(): + i = MultiIndex.from_tuples([('a', 0), ('b', 1)]).searchsorted(('b', 0)) + assert i == 1 From e946e087dfce1dfe46dc7f581d03c2293737d56c Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Oct 2018 21:35:26 +0200 Subject: [PATCH 3/3] linting --- pandas/core/indexes/multi.py | 11 ++++++----- pandas/tests/indexes/multi/test_indexing.py | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2dee30e217d41..f5328ce9e6317 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2,6 +2,7 @@ # pylint: disable=E1101,E1103,W0232 import datetime import warnings +from functools import reduce from sys import getsizeof import numpy as np @@ -2900,11 +2901,12 @@ def isin(self, values, level=None): return np.lib.arraysetops.in1d(labs, sought_labels) def searchsorted(self, arr): - from functools import reduce - dtype = reduce(lambda x, y : x + y, [l.dtype.descr for l in self.levels], []) - return self.values.astype(dtype).searchsorted(np.asarray(arr, dtype=dtype)) + dtype = [l.dtype.descr for l in self.levels] + dtype = reduce(lambda x, y: x + y, dtype, []) + arr = np.asarray(arr, dtype=dtype) + values = self.values.astype(dtype) + return values.searchsorted(arr) - MultiIndex._add_numeric_methods_disabled() MultiIndex._add_numeric_methods_add_sub_disabled() @@ -2939,6 +2941,5 @@ def _sparsify(label_list, start=0, sentinel=''): return lzip(*result) - def _get_na_rep(dtype): return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 4b0619cf87d1d..abe2604b5da71 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -347,6 +347,7 @@ def test_get_indexer_categorical_time(): result = midx.get_indexer(midx) tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) + def test_searchsorted(): i = MultiIndex.from_tuples([('a', 0), ('b', 1)]).searchsorted(('b', 0)) assert i == 1