1
1
import re
2
+ from functools import wraps
2
3
3
4
import pandas as pd
4
5
from IPython .display import display
@@ -9,8 +10,35 @@ class _MSP:
9
10
10
11
def __init__ (self , df ):
11
12
self ._df = df
12
- self .select = Select ()
13
-
13
+ self .select = Select (df )
14
+ self .op = Op (df )
15
+ self .to = To (df )
16
+ same_methods = ['isna' , 'copy' , 'head' , 'tail' , 'isin' , 'where' , 'query' ,
17
+ 'agg' , 'groupby' , 'rolling' , 'abs' , 'all' , 'any' , 'clip' , 'round' ,
18
+ 'quantile' , 'diff' , 'drop' , 'drop_duplicates' , 'interpolate' ,
19
+ 'dropna' , 'replace' , 'pivot_table' , 'nlargest' , 'nsmallest' ,
20
+ 'melt' , 'T' , 'append' , 'merge' , 'asfreq' , 'resample' , 'plot' ,
21
+ 'asfreq' , 'idxmax' , 'idxmin' ]
22
+ for method in same_methods :
23
+ old_method = getattr (self ._df , method )
24
+ setattr (self , method , old_method )
25
+
26
+ @property
27
+ def index (self ):
28
+ return self ._df .index
29
+
30
+ @property
31
+ def values (self ):
32
+ return self ._df .values
33
+
34
+ @property
35
+ def shape (self ):
36
+ return self ._df .shape
37
+
38
+ @property
39
+ def dtypes (self ):
40
+ return self ._df .dtypes
41
+
14
42
def flatten_index (self , axis = 'index' , sep = '_' , inplace = False ):
15
43
"""
16
44
Flatten MultiLevel Index to a single level
@@ -78,32 +106,26 @@ def var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, kee
78
106
return self ._agg ('var' , axis = axis , skipna = skipna , level = level , ddof = ddof ,
79
107
numeric_only = numeric_only , keep_df = keep_df )
80
108
81
- def skew (self , axis = None , skipna = None , level = None , numeric_only = None , keep_df = True ):
82
- return self ._agg ('skew' , axis = axis , skipna = skipna , level = level ,
83
- numeric_only = numeric_only , keep_df = keep_df )
84
-
85
- def kurt (self , axis = None , skipna = None , level = None , numeric_only = None , keep_df = True ):
86
- return self ._agg ('kurt' , axis = axis , skipna = skipna , level = level ,
87
- numeric_only = numeric_only , keep_df = keep_df )
88
-
89
- def mad (self , axis = None , skipna = None , level = None , keep_df = True ):
90
- return self ._agg ('mad' , axis = axis , skipna = skipna , level = level , keep_df = keep_df )
91
-
92
109
def count (self , axis = None , level = None , numeric_only = None , keep_df = True ):
93
110
return self ._agg ('count' , axis = axis , level = level ,
94
111
numeric_only = numeric_only , keep_df = keep_df )
95
112
96
- def prod (self , axis = None , skipna = None , level = None , numeric_only = None , min_count = 0 , keep_df = True ):
97
- return self ._agg ('prod' , axis = axis , skipna = skipna , level = level ,
98
- numeric_only = numeric_only , min_count = min_count , keep_df = keep_df )
99
-
100
113
def nunique (self , axis = 0 , dropna = True , keep_df = True ):
101
114
return self ._agg ('nunique' , axis = axis , dropna = dropna , keep_df = keep_df )
102
115
103
116
def mode (self , axis = 0 , numeric_only = False , dropna = True ):
104
117
return self ._df .mode (axis = axis , numeric_only = numeric_only , dropna = dropna )
105
118
106
- # Display
119
+ def _agg (self , method_name , ** kwargs ):
120
+ keep_df = kwargs .pop ('keep_df' )
121
+ obj = getattr (self ._df , method_name )(** kwargs )
122
+ if keep_df and isinstance (obj , pd .Series ):
123
+ df = obj .to_frame (method_name )
124
+ if kwargs ['axis' ] in [None , 0 , 'rows' , 'index' ]:
125
+ df = df .T
126
+ else :
127
+ df = obj
128
+ return df
107
129
108
130
def display (self , top = 100 , bottom = 0 , max_columns = None ):
109
131
"""
@@ -121,31 +143,19 @@ def display(self, top=100, bottom=0, max_columns=None):
121
143
Number of rows to display from the bottom of the DataFrame.
122
144
When <=0, no DataFrame is displayed
123
145
146
+ max_columns : int, default None
147
+ Controls the pd.options.display.max_columns property.
148
+ When None (default), all column get displayed.
149
+
124
150
Returns
125
151
-------
126
152
None
127
153
"""
128
154
with pd .option_context ('display.max_rows' , None , 'display.max_columns' , max_columns ):
129
155
if top > 0 :
130
- display (df .head (top ).style .set_caption (f'Top { top } rows' ))
156
+ display (self . _df .head (top ).style .set_caption (f'Top { top } rows' ))
131
157
if bottom > 0 :
132
- display (df .tail (bottom ).style .set_caption (f'Bottom { bottom } rows' )))
133
-
134
-
135
-
136
-
137
-
138
- def _agg (self , method_name , ** kwargs ):
139
- keep_df = kwargs .pop ('keep_df' )
140
- df = getattr (self ._df , method_name )(** kwargs )
141
- if keep_df :
142
- df = df .to_frame (name )
143
- if axis in [None , 0 , 'rows' , 'index' ]:
144
- df = df .T
145
- return df
146
-
147
-
148
-
158
+ display (self ._df .tail (bottom ).style .set_caption (f'Bottom { bottom } rows' ))
149
159
150
160
def reset_index (self , level = None , drop = False , inplace = False , col_level = 0 ,
151
161
col_fill = '' , names = None ):
@@ -159,13 +169,46 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
159
169
df = self ._df .rename_axis (names )
160
170
return df .reset_index (level , drop , inplace , col_level , col_fill )
161
171
172
+ def rename (index = None , columns = None , copy = True , inplace = False ):
173
+ return self ._df .rename (index = index , columns = columns , copy = copy , inplace = inplace )
174
+
162
175
163
176
class Select :
164
177
178
+ def __init__ (self , df ):
179
+ self ._df = df
180
+
165
181
def __getitem__ (self , item ):
166
- pass
182
+ return self ._df .loc [item ]
183
+
184
+
185
+ class Op :
167
186
168
- _agg_methods = ['max' , 'min' , 'mean' , 'median' , 'sum' , 'std' , 'var' ]
187
+ def __init__ (self , df ):
188
+ self ._df = df
189
+ methods = ['add' , 'sub' , 'mul' , 'div' , 'truediv' , 'floordiv' , 'mod' , 'pow' , 'dot' ,
190
+ 'radd' , 'rsub' , 'rmul' , 'rdiv' , 'rtruediv' , 'rfloordiv' , 'rmod' , 'rpow' ,
191
+ 'lt' , 'gt' , 'le' , 'ge' , 'ne' , 'eq' ]
192
+
193
+ for method in methods :
194
+ old_method = getattr (self ._df , method )
195
+ setattr (self , method , old_method )
196
+
197
+ class To :
198
+
199
+ def __init__ (self , df ):
200
+ self ._df = df
201
+ methods = ['parquet' , 'pickle' , 'csv' , 'hdf' , 'sql' , 'dict' , 'excel' ,
202
+ 'json' , 'html' , 'feather' , 'latex' , 'stata' , 'gbq' , 'records' ,
203
+ 'string' , 'clipboard' , 'markdown' ]
204
+
205
+ for method in methods :
206
+ old_method = getattr (self ._df , 'to_' + method )
207
+ setattr (self , method , old_method )
208
+
209
+
210
+ _agg_methods = ['max' , 'min' , 'mean' , 'median' , 'sum' , 'std' , 'var' ,
211
+ 'count' , 'nunique' , 'mode' ]
169
212
_keep_df_doc = """
170
213
keep_df : bool, default True
171
214
When True, returns the result as a DataFrame, keeping the
@@ -178,11 +221,12 @@ def __getitem__(self, item):
178
221
pd_method = getattr (pd .DataFrame , method , None )
179
222
if pd_method is not None :
180
223
pd_doc = getattr (pd_method , '__doc__' )
224
+ msp_method = getattr (_MSP , method )
181
225
if method in _agg_methods :
182
226
msp_doc = _keep_df_doc
183
227
else :
184
- msp_method = getattr (_MSP , method )
185
- msp_doc = getattr ( msp_method , '__doc__' )
228
+ msp_doc = getattr (msp_method , '__doc__' ) or ''
229
+
186
230
msp_doc = [line .strip () for line in msp_doc .split ('\n ' )[1 :- 1 ]]
187
231
msp_doc = '\n ' .join (msp_doc ) + '\n \n '
188
232
new_msp_doc = re .sub ('(?=Returns\n ---)' , msp_doc , pd_doc )
0 commit comments