Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/nested_pandas/datasets/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:

# Generate base data
base_data = {"a": randomstate.random(n_base), "b": randomstate.random(n_base) * 2}
base_nf = NestedFrame(data=base_data)
base_nf = NestedFrame(data=base_data).convert_dtypes(dtype_backend="pyarrow")

# In case of int, create a single nested layer called "nested"
if isinstance(n_layer, int):
Expand All @@ -50,6 +50,7 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:
"index": np.arange(layer_size * n_base) % n_base,
}
layer_nf = NestedFrame(data=layer_data).set_index("index")
layer_nf = layer_nf.convert_dtypes(dtype_backend="pyarrow")
base_nf = base_nf.add_nested(layer_nf, key)
return base_nf
else:
Expand Down
14 changes: 7 additions & 7 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ def drop(
>>> nf
a b nested
0 0.417022 0.184677 [{flux: 31.551563, band: 'r'}; …] (5 rows)
1 0.720324 0.372520 [{flux: 68.650093, band: 'g'}; …] (5 rows)
1 0.720324 0.37252 [{flux: 68.650093, band: 'g'}; …] (5 rows)
2 0.000114 0.691121 [{flux: 83.462567, band: 'g'}; …] (5 rows)
3 0.302333 0.793535 [{flux: 1.828828, band: 'g'}; …] (5 rows)
4 0.146756 1.077633 [{flux: 75.014431, band: 'g'}; …] (5 rows)
Expand Down Expand Up @@ -903,7 +903,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
>>> nf
a b nested
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
2 0.000114 0.691121 [{t: 11.173797, flux: 28.044399, band: 'r'}; …...
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
4 0.146756 1.077633 [{t: 17.527783, flux: 13.002857, band: 'r'}; …...
Expand Down Expand Up @@ -1087,7 +1087,7 @@ def dropna(
>>> nf
a b nested
0 0.417022 0.184677 None
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
1 0.720324 0.37252 [{t: 19.365232, flux: 90.85955, band: 'r'}]
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
3 0.302333 0.793535 None
4 0.146756 1.077633 None
Expand All @@ -1096,7 +1096,7 @@ def dropna(
>>> # dropna removes rows with those emptied dataframes
>>> nf.dropna(subset="nested")
a b nested
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
1 0.720324 0.37252 [{t: 19.365232, flux: 90.85955, band: 'r'}]
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]


Expand All @@ -1107,15 +1107,15 @@ def dropna(
>>> nf.dropna(on_nested="nested")
a b nested
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
>>> # or on a specific nested column
>>> nf.dropna(subset="nested.t")
a b nested
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
Expand Down Expand Up @@ -1226,7 +1226,7 @@ def sort_values(
>>> nf.sort_values(by="nested.band")
a b nested
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
Expand Down
2 changes: 1 addition & 1 deletion src/nested_pandas/series/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def to_flatten_inner(self, field: str) -> pd.Series:
>>> nf
a b inner id
0 0.417022 0.184677 [{t: 8.38389, flux: 80.074457, band: 'r'}; …] ... 0
1 0.720324 0.372520 [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
1 0.720324 0.37252 [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
2 0.000114 0.691121 [{t: 4.089045, flux: 31.342418, band: 'g'}; …]... 0
3 0.302333 0.793535 [{t: 17.562349, flux: 69.232262, band: 'r'}; …... 1
4 0.146756 1.077633 [{t: 0.547752, flux: 87.638915, band: 'g'}; …]... 1
Expand Down
6 changes: 5 additions & 1 deletion src/nested_pandas/series/ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,11 @@ def _convert_struct_scalar_to_df(
copy=copy,
name=name,
)
return pd.DataFrame(series, copy=False)

res_df = pd.DataFrame(series, copy=False)
#non_nested = [col for col in res_df.columns if not isinstance(res_df[col].dtype, NestedDtype)]
#res_df[non_nested] = res_df[non_nested].convert_dtypes(dtype_backend="pyarrow")
return res_df

@property
def _list_storage(self):
Expand Down
2 changes: 1 addition & 1 deletion src/nested_pandas/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame:
>>> count_nested(nf, "nested")
a b nested n_nested
0 0.417022 0.184677 [{t: 8.38389, flux: 10.233443, band: 'g'}; …] ... 10
1 0.720324 0.372520 [{t: 13.70439, flux: 41.405599, band: 'g'}; …]... 10
1 0.720324 0.37252 [{t: 13.70439, flux: 41.405599, band: 'g'}; …]... 10
2 0.000114 0.691121 [{t: 4.089045, flux: 69.440016, band: 'g'}; …]... 10
3 0.302333 0.793535 [{t: 17.562349, flux: 41.417927, band: 'g'}; …... 10
4 0.146756 1.077633 [{t: 0.547752, flux: 4.995346, band: 'r'}; …] ... 10
Expand Down
3 changes: 3 additions & 0 deletions tests/nested_pandas/series/test_ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,11 +709,14 @@ def test___getitem___with_integer():
nf = generate_data(10, 3)
# repeat index 3 and nest on it
nf["id"] = [0, 1, 2, 3, 3, 4, 5, 6, 7, 8]
#nf["id"] = nf["id"].astype(pd.ArrowDtype(pa.int64()))

nnf = NestedFrame.from_flat(nf, base_columns=[], on="id", name="outer")
ext_array = nnf["outer"].array

actual = ext_array[3]
desired = pd.DataFrame(nf.query("id == 3").drop("id", axis=1)).reset_index(drop=True)
#import pdb;pdb.set_trace()

assert_frame_equal(actual, desired)

Expand Down
Loading