3
3
import pytest
4
4
from nested_pandas import NestedDtype
5
5
from nested_pandas .series .utils import (
6
+ align_chunked_struct_list_offsets ,
7
+ align_struct_list_offsets ,
6
8
nested_types_mapper ,
7
9
struct_field_names ,
8
10
transpose_list_struct_array ,
9
11
transpose_list_struct_scalar ,
10
12
transpose_list_struct_type ,
11
13
transpose_struct_list_array ,
12
14
transpose_struct_list_type ,
13
- validate_struct_list_array_for_equal_lengths ,
15
+ validate_struct_list_type ,
14
16
)
15
17
16
18
17
- def test_validate_struct_list_array_for_equal_lengths ():
18
- """Test validate_struct_list_array_for_equal_lengths function."""
19
+ def test_align_struct_list_offsets ():
20
+ """Test align_struct_list_offsets function."""
19
21
# Raises for wrong types
20
22
with pytest .raises (ValueError ):
21
- validate_struct_list_array_for_equal_lengths (pa .array ([], type = pa .int64 ()))
23
+ align_struct_list_offsets (pa .array ([], type = pa .int64 ()))
22
24
with pytest .raises (ValueError ):
23
- validate_struct_list_array_for_equal_lengths (pa .array ([], type = pa .list_ (pa .int64 ())))
25
+ align_struct_list_offsets (pa .array ([], type = pa .list_ (pa .int64 ())))
24
26
25
27
# Raises if one of the fields is not a ListArray
26
28
with pytest .raises (ValueError ):
27
- validate_struct_list_array_for_equal_lengths (
29
+ align_struct_list_offsets (
28
30
pa .StructArray .from_arrays ([pa .array ([[1 , 2 ], [3 , 4 , 5 ]]), pa .array ([1 , 2 ])], ["a" , "b" ])
29
31
)
30
32
31
33
# Raises for mismatched lengths
32
34
with pytest .raises (ValueError ):
33
- validate_struct_list_array_for_equal_lengths (
35
+ align_struct_list_offsets (
34
36
pa .StructArray .from_arrays (
35
37
[pa .array ([[1 , 2 ], [3 , 4 , 5 ]]), pa .array ([[1 , 2 , 3 ], [4 , 5 ]])], ["a" , "b" ]
36
38
)
@@ -43,7 +45,96 @@ def test_validate_struct_list_array_for_equal_lengths():
43
45
],
44
46
names = ["a" , "b" ],
45
47
)
46
- assert validate_struct_list_array_for_equal_lengths (input_array ) is None
48
+ assert align_struct_list_offsets (input_array ) is input_array
49
+
50
+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
51
+ assert a .offsets [0 ].as_py () == 3
52
+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
53
+ assert b .offsets [0 ].as_py () == 0
54
+ input_array = pa .StructArray .from_arrays (
55
+ arrays = [a , b ],
56
+ names = ["a" , "b" ],
57
+ )
58
+ aligned_array = align_struct_list_offsets (input_array )
59
+ assert aligned_array is not input_array
60
+ assert aligned_array .equals (input_array )
61
+
62
+
63
+ def test_align_chunked_struct_list_offsets ():
64
+ """Test align_chunked_struct_list_offsets function."""
65
+ # Input is an array, output is chunked array
66
+ a = pa .array ([[1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])
67
+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
68
+ input_array = pa .StructArray .from_arrays (
69
+ arrays = [a , b ],
70
+ names = ["a" , "b" ],
71
+ )
72
+ output_array = align_chunked_struct_list_offsets (input_array )
73
+ assert isinstance (output_array , pa .ChunkedArray )
74
+ assert output_array .equals (pa .chunked_array ([input_array ]))
75
+
76
+ # Input is an "aligned" chunked array
77
+ input_array = pa .chunked_array (
78
+ [
79
+ pa .StructArray .from_arrays (
80
+ arrays = [a , b ],
81
+ names = ["a" , "b" ],
82
+ )
83
+ ]
84
+ * 2
85
+ )
86
+ output_array = align_chunked_struct_list_offsets (input_array )
87
+ assert output_array .equals (input_array )
88
+
89
+ # Input is an "aligned" chunked array, but offsets do not start with zero
90
+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
91
+ b = pa .array ([["a" , "a" , "a" , "a" ], ["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])[1 :]
92
+ input_array = pa .chunked_array (
93
+ [
94
+ pa .StructArray .from_arrays (
95
+ arrays = [a , b ],
96
+ names = ["a" , "b" ],
97
+ )
98
+ ]
99
+ * 3
100
+ )
101
+ output_array = align_chunked_struct_list_offsets (input_array )
102
+ assert output_array .equals (input_array )
103
+
104
+ # Input is a "non-aligned" chunked array
105
+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
106
+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
107
+ input_array = pa .chunked_array (
108
+ [
109
+ pa .StructArray .from_arrays (
110
+ arrays = [a , b ],
111
+ names = ["a" , "b" ],
112
+ )
113
+ ]
114
+ * 4
115
+ )
116
+ output_array = align_chunked_struct_list_offsets (input_array )
117
+ assert output_array .equals (input_array )
118
+
119
+
120
+ def test_validate_struct_list_type ():
121
+ """Test validate_struct_list_type function."""
122
+ with pytest .raises (ValueError ):
123
+ validate_struct_list_type (pa .float64 ())
124
+
125
+ with pytest .raises (ValueError ):
126
+ validate_struct_list_type (pa .list_ (pa .struct ({"a" : pa .int64 ()})))
127
+
128
+ with pytest .raises (ValueError ):
129
+ validate_struct_list_type (pa .struct ({"a" : pa .float64 ()}))
130
+
131
+ with pytest .raises (ValueError ):
132
+ validate_struct_list_type (pa .struct ({"a" : pa .list_ (pa .float64 ()), "b" : pa .float64 ()}))
133
+
134
+ assert (
135
+ validate_struct_list_type (pa .struct ({"a" : pa .list_ (pa .float64 ()), "b" : pa .list_ (pa .float64 ())}))
136
+ is None
137
+ )
47
138
48
139
49
140
def test_transpose_struct_list_type ():
0 commit comments