@@ -7,23 +7,46 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
7
7
) -> Result < ArrayRef , ArrowError > {
8
8
match array. data_type ( ) {
9
9
DataType :: RunEndEncoded ( _run_end_field, _values_field) => {
10
- let run_array = array. as_any ( ) . downcast_ref :: < RunArray < K > > ( ) . unwrap ( ) ;
10
+ let run_array = array
11
+ . as_any ( )
12
+ . downcast_ref :: < RunArray < K > > ( )
13
+ . ok_or_else ( || ArrowError :: CastError ( "Expected RunArray" . to_string ( ) ) ) ?;
11
14
12
15
let values = run_array. values ( ) ;
13
16
14
- // Cast the values to the target type
15
- let cast_values = cast_with_options ( values, to_type, cast_options) ?;
16
-
17
- // Create a PrimitiveArray from the run_ends buffer
18
- let run_ends_buffer = run_array. run_ends ( ) ;
19
- let run_ends_array =
20
- PrimitiveArray :: < K > :: from_iter_values ( run_ends_buffer. values ( ) . iter ( ) . copied ( ) ) ;
21
-
22
- // Create new RunArray with the same run_ends but cast values
23
- let new_run_array = RunArray :: < K > :: try_new ( & run_ends_array, cast_values. as_ref ( ) ) ?;
24
-
25
- Ok ( Arc :: new ( new_run_array) )
17
+ match to_type {
18
+ // CASE 1: Stay as RunEndEncoded, cast only the values
19
+ DataType :: RunEndEncoded ( _target_run_end_field, target_value_field) => {
20
+ let cast_values =
21
+ cast_with_options ( values, target_value_field. data_type ( ) , cast_options) ?;
22
+
23
+ let run_ends_array = PrimitiveArray :: < K > :: from_iter_values (
24
+ run_array. run_ends ( ) . values ( ) . iter ( ) . copied ( ) ,
25
+ ) ;
26
+
27
+ let new_run_array =
28
+ RunArray :: < K > :: try_new ( & run_ends_array, cast_values. as_ref ( ) ) ?;
29
+ Ok ( Arc :: new ( new_run_array) )
30
+ }
31
+
32
+ // CASE 2: Expand to logical form
33
+ _ => {
34
+ let total_len = run_array. len ( ) ;
35
+ let indices = Int32Array :: from_iter_values (
36
+ ( 0 ..total_len) . map ( |i| run_array. get_physical_index ( i) as i32 ) ,
37
+ ) ;
38
+
39
+ let taken = take ( values. as_ref ( ) , & indices, None ) ?;
40
+
41
+ if taken. data_type ( ) != to_type {
42
+ cast_with_options ( taken. as_ref ( ) , to_type, cast_options)
43
+ } else {
44
+ Ok ( taken)
45
+ }
46
+ }
47
+ }
26
48
}
49
+
27
50
_ => Err ( ArrowError :: CastError ( format ! (
28
51
"Cannot cast array of type {:?} to RunEndEncodedArray" ,
29
52
array. data_type( )
@@ -76,12 +99,6 @@ pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
76
99
) ?) ) ;
77
100
}
78
101
79
- // Step 3: Use a simpler approach - use existing Arrow builders for run-length encoding
80
- // This is a more robust implementation that handles all data types correctly
81
-
82
- // For now, we'll use a basic approach that works with the existing builder infrastructure
83
- // In a production implementation, you'd want to use type-specific comparison logic
84
-
85
102
// Create a temporary builder to construct the run array
86
103
// We'll iterate through and build runs by comparing adjacent elements
87
104
let mut run_ends_vec = Vec :: new ( ) ;
@@ -133,225 +150,3 @@ pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
133
150
let run_array = RunArray :: < K > :: try_new ( & run_ends_array, values_array. as_ref ( ) ) ?;
134
151
Ok ( Arc :: new ( run_array) )
135
152
}
136
-
137
- #[ cfg( test) ]
138
- mod tests {
139
- use super :: * ;
140
- use arrow_array:: * ;
141
- use arrow_schema:: DataType ;
142
- use std:: sync:: Arc ;
143
-
144
- /// Test casting FROM RunEndEncoded to other types
145
- #[ test]
146
- fn test_run_end_encoded_to_primitive ( ) {
147
- // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3]
148
- let run_ends = Int32Array :: from ( vec ! [ 2 , 5 , 6 ] ) ;
149
- let values = Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ;
150
- let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
151
- let array_ref = Arc :: new ( run_array) as ArrayRef ;
152
-
153
- // Cast to Int64
154
- let cast_result = run_end_encoded_cast :: < Int32Type > (
155
- array_ref. as_ref ( ) ,
156
- & DataType :: Int64 ,
157
- & CastOptions :: default ( ) ,
158
- )
159
- . unwrap ( ) ;
160
-
161
- // Verify the result is a RunArray with Int64 values
162
- let result_run_array = cast_result
163
- . as_any ( )
164
- . downcast_ref :: < RunArray < Int32Type > > ( )
165
- . unwrap ( ) ;
166
-
167
- // Check that values were cast to Int64
168
- assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Int64 ) ;
169
-
170
- // Check that run structure is preserved
171
- assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 5 , 6 ] ) ;
172
-
173
- // Check that values are correct
174
- let values_array = result_run_array. values ( ) . as_primitive :: < Int64Type > ( ) ;
175
- assert_eq ! ( values_array. values( ) , & [ 1i64 , 2i64 , 3i64 ] ) ;
176
- }
177
-
178
- #[ test]
179
- fn test_run_end_encoded_to_string ( ) {
180
- // Create a RunEndEncoded array with Int32 values: [10, 10, 20, 30, 30]
181
- let run_ends = Int32Array :: from ( vec ! [ 2 , 3 , 5 ] ) ;
182
- let values = Int32Array :: from ( vec ! [ 10 , 20 , 30 ] ) ;
183
- let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
184
- let array_ref = Arc :: new ( run_array) as ArrayRef ;
185
-
186
- // Cast to String
187
- let cast_result = run_end_encoded_cast :: < Int32Type > (
188
- array_ref. as_ref ( ) ,
189
- & DataType :: Utf8 ,
190
- & CastOptions :: default ( ) ,
191
- )
192
- . unwrap ( ) ;
193
-
194
- // Verify the result is a RunArray with String values
195
- let result_run_array = cast_result
196
- . as_any ( )
197
- . downcast_ref :: < RunArray < Int32Type > > ( )
198
- . unwrap ( ) ;
199
-
200
- // Check that values were cast to String
201
- assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Utf8 ) ;
202
-
203
- // Check that run structure is preserved
204
- assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 3 , 5 ] ) ;
205
-
206
- // Check that values are correct
207
- let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
208
- assert_eq ! ( values_array. value( 0 ) , "10" ) ;
209
- assert_eq ! ( values_array. value( 1 ) , "20" ) ;
210
- assert_eq ! ( values_array. value( 2 ) , "30" ) ;
211
- }
212
-
213
- /// Test casting TO RunEndEncoded from other types
214
- #[ test]
215
- fn test_primitive_to_run_end_encoded ( ) {
216
- // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3]
217
- let source_array = Int32Array :: from ( vec ! [ 1 , 1 , 2 , 2 , 2 , 3 ] ) ;
218
- let array_ref = Arc :: new ( source_array) as ArrayRef ;
219
-
220
- // Cast to RunEndEncoded<Int32, Int32>
221
- let cast_result = cast_to_run_end_encoded :: < Int32Type > (
222
- array_ref. as_ref ( ) ,
223
- & DataType :: Int32 ,
224
- & CastOptions :: default ( ) ,
225
- )
226
- . unwrap ( ) ;
227
-
228
- // Verify the result is a RunArray
229
- let result_run_array = cast_result
230
- . as_any ( )
231
- . downcast_ref :: < RunArray < Int32Type > > ( )
232
- . unwrap ( ) ;
233
-
234
- // Check run structure: runs should end at positions [2, 5, 6]
235
- assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 5 , 6 ] ) ;
236
-
237
- // Check values: should be [1, 2, 3]
238
- let values_array = result_run_array. values ( ) . as_primitive :: < Int32Type > ( ) ;
239
- assert_eq ! ( values_array. values( ) , & [ 1 , 2 , 3 ] ) ;
240
- }
241
-
242
- #[ test]
243
- fn test_string_to_run_end_encoded ( ) {
244
- // Create a String array with repeated values: ["a", "a", "b", "c", "c"]
245
- let source_array = StringArray :: from ( vec ! [ "a" , "a" , "b" , "c" , "c" ] ) ;
246
- let array_ref = Arc :: new ( source_array) as ArrayRef ;
247
-
248
- // Cast to RunEndEncoded<Int32, String>
249
- let cast_result = cast_to_run_end_encoded :: < Int32Type > (
250
- array_ref. as_ref ( ) ,
251
- & DataType :: Utf8 ,
252
- & CastOptions :: default ( ) ,
253
- )
254
- . unwrap ( ) ;
255
-
256
- // Verify the result is a RunArray
257
- let result_run_array = cast_result
258
- . as_any ( )
259
- . downcast_ref :: < RunArray < Int32Type > > ( )
260
- . unwrap ( ) ;
261
-
262
- // Check run structure: runs should end at positions [2, 3, 5]
263
- assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 3 , 5 ] ) ;
264
-
265
- // Check values: should be ["a", "b", "c"]
266
- let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
267
- assert_eq ! ( values_array. value( 0 ) , "a" ) ;
268
- assert_eq ! ( values_array. value( 1 ) , "b" ) ;
269
- assert_eq ! ( values_array. value( 2 ) , "c" ) ;
270
- }
271
-
272
- #[ test]
273
- fn test_cast_with_type_conversion ( ) {
274
- // Create an Int32 array: [1, 1, 2, 2, 3]
275
- let source_array = Int32Array :: from ( vec ! [ 1 , 1 , 2 , 2 , 3 ] ) ;
276
- let array_ref = Arc :: new ( source_array) as ArrayRef ;
277
-
278
- // Cast to RunEndEncoded<Int32, String> (values get converted to strings)
279
- let cast_result = cast_to_run_end_encoded :: < Int32Type > (
280
- array_ref. as_ref ( ) ,
281
- & DataType :: Utf8 ,
282
- & CastOptions :: default ( ) ,
283
- )
284
- . unwrap ( ) ;
285
-
286
- // Verify the result is a RunArray with String values
287
- let result_run_array = cast_result
288
- . as_any ( )
289
- . downcast_ref :: < RunArray < Int32Type > > ( )
290
- . unwrap ( ) ;
291
-
292
- // Check that values were converted to strings
293
- assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Utf8 ) ;
294
-
295
- // Check run structure: runs should end at positions [2, 4, 5]
296
- assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 4 , 5 ] ) ;
297
-
298
- // Check values: should be ["1", "2", "3"]
299
- let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
300
- assert_eq ! ( values_array. value( 0 ) , "1" ) ;
301
- assert_eq ! ( values_array. value( 1 ) , "2" ) ;
302
- assert_eq ! ( values_array. value( 2 ) , "3" ) ;
303
- }
304
-
305
- #[ test]
306
- fn test_empty_array_to_run_end_encoded ( ) {
307
- // Create an empty Int32 array
308
- let source_array = Int32Array :: from ( Vec :: < i32 > :: new ( ) ) ;
309
- let array_ref = Arc :: new ( source_array) as ArrayRef ;
310
-
311
- // Cast to RunEndEncoded<Int32, Int32>
312
- let cast_result = cast_to_run_end_encoded :: < Int32Type > (
313
- array_ref. as_ref ( ) ,
314
- & DataType :: Int32 ,
315
- & CastOptions :: default ( ) ,
316
- )
317
- . unwrap ( ) ;
318
-
319
- // Verify the result is an empty RunArray
320
- let result_run_array = cast_result
321
- . as_any ( )
322
- . downcast_ref :: < RunArray < Int32Type > > ( )
323
- . unwrap ( ) ;
324
-
325
- // Check that both run_ends and values are empty
326
- assert_eq ! ( result_run_array. run_ends( ) . len( ) , 0 ) ;
327
- assert_eq ! ( result_run_array. values( ) . len( ) , 0 ) ;
328
- }
329
-
330
- #[ test]
331
- fn test_run_end_encoded_with_nulls ( ) {
332
- // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2]
333
- let run_ends = Int32Array :: from ( vec ! [ 2 , 3 , 5 ] ) ;
334
- let values = Int32Array :: from ( vec ! [ Some ( 1 ) , None , Some ( 2 ) ] ) ;
335
- let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
336
- let array_ref = Arc :: new ( run_array) as ArrayRef ;
337
-
338
- // Cast to String
339
- let cast_result = run_end_encoded_cast :: < Int32Type > (
340
- array_ref. as_ref ( ) ,
341
- & DataType :: Utf8 ,
342
- & CastOptions :: default ( ) ,
343
- )
344
- . unwrap ( ) ;
345
-
346
- // Verify the result preserves nulls
347
- let result_run_array = cast_result
348
- . as_any ( )
349
- . downcast_ref :: < RunArray < Int32Type > > ( )
350
- . unwrap ( ) ;
351
-
352
- let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
353
- assert_eq ! ( values_array. value( 0 ) , "1" ) ;
354
- assert ! ( values_array. is_null( 1 ) ) ;
355
- assert_eq ! ( values_array. value( 2 ) , "2" ) ;
356
- }
357
- }
0 commit comments