@@ -176,9 +176,28 @@ def replace_with_mask(array: pa.ChunkedArray, mask: pa.BooleanArray, value: pa.A
176
176
return pa .compute .if_else (mask , broadcast_value , array )
177
177
178
178
179
- def convert_df_to_pa_scalar (df : pd .DataFrame , * , pa_type : pa .DataType | None ) -> pa .Scalar :
180
- d = {column : series .values for column , series in df .to_dict ("series" ).items ()}
181
- return pa .scalar (d , type = pa_type , from_pandas = True )
179
+ def convert_df_to_pa_scalar (df : pd .DataFrame , * , pa_type : pa .StructType | None ) -> pa .Scalar :
180
+ d = {}
181
+ types = {}
182
+ columns = df .columns
183
+ if pa_type is not None :
184
+ names = pa_type .names
185
+ columns = names + list (set (columns ) - set (names ))
186
+ for column in columns :
187
+ series = df [column ]
188
+ if isinstance (series .dtype , NestedDtype ):
189
+ scalar = series .array .to_pyarrow_scalar (list_struct = True )
190
+ ty = scalar .type
191
+ else :
192
+ array = pa .array (series )
193
+ ty = pa .list_ (array .type )
194
+ scalar = pa .scalar (array , type = ty )
195
+ d [column ] = scalar
196
+ types [column ] = ty
197
+ result = pa .scalar (d , type = pa .struct (types ), from_pandas = True )
198
+ if pa_type is not None :
199
+ result = result .cast (pa_type )
200
+ return result
182
201
183
202
184
203
class NestedExtensionArray (ExtensionArray ):
@@ -686,17 +705,37 @@ def _from_arrow_like(cls, arraylike, dtype: NestedDtype | None = None) -> Self:
686
705
raise ValueError (f"Cannot cast input to { dtype } " ) from None
687
706
return cls (cast_array )
688
707
689
- @classmethod
690
- def _convert_struct_scalar_to_df (cls , value : pa .StructScalar , * , copy : bool , na_value : Any = None ) -> Any :
708
+ def _convert_struct_scalar_to_df (
709
+ self , value : pa .StructScalar , * , copy : bool , na_value : Any = None , pyarrow_dtypes : bool = False
710
+ ) -> Any :
691
711
"""Converts a struct scalar of equal-length list scalars to a pd.DataFrame
692
712
693
713
No validation is done, so the input must be a struct scalar with all fields being list scalars
694
714
of the same lengths.
715
+
716
+ Parameters
717
+ ----------
718
+ value : pa.StructScalar
719
+ The struct scalar to convert.
720
+ copy : bool
721
+ Whether to copy the data.
722
+ na_value : Any, optional
723
+ The value to use for nulls.
724
+ pyarrow_dtypes : bool, optional
725
+ Whether to use pd.ArrowDtype. Nested fields will always
726
+ have NestedDtype.
695
727
"""
696
728
if pa .compute .is_null (value ).as_py ():
697
729
return na_value
698
- d = {name : pd .Series (list_scalar .values , copy = copy ) for name , list_scalar in value .items ()}
699
- return pd .DataFrame (d , copy = False )
730
+ series = {}
731
+ for name , list_scalar in value .items ():
732
+ dtype = self .dtype .field_dtype (name )
733
+ # It gave pd.ArrowDtype for non-NestedDtype fields,
734
+ # make it None if we'd like to use pandas "ordinary" dtypes.
735
+ if not pyarrow_dtypes and not isinstance (dtype , NestedDtype ):
736
+ dtype = None
737
+ series [name ] = pd .Series (list_scalar .values , dtype = dtype , copy = copy , name = name )
738
+ return pd .DataFrame (series , copy = False )
700
739
701
740
@property
702
741
def _list_storage (self ):
0 commit comments