72
72
_MAX_WRITE_BATCH_SIZE = max (100000 , MAX_WRITE_BATCH_SIZE )
73
73
74
74
75
- def find_arrow_all (collection , query , * , schema = None , ** kwargs ):
75
+ def find_arrow_all (collection , query , * , schema = None , allow_invalid = False , ** kwargs ):
76
76
"""Method that returns the results of a find query as a
77
77
:class:`pyarrow.Table` instance.
78
78
@@ -83,14 +83,18 @@ def find_arrow_all(collection, query, *, schema=None, **kwargs):
83
83
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
84
84
If the schema is not given, it will be inferred using the data in the
85
85
result set.
86
+ - `allow_invalid` (optional): If set to ``True``,
87
+ results will have all fields that do not conform to the schema silently converted to NaN.
86
88
87
89
Additional keyword-arguments passed to this method will be passed
88
90
directly to the underlying ``find`` operation.
89
91
90
92
:Returns:
91
93
An instance of class:`pyarrow.Table`.
92
94
"""
93
- context = PyMongoArrowContext (schema , codec_options = collection .codec_options )
95
+ context = PyMongoArrowContext (
96
+ schema , codec_options = collection .codec_options , allow_invalid = allow_invalid
97
+ )
94
98
95
99
for opt in ("cursor_type" ,):
96
100
if kwargs .pop (opt , None ):
@@ -110,7 +114,7 @@ def find_arrow_all(collection, query, *, schema=None, **kwargs):
110
114
return context .finish ()
111
115
112
116
113
- def aggregate_arrow_all (collection , pipeline , * , schema = None , ** kwargs ):
117
+ def aggregate_arrow_all (collection , pipeline , * , schema = None , allow_invalid = False , ** kwargs ):
114
118
"""Method that returns the results of an aggregation pipeline as a
115
119
:class:`pyarrow.Table` instance.
116
120
@@ -121,14 +125,18 @@ def aggregate_arrow_all(collection, pipeline, *, schema=None, **kwargs):
121
125
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
122
126
If the schema is not given, it will be inferred using the data in the
123
127
result set.
128
+ - `allow_invalid` (optional): If set to ``True``,
129
+ results will have all fields that do not conform to the schema silently converted to NaN.
124
130
125
131
Additional keyword-arguments passed to this method will be passed
126
132
directly to the underlying ``aggregate`` operation.
127
133
128
134
:Returns:
129
135
An instance of class:`pyarrow.Table`.
130
136
"""
131
- context = PyMongoArrowContext (schema , codec_options = collection .codec_options )
137
+ context = PyMongoArrowContext (
138
+ schema , codec_options = collection .codec_options , allow_invalid = allow_invalid
139
+ )
132
140
133
141
if pipeline and ("$out" in pipeline [- 1 ] or "$merge" in pipeline [- 1 ]):
134
142
msg = (
@@ -165,7 +173,7 @@ def _arrow_to_pandas(arrow_table):
165
173
return arrow_table .to_pandas (split_blocks = True , self_destruct = True )
166
174
167
175
168
- def find_pandas_all (collection , query , * , schema = None , ** kwargs ):
176
+ def find_pandas_all (collection , query , * , schema = None , allow_invalid = False , ** kwargs ):
169
177
"""Method that returns the results of a find query as a
170
178
:class:`pandas.DataFrame` instance.
171
179
@@ -176,17 +184,21 @@ def find_pandas_all(collection, query, *, schema=None, **kwargs):
176
184
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
177
185
If the schema is not given, it will be inferred using the data in the
178
186
result set.
187
+ - `allow_invalid` (optional): If set to ``True``,
188
+ results will have all fields that do not conform to the schema silently converted to NaN.
179
189
180
190
Additional keyword-arguments passed to this method will be passed
181
191
directly to the underlying ``find`` operation.
182
192
183
193
:Returns:
184
194
An instance of class:`pandas.DataFrame`.
185
195
"""
186
- return _arrow_to_pandas (find_arrow_all (collection , query , schema = schema , ** kwargs ))
196
+ return _arrow_to_pandas (
197
+ find_arrow_all (collection , query , schema = schema , allow_invalid = allow_invalid , ** kwargs )
198
+ )
187
199
188
200
189
- def aggregate_pandas_all (collection , pipeline , * , schema = None , ** kwargs ):
201
+ def aggregate_pandas_all (collection , pipeline , * , schema = None , allow_invalid = False , ** kwargs ):
190
202
"""Method that returns the results of an aggregation pipeline as a
191
203
:class:`pandas.DataFrame` instance.
192
204
@@ -197,14 +209,20 @@ def aggregate_pandas_all(collection, pipeline, *, schema=None, **kwargs):
197
209
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
198
210
If the schema is not given, it will be inferred using the data in the
199
211
result set.
212
+ - `allow_invalid` (optional): If set to ``True``,
213
+ results will have all fields that do not conform to the schema silently converted to NaN.
200
214
201
215
Additional keyword-arguments passed to this method will be passed
202
216
directly to the underlying ``aggregate`` operation.
203
217
204
218
:Returns:
205
219
An instance of class:`pandas.DataFrame`.
206
220
"""
207
- return _arrow_to_pandas (aggregate_arrow_all (collection , pipeline , schema = schema , ** kwargs ))
221
+ return _arrow_to_pandas (
222
+ aggregate_arrow_all (
223
+ collection , pipeline , schema = schema , allow_invalid = allow_invalid , ** kwargs
224
+ )
225
+ )
208
226
209
227
210
228
def _arrow_to_numpy (arrow_table , schema = None ):
@@ -227,7 +245,7 @@ def _arrow_to_numpy(arrow_table, schema=None):
227
245
return container
228
246
229
247
230
- def find_numpy_all (collection , query , * , schema = None , ** kwargs ):
248
+ def find_numpy_all (collection , query , * , schema = None , allow_invalid = False , ** kwargs ):
231
249
"""Method that returns the results of a find query as a
232
250
:class:`dict` instance whose keys are field names and values are
233
251
:class:`~numpy.ndarray` instances bearing the appropriate dtype.
@@ -239,6 +257,8 @@ def find_numpy_all(collection, query, *, schema=None, **kwargs):
239
257
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
240
258
If the schema is not given, it will be inferred using the data in the
241
259
result set.
260
+ - `allow_invalid` (optional): If set to ``True``,
261
+ results will have all fields that do not conform to the schema silently converted to NaN.
242
262
243
263
Additional keyword-arguments passed to this method will be passed
244
264
directly to the underlying ``find`` operation.
@@ -255,10 +275,13 @@ def find_numpy_all(collection, query, *, schema=None, **kwargs):
255
275
:Returns:
256
276
An instance of :class:`dict`.
257
277
"""
258
- return _arrow_to_numpy (find_arrow_all (collection , query , schema = schema , ** kwargs ), schema )
278
+ return _arrow_to_numpy (
279
+ find_arrow_all (collection , query , schema = schema , allow_invalid = allow_invalid , ** kwargs ),
280
+ schema ,
281
+ )
259
282
260
283
261
- def aggregate_numpy_all (collection , pipeline , * , schema = None , ** kwargs ):
284
+ def aggregate_numpy_all (collection , pipeline , * , schema = None , allow_invalid = False , ** kwargs ):
262
285
"""Method that returns the results of an aggregation pipeline as a
263
286
:class:`dict` instance whose keys are field names and values are
264
287
:class:`~numpy.ndarray` instances bearing the appropriate dtype.
@@ -270,6 +293,8 @@ def aggregate_numpy_all(collection, pipeline, *, schema=None, **kwargs):
270
293
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
271
294
If the schema is not given, it will be inferred using the data in the
272
295
result set.
296
+ - `allow_invalid` (optional): If set to ``True``,
297
+ results will have all fields that do not conform to the schema silently converted to NaN.
273
298
274
299
Additional keyword-arguments passed to this method will be passed
275
300
directly to the underlying ``aggregate`` operation.
@@ -287,7 +312,10 @@ def aggregate_numpy_all(collection, pipeline, *, schema=None, **kwargs):
287
312
An instance of :class:`dict`.
288
313
"""
289
314
return _arrow_to_numpy (
290
- aggregate_arrow_all (collection , pipeline , schema = schema , ** kwargs ), schema
315
+ aggregate_arrow_all (
316
+ collection , pipeline , schema = schema , allow_invalid = allow_invalid , ** kwargs
317
+ ),
318
+ schema ,
291
319
)
292
320
293
321
@@ -326,7 +354,7 @@ def _arrow_to_polars(arrow_table: pa.Table):
326
354
return pl .from_arrow (arrow_table_without_extensions )
327
355
328
356
329
- def find_polars_all (collection , query , * , schema = None , ** kwargs ):
357
+ def find_polars_all (collection , query , * , schema = None , allow_invalid = False , ** kwargs ):
330
358
"""Method that returns the results of a find query as a
331
359
:class:`polars.DataFrame` instance.
332
360
@@ -337,6 +365,8 @@ def find_polars_all(collection, query, *, schema=None, **kwargs):
337
365
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
338
366
If the schema is not given, it will be inferred using the data in the
339
367
result set.
368
+ - `allow_invalid` (optional): If set to ``True``,
369
+ results will have all fields that do not conform to the schema silently converted to NaN.
340
370
341
371
Additional keyword-arguments passed to this method will be passed
342
372
directly to the underlying ``find`` operation.
@@ -346,10 +376,12 @@ def find_polars_all(collection, query, *, schema=None, **kwargs):
346
376
347
377
.. versionadded:: 1.3
348
378
"""
349
- return _arrow_to_polars (find_arrow_all (collection , query , schema = schema , ** kwargs ))
379
+ return _arrow_to_polars (
380
+ find_arrow_all (collection , query , schema = schema , allow_invalid = allow_invalid , ** kwargs )
381
+ )
350
382
351
383
352
- def aggregate_polars_all (collection , pipeline , * , schema = None , ** kwargs ):
384
+ def aggregate_polars_all (collection , pipeline , * , schema = None , allow_invalid = False , ** kwargs ):
353
385
"""Method that returns the results of an aggregation pipeline as a
354
386
:class:`polars.DataFrame` instance.
355
387
@@ -360,14 +392,20 @@ def aggregate_polars_all(collection, pipeline, *, schema=None, **kwargs):
360
392
- `schema` (optional): Instance of :class:`~pymongoarrow.schema.Schema`.
361
393
If the schema is not given, it will be inferred using the data in the
362
394
result set.
395
+ - `allow_invalid` (optional): If set to ``True``,
396
+ results will have all fields that do not conform to the schema silently converted to NaN.
363
397
364
398
Additional keyword-arguments passed to this method will be passed
365
399
directly to the underlying ``aggregate`` operation.
366
400
367
401
:Returns:
368
402
An instance of class:`polars.DataFrame`.
369
403
"""
370
- return _arrow_to_polars (aggregate_arrow_all (collection , pipeline , schema = schema , ** kwargs ))
404
+ return _arrow_to_polars (
405
+ aggregate_arrow_all (
406
+ collection , pipeline , schema = schema , allow_invalid = allow_invalid , ** kwargs
407
+ )
408
+ )
371
409
372
410
373
411
def _transform_bwe (bwe , offset ):
0 commit comments