diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index da3582766..eac765c4d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -91,9 +91,9 @@ jobs: - name: FFI unit tests run: | - cd examples/ffi-table-provider + cd examples/datafusion-ffi-example uv run --no-project maturin develop --uv - uv run --no-project pytest python/tests/_test_table_provider.py + uv run --no-project pytest python/tests/_test*.py - name: Cache the generated dataset id: cache-tpch-dataset diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst b/docs/source/user-guide/common-operations/udf-and-udfa.rst index ffd7a05cb..110c1a7d0 100644 --- a/docs/source/user-guide/common-operations/udf-and-udfa.rst +++ b/docs/source/user-guide/common-operations/udf-and-udfa.rst @@ -242,3 +242,35 @@ determine which evaluate functions are called. }) df.select("a", exp_smooth(col("a")).alias("smooth_a")).show() + +Table Functions +--------------- + +User Defined Table Functions are slightly different than the other functions +described here. These functions take any number of `Expr` arguments, but only +literal expressions are supported. Table functions must return a Table +Provider as described in the ref:`_io_custom_table_provider` page. + +Once you have a table function, you can register it with the session context +by using :py:func:`datafusion.context.SessionContext.register_udtf`. + +There are examples of both rust backed and python based table functions in the +examples folder of the repository. If you have a rust backed table function +that you wish to expose via PyO3, you need to expose it as a ``PyCapsule``. + +.. code-block:: rust + + #[pymethods] + impl MyTableFunction { + fn __datafusion_table_function__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_table_function".into(); + + let func = self.clone(); + let provider = FFI_TableFunction::new(Arc::new(func), None); + + PyCapsule::new(py, provider, Some(name)) + } + } diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/datafusion-ffi-example/.cargo/config.toml similarity index 100% rename from examples/ffi-table-provider/.cargo/config.toml rename to examples/datafusion-ffi-example/.cargo/config.toml diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock similarity index 84% rename from examples/ffi-table-provider/Cargo.lock rename to examples/datafusion-ffi-example/Cargo.lock index 8d0edd515..11096ac69 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -73,7 +73,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" +checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d" dependencies = [ "arrow-arith", "arrow-array", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" +checksum = "00752064ff47cee746e816ddb8450520c3a52cbad1e256f6fa861a35f86c45e7" dependencies = [ "arrow-array", "arrow-buffer", @@ -179,9 +179,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" +checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472" dependencies = [ "ahash", "arrow-buffer", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" +checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824" dependencies = [ "bytes", "half", @@ -207,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" +checksum = "335f769c5a218ea823d3760a743feba1ef7857cba114c01399a891c2fff34285" dependencies = [ "arrow-array", "arrow-buffer", @@ -228,9 +228,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +checksum = "510db7dfbb4d5761826516cc611d97b3a68835d0ece95b034a052601109c0b1b" dependencies = [ "arrow-array", "arrow-cast", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" +checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +checksum = "69880a9e6934d9cba2b8630dd08a3463a91db8693b16b499d54026b6137af284" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" +checksum = "d8dafd17a05449e31e0114d740530e0ada7379d7cb9c338fd65b09a8130960b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,16 +283,18 @@ dependencies = [ "half", "indexmap", "lexical-core", + "memchr", "num", "serde", "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" +checksum = "895644523af4e17502d42c3cb6b27cb820f0cb77954c22d75c23a85247c849e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -303,9 +305,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" +checksum = "9be8a2a4e5e7d9c822b2b8095ecd77010576d824f654d347817640acfc97d229" dependencies = [ "arrow-array", "arrow-buffer", @@ -316,18 +318,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" +checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49" dependencies = [ - "bitflags 2.6.0", + "bitflags", ] [[package]] name = "arrow-select" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" +checksum = "aa5f5a93c75f46ef48e4001535e7b6c922eeb0aa20b73cf58d09e13d057490d8" dependencies = [ "ahash", "arrow-array", @@ -339,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +checksum = "6e7005d858d84b56428ba2a98a107fe88c0132c61793cf6b8232a1f9bfc0452b" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,11 +370,11 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2 0.4.4", + "bzip2", "flate2", "futures-core", "memchr", @@ -394,13 +396,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -441,9 +443,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" dependencies = [ "autocfg", "libm", @@ -454,15 +456,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "1.3.2" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "blake2" @@ -475,9 +471,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" dependencies = [ "arrayref", "arrayvec", @@ -530,38 +526,26 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" - -[[package]] -name = "bzip2" -version = "0.4.4" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "bzip2" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -584,14 +568,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets", + "windows-link", ] [[package]] @@ -641,7 +625,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom", + "getrandom 0.2.15", "once_cell", "tiny-keccak", ] @@ -765,38 +749,43 @@ dependencies = [ [[package]] name = "datafusion" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" dependencies = [ "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", - "bzip2 0.5.0", + "bzip2", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "object_store", @@ -807,7 +796,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -816,37 +804,62 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" dependencies = [ "arrow", "async-trait", "dashmap", "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "futures", "itertools 0.14.0", "log", + "object_store", "parking_lot", - "sqlparser", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64", "half", "hashbrown 0.14.5", @@ -864,25 +877,143 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet", + "rand", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", "log", + "object_store", + "parking_lot", + "parquet", + "rand", "tokio", ] [[package]] name = "datafusion-doc" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" +checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" [[package]] name = "datafusion-execution" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" dependencies = [ "arrow", "dashmap", @@ -899,9 +1030,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" dependencies = [ "arrow", "chrono", @@ -920,25 +1051,25 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" dependencies = [ "arrow", "datafusion-common", + "indexmap", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" +checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" dependencies = [ "abi_stable", "arrow", - "arrow-array", "arrow-schema", "async-ffi", "async-trait", @@ -953,9 +1084,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" dependencies = [ "arrow", "arrow-buffer", @@ -969,7 +1100,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -983,14 +1113,12 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1006,9 +1134,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" dependencies = [ "ahash", "arrow", @@ -1019,15 +1147,12 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1043,9 +1168,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" dependencies = [ "arrow", "async-trait", @@ -1059,9 +1184,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1076,9 +1201,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1086,20 +1211,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" dependencies = [ "datafusion-expr", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] name = "datafusion-optimizer" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" dependencies = [ "arrow", "chrono", @@ -1116,15 +1241,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1141,13 +1263,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1156,12 +1277,11 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1169,23 +1289,19 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", "recursive", - "url", ] [[package]] name = "datafusion-physical-plan" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -1210,9 +1326,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" +checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" dependencies = [ "arrow", "chrono", @@ -1226,24 +1342,46 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" +checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" dependencies = [ "arrow", "datafusion-common", "prost", ] +[[package]] +name = "datafusion-session" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-sql" -version = "45.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", @@ -1273,7 +1411,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -1325,24 +1463,31 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "24.12.23" +version = "25.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" dependencies = [ - "bitflags 1.3.2", + "bitflags", "rustc_version", ] [[package]] name = "flate2" -version = "1.0.34" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", + "libz-rs-sys", "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1408,7 +1553,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -1468,7 +1613,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -1485,9 +1642,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -1522,6 +1679,17 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "humantime" version = "2.1.0" @@ -1666,7 +1834,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -1692,9 +1860,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.1", @@ -1826,9 +1994,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.162" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" @@ -1846,6 +2014,15 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +[[package]] +name = "libz-rs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" +dependencies = [ + "zlib-rs", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -1880,7 +2057,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ - "twox-hash", + "twox-hash 1.6.3", ] [[package]] @@ -1921,9 +2098,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -2013,19 +2190,20 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.1" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +checksum = "e9ce831b09395f933addbc56d894d889e4b226eba304d4e7adbab591e26daf1e" dependencies = [ "async-trait", "bytes", "chrono", "futures", + "http", "humantime", - "itertools 0.13.0", + "itertools 0.14.0", "parking_lot", "percent-encoding", - "snafu", + "thiserror", "tokio", "tracing", "url", @@ -2072,9 +2250,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.1.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +checksum = "cd31a8290ac5b19f09ad77ee7a1e6a541f1be7674ad410547d5f1eef6eef4a9c" dependencies = [ "ahash", "arrow-array", @@ -2102,9 +2280,8 @@ dependencies = [ "snap", "thrift", "tokio", - "twox-hash", + "twox-hash 2.1.0", "zstd", - "zstd-sys", ] [[package]] @@ -2211,9 +2388,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -2238,7 +2415,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2297,7 +2474,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2310,18 +2487,24 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] name = "quote" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" @@ -2349,7 +2532,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -2369,7 +2552,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2378,7 +2561,7 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags 2.6.0", + "bitflags", ] [[package]] @@ -2440,7 +2623,7 @@ version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ - "bitflags 2.6.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -2476,9 +2659,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "seq-macro" @@ -2503,7 +2686,7 @@ checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2562,27 +2745,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "snafu" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "snap" version = "1.1.1" @@ -2591,11 +2753,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -2607,7 +2770,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2651,7 +2814,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2673,9 +2836,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -2690,7 +2853,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2712,6 +2875,26 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "thrift" version = "0.17.0" @@ -2744,9 +2927,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.41.1" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -2756,20 +2939,20 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" dependencies = [ "bytes", "futures-core", @@ -2797,7 +2980,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -2834,6 +3017,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "twox-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" + [[package]] name = "typed-arena" version = "2.0.2" @@ -2895,11 +3084,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.11.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ - "getrandom", + "getrandom 0.3.2", + "js-sys", + "wasm-bindgen", ] [[package]] @@ -2924,6 +3115,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.95" @@ -2946,7 +3146,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -2968,7 +3168,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3029,6 +3229,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-sys" version = "0.52.0" @@ -3111,6 +3317,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "write16" version = "1.0.0" @@ -3152,7 +3367,7 @@ checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", "synstructure", ] @@ -3174,7 +3389,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] [[package]] @@ -3194,7 +3409,7 @@ checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", "synstructure", ] @@ -3217,9 +3432,15 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.100", ] +[[package]] +name = "zlib-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" + [[package]] name = "zstd" version = "0.13.2" diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml similarity index 83% rename from examples/ffi-table-provider/Cargo.toml rename to examples/datafusion-ffi-example/Cargo.toml index f4e4fda79..d90d656b3 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -21,16 +21,16 @@ version = "0.1.0" edition = "2021" [dependencies] -datafusion = { version = "45.0.0" } -datafusion-ffi = { version = "45.0.0" } +datafusion = { version = "47.0.0" } +datafusion-ffi = { version = "47.0.0" } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } -arrow = { version = "54" } -arrow-array = { version = "54" } -arrow-schema = { version = "54" } +arrow = { version = "55" } +arrow-array = { version = "55" } +arrow-schema = { version = "55" } [build-dependencies] pyo3-build-config = "0.23" [lib] -name = "ffi_table_provider" +name = "datafusion_ffi_example" crate-type = ["cdylib", "rlib"] diff --git a/examples/ffi-table-provider/build.rs b/examples/datafusion-ffi-example/build.rs similarity index 100% rename from examples/ffi-table-provider/build.rs rename to examples/datafusion-ffi-example/build.rs diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/datafusion-ffi-example/pyproject.toml similarity index 97% rename from examples/ffi-table-provider/pyproject.toml rename to examples/datafusion-ffi-example/pyproject.toml index 9cd25b423..0c54df95c 100644 --- a/examples/ffi-table-provider/pyproject.toml +++ b/examples/datafusion-ffi-example/pyproject.toml @@ -20,7 +20,7 @@ requires = ["maturin>=1.6,<2.0"] build-backend = "maturin" [project] -name = "ffi_table_provider" +name = "datafusion_ffi_example" requires-python = ">=3.9" classifiers = [ "Programming Language :: Rust", diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py new file mode 100644 index 000000000..b3a837717 --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_table_function.py @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pyarrow as pa +from datafusion import Expr, SessionContext, udtf +from datafusion_ffi_example import MyTableFunction, MyTableProvider + +if TYPE_CHECKING: + from datafusion.context import TableProviderExportable + + +def test_ffi_table_function_register(): + ctx = SessionContext() + table_func = MyTableFunction() + table_udtf = udtf(table_func, "my_table_func") + ctx.register_udtf(table_udtf) + result = ctx.sql("select * from my_table_func()").collect() + + assert len(result) == 2 + assert result[0].num_columns == 4 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([0, 1, 2], type=pa.int32()), + pa.array([3, 4, 5, 6], type=pa.int32()), + ] + + assert result == expected + + +def test_ffi_table_function_call_directly(): + ctx = SessionContext() + table_func = MyTableFunction() + table_udtf = udtf(table_func, "my_table_func") + + my_table = table_udtf() + ctx.register_table_provider("t", my_table) + result = ctx.table("t").collect() + + assert len(result) == 2 + assert result[0].num_columns == 4 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([0, 1, 2], type=pa.int32()), + pa.array([3, 4, 5, 6], type=pa.int32()), + ] + + assert result == expected + + +class PythonTableFunction: + """Python based table function. + + This class is used as a Python implementation of a table function. + We use the existing TableProvider to create the underlying + provider, and this function takes no arguments + """ + + def __call__( + self, num_cols: Expr, num_rows: Expr, num_batches: Expr + ) -> TableProviderExportable: + args = [ + num_cols.to_variant().value_i64(), + num_rows.to_variant().value_i64(), + num_batches.to_variant().value_i64(), + ] + return MyTableProvider(*args) + + +def test_python_table_function(): + ctx = SessionContext() + table_func = PythonTableFunction() + table_udtf = udtf(table_func, "my_table_func") + ctx.register_udtf(table_udtf) + result = ctx.sql("select * from my_table_func(3,2,4)").collect() + + assert len(result) == 4 + assert result[0].num_columns == 3 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([0, 1], type=pa.int32()), + pa.array([2, 3, 4], type=pa.int32()), + pa.array([4, 5, 6, 7], type=pa.int32()), + pa.array([6, 7, 8, 9, 10], type=pa.int32()), + ] + + assert result == expected diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py similarity index 94% rename from examples/ffi-table-provider/python/tests/_test_table_provider.py rename to examples/datafusion-ffi-example/python/tests/_test_table_provider.py index 0db3ec561..6b24da06c 100644 --- a/examples/ffi-table-provider/python/tests/_test_table_provider.py +++ b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import pyarrow as pa from datafusion import SessionContext -from ffi_table_provider import MyTableProvider +from datafusion_ffi_example import MyTableProvider def test_table_loading(): diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs new file mode 100644 index 000000000..ae08c3b65 --- /dev/null +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::table_function::MyTableFunction; +use crate::table_provider::MyTableProvider; +use pyo3::prelude::*; + +pub(crate) mod table_function; +pub(crate) mod table_provider; + +#[pymodule] +fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/examples/datafusion-ffi-example/src/table_function.rs b/examples/datafusion-ffi-example/src/table_function.rs new file mode 100644 index 000000000..2d7b356e3 --- /dev/null +++ b/examples/datafusion-ffi-example/src/table_function.rs @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::table_provider::MyTableProvider; +use datafusion::catalog::{TableFunctionImpl, TableProvider}; +use datafusion::error::Result as DataFusionResult; +use datafusion::prelude::Expr; +use datafusion_ffi::udtf::FFI_TableFunction; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::sync::Arc; + +#[pyclass(name = "MyTableFunction", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct MyTableFunction {} + +#[pymethods] +impl MyTableFunction { + #[new] + fn new() -> Self { + Self {} + } + + fn __datafusion_table_function__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_table_function".into(); + + let func = self.clone(); + let provider = FFI_TableFunction::new(Arc::new(func), None); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl TableFunctionImpl for MyTableFunction { + fn call(&self, _args: &[Expr]) -> DataFusionResult> { + let provider = MyTableProvider::new(4, 3, 2).create_table()?; + Ok(Arc::new(provider)) + } +} diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/datafusion-ffi-example/src/table_provider.rs similarity index 71% rename from examples/ffi-table-provider/src/lib.rs rename to examples/datafusion-ffi-example/src/table_provider.rs index 88deeece2..e884585b5 100644 --- a/examples/ffi-table-provider/src/lib.rs +++ b/examples/datafusion-ffi-example/src/table_provider.rs @@ -15,25 +15,21 @@ // specific language governing permissions and limitations // under the License. -use std::{ffi::CString, sync::Arc}; - -use arrow_array::ArrayRef; -use datafusion::{ - arrow::{ - array::RecordBatch, - datatypes::{DataType, Field, Schema}, - }, - datasource::MemTable, - error::{DataFusionError, Result}, -}; +use arrow_array::{ArrayRef, RecordBatch}; +use arrow_schema::{DataType, Field, Schema}; +use datafusion::catalog::MemTable; +use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion_ffi::table_provider::FFI_TableProvider; -use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule}; +use pyo3::exceptions::PyRuntimeError; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::sync::Arc; /// In order to provide a test that demonstrates different sized record batches, /// the first batch will have num_rows, the second batch num_rows+1, and so on. -#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)] +#[pyclass(name = "MyTableProvider", module = "datafusion_ffi_example", subclass)] #[derive(Clone)] -struct MyTableProvider { +pub(crate) struct MyTableProvider { num_cols: usize, num_rows: usize, num_batches: usize, @@ -44,21 +40,19 @@ fn create_record_batch( num_cols: usize, start_value: i32, num_values: usize, -) -> Result { +) -> DataFusionResult { let end_value = start_value + num_values as i32; let row_values: Vec = (start_value..end_value).collect(); let columns: Vec<_> = (0..num_cols) - .map(|_| { - std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef - }) + .map(|_| Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef) .collect(); RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from) } impl MyTableProvider { - fn create_table(&self) -> Result { + pub fn create_table(&self) -> DataFusionResult { let fields: Vec<_> = (0..self.num_cols) .map(|idx| (b'A' + idx as u8) as char) .map(|col_name| Field::new(col_name, DataType::Int32, true)) @@ -66,7 +60,7 @@ impl MyTableProvider { let schema = Arc::new(Schema::new(fields)); - let batches: Result> = (0..self.num_batches) + let batches: DataFusionResult> = (0..self.num_batches) .map(|batch_idx| { let start_value = batch_idx * self.num_rows; create_record_batch( @@ -85,7 +79,7 @@ impl MyTableProvider { #[pymethods] impl MyTableProvider { #[new] - fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self { + pub fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self { Self { num_cols, num_rows, @@ -93,23 +87,17 @@ impl MyTableProvider { } } - fn __datafusion_table_provider__<'py>( + pub fn __datafusion_table_provider__<'py>( &self, py: Python<'py>, ) -> PyResult> { - let name = CString::new("datafusion_table_provider").unwrap(); + let name = cr"datafusion_table_provider".into(); let provider = self .create_table() .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; let provider = FFI_TableProvider::new(Arc::new(provider), false, None); - PyCapsule::new_bound(py, provider, Some(name.clone())) + PyCapsule::new(py, provider, Some(name)) } } - -#[pymodule] -fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 15ceefbdb..43b2e4a34 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -51,7 +51,17 @@ from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream -from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf +from .udf import ( + Accumulator, + AggregateUDF, + ScalarUDF, + TableFunction, + WindowUDF, + udaf, + udf, + udtf, + udwf, +) __version__ = importlib_metadata.version(__name__) @@ -74,6 +84,7 @@ "SessionConfig", "SessionContext", "Table", + "TableFunction", "WindowFrame", "WindowUDF", "col", @@ -92,6 +103,7 @@ "substrait", "udaf", "udf", + "udtf", "udwf", "unparser", ] diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 1429a4975..88f242183 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -30,7 +30,7 @@ from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream -from datafusion.udf import AggregateUDF, ScalarUDF, WindowUDF +from datafusion.udf import AggregateUDF, ScalarUDF, TableFunction, WindowUDF from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal from ._internal import SessionConfig as SessionConfigInternal @@ -752,6 +752,10 @@ def register_table_provider( """ self.ctx.register_table_provider(name, provider) + def register_udtf(self, func: TableFunction) -> None: + """Register a user defined table function.""" + self.ctx.register_udtf(func._udtf) + def register_record_batches( self, name: str, partitions: list[list[pa.RecordBatch]] ) -> None: diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index e93a34ca5..997dead52 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -760,8 +760,74 @@ def wrapper(*args: Any, **kwargs: Any) -> Expr: return decorator +class TableFunction: + """Class for performing user-defined table functions (UDTF). + + Table functions generate new table providers based on the + input expressions. + """ + + def __init__( + self, + name: str, + func: Callable[[], any], + ) -> None: + """Instantiate a user-defined table function (UDTF). + + See :py:func:`udtf` for a convenience function and argument + descriptions. + """ + self._udtf = df_internal.TableFunction(name, func) + + def __call__(self, *args: Expr) -> Any: + """Execute the UDTF and return a table provider.""" + args_raw = [arg.expr for arg in args] + return self._udtf.__call__(*args_raw) + + @overload + @staticmethod + def udtf( + name: str, + ) -> Callable[..., Any]: ... + + @overload + @staticmethod + def udtf( + func: Callable[[], Any], + name: str, + ) -> TableFunction: ... + + @staticmethod + def udtf(*args: Any, **kwargs: Any): + """Create a new User-Defined Table Function (UDTF).""" + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return TableFunction._create_table_udf(*args, **kwargs) + if args and hasattr(args[0], "__datafusion_table_function__"): + return TableFunction(args[1], args[0]) + # Case 2: Used as a decorator with parameters + return TableFunction._create_table_udf_decorator(*args, **kwargs) + + @staticmethod + def _create_table_udf( + func: Callable[..., Any], + name: str, + ) -> TableFunction: + """Create a TableFunction instance from function arguments.""" + if not callable(func): + msg = "`func` must be callable." + raise TypeError(msg) + + return TableFunction(name, func) + + def __repr__(self) -> str: + """User printable representation.""" + return self._udtf.__repr__() + + # Convenience exports so we can import instead of treating as # variables at the package root udf = ScalarUDF.udf udaf = AggregateUDF.udaf udwf = WindowUDF.udwf +udtf = TableFunction.udtf diff --git a/src/context.rs b/src/context.rs index 0db0f4d7e..cc3d8e8e9 100644 --- a/src/context.rs +++ b/src/context.rs @@ -43,6 +43,7 @@ use crate::sql::logical::PyLogicalPlan; use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; +use crate::udtf::PyTableFunction; use crate::udwf::PyWindowUDF; use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; @@ -390,6 +391,12 @@ impl PySessionContext { Ok(()) } + pub fn register_udtf(&mut self, func: PyTableFunction) { + let name = func.name.clone(); + let func = Arc::new(func); + self.ctx.register_udtf(&name, func); + } + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. pub fn sql(&mut self, query: &str, py: Python) -> PyDataFusionResult { let result = self.ctx.sql(query); diff --git a/src/dataframe.rs b/src/dataframe.rs index 211e31bd1..b6b1645ed 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -34,6 +34,7 @@ use datafusion::error::DataFusionError; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; +use datafusion_ffi::table_provider::FFI_TableProvider; use futures::{StreamExt, TryStreamExt}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -58,7 +59,7 @@ use crate::{ // this is an interim implementation #[pyclass(name = "TableProvider", module = "datafusion")] pub struct PyTableProvider { - provider: Arc, + provider: Arc, } impl PyTableProvider { @@ -72,6 +73,21 @@ impl PyTableProvider { } } +#[pymethods] +impl PyTableProvider { + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let runtime = get_tokio_runtime().0.handle().clone(); + let provider = FFI_TableProvider::new(Arc::clone(&self.provider), false, Some(runtime)); + + PyCapsule::new(py, provider, Some(name.clone())) + } +} + /// Configuration for DataFrame display formatting #[derive(Debug, Clone)] pub struct FormatterConfig { diff --git a/src/lib.rs b/src/lib.rs index 6eeda0878..7dced1fbd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,6 +60,7 @@ pub mod substrait; mod udaf; #[allow(clippy::borrow_deref_ref)] mod udf; +pub mod udtf; mod udwf; pub mod utils; @@ -88,6 +89,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/udtf.rs b/src/udtf.rs new file mode 100644 index 000000000..1cc1a6328 --- /dev/null +++ b/src/udtf.rs @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::prelude::*; +use std::sync::Arc; + +use crate::dataframe::PyTableProvider; +use crate::errors::{py_datafusion_err, to_datafusion_err}; +use crate::expr::PyExpr; +use crate::utils::validate_pycapsule; +use datafusion::catalog::{TableFunctionImpl, TableProvider}; +use datafusion::error::Result as DataFusionResult; +use datafusion::logical_expr::Expr; +use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; +use datafusion_ffi::udtf::{FFI_TableFunction, ForeignTableFunction}; +use pyo3::exceptions::PyNotImplementedError; +use pyo3::types::{PyCapsule, PyTuple}; + +/// Represents a user defined table function +#[pyclass(name = "TableFunction", module = "datafusion")] +#[derive(Debug, Clone)] +pub struct PyTableFunction { + pub(crate) name: String, + pub(crate) inner: PyTableFunctionInner, +} + +// TODO: Implement pure python based user defined table functions +#[derive(Debug, Clone)] +pub(crate) enum PyTableFunctionInner { + PythonFunction(Arc), + FFIFunction(Arc), +} + +#[pymethods] +impl PyTableFunction { + #[new] + #[pyo3(signature=(name, func))] + pub fn new(name: &str, func: Bound<'_, PyAny>) -> PyResult { + let inner = if func.hasattr("__datafusion_table_function__")? { + let capsule = func.getattr("__datafusion_table_function__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_function")?; + + let ffi_func = unsafe { capsule.reference::() }; + let foreign_func: ForeignTableFunction = ffi_func.to_owned().into(); + + PyTableFunctionInner::FFIFunction(Arc::new(foreign_func)) + } else { + let py_obj = Arc::new(func.unbind()); + PyTableFunctionInner::PythonFunction(py_obj) + }; + + Ok(Self { + name: name.to_string(), + inner, + }) + } + + #[pyo3(signature = (*args))] + pub fn __call__(&self, args: Vec) -> PyResult { + let args: Vec = args.iter().map(|e| e.expr.clone()).collect(); + let table_provider = self.call(&args).map_err(py_datafusion_err)?; + + Ok(PyTableProvider::new(table_provider)) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("TableUDF({})", self.name)) + } +} + +fn call_python_table_function( + func: &Arc, + args: &[Expr], +) -> DataFusionResult> { + let args = args + .iter() + .map(|arg| PyExpr::from(arg.clone())) + .collect::>(); + + // move |args: &[ArrayRef]| -> Result { + Python::with_gil(|py| { + let py_args = PyTuple::new(py, args)?; + let provider_obj = func.call1(py, py_args)?; + let provider = provider_obj.bind(py); + + if provider.hasattr("__datafusion_table_provider__")? { + let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + + Ok(Arc::new(provider) as Arc) + } else { + Err(PyNotImplementedError::new_err( + "__datafusion_table_provider__ does not exist on Table Provider object.", + )) + } + }) + .map_err(to_datafusion_err) +} + +impl TableFunctionImpl for PyTableFunction { + fn call(&self, args: &[Expr]) -> DataFusionResult> { + match &self.inner { + PyTableFunctionInner::FFIFunction(func) => func.call(args), + PyTableFunctionInner::PythonFunction(obj) => call_python_table_function(obj, args), + } + } +}