From 36cfae2677fa2b32d2df11ff49b71fdb6c4b70fd Mon Sep 17 00:00:00 2001 From: Sirui Huang Date: Tue, 5 Aug 2025 18:33:17 -0400 Subject: [PATCH 01/40] first commit --- .gitignore | 1 + Cargo.lock | 3972 +++++++++++++++++++++++++++--- optd/catalog/Cargo.toml | 11 + optd/catalog/src/lib.rs | 1 - optd/catalog/src/main.rs | 81 + optd/catalog/src/optd_catalog.rs | 128 + optd/catalog/src/optd_table.rs | 149 ++ optd/catalog/src/write.rs | 7 + 8 files changed, 4068 insertions(+), 282 deletions(-) delete mode 100644 optd/catalog/src/lib.rs create mode 100644 optd/catalog/src/main.rs create mode 100644 optd/catalog/src/optd_catalog.rs create mode 100644 optd/catalog/src/optd_table.rs create mode 100644 optd/catalog/src/write.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..d9b2525 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +*.db \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index d065ade..a8e965c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,20 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.3", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -27,524 +41,3701 @@ dependencies = [ ] [[package]] -name = "anyhow" -version = "1.0.98" +name = "alloc-no-stdlib" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" [[package]] -name = "autocfg" -version = "1.5.0" +name = "alloc-stdlib" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] [[package]] -name = "backtrace" -version = "0.3.75" +name = "allocator-api2" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets", -] +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] -name = "bitflags" -version = "2.9.1" +name = "android-tzdata" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" [[package]] -name = "bitvec" -version = "1.0.1" +name = "android_system_properties" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "funty", - "radium", - "tap", - "wyz", + "libc", ] [[package]] -name = "cfg-if" -version = "1.0.1" +name = "anyhow" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] -name = "either" -version = "1.15.0" +name = "arrayref" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] -name = "funty" -version = "2.0.0" +name = "arrayvec" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] -name = "gimli" -version = "0.31.1" +name = "arrow" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] [[package]] -name = "io-uring" -version = "0.7.9" +name = "arrow-arith" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" dependencies = [ - "bitflags", - "cfg-if", - "libc", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", ] [[package]] -name = "itertools" -version = "0.14.0" +name = "arrow-array" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" dependencies = [ - "either", + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.4", + "num", ] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "arrow-buffer" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] [[package]] -name = "libc" -version = "0.2.174" +name = "arrow-cast" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] [[package]] -name = "lock_api" -version = "0.4.13" +name = "arrow-csv" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" dependencies = [ - "autocfg", - "scopeguard", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", ] [[package]] -name = "log" -version = "0.4.27" +name = "arrow-data" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] [[package]] -name = "matchers" -version = "0.1.0" +name = "arrow-ipc" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" dependencies = [ - "regex-automata 0.1.10", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", + "zstd", ] [[package]] -name = "memchr" -version = "2.7.5" +name = "arrow-json" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] [[package]] -name = "miniz_oxide" -version = "0.8.9" +name = "arrow-ord" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" dependencies = [ - "adler2", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", ] [[package]] -name = "mio" -version = "1.0.4" +name = "arrow-row" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" dependencies = [ - "libc", - "wasi", - "windows-sys", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", ] [[package]] -name = "nu-ansi-term" -version = "0.46.0" +name = "arrow-schema" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" dependencies = [ - "overload", - "winapi", + "serde", + "serde_json", ] [[package]] -name = "object" -version = "0.36.7" +name = "arrow-select" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" dependencies = [ - "memchr", + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", ] [[package]] -name = "once_cell" -version = "1.21.3" +name = "arrow-string" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "optd-catalog" -version = "0.1.0" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] [[package]] -name = "optd-core" -version = "0.1.0" +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "anyhow", - "bitvec", - "itertools", - "pretty-xmlish", + "bzip2 0.5.2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", "tokio", - "tracing", - "tracing-test", + "xz2", + "zstd", + "zstd-safe", ] [[package]] -name = "overload" -version = "0.1.1" +name = "async-trait" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "parking_lot" -version = "0.12.4" +name = "atoi" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" dependencies = [ - "lock_api", - "parking_lot_core", + "num-traits", ] [[package]] -name = "parking_lot_core" -version = "0.9.11" +name = "autocfg" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ + "addr2line", "cfg-if", "libc", - "redox_syscall", - "smallvec", - "windows-targets", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] -name = "pin-project-lite" -version = "0.2.16" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "pretty-xmlish" -version = "0.1.13" +name = "base64ct" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" [[package]] -name = "proc-macro2" -version = "1.0.95" +name = "bigdecimal" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" dependencies = [ - "unicode-ident", + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", ] [[package]] -name = "quote" -version = "1.0.40" +name = "bitflags" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" dependencies = [ - "proc-macro2", + "serde", ] [[package]] -name = "radium" -version = "0.7.0" +name = "bitvec" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] [[package]] -name = "redox_syscall" -version = "0.5.17" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "bitflags", + "digest", ] [[package]] -name = "regex" -version = "1.11.1" +name = "blake3" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", ] [[package]] -name = "regex-automata" -version = "0.1.10" +name = "block-buffer" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ - "regex-syntax 0.6.29", + "generic-array", ] [[package]] -name = "regex-automata" -version = "0.4.9" +name = "brotli" +version = "8.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.5", + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", ] [[package]] -name = "regex-syntax" -version = "0.6.29" +name = "brotli-decompressor" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] [[package]] -name = "regex-syntax" -version = "0.8.5" +name = "bumpalo" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] -name = "rustc-demangle" -version = "0.1.26" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "scopeguard" -version = "1.2.0" +name = "bytes" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] -name = "sharded-slab" -version = "0.1.7" +name = "bzip2" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ - "lazy_static", + "bzip2-sys", ] [[package]] -name = "slab" -version = "0.4.10" +name = "bzip2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +dependencies = [ + "libbz2-rs-sys", +] [[package]] -name = "smallvec" -version = "1.15.1" +name = "bzip2-sys" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] [[package]] -name = "syn" -version = "2.0.104" +name = "cc" +version = "1.2.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "comfy-table" +version = "7.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f47772c28553d837e12cdcc0fb04c2a0fe8eca8b704a30f721d076f32407435" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "bzip2 0.6.0", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures", + "hex", + "itertools", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.9.2", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6b29c9c922959285fac53139e12c81014e2ca54704f20355edd7e9d11fd773" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7313553e4c01d184dd49183afdfa22f23204a10a26dd12e6f799203d8fdb95c2" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d66104731b7476a8c86fbe7a6fd741e6329791166ac89a91fcd8336a560ddaf" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64", + "chrono", + "half", + "hashbrown 0.14.5", + "hex", + "indexmap", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7527ecdfeae6961a8564d3b036507a67bd467fd36a9f10cf8ad7a99db1f1bc" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e5076be33d8eb9f4d99858e5f3477b36c07e61eee8eb93c4320428d9e1e344" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.6.0", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools", + "log", + "object_store", + "parquet", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "785518d0f2f136c19b9389a10762c01a5aeb5fcdebdb244297bb656b2862dc88" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71cb7c3bad0951bf5c52505d0e6d87e6c0098156d2a195924cbcdc82238d29ba" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea76ad2c5189c98a6b1d4bdf6c3b3caacc9701c417af6661597c946a201bc328" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-pruning", + "datafusion-session", + "futures", + "hex", + "itertools", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.9.2", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bcc45e380db5c6033c3f39e765a3d752679f14315060a7f4030a60066a36946" + +[[package]] +name = "datafusion-execution" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8209805fdce3d5c6e1625f674d3e4ce93e995a56d3709a0bb8d4361062652596" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7879a845e72a00cacffacbdf5f40626049cb9584d2ba8aa0b9172f09833110ab" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6da7e47e70ef2c7678735c82c392bd74687004043f5fc8072ab8678dc6fa459d" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap", + "itertools", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e7b92b04c5c3b1151f055251b36e272071f9088d9701826a533cb4f764af1c8" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f16cb922b62e535a4d484961ac2c1c6d188dbe02e85e026c05f0fabbc8f814e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f71bb59dc8b4dc985c911f2e0d8cf426c21f565b56dca4b852c244101a1a7a2" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27eb3b98a2eb02a8af4ef19cc793cac21fc98d8720b987f15d7d25b8cc875f4d" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e0940fc3e2fa4645a4d323f9ebf9258b2d7fdad12013a471cae4ae5568683" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df03c6c62039578fd110b327c474846fdf3d9077a568f1e8706e585ed30cb98d" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "083659a95914bf3ca568a72b085cb8654576fef1236b260dc2379cb8e5f922b2" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cabe1f32daa2fa54e6b20d14a13a9e85bef97c4161fe8a90d76b6d9693a5ac4" +dependencies = [ + "datafusion-expr", + "quote", + "syn", +] + +[[package]] +name = "datafusion-optimizer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e12a97dcb0ccc569798be1289c744829cce5f18cc9b037054f8d7f93e1d57be" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "indexmap", + "itertools", + "log", + "recursive", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "datafusion-physical-expr" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41312712b8659a82b4e9faa8d97a018e7f2ccbdedf2f7cb93ecf256e39858c86" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1649a60ea0319496d616ae3554e84dfcc262c201ab4439abcd83cca989b85b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea3f5b8ba6122426774aaaf11325740b8e5d3afaab9ab39dc63423adca554748" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "itertools", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a595f296929d6cffa12b993ea53e9fe8215fada050d78626c5cf0e2f02b0205" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools", + "log", +] + +[[package]] +name = "datafusion-session" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd5f2fe790f43839c70fb9604c4f9b59ad290ef64e1d2f927925dd34a9245406" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ebebb82fda37f62f06fe14339f4faa9f197a0320cc4d26ce2a5fd53a5ccd27c" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +dependencies = [ + "crc32fast", + "libz-rs-sys", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.4", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown 0.15.4", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-rs-sys" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +dependencies = [ + "zlib-rs", +] + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http", + "humantime", + "itertools", + "parking_lot", + "percent-encoding", + "thiserror", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "optd-catalog" +version = "0.1.0" +dependencies = [ + "async-trait", + "datafusion", + "glob", + "parking_lot", + "sqlx", + "url", + "uuid", +] + +[[package]] +name = "optd-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "bitvec", + "itertools", + "pretty-xmlish", + "tokio", + "tracing", + "tracing-test", +] + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "parquet" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.4", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "ring", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.4", + "indexmap", + "serde", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty-xmlish" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.3", +] + +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rsa" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.142" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + +[[package]] +name = "slab" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.4", + "hashlink", + "indexmap", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror", + "tracing", + "url", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "stacker" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "slab", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracing-test" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" +dependencies = [ + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "twox-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" + +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", ] [[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - -[[package]] -name = "thread_local" -version = "1.1.9" +name = "wasm-bindgen-futures" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", ] [[package]] -name = "tokio" -version = "1.47.1" +name = "wasm-bindgen-macro" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ - "backtrace", - "io-uring", - "libc", - "mio", - "parking_lot", - "pin-project-lite", - "slab", - "tokio-macros", + "quote", + "wasm-bindgen-macro-support", ] [[package]] -name = "tokio-macros" -version = "2.5.0" +name = "wasm-bindgen-macro-support" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", ] [[package]] -name = "tracing" -version = "0.1.41" +name = "wasm-bindgen-shared" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", + "unicode-ident", ] [[package]] -name = "tracing-attributes" -version = "0.1.30" +name = "web-sys" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ - "proc-macro2", - "quote", - "syn", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "tracing-core" -version = "0.1.34" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ - "once_cell", - "valuable", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "tracing-log" -version = "0.2.0" +name = "whoami" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" dependencies = [ - "log", - "once_cell", - "tracing-core", + "redox_syscall", + "wasite", ] [[package]] -name = "tracing-subscriber" -version = "0.3.19" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] -name = "tracing-test" -version = "0.2.5" +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "tracing-core", - "tracing-subscriber", - "tracing-test-macro", + "windows-sys 0.59.0", ] [[package]] -name = "tracing-test-macro" -version = "0.2.5" +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ + "proc-macro2", "quote", "syn", ] [[package]] -name = "unicode-ident" -version = "1.0.18" +name = "windows-interface" +version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "valuable" -version = "0.1.1" +name = "windows-link" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" +name = "windows-result" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] [[package]] -name = "winapi" -version = "0.3.9" +name = "windows-strings" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "windows-link", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "windows-sys" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "windows-sys" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] [[package]] name = "windows-sys" @@ -552,7 +3743,22 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -561,28 +3767,46 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -595,30 +3819,69 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + [[package]] name = "wyz" version = "0.5.1" @@ -627,3 +3890,150 @@ checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" dependencies = [ "tap", ] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdbb9122ea75b11bf96e7492afb723e8a7fbe12c67417aa95e7e3d18144d37cd" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zlib-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.15+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index 022afce..172fb96 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -4,3 +4,14 @@ version = "0.1.0" edition = "2024" [dependencies] +datafusion = "=49.0.0" +sqlx = { version = "=0.8.6", features = ["sqlite"] } +parking_lot = "=0.12.4" +glob = "0.3.2" +url = "2.5.4" +async-trait = "0.1.88" + +[dependencies.uuid] +version = "1.17.0" +# Lets you generate random UUIDs +features = ["v4"] diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs deleted file mode 100644 index 8b13789..0000000 --- a/optd/catalog/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/optd/catalog/src/main.rs b/optd/catalog/src/main.rs new file mode 100644 index 0000000..8f7c3b9 --- /dev/null +++ b/optd/catalog/src/main.rs @@ -0,0 +1,81 @@ +mod optd_catalog; +mod optd_table; + +use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; +use tokio; +use uuid::Uuid; + +#[tokio::main] +async fn main() -> Result<(), sqlx::Error> { + // Create Sqlite database file to hold the catalog + const SQLITE_DB_PATH: &str = "catalog.db"; + + // Set connect options + let connect_options = SqliteConnectOptions::new() + .filename(SQLITE_DB_PATH) + .create_if_missing(true); + + // Connect with SqlX + let pool = SqlitePool::connect_with(connect_options) + .await + .expect("Failed to connect to the SQLite database"); + + // Set the metadata catalog name + const METADATA_CATALOG: &str = "catalog"; + + // Execute the given Sql queries to create the catalog + let mut create_catalog_queries = vec![ + // "CREATE TABLE () IF NOT EXISTS {METADATA_CATALOG};", + "CREATE TABLE {METADATA_CATALOG}_metadata(key VARCHAR NOT NULL, value VARCHAR NOT NULL, scope VARCHAR, scope_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_snapshot(snapshot_id BIGINT PRIMARY KEY, snapshot_time TIMESTAMPTZ, schema_version BIGINT, next_catalog_id BIGINT, next_file_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_snapshot_changes(snapshot_id BIGINT PRIMARY KEY, changes_made VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_schema(schema_id BIGINT PRIMARY KEY, schema_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_table(table_id BIGINT, table_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, table_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, partial_file_info VARCHAR, mapping_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_file_column_statistics(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_delete_file(delete_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, format VARCHAR, delete_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, encryption_key VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_column(column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, table_id BIGINT, column_order BIGINT, column_name VARCHAR, column_type VARCHAR, initial_default VARCHAR, default_value VARCHAR, nulls_allowed BOOLEAN, parent_column BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_table_stats(table_id BIGINT, record_count BIGINT, next_row_id BIGINT, file_size_bytes BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_table_column_stats(table_id BIGINT, column_id BIGINT, contains_null BOOLEAN, contains_nan BOOLEAN, min_value VARCHAR, max_value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_partition_info(partition_id BIGINT, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_partition_column(partition_id BIGINT, table_id BIGINT, partition_key_index BIGINT, column_id BIGINT, transform VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_file_partition_value(data_file_id BIGINT, table_id BIGINT, partition_key_index BIGINT, partition_value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_files_scheduled_for_deletion(data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, schedule_start TIMESTAMPTZ);", + "CREATE TABLE {METADATA_CATALOG}_inlined_data_tables(table_id BIGINT, table_name VARCHAR, schema_version BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_column_mapping(mapping_id BIGINT, table_id BIGINT, type VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_name_mapping(mapping_id BIGINT, column_id BIGINT, source_name VARCHAR, target_field_id BIGINT, parent_column BIGINT);", + "INSERT INTO {METADATA_CATALOG}_snapshot VALUES (0, current_timestamp, 0, 1, 0);", + "INSERT INTO {METADATA_CATALOG}_snapshot_changes VALUES (0, 'created_schema:\"main\"');", + //"INSERT INTO {METADATA_CATALOG}_metadata (key, value) VALUES ('version', '0.2'), ('created_by', 'DuckDB %s'), ('data_path', %s), ('encrypted', '%s');" + ]; + + let set_uuid_query = format!( + "UPDATE {METADATA_CATALOG}_schema SET schema_uuid = '{}' WHERE schema_id = 0;", + Uuid::new_v4() + ); + + create_catalog_queries.push(set_uuid_query.as_str()); + + // Format the queries with the metadata catalog name + let formatted_query = create_catalog_queries + .iter() + .map(|query| query.replace("{METADATA_CATALOG}", METADATA_CATALOG)); + + for query in formatted_query { + println!("Executing query: {}", query); + sqlx::query(&query) + .execute(&pool) + .await + .expect("Failed to execute query"); + + println!("Query executed successfully."); + } + + // Close the connection + pool.close().await; + + Ok(()) +} diff --git a/optd/catalog/src/optd_catalog.rs b/optd/catalog/src/optd_catalog.rs new file mode 100644 index 0000000..51c48ce --- /dev/null +++ b/optd/catalog/src/optd_catalog.rs @@ -0,0 +1,128 @@ +// use datafusion::{ +// catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}, +// error::Result, +// execution::SessionState, +// }; +// use parking_lot::RwLock; +// use std::any::Any; +// use std::sync::{Arc, Weak}; + +// #[derive(Debug)] +// pub struct OptdCatalogProviderList { +// inner: Arc, +// state: Weak>, +// } + +// impl OptdCatalogProviderList { +// pub fn new(inner: Arc, state: Weak>) -> Self { +// Self { inner, state } +// } +// } + +// impl CatalogProviderList for OptdCatalogProviderList { +// fn as_any(&self) -> &dyn Any { +// self +// } + +// fn register_catalog( +// &self, +// name: String, +// catalog: Arc, +// ) -> Option> { +// self.inner.register_catalog(name, catalog) +// } + +// fn catalog_names(&self) -> Vec { +// self.inner.catalog_names() +// } + +// fn catalog(&self, name: &str) -> Option> { +// let state = self.state.clone(); +// self.inner +// .catalog(name) +// .map(|catalog| Arc::new(OptdCatalogProvider::new(catalog, state)) as _) +// } +// } + +// #[derive(Debug)] +// struct OptdCatalogProvider { +// inner: Arc, +// state: Weak>, +// } + +// impl OptdCatalogProvider { +// pub fn new(inner: Arc, state: Weak>) -> Self { +// Self { inner, state } +// } +// } + +// impl CatalogProvider for OptdCatalogProvider { +// fn as_any(&self) -> &dyn Any { +// self +// } + +// fn schema_names(&self) -> Vec { +// self.inner.schema_names() +// } + +// fn schema(&self, name: &str) -> Option> { +// let state = self.state.clone(); +// self.inner +// .schema(name) +// .map(|schema| Arc::new(OptdSchemaProvider::new(schema, state)) as _) +// } + +// fn register_schema( +// &self, +// name: &str, +// schema: Arc, +// ) -> Result>> { +// self.inner.register_schema(name, schema) +// } +// } + +// #[derive(Debug)] +// pub struct OptdSchemaProvider { +// inner: Arc, +// state: Weak>, +// } + +// impl OptdSchemaProvider { +// pub fn new(inner: Arc, state: Weak>) -> Self { +// Self { inner, state } +// } +// } + +// #[async_trait] +// impl SchemaProvider for OptdSchemaProvider { +// fn as_any(&self) -> &(dyn std::any::Any + 'static) { +// self +// } + +// async fn table(&self, name: &str) -> Result>> { +// self.inner.table(name) +// } + +// fn table_names(&self) -> Vec { +// self.inner.table_names() +// } + +// fn register_table( +// &self, +// name: String, +// table: Arc, +// ) -> Result>> { +// self.inner.register_table(name, table) +// } + +// fn deregister_table( +// &self, +// name: &str, +// ) -> Result>> { +// self.inner.deregister_table(name) +// } + +// fn table_exist(&self, name: &str) -> bool { +// self.inner.table_exist(name) +// } +// } diff --git a/optd/catalog/src/optd_table.rs b/optd/catalog/src/optd_table.rs new file mode 100644 index 0000000..d9c89b5 --- /dev/null +++ b/optd/catalog/src/optd_table.rs @@ -0,0 +1,149 @@ +use std::{any::Any, borrow::Cow, sync::Arc}; + +use datafusion::{ + arrow::datatypes::SchemaRef, + catalog::{Session, TableProvider}, + common::{Constraints, Statistics}, + datasource::{ + TableType, + listing::{ListingTable, ListingTableUrl}, + }, + error::Result, + logical_expr::{LogicalPlan, TableProviderFilterPushDown, dml::InsertOp}, + physical_plan::ExecutionPlan, + prelude::Expr, + sql::TableReference, +}; + +use glob::Pattern; +use url::Url; + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct OptdTableUrl { + inner: Arc, +} + +impl OptdTableUrl { + pub fn try_new(url: Url, glob: Option) -> Result { + let inner = ListingTableUrl::try_new(url, glob)?; + Ok(OptdTableUrl { + inner: Arc::new(inner), + }) + } + + pub fn new_with_inner(inner: Arc) -> Self { + OptdTableUrl { inner } + } +} + +// #[derive()] + +pub struct OptdTable { + inner: Box, + name: String, + table_reference: TableReference, +} + +impl OptdTable { + pub fn try_new( + inner: ListingTable, + name: String, + table_reference: TableReference, + ) -> Result { + Ok(OptdTable { + inner: Box::new(inner), + name, + table_reference, + }) + } + + pub fn new_with_inner( + inner: Box, + name: String, + table_reference: TableReference, + ) -> Self { + OptdTable { + inner, + name, + table_reference, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn table_reference(&self) -> &TableReference { + &self.table_reference + } +} + +#[derive(Debug)] +pub struct OptdTableProvider { + inner: Arc, + table_url: OptdTableUrl, +} + +#[async_trait::async_trait] +impl TableProvider for OptdTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.inner.schema() + } + + fn table_type(&self) -> TableType { + self.inner.table_type() + } + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + self.inner.scan(state, projection, filters, limit).await + } + + fn constraints(&self) -> Option<&Constraints> { + self.inner.constraints() + } + + fn get_table_definition(&self) -> Option<&str> { + self.inner.get_table_definition() + } + + fn get_logical_plan(&self) -> Option> { + self.inner.get_logical_plan() + } + + fn get_column_default(&self, _column: &str) -> Option<&Expr> { + None + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + Ok(vec![ + TableProviderFilterPushDown::Unsupported; + filters.len() + ]) + } + + fn statistics(&self) -> Option { + self.inner.statistics() + } + + async fn insert_into( + &self, + _state: &dyn Session, + _input: Arc, + _insert_op: InsertOp, + ) -> Result> { + self.inner.insert_into(_state, _input, _insert_op).await + } +} diff --git a/optd/catalog/src/write.rs b/optd/catalog/src/write.rs new file mode 100644 index 0000000..3edda94 --- /dev/null +++ b/optd/catalog/src/write.rs @@ -0,0 +1,7 @@ +use std::{any::Any, fmt, sync::Arc}; + +use async_trait::async_trait; + +use crate::sql::sql_provider_datafusion::{ + get_stream, to_execution_error, Result as SqlResult +} \ No newline at end of file From 7f74b6d7766db6bd981c2bbb2de586b9b46194ac Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Thu, 7 Aug 2025 06:01:28 -0700 Subject: [PATCH 02/40] core: cascades v0 (#3) v0 of the Cascades-style optimizer. - Exhaustive optimization: expression and group returns only when the subgraph is optimized. - Applying enforcer rules and adding generated expressions to the memo table. - Special termination logic is required when the child has the same group + physical requirement as the parent. - Exhaustive exploration when applying rules, generate all bindings before doing the transform, but only expand based on specified rule patterns. Signed-off-by: Yuchen Liang --- .gitignore | 3 +- Cargo.lock | 4074 +++-------------- optd/core/Cargo.toml | 5 + optd/core/src/cascades.rs | 532 +++ optd/core/src/ir/builder.rs | 1 - optd/core/src/ir/catalog.rs | 2 +- optd/core/src/ir/context.rs | 1 + optd/core/src/ir/cost/mod.rs | 12 +- optd/core/src/ir/explain.rs | 2 +- optd/core/src/ir/group.rs | 15 + optd/core/src/ir/macros.rs | 1 + optd/core/src/ir/mod.rs | 56 +- optd/core/src/ir/operator/enforcer/sort.rs | 17 +- optd/core/src/ir/operator/logical/join.rs | 8 +- optd/core/src/ir/operator/logical/order_by.rs | 2 +- optd/core/src/ir/operator/mod.rs | 26 +- .../src/ir/operator/physical/mock_scan.rs | 8 +- optd/core/src/ir/operator/physical/mod.rs | 1 - optd/core/src/ir/operator/physical/nl_join.rs | 12 +- optd/core/src/ir/properties/cardinality.rs | 6 +- optd/core/src/ir/properties/mod.rs | 3 +- optd/core/src/ir/properties/output_columns.rs | 9 +- optd/core/src/ir/properties/required.rs | 26 +- optd/core/src/ir/properties/tuple_ordering.rs | 12 +- optd/core/src/ir/rule/mod.rs | 6 +- optd/core/src/ir/rule/pattern.rs | 6 +- optd/core/src/ir/rule/set.rs | 1 + optd/core/src/ir/scalar/mod.rs | 17 +- optd/core/src/ir/scalar/projection_list.rs | 2 +- optd/core/src/ir/value.rs | 4 +- optd/core/src/lib.rs | 1 + optd/core/src/magic/card.rs | 1 - optd/core/src/magic/cm.rs | 3 +- optd/core/src/magic/mod.rs | 2 +- optd/core/src/memo.rs | 252 +- optd/core/src/rules/enforcers/mod.rs | 43 + optd/core/src/rules/implementations/mod.rs | 3 + .../core/src/rules/implementations/nl_join.rs | 18 +- .../src/rules/implementations/table_scan.rs | 9 +- .../src/rules/logical_join_inner_assoc.rs | 17 +- .../src/rules/logical_join_inner_commute.rs | 5 +- optd/core/src/rules/mod.rs | 6 + optd/core/src/utility/union_find.rs | 6 +- optd/core/tests/it_works.rs | 106 +- 44 files changed, 1839 insertions(+), 3503 deletions(-) create mode 100644 optd/core/src/cascades.rs create mode 100644 optd/core/src/rules/enforcers/mod.rs diff --git a/.gitignore b/.gitignore index d9b2525..5fcadb3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -*.db \ No newline at end of file +*.db +*.memo diff --git a/Cargo.lock b/Cargo.lock index a8e965c..0154c51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,20 +17,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "const-random", - "getrandom 0.3.3", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -41,3593 +27,1255 @@ dependencies = [ ] [[package]] -name = "alloc-no-stdlib" -version = "2.0.4" +name = "anyhow" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] -name = "alloc-stdlib" -version = "0.2.2" +name = "async-stream" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ - "alloc-no-stdlib", + "async-stream-impl", + "futures-core", + "pin-project-lite", ] [[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" +name = "async-stream-impl" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ - "libc", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "anyhow" -version = "1.0.98" +name = "async-trait" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "arrayref" -version = "0.3.9" +name = "atomic-waker" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] -name = "arrayvec" -version = "0.7.6" +name = "autocfg" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "arrow" -version = "55.2.0" +name = "axum" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", ] [[package]] -name = "arrow-arith" -version = "55.2.0" +name = "axum-core" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "num", + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", ] [[package]] -name = "arrow-array" -version = "55.2.0" +name = "backtrace" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown 0.15.4", - "num", + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", ] [[package]] -name = "arrow-buffer" -version = "55.2.0" +name = "base64" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" -dependencies = [ - "bytes", - "half", - "num", -] +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] -name = "arrow-cast" -version = "55.2.0" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", - "ryu", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "arrow-csv" -version = "55.2.0" +name = "bitflags" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" -dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "regex", -] +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] -name = "arrow-data" -version = "55.2.0" +name = "bitvec" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", + "funty", + "radium", + "tap", + "wyz", ] [[package]] -name = "arrow-ipc" -version = "55.2.0" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "flatbuffers", - "lz4_flex", - "zstd", -] +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "arrow-json" -version = "55.2.0" +name = "bytes" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "lexical-core", - "memchr", - "num", - "serde", - "serde_json", - "simdutf8", -] +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] -name = "arrow-ord" -version = "55.2.0" +name = "cfg-if" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", -] +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] -name = "arrow-row" -version = "55.2.0" +name = "console-api" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", + "futures-core", + "prost", + "prost-types", + "tonic", + "tracing-core", ] [[package]] -name = "arrow-schema" -version = "55.2.0" +name = "console-subscriber" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01" dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", "serde", "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", ] [[package]] -name = "arrow-select" -version = "55.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", -] - -[[package]] -name = "arrow-string" -version = "55.2.0" +name = "crc32fast" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "memchr", - "num", - "regex", - "regex-syntax 0.8.5", + "cfg-if", ] [[package]] -name = "async-compression" -version = "0.4.19" +name = "crossbeam-channel" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ - "bzip2 0.5.2", - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", + "crossbeam-utils", ] [[package]] -name = "async-trait" -version = "0.1.88" +name = "crossbeam-utils" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] -name = "atoi" -version = "2.0.0" +name = "either" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] -name = "autocfg" -version = "1.5.0" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "backtrace" -version = "0.3.75" +name = "flate2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ - "addr2line", - "cfg-if", - "libc", + "crc32fast", "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", ] [[package]] -name = "base64" -version = "0.22.1" +name = "fnv" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "base64ct" -version = "1.8.0" +name = "funty" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] -name = "bigdecimal" -version = "0.4.8" +name = "futures-channel" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", + "futures-core", ] [[package]] -name = "bitflags" -version = "2.9.1" +name = "futures-core" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" -dependencies = [ - "serde", -] +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] -name = "bitvec" -version = "1.0.1" +name = "futures-sink" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] -name = "blake2" -version = "0.10.6" +name = "futures-task" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] -name = "blake3" -version = "1.8.2" +name = "futures-util" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", ] [[package]] -name = "block-buffer" -version = "0.10.4" +name = "getrandom" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ - "generic-array", + "cfg-if", + "libc", + "wasi", ] [[package]] -name = "brotli" -version = "8.0.1" +name = "gimli" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] -name = "brotli-decompressor" -version = "5.0.0" +name = "h2" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.10.0", + "slab", + "tokio", + "tokio-util", + "tracing", ] [[package]] -name = "bumpalo" -version = "3.19.0" +name = "hashbrown" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] -name = "byteorder" -version = "1.5.0" +name = "hashbrown" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] -name = "bytes" -version = "1.10.1" +name = "hdrhistogram" +version = "7.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "flate2", + "nom", + "num-traits", +] [[package]] -name = "bzip2" -version = "0.5.2" +name = "http" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ - "bzip2-sys", + "bytes", + "fnv", + "itoa", ] [[package]] -name = "bzip2" -version = "0.6.0" +name = "http-body" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ - "libbz2-rs-sys", + "bytes", + "http", ] [[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" +name = "http-body-util" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ - "cc", - "pkg-config", + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", ] [[package]] -name = "cc" -version = "1.2.31" +name = "httparse" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" -dependencies = [ - "jobserver", - "libc", - "shlex", -] +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] -name = "cfg-if" -version = "1.0.1" +name = "httpdate" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] -name = "chrono" -version = "0.4.41" +name = "hyper" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-link", + "bytes", + "futures-channel", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", ] [[package]] -name = "chrono-tz" -version = "0.10.4" +name = "hyper-timeout" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "chrono", - "phf", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", ] [[package]] -name = "comfy-table" -version = "7.1.4" +name = "hyper-util" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" dependencies = [ - "unicode-segmentation", - "unicode-width", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "libc", + "pin-project-lite", + "socket2 0.6.0", + "tokio", + "tower-service", + "tracing", ] [[package]] -name = "concurrent-queue" -version = "2.5.0" +name = "indexmap" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ - "crossbeam-utils", + "autocfg", + "hashbrown 0.12.3", ] [[package]] -name = "const-oid" -version = "0.9.6" +name = "indexmap" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown 0.15.4", +] [[package]] -name = "const-random" -version = "0.1.18" +name = "io-uring" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" dependencies = [ - "const-random-macro", + "bitflags", + "cfg-if", + "libc", ] [[package]] -name = "const-random-macro" -version = "0.1.16" +name = "itertools" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ - "getrandom 0.2.16", - "once_cell", - "tiny-keccak", + "either", ] [[package]] -name = "constant_time_eq" -version = "0.3.1" +name = "itoa" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] -name = "core-foundation-sys" -version = "0.8.7" +name = "lazy_static" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "libc" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] -name = "crc" -version = "3.3.0" +name = "lock_api" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ - "crc-catalog", + "autocfg", + "scopeguard", ] [[package]] -name = "crc-catalog" -version = "2.4.0" +name = "log" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] -name = "crc32fast" -version = "1.5.0" +name = "matchers" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "cfg-if", + "regex-automata 0.1.10", ] [[package]] -name = "crossbeam-queue" -version = "0.3.12" +name = "matchit" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" [[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "memchr" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] -name = "crunchy" -version = "0.2.4" +name = "mime" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] -name = "crypto-common" -version = "0.1.6" +name = "minimal-lexical" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] -name = "csv" -version = "1.3.1" +name = "miniz_oxide" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", + "adler2", ] [[package]] -name = "csv-core" -version = "0.1.12" +name = "mio" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ - "memchr", + "libc", + "wasi", + "windows-sys 0.59.0", ] [[package]] -name = "dashmap" -version = "6.1.0" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", + "memchr", + "minimal-lexical", ] [[package]] -name = "datafusion" -version = "49.0.0" +name = "nu-ansi-term" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f47772c28553d837e12cdcc0fb04c2a0fe8eca8b704a30f721d076f32407435" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" dependencies = [ - "arrow", - "arrow-ipc", - "arrow-schema", - "async-trait", - "bytes", - "bzip2 0.6.0", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", - "flate2", - "futures", - "hex", - "itertools", - "log", - "object_store", - "parking_lot", - "parquet", - "rand 0.9.2", - "regex", - "sqlparser", - "tempfile", - "tokio", - "url", - "uuid", - "xz2", - "zstd", + "overload", + "winapi", ] [[package]] -name = "datafusion-catalog" -version = "49.0.0" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6b29c9c922959285fac53139e12c81014e2ca54704f20355edd7e9d11fd773" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "tokio", + "autocfg", ] [[package]] -name = "datafusion-catalog-listing" -version = "49.0.0" +name = "object" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7313553e4c01d184dd49183afdfa22f23204a10a26dd12e6f799203d8fdb95c2" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "log", - "object_store", - "tokio", + "memchr", ] [[package]] -name = "datafusion-common" -version = "49.0.0" +name = "once_cell" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d66104731b7476a8c86fbe7a6fd741e6329791166ac89a91fcd8336a560ddaf" -dependencies = [ - "ahash", - "arrow", - "arrow-ipc", - "base64", - "chrono", - "half", - "hashbrown 0.14.5", - "hex", - "indexmap", - "libc", - "log", - "object_store", - "parquet", - "paste", - "recursive", - "sqlparser", - "tokio", - "web-time", -] +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] -name = "datafusion-common-runtime" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7527ecdfeae6961a8564d3b036507a67bd467fd36a9f10cf8ad7a99db1f1bc" -dependencies = [ - "futures", - "log", - "tokio", -] +name = "optd-catalog" +version = "0.1.0" [[package]] -name = "datafusion-datasource" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e5076be33d8eb9f4d99858e5f3477b36c07e61eee8eb93c4320428d9e1e344" +name = "optd-core" +version = "0.1.0" dependencies = [ - "arrow", - "async-compression", - "async-trait", - "bytes", - "bzip2 0.6.0", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "flate2", - "futures", - "glob", + "anyhow", + "bitvec", + "console-subscriber", "itertools", - "log", - "object_store", - "parquet", - "rand 0.9.2", - "tempfile", + "pretty-xmlish", "tokio", - "tokio-util", - "url", - "xz2", - "zstd", + "tracing", + "tracing-subscriber", + "tracing-test", ] [[package]] -name = "datafusion-datasource-csv" -version = "49.0.0" +name = "overload" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "785518d0f2f136c19b9389a10762c01a5aeb5fcdebdb244297bb656b2862dc88" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "regex", - "tokio", -] +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] -name = "datafusion-datasource-json" -version = "49.0.0" +name = "parking_lot" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71cb7c3bad0951bf5c52505d0e6d87e6c0098156d2a195924cbcdc82238d29ba" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "serde_json", - "tokio", + "lock_api", + "parking_lot_core", ] [[package]] -name = "datafusion-datasource-parquet" -version = "49.0.0" +name = "parking_lot_core" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea76ad2c5189c98a6b1d4bdf6c3b3caacc9701c417af6661597c946a201bc328" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-pruning", - "datafusion-session", - "futures", - "hex", - "itertools", - "log", - "object_store", - "parking_lot", - "parquet", - "rand 0.9.2", - "tokio", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] [[package]] -name = "datafusion-doc" -version = "49.0.0" +name = "percent-encoding" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bcc45e380db5c6033c3f39e765a3d752679f14315060a7f4030a60066a36946" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "datafusion-execution" -version = "49.0.0" +name = "pin-project" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8209805fdce3d5c6e1625f674d3e4ce93e995a56d3709a0bb8d4361062652596" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" dependencies = [ - "arrow", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "log", - "object_store", - "parking_lot", - "rand 0.9.2", - "tempfile", - "url", + "pin-project-internal", ] [[package]] -name = "datafusion-expr" -version = "49.0.0" +name = "pin-project-internal" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7879a845e72a00cacffacbdf5f40626049cb9584d2ba8aa0b9172f09833110ab" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", - "indexmap", - "paste", - "recursive", - "serde_json", - "sqlparser", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "datafusion-expr-common" -version = "49.0.0" +name = "pin-project-lite" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6da7e47e70ef2c7678735c82c392bd74687004043f5fc8072ab8678dc6fa459d" -dependencies = [ - "arrow", - "datafusion-common", - "indexmap", - "itertools", - "paste", -] +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] -name = "datafusion-functions" -version = "49.0.0" +name = "pin-utils" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e7b92b04c5c3b1151f055251b36e272071f9088d9701826a533cb4f764af1c8" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", - "hex", - "itertools", - "log", - "md-5", - "rand 0.9.2", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "datafusion-functions-aggregate" -version = "49.0.0" +name = "ppv-lite86" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f16cb922b62e535a4d484961ac2c1c6d188dbe02e85e026c05f0fabbc8f814e" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "half", - "log", - "paste", + "zerocopy", ] [[package]] -name = "datafusion-functions-aggregate-common" -version = "49.0.0" +name = "pretty-xmlish" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f71bb59dc8b4dc985c911f2e0d8cf426c21f565b56dca4b852c244101a1a7a2" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", -] +checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" [[package]] -name = "datafusion-functions-nested" -version = "49.0.0" +name = "proc-macro2" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27eb3b98a2eb02a8af4ef19cc793cac21fc98d8720b987f15d7d25b8cc875f4d" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", - "itertools", - "log", - "paste", + "unicode-ident", ] [[package]] -name = "datafusion-functions-table" -version = "49.0.0" +name = "prost" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e0940fc3e2fa4645a4d323f9ebf9258b2d7fdad12013a471cae4ae5568683" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", - "paste", + "bytes", + "prost-derive", ] [[package]] -name = "datafusion-functions-window" -version = "49.0.0" +name = "prost-derive" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df03c6c62039578fd110b327c474846fdf3d9077a568f1e8706e585ed30cb98d" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "log", - "paste", + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "datafusion-functions-window-common" -version = "49.0.0" +name = "prost-types" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "083659a95914bf3ca568a72b085cb8654576fef1236b260dc2379cb8e5f922b2" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "prost", ] [[package]] -name = "datafusion-macros" -version = "49.0.0" +name = "quote" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cabe1f32daa2fa54e6b20d14a13a9e85bef97c4161fe8a90d76b6d9693a5ac4" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ - "datafusion-expr", - "quote", - "syn", + "proc-macro2", ] [[package]] -name = "datafusion-optimizer" -version = "49.0.0" +name = "radium" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e12a97dcb0ccc569798be1289c744829cce5f18cc9b037054f8d7f93e1d57be" -dependencies = [ - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "indexmap", - "itertools", - "log", - "recursive", - "regex", - "regex-syntax 0.8.5", -] +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" [[package]] -name = "datafusion-physical-expr" -version = "49.0.0" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41312712b8659a82b4e9faa8d97a018e7f2ccbdedf2f7cb93ecf256e39858c86" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "half", - "hashbrown 0.14.5", - "indexmap", - "itertools", - "log", - "paste", - "petgraph", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "datafusion-physical-expr-common" -version = "49.0.0" +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1649a60ea0319496d616ae3554e84dfcc262c201ab4439abcd83cca989b85b" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "hashbrown 0.14.5", - "itertools", + "ppv-lite86", + "rand_core", ] [[package]] -name = "datafusion-physical-optimizer" -version = "49.0.0" +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea3f5b8ba6122426774aaaf11325740b8e5d3afaab9ab39dc63423adca554748" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "itertools", - "log", - "recursive", + "getrandom", ] [[package]] -name = "datafusion-physical-plan" -version = "49.0.0" +name = "redox_syscall" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a595f296929d6cffa12b993ea53e9fe8215fada050d78626c5cf0e2f02b0205" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "futures", - "half", - "hashbrown 0.14.5", - "indexmap", - "itertools", - "log", - "parking_lot", - "pin-project-lite", - "tokio", + "bitflags", ] [[package]] -name = "datafusion-pruning" -version = "49.0.0" +name = "regex" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "itertools", - "log", + "aho-corasick", + "memchr", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] -name = "datafusion-session" -version = "49.0.0" +name = "regex-automata" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5f2fe790f43839c70fb9604c4f9b59ad290ef64e1d2f927925dd34a9245406" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "tokio", + "regex-syntax 0.6.29", ] [[package]] -name = "datafusion-sql" -version = "49.0.0" +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ebebb82fda37f62f06fe14339f4faa9f197a0320cc4d26ce2a5fd53a5ccd27c" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ - "arrow", - "bigdecimal", - "datafusion-common", - "datafusion-expr", - "indexmap", - "log", - "recursive", - "regex", - "sqlparser", + "aho-corasick", + "memchr", + "regex-syntax 0.8.5", ] [[package]] -name = "der" -version = "0.7.10" +name = "regex-syntax" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" -dependencies = [ - "const-oid", - "pem-rfc7468", - "zeroize", -] +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] -name = "digest" -version = "0.10.7" +name = "regex-syntax" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", - "subtle", -] +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] -name = "displaydoc" -version = "0.2.5" +name = "rustc-demangle" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" [[package]] -name = "dotenvy" -version = "0.15.7" +name = "rustversion" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] -name = "either" -version = "1.15.0" +name = "ryu" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -dependencies = [ - "serde", -] +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] -name = "equivalent" -version = "1.0.2" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "errno" -version = "0.3.13" +name = "serde" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ - "libc", - "windows-sys 0.59.0", + "serde_derive", ] [[package]] -name = "etcetera" -version = "0.8.0" +name = "serde_derive" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ - "cfg-if", - "home", - "windows-sys 0.48.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.142" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", ] [[package]] -name = "event-listener" -version = "5.4.1" +name = "sharded-slab" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", + "lazy_static", ] [[package]] -name = "fastrand" -version = "2.3.0" +name = "slab" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" [[package]] -name = "fixedbitset" -version = "0.5.7" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] -name = "flatbuffers" -version = "25.2.10" +name = "socket2" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ - "bitflags", - "rustc_version", + "libc", + "windows-sys 0.52.0", ] [[package]] -name = "flate2" -version = "1.1.2" +name = "socket2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" dependencies = [ - "crc32fast", - "libz-rs-sys", - "miniz_oxide", + "libc", + "windows-sys 0.59.0", ] [[package]] -name = "flume" -version = "0.11.1" +name = "syn" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ - "futures-core", - "futures-sink", - "spin", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "fnv" -version = "1.0.7" +name = "sync_wrapper" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" [[package]] -name = "foldhash" -version = "0.1.5" +name = "tap" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] -name = "form_urlencoded" -version = "1.2.1" +name = "thread_local" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "percent-encoding", + "cfg-if", ] [[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "futures" -version = "0.3.31" +name = "tokio" +version = "1.47.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "slab", + "socket2 0.6.0", + "tokio-macros", + "tracing", + "windows-sys 0.59.0", ] [[package]] -name = "futures-channel" -version = "0.3.31" +name = "tokio-macros" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ - "futures-core", - "futures-sink", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-executor" -version = "0.3.31" +name = "tokio-stream" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", - "futures-task", - "futures-util", + "pin-project-lite", + "tokio", ] [[package]] -name = "futures-intrusive" -version = "0.5.0" +name = "tokio-util" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" dependencies = [ + "bytes", "futures-core", - "lock_api", - "parking_lot", + "futures-sink", + "pin-project-lite", + "tokio", ] [[package]] -name = "futures-io" -version = "0.3.31" +name = "tonic" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-macro" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "wasm-bindgen", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - -[[package]] -name = "glob" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" - -[[package]] -name = "half" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" -dependencies = [ - "cfg-if", - "crunchy", - "num-traits", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] - -[[package]] -name = "hashlink" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" -dependencies = [ - "hashbrown 0.15.4", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hkdf" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" -dependencies = [ - "hmac", -] - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "home" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" -dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "http" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "humantime" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" - -[[package]] -name = "iana-time-zone" -version = "0.1.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "icu_collections" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" - -[[package]] -name = "icu_properties" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "potential_utf", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" - -[[package]] -name = "icu_provider" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" -dependencies = [ - "displaydoc", - "icu_locale_core", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "indexmap" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" -dependencies = [ - "equivalent", - "hashbrown 0.15.4", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "io-uring" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - -[[package]] -name = "jobserver" -version = "0.1.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" -dependencies = [ - "getrandom 0.3.3", - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -dependencies = [ - "spin", -] - -[[package]] -name = "lexical-core" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libbz2-rs-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" - -[[package]] -name = "libc" -version = "0.2.174" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" - -[[package]] -name = "libm" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" - -[[package]] -name = "libsqlite3-sys" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "libz-rs-sys" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" -dependencies = [ - "zlib-rs", -] - -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - -[[package]] -name = "litemap" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" - -[[package]] -name = "lock_api" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "lz4_flex" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", -] - -[[package]] -name = "mio" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" -dependencies = [ - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", -] - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" -dependencies = [ - "byteorder", - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "smallvec", - "zeroize", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - -[[package]] -name = "object_store" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "http", - "humantime", - "itertools", - "parking_lot", - "percent-encoding", - "thiserror", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "optd-catalog" -version = "0.1.0" -dependencies = [ - "async-trait", - "datafusion", - "glob", - "parking_lot", - "sqlx", - "url", - "uuid", -] - -[[package]] -name = "optd-core" -version = "0.1.0" -dependencies = [ - "anyhow", - "bitvec", - "itertools", - "pretty-xmlish", - "tokio", - "tracing", - "tracing-test", -] - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - -[[package]] -name = "parking_lot" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.52.6", -] - -[[package]] -name = "parquet" -version = "55.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.15.4", - "lz4_flex", - "num", - "num-bigint", - "object_store", - "paste", - "ring", - "seq-macro", - "simdutf8", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pem-rfc7468" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" -dependencies = [ - "base64ct", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "petgraph" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" -dependencies = [ - "fixedbitset", - "hashbrown 0.15.4", - "indexmap", - "serde", -] - -[[package]] -name = "phf" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkcs1" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" -dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "spki", -] - -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - -[[package]] -name = "potential_utf" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" -dependencies = [ - "zerovec", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "pretty-xmlish" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom 0.3.3", -] - -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "redox_syscall" -version = "0.5.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" - -[[package]] -name = "ring" -version = "0.17.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" -dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.16", - "libc", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rsa" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" -dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core 0.6.4", - "signature", - "spki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" - -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" - -[[package]] -name = "seq-macro" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" - -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.142" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "signature" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" -dependencies = [ - "digest", - "rand_core 0.6.4", -] - -[[package]] -name = "simdutf8" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" - -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - -[[package]] -name = "slab" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -dependencies = [ - "serde", -] - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - -[[package]] -name = "spki" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" -dependencies = [ - "base64ct", - "der", -] - -[[package]] -name = "sqlparser" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" -dependencies = [ - "log", - "recursive", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "sqlx" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" -dependencies = [ - "sqlx-core", - "sqlx-macros", - "sqlx-mysql", - "sqlx-postgres", - "sqlx-sqlite", -] - -[[package]] -name = "sqlx-core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" -dependencies = [ - "base64", - "bytes", - "crc", - "crossbeam-queue", - "either", - "event-listener", - "futures-core", - "futures-intrusive", - "futures-io", - "futures-util", - "hashbrown 0.15.4", - "hashlink", - "indexmap", - "log", - "memchr", - "once_cell", - "percent-encoding", - "serde", - "serde_json", - "sha2", - "smallvec", - "thiserror", - "tracing", - "url", -] - -[[package]] -name = "sqlx-macros" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" -dependencies = [ - "proc-macro2", - "quote", - "sqlx-core", - "sqlx-macros-core", - "syn", -] - -[[package]] -name = "sqlx-macros-core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" -dependencies = [ - "dotenvy", - "either", - "heck", - "hex", - "once_cell", - "proc-macro2", - "quote", - "serde", - "serde_json", - "sha2", - "sqlx-core", - "sqlx-mysql", - "sqlx-postgres", - "sqlx-sqlite", - "syn", - "url", -] - -[[package]] -name = "sqlx-mysql" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" -dependencies = [ - "atoi", - "base64", - "bitflags", - "byteorder", - "bytes", - "crc", - "digest", - "dotenvy", - "either", - "futures-channel", - "futures-core", - "futures-io", - "futures-util", - "generic-array", - "hex", - "hkdf", - "hmac", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "percent-encoding", - "rand 0.8.5", - "rsa", - "serde", - "sha1", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror", - "tracing", - "whoami", -] - -[[package]] -name = "sqlx-postgres" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" -dependencies = [ - "atoi", - "base64", - "bitflags", - "byteorder", - "crc", - "dotenvy", - "etcetera", - "futures-channel", - "futures-core", - "futures-util", - "hex", - "hkdf", - "hmac", - "home", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "rand 0.8.5", - "serde", - "serde_json", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror", - "tracing", - "whoami", -] - -[[package]] -name = "sqlx-sqlite" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" -dependencies = [ - "atoi", - "flume", - "futures-channel", - "futures-core", - "futures-executor", - "futures-intrusive", - "futures-util", - "libsqlite3-sys", - "log", - "percent-encoding", - "serde", - "serde_urlencoded", - "sqlx-core", - "thiserror", - "tracing", - "url", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - -[[package]] -name = "syn" -version = "2.0.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - -[[package]] -name = "tempfile" -version = "3.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" -dependencies = [ - "fastrand", - "getrandom 0.3.3", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "thiserror" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thread_local" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinystr" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tinyvec" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.47.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" -dependencies = [ - "backtrace", + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", "bytes", - "io-uring", - "libc", - "mio", - "parking_lot", - "pin-project-lite", - "slab", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2 0.5.10", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", ] [[package]] -name = "tokio-util" -version = "0.7.16" +name = "tower" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ - "bytes", "futures-core", - "futures-sink", + "futures-util", + "indexmap 1.9.3", + "pin-project", "pin-project-lite", + "rand", + "slab", "tokio", -] - -[[package]] -name = "tracing" -version = "0.1.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" -dependencies = [ - "log", - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "tracing-test" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" -dependencies = [ - "tracing-core", - "tracing-subscriber", - "tracing-test-macro", -] - -[[package]] -name = "tracing-test-macro" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "twox-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" - -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-properties" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" - -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - -[[package]] -name = "unicode-width" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "url" -version = "2.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "uuid" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" -dependencies = [ - "getrandom 0.3.3", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] [[package]] -name = "version_check" -version = "0.9.5" +name = "tower" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", +] [[package]] -name = "walkdir" -version = "2.5.0" +name = "tower-layer" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" +name = "tower-service" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "tracing" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ - "wit-bindgen-rt", + "pin-project-lite", + "tracing-attributes", + "tracing-core", ] [[package]] -name = "wasite" -version = "0.1.0" +name = "tracing-attributes" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "wasm-bindgen" -version = "0.2.100" +name = "tracing-core" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ - "cfg-if", "once_cell", - "rustversion", - "wasm-bindgen-macro", + "valuable", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" +name = "tracing-log" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "bumpalo", "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", + "once_cell", + "tracing-core", ] [[package]] -name = "wasm-bindgen-futures" -version = "0.4.50" +name = "tracing-subscriber" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ - "cfg-if", - "js-sys", + "matchers", + "nu-ansi-term", "once_cell", - "wasm-bindgen", - "web-sys", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] -name = "wasm-bindgen-macro" -version = "0.2.100" +name = "tracing-test" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" dependencies = [ - "quote", - "wasm-bindgen-macro-support", + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", ] [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.100" +name = "tracing-test-macro" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ - "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", ] [[package]] -name = "wasm-bindgen-shared" -version = "0.2.100" +name = "try-lock" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" -dependencies = [ - "unicode-ident", -] +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] -name = "web-sys" -version = "0.3.77" +name = "unicode-ident" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] -name = "web-time" -version = "1.1.0" +name = "want" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" dependencies = [ - "js-sys", - "wasm-bindgen", + "try-lock", ] [[package]] -name = "whoami" -version = "1.6.0" +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" -dependencies = [ - "redox_syscall", - "wasite", -] +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "winapi" @@ -3645,96 +1293,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-core" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - -[[package]] -name = "windows-result" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -3743,22 +1314,7 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -3767,46 +1323,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -3819,69 +1357,30 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - -[[package]] -name = "writeable" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - [[package]] name = "wyz" version = "0.5.1" @@ -3891,39 +1390,6 @@ dependencies = [ "tap", ] -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "yoke" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.8.26" @@ -3943,97 +1409,3 @@ dependencies = [ "quote", "syn", ] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zeroize" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" - -[[package]] -name = "zerotrie" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbb9122ea75b11bf96e7492afb723e8a7fbe12c67417aa95e7e3d18144d37cd" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zlib-rs" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" - -[[package]] -name = "zstd" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/optd/core/Cargo.toml b/optd/core/Cargo.toml index 82a1ca2..62ead8d 100644 --- a/optd/core/Cargo.toml +++ b/optd/core/Cargo.toml @@ -13,6 +13,11 @@ tokio = { version = "1.47.0", features = [ "sync", "macros", "parking_lot", + "tracing", ] } tracing = "0.1.41" + +[dev-dependencies] +console-subscriber = "0.4.1" tracing-test = "0.2.5" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/optd/core/src/cascades.rs b/optd/core/src/cascades.rs new file mode 100644 index 0000000..6cb7d98 --- /dev/null +++ b/optd/core/src/cascades.rs @@ -0,0 +1,532 @@ +use std::{collections::HashMap, sync::Arc}; + +use itertools::Itertools; +use tokio::sync::watch; +use tracing::{info, instrument, trace}; + +use crate::{ + ir::{ + Group, GroupId, IRCommon, IRContext, Operator, + convert::IntoOperator, + properties::{OperatorProperties, Required, TrySatisfy}, + rule::{OperatorPattern, Rule, RuleSet}, + }, + memo::{CostedExpr, Exploration, MemoGroupExpr, MemoTable, Optimization, Status, WithId}, + rules::EnforceTupleOrderingRule, +}; + +pub struct Cascades { + pub memo: tokio::sync::RwLock, + pub ctx: IRContext, + pub rule_set: RuleSet, +} + +impl Cascades { + /// Creates a new Cascades optimizer instance. + pub fn new(ctx: IRContext, rule_set: RuleSet) -> Self { + Self { + memo: tokio::sync::RwLock::new(MemoTable::new(ctx.clone())), + ctx, + rule_set, + } + } + + /// Optimizes a query plan to find the lowest-cost execution plan that satisfies the requirement. + pub async fn optimize( + self: &Arc, + plan: &Arc, + required: Arc, + ) -> Option> { + let group_id = self.insert_new_operator(plan).await; + let fut = self.find_best_costed_expr_for(group_id, required); + let rx = fut.await; + let best_root = { + rx.borrow() + .costed_exprs + .iter() + .min_by(|x, y| x.total_cost.as_f64().total_cmp(&y.total_cost.as_f64())) + .cloned() + }?; + + let properties = { + let reader = self.memo.read().await; + reader + .get_memo_group(&group_id) + .exploration + .borrow() + .properties + .clone() + }; + + let best_plan = self + .extract_best_group_expr(&best_root, group_id, properties) + .await?; + Some(best_plan) + } + + /// Recursively extracts the best query plan from a costed expression. + fn extract_best_group_expr( + self: &Arc, + best_root: &CostedExpr, + group_id: GroupId, + properties: Arc, + ) -> impl Future>> + Send { + Box::pin(async move { + let expr = best_root.group_expr.key(); + + let input_groups = expr.input_operators(); + let mut input_operators = Vec::with_capacity(input_groups.len()); + for (group_id, (required, index)) in + input_groups.iter().zip(best_root.input_requirements.iter()) + { + let rx = self + .find_best_costed_expr_for(*group_id, required.clone()) + .await; + + let properties = { + let reader = self.memo.read().await; + reader + .get_memo_group(group_id) + .exploration + .borrow() + .properties + .clone() + }; + + let best_costed = rx.borrow().costed_exprs.get(*index).cloned()?; + + let input_op = self + .extract_best_group_expr(&best_costed, *group_id, properties) + .await?; + input_operators.push(input_op); + } + let input_scalars = { + let reader = self.memo.read().await; + expr.input_scalars() + .iter() + .map(|id| reader.get_scalar(id).unwrap()) + .collect() + }; + + let common = + IRCommon::new_with_properties(input_operators.into(), input_scalars, properties); + Some(Arc::new(Operator::from_raw_parts( + Some(group_id), + expr.kind().clone(), + common, + ))) + }) + } + + async fn insert_new_operator(self: &Arc, plan: &Arc) -> GroupId { + let mut writer = self.memo.write().await; + writer + .insert_new_operator(plan.clone()) + .unwrap_or_else(|group_id| group_id) + } +} + +// Optimization. +impl Cascades { + async fn find_best_costed_expr_for( + self: &Arc, + group_id: GroupId, + required: Arc, + ) -> watch::Receiver { + let cascades = self.clone(); + let mut rx = cascades + .spawn_optimize_group(group_id, required.clone()) + .await; + + loop { + { + let res = rx.wait_for(|state| state.status == Status::Complete).await; + if res.is_ok() { + break; + } + } + rx = self + .clone() + .spawn_optimize_group(group_id, required.clone()) + .await; + } + rx + } + + #[instrument(name = "enforce", skip_all)] + async fn explore_enforcers( + &self, + group_id: GroupId, + required: &Arc, + properties: &Arc, + ) { + let group = Group::new(group_id, properties.clone()).into_operator(); + let enforcer_rule = EnforceTupleOrderingRule::new(required.tuple_ordering.clone()); + let new_enforcers = enforcer_rule.transform(&group, &self.ctx).unwrap(); + let total_produced = new_enforcers.len(); + let mut newly_produced = 0; + { + let mut writer = self.memo.write().await; + for op in new_enforcers { + if writer.insert_operator_into_group(op, group_id).is_ok() { + newly_produced += 1; + } + } + } + info!( + rule = enforcer_rule.name(), + total_produced, newly_produced, "applied" + ) + } + + // clippy: the compiler cannot derive `Send` bounds when using `async fn`. (alternative: pinbox.) + #[allow(clippy::manual_async_fn)] + fn spawn_optimize_group( + self: Arc, + group_id: GroupId, + required: Arc, + ) -> impl Future> + Send { + async move { + let (tx, not_started) = { + let mut writer = self.memo.write().await; + let group = writer.get_memo_group_mut(&group_id); + let tx = group + .optimizations + .entry(required.clone()) + .or_insert(watch::Sender::default()) + .clone(); + drop(writer); + let not_started = tx.send_if_modified(|state| { + let not_started = state.status == Status::NotStarted; + not_started.then(|| state.status = Status::InProgress); + not_started + }); + (tx, not_started) + }; + let rx = tx.subscribe(); + if not_started { + let required = required.clone(); + tokio::spawn( + async move { Box::pin(self.optimize_group(group_id, required, tx)).await }, + ); + } + rx + } + } + + #[instrument(parent = None, skip(self, required, tx), fields(required = %required))] + async fn optimize_group( + self: Arc, + group_id: GroupId, + required: Arc, + tx: watch::Sender, + ) { + let mut rx = self.clone().spawn_explore_group(group_id).await; + let properties = rx.borrow().properties.clone(); + self.explore_enforcers(group_id, &required, &properties) + .await; + let mut index = 0; + loop { + let next_expr = { + let Ok(x) = rx + .wait_for(|x| index < x.exprs.len() || x.status == Status::Complete) + .await + else { + return; + }; + + let Some(next_expr) = x.exprs.get(index).cloned() else { + break; + }; + next_expr + }; + + if let Some(costed) = self + .optimize_expr(group_id, &required, next_expr, &properties) + .await + { + tx.send_if_modified(|x| { + x.costed_exprs.push(costed); + false + }); + } + index += 1; + } + + tx.send_if_modified(|state| { + let in_progress = state.status == Status::InProgress; + if in_progress { + state.status = Status::Complete; + } + in_progress + }); + info!("optimized"); + } + + #[instrument(name = "expr", skip_all, fields(id = %expr.id()))] + async fn optimize_expr( + self: &Arc, + group_id: GroupId, + required: &Arc, + expr: WithId>, + properties: &Arc, + ) -> Option { + let operator = { + let reader = self.memo.read().await; + reader.get_operator_one_level(expr.key(), properties.clone(), group_id) + }; + let op_cost = self.ctx.cm.compute_operator_cost(&operator, &self.ctx)?; + + let inputs_required = operator.try_satisfy(required, &self.ctx)?; + + let mut best_inputs = Vec::with_capacity(operator.input_operators().len()); + let mut best_input_costs = Vec::with_capacity(operator.input_operators().len()); + for (input_group_id, input_required) in expr + .key() + .input_operators() + .iter() + .zip(inputs_required.iter()) + { + if input_group_id.eq(&group_id) && input_required == required { + trace!("self optimization avoided"); + return None; + } + + let rx = self + .clone() + .find_best_costed_expr_for(*input_group_id, input_required.clone()) + .await; + let state = rx.borrow(); + let (index, costed_expr) = state + .costed_exprs + .iter() + .enumerate() + .min_by(|(_, x), (_, y)| x.total_cost.as_f64().total_cmp(&y.total_cost.as_f64()))?; + best_inputs.push((input_required.clone(), index)); + best_input_costs.push(costed_expr.total_cost); + } + + let total_cost = + self.ctx + .cm + .compute_total_with_input_costs(&operator, &best_input_costs, &self.ctx)?; + info!(%op_cost, %total_cost, "optimized"); + Some(CostedExpr::new( + expr, + op_cost, + total_cost, + best_inputs.into(), + )) + } +} + +// Exploration. +impl Cascades { + async fn get_all_group_exprs_in( + self: &Arc, + group_id: GroupId, + ) -> watch::Receiver { + let mut rx = self.clone().spawn_explore_group(group_id).await; + loop { + { + let res = rx.wait_for(|state| state.status == Status::Complete).await; + if res.is_ok() { + break; + } + } + rx = self.clone().spawn_explore_group(group_id).await; + } + rx + } + + // clippy: the compiler cannot derive `Send` bounds when using `async fn`. (alternative: pinbox.) + #[allow(clippy::manual_async_fn)] + fn spawn_explore_group( + self: Arc, + group_id: GroupId, + ) -> impl Future> + Send { + async move { + let (tx, not_started) = { + let reader = self.memo.read().await; + let tx = &reader.get_memo_group(&group_id).exploration.clone(); + let not_started = tx.send_if_modified(|state| { + let not_started = state.status == Status::NotStarted; + not_started.then(|| state.status = Status::InProgress); + not_started + }); + (tx.clone(), not_started) + }; + let rx = tx.subscribe(); + + if not_started { + tokio::spawn(async move { Box::pin(self.explore_group(group_id, tx)).await }); + } + rx + } + } + + #[instrument(parent = None, skip(self, tx))] + async fn explore_group(self: Arc, group_id: GroupId, tx: watch::Sender) { + let properties = tx.borrow().properties.clone(); + let mut index = 0; + + loop { + let next_expr = { + let state = tx.borrow(); + if state.status == Status::Obsolete { + return; + } + let Some(expr) = state.exprs.get(index).cloned() else { + break; + }; + expr + }; + + self.explore_expr(group_id, next_expr, &properties).await; + // increment the counter. + index += 1; + } + + // Exploration is complete. + tx.send_if_modified(|state| { + let in_progress = state.status == Status::InProgress; + if in_progress { + state.status = Status::Complete; + } + in_progress + }); + info!("explored"); + } + + #[instrument(name = "expr", skip_all, fields(id = %expr.id()))] + async fn explore_expr( + self: &Arc, + group_id: GroupId, + expr: WithId>, + properties: &Arc, + ) { + for rule in self.rule_set.iter() { + let all_bindings = self + .explore_all_bindings(group_id, &expr, properties, rule.pattern()) + .await; + + let bindings_count = all_bindings.as_ref().map(|v| v.len()).unwrap_or(0); + info!(rule = rule.name(), %bindings_count, "matched"); + + if bindings_count > 0 { + let mut total_produced = 0; + let mut newly_produced = 0; + for binding in all_bindings.iter().flatten() { + let new_operators = rule.transform(binding, &self.ctx).unwrap(); + total_produced += new_operators.len(); + { + let mut writer = self.memo.write().await; + for op in new_operators { + if writer + .insert_operator_into_group(op.clone(), group_id) + .is_ok() + { + newly_produced += 1; + } + } + } + } + info!( + rule = rule.name(), + total_produced, newly_produced, "applied" + ) + } + } + } + + #[instrument(skip_all, fields(top = %expr.id()))] + async fn explore_all_bindings( + self: &Arc, + group_id: GroupId, + expr: &WithId>, + properties: &Arc, + pattern: &OperatorPattern, + ) -> Option>> { + let input_group_ids = expr.key().input_operators(); + let input_scalars = expr.key().input_scalars(); + if !pattern.top_matches(expr.key().kind()) { + return None; + } + let input_patterns = pattern.input_operator_patterns(); + let mut input_bindings_map: HashMap> = + HashMap::with_capacity(input_patterns.len()); + for (index, input_pattern) in input_patterns { + // early return if input length mismatch. + let input_group_id = input_group_ids.get(*index)?; + + let rx = self.get_all_group_exprs_in(*input_group_id).await; + let input_exprs = rx.borrow().exprs.clone(); + let input_properties = rx.borrow().properties.clone(); + assert_eq!(rx.borrow().status, Status::Complete); + + let input_bindings = input_bindings_map.entry(*index).or_default(); + for input_expr in input_exprs { + let cascades = self.clone(); + let fut = Box::pin(cascades.explore_all_bindings( + *input_group_id, + &input_expr, + &input_properties, + input_pattern, + )); + if let Some(input_bindings_from_expr) = fut.await { + input_bindings.extend(input_bindings_from_expr); + } + } + if input_bindings.is_empty() { + return None; + } + } + + let (input_choices, input_scalars) = { + let reader = self.memo.read().await; + let input_choices = input_group_ids + .iter() + .enumerate() + .map(|(i, input_group_id)| { + match input_bindings_map.remove(&i) { + Some(v) => v, + None => { + // get group + + let properties = reader + .get_memo_group(input_group_id) + .exploration + .borrow() + .properties + .clone(); + vec![Group::new(*input_group_id, properties).into_operator()] + } + } + }) + .collect_vec(); + let input_scalars = input_scalars + .iter() + .map(|group_id| reader.get_scalar(group_id).unwrap()) + .collect::>(); + (input_choices, input_scalars) + }; + + Some( + input_choices + .into_iter() + .multi_cartesian_product() + .map(|input_operators| { + Arc::new(Operator::from_raw_parts( + Some(group_id), + expr.key().kind().clone(), + IRCommon::new_with_properties( + input_operators.into(), + input_scalars.clone(), + properties.clone(), + ), + )) + }) + .collect_vec(), + ) + } +} diff --git a/optd/core/src/ir/builder.rs b/optd/core/src/ir/builder.rs index 2b0d18d..ddfba45 100644 --- a/optd/core/src/ir/builder.rs +++ b/optd/core/src/ir/builder.rs @@ -9,7 +9,6 @@ use crate::ir::{ }; /// Creates a mock scan operator. -#[cfg(test)] pub fn mock_scan(id: usize, columns: Vec, card: f64) -> Arc { use crate::ir::operator::{MockScan, MockSpec}; diff --git a/optd/core/src/ir/catalog.rs b/optd/core/src/ir/catalog.rs index 48fcb91..811d4d6 100644 --- a/optd/core/src/ir/catalog.rs +++ b/optd/core/src/ir/catalog.rs @@ -100,6 +100,6 @@ pub trait Catalog: Send + Sync + 'static { /// Describes the schema of a table with name `table_name`. fn try_describe_table_with_name(&self, table_name: &str) -> anyhow::Result<&TableMetadata>; - /// TODO: This is a mock. + /// TODO(yuchen): This is a mock. fn set_table_row_count(&mut self, table_id: DataSourceId, row_count: usize); } diff --git a/optd/core/src/ir/context.rs b/optd/core/src/ir/context.rs index 1e7505e..b82f86c 100644 --- a/optd/core/src/ir/context.rs +++ b/optd/core/src/ir/context.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::ir::{catalog::Catalog, cost::CostModel, properties::CardinalityEstimator}; +#[derive(Clone)] pub struct IRContext { /// An accessor to the catalog interface. pub cat: Arc, diff --git a/optd/core/src/ir/cost/mod.rs b/optd/core/src/ir/cost/mod.rs index 8225c25..8a311f2 100644 --- a/optd/core/src/ir/cost/mod.rs +++ b/optd/core/src/ir/cost/mod.rs @@ -5,12 +5,22 @@ pub struct Cost { c: f64, } +impl std::fmt::Display for Cost { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "${:.2}", self.c) + } +} + impl Cost { pub const ZERO: Self = Self::new(0.); pub const UNIT: Self = Self::new(1.); pub const fn new(c: f64) -> Self { Self { c } } + + pub fn as_f64(&self) -> f64 { + self.c + } } impl std::ops::Add for Cost { @@ -61,7 +71,7 @@ impl std::ops::Mul for f64 { } } -pub trait CostModel { +pub trait CostModel: Send + Sync + 'static { fn compute_operator_cost(&self, op: &Operator, ctx: &IRContext) -> Option; fn compute_total_with_input_costs( &self, diff --git a/optd/core/src/ir/explain.rs b/optd/core/src/ir/explain.rs index 3a47936..e42f965 100644 --- a/optd/core/src/ir/explain.rs +++ b/optd/core/src/ir/explain.rs @@ -8,7 +8,7 @@ pub trait Explain { #[derive(Default)] pub struct ExplainOption { - pub show_output_columns: bool, + pub verbose: bool, } pub fn quick_explain(v: impl AsRef, ctx: &IRContext) -> String diff --git a/optd/core/src/ir/group.rs b/optd/core/src/ir/group.rs index 0b321b7..06c898c 100644 --- a/optd/core/src/ir/group.rs +++ b/optd/core/src/ir/group.rs @@ -1,7 +1,10 @@ use std::sync::Arc; +use pretty_xmlish::Pretty; + use crate::ir::{ IRCommon, + explain::Explain, macros::{define_node, impl_operator_conversion}, properties::OperatorProperties, }; @@ -45,3 +48,15 @@ impl Group { } } } + +impl Explain for GroupBorrowed<'_> { + fn explain<'a>( + &self, + ctx: &super::IRContext, + option: &super::explain::ExplainOption, + ) -> Pretty<'a> { + let mut fields = vec![(".group_id", Pretty::display(self.group_id()))]; + fields.extend(self.common.explain_operator_properties(ctx, option)); + Pretty::childless_record("Group", fields) + } +} diff --git a/optd/core/src/ir/macros.rs b/optd/core/src/ir/macros.rs index 9187730..a886917 100644 --- a/optd/core/src/ir/macros.rs +++ b/optd/core/src/ir/macros.rs @@ -295,6 +295,7 @@ macro_rules! impl_operator_conversion { impl crate::ir::convert::IntoOperator for $node_name { fn into_operator(self) -> std::sync::Arc { std::sync::Arc::new(crate::ir::Operator { + group_id: None, kind: crate::ir::OperatorKind::$node_name(self.meta), common: self.common, }) diff --git a/optd/core/src/ir/mod.rs b/optd/core/src/ir/mod.rs index 0a15755..30223fa 100644 --- a/optd/core/src/ir/mod.rs +++ b/optd/core/src/ir/mod.rs @@ -21,9 +21,15 @@ pub use context::IRContext; pub use data_type::DataType; pub use group::*; pub use operator::{Operator, OperatorCategory, OperatorKind}; +use pretty_xmlish::Pretty; pub use scalar::{Scalar, ScalarKind}; pub use value::ScalarValue; +use crate::ir::{ + explain::{Explain, ExplainOption}, + properties::OperatorProperties, +}; + /// The portion of the IR shared by all nodes. #[derive(Debug)] pub struct IRCommon

{ @@ -69,13 +75,61 @@ impl IRCommon

{ Self::new(operators, Self::empty_input_scalars()) } - pub fn with_properties_only(properties: Arc

) -> Self { + pub(crate) fn new_with_properties( + input_operators: Arc<[Arc]>, + input_scalars: Arc<[Arc]>, + properties: Arc

, + ) -> Self { + Self { + input_operators, + input_scalars, + properties, + } + } + + pub(crate) fn with_properties_only(properties: Arc

) -> Self { Self { input_operators: Self::empty_input_operators(), input_scalars: Self::empty_input_scalars(), properties, } } + + pub fn explain_input_operators<'a>( + &self, + ctx: &IRContext, + option: &ExplainOption, + ) -> Vec> { + self.input_operators + .iter() + .map(|input_op| input_op.explain(ctx, option)) + .collect() + } +} + +impl IRCommon { + pub fn explain_operator_properties<'a>( + &self, + _ctx: &IRContext, + _option: &ExplainOption, + ) -> Vec<(&'static str, Pretty<'a>)> { + let mut fields = Vec::with_capacity(2); + let cardinality = self + .properties + .cardinality + .get() + .map(|x| format!("{:.2}", x.as_f64())) + .unwrap_or("?".to_string()); + let output_columns = self + .properties + .output_columns + .get() + .map(|x| format!("{x}")) + .unwrap_or("?".to_string()); + fields.push(("(.output_columns)", Pretty::display(&output_columns))); + fields.push(("(.cardinality)", Pretty::display(&cardinality))); + fields + } } impl

PartialEq for IRCommon

{ diff --git a/optd/core/src/ir/operator/enforcer/sort.rs b/optd/core/src/ir/operator/enforcer/sort.rs index f6b2b57..b5da57a 100644 --- a/optd/core/src/ir/operator/enforcer/sort.rs +++ b/optd/core/src/ir/operator/enforcer/sort.rs @@ -1,5 +1,7 @@ use std::sync::Arc; +use pretty_xmlish::Pretty; + use crate::ir::{ IRCommon, Operator, explain::Explain, @@ -33,9 +35,16 @@ impl EnforcerSort { impl Explain for EnforcerSortBorrowed<'_> { fn explain<'a>( &self, - _ctx: &crate::ir::IRContext, - _option: &crate::ir::explain::ExplainOption, - ) -> pretty_xmlish::Pretty<'a> { - todo!() + ctx: &crate::ir::IRContext, + option: &crate::ir::explain::ExplainOption, + ) -> Pretty<'a> { + let mut fields = Vec::with_capacity(1); + fields.push(("tuple_ordering", Pretty::display(self.tuple_ordering()))); + + let metadata = self.common.explain_operator_properties(ctx, option); + fields.extend(metadata); + let children = self.common.explain_input_operators(ctx, option); + + Pretty::simple_record("EnforcerSort", fields, children) } } diff --git a/optd/core/src/ir/operator/logical/join.rs b/optd/core/src/ir/operator/logical/join.rs index 373cc87..73326dc 100644 --- a/optd/core/src/ir/operator/logical/join.rs +++ b/optd/core/src/ir/operator/logical/join.rs @@ -52,12 +52,8 @@ impl Explain for LogicalJoinBorrowed<'_> { let mut fields = Vec::with_capacity(3); fields.push((".join_type", Pretty::debug(self.join_type()))); fields.push((".join_cond", self.join_cond().explain(ctx, option))); - let children = self - .common - .input_operators - .iter() - .map(|input_op| input_op.explain(ctx, option)) - .collect(); + fields.extend(self.common.explain_operator_properties(ctx, option)); + let children = self.common.explain_input_operators(ctx, option); Pretty::simple_record("LogicalJoin", fields, children) } } diff --git a/optd/core/src/ir/operator/logical/order_by.rs b/optd/core/src/ir/operator/logical/order_by.rs index e406079..abdc898 100644 --- a/optd/core/src/ir/operator/logical/order_by.rs +++ b/optd/core/src/ir/operator/logical/order_by.rs @@ -48,7 +48,7 @@ impl LogicalOrderBy { .iter() .map(|expr| { expr.try_bind_ref::() - .map(|column_ref| column_ref.column().clone()) + .map(|column_ref| *column_ref.column()) .map_err(|_| expr.clone()) }) .partition_result(); diff --git a/optd/core/src/ir/operator/mod.rs b/optd/core/src/ir/operator/mod.rs index 3761858..d32aa9e 100644 --- a/optd/core/src/ir/operator/mod.rs +++ b/optd/core/src/ir/operator/mod.rs @@ -17,26 +17,23 @@ pub mod join { pub use super::logical::join::JoinType; } -#[cfg(test)] pub use physical::mock_scan::*; -use pretty_xmlish::Pretty; use crate::ir::explain::Explain; use crate::ir::properties::OperatorProperties; -use crate::ir::{GroupMetadata, IRCommon, Scalar}; +use crate::ir::{GroupBorrowed, GroupId, GroupMetadata, IRCommon, Scalar}; /// The operator type and its associated metadata. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum OperatorKind { Group(GroupMetadata), - #[cfg(test)] MockScan(MockScanMetadata), LogicalGet(LogicalGetMetadata), LogicalJoin(LogicalJoinMetadata), LogicalSelect(LogicalSelectMetadata), EnforcerSort(EnforcerSortMetadata), PhysicalTableScan(PhysicalTableScanMetadata), - PhysicalNLJoin(PhysicalNLJoinJoinMetadata), + PhysicalNLJoin(PhysicalNLJoinMetadata), PhysicalFilter(PhysicalFilterMetadata), } @@ -61,7 +58,6 @@ impl OperatorKind { pub fn maybe_produce_columns(&self) -> bool { match self { OperatorKind::LogicalGet(_) | OperatorKind::PhysicalTableScan(_) => true, - #[cfg(test)] OperatorKind::MockScan(_) => true, _other => false, } @@ -70,6 +66,7 @@ impl OperatorKind { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Operator { + pub group_id: Option, /// The operator type and associated metadata. pub kind: OperatorKind, /// The input operators and scalars. @@ -77,6 +74,17 @@ pub struct Operator { } impl Operator { + pub fn from_raw_parts( + group_id: Option, + kind: OperatorKind, + common: IRCommon, + ) -> Self { + Self { + group_id, + kind, + common, + } + } /// Gets the slice to the input operators. pub fn input_operators(&self) -> &[Arc] { &self.common.input_operators @@ -101,6 +109,7 @@ impl Operator { input_operators.unwrap_or_else(|| self.common.input_operators.clone()); let input_scalars = input_scalars.unwrap_or_else(|| self.common.input_scalars.clone()); Self { + group_id: None, kind: self.kind.clone(), common: IRCommon::new(input_operators, input_scalars), } @@ -114,8 +123,9 @@ impl Explain for Operator { option: &super::explain::ExplainOption, ) -> pretty_xmlish::Pretty<'a> { match &self.kind { - OperatorKind::Group(meta) => Pretty::display(&meta.group_id), - #[cfg(test)] + OperatorKind::Group(meta) => { + GroupBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + } OperatorKind::MockScan(meta) => { MockScanBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) } diff --git a/optd/core/src/ir/operator/physical/mock_scan.rs b/optd/core/src/ir/operator/physical/mock_scan.rs index eb26a72..6e510cb 100644 --- a/optd/core/src/ir/operator/physical/mock_scan.rs +++ b/optd/core/src/ir/operator/physical/mock_scan.rs @@ -38,13 +38,12 @@ impl std::fmt::Debug for MockSpec { impl Explain for MockScanBorrowed<'_> { fn explain<'a>( &self, - _ctx: &crate::ir::IRContext, - _option: &crate::ir::explain::ExplainOption, + ctx: &crate::ir::IRContext, + option: &crate::ir::explain::ExplainOption, ) -> pretty_xmlish::Pretty<'a> { - let columns = self.spec().mocked_output_columns.set(); let mut fields = Vec::with_capacity(2); - fields.push((".columns", Pretty::debug(columns))); fields.push((".mock_id", Pretty::display(self.mock_id()))); + fields.extend(self.common.explain_operator_properties(ctx, option)); Pretty::childless_record("MockScan", fields) } } @@ -57,6 +56,7 @@ impl MockSpec { Self { mocked_output_columns, mocked_card, + mocked_operator_cost: Some(Cost::UNIT * card * 1.1), ..Default::default() } } diff --git a/optd/core/src/ir/operator/physical/mod.rs b/optd/core/src/ir/operator/physical/mod.rs index e058f05..9a6dd91 100644 --- a/optd/core/src/ir/operator/physical/mod.rs +++ b/optd/core/src/ir/operator/physical/mod.rs @@ -1,5 +1,4 @@ pub mod filter; -#[cfg(test)] pub mod mock_scan; pub mod nl_join; pub mod table_scan; diff --git a/optd/core/src/ir/operator/physical/nl_join.rs b/optd/core/src/ir/operator/physical/nl_join.rs index 41e7c08..ab181c1 100644 --- a/optd/core/src/ir/operator/physical/nl_join.rs +++ b/optd/core/src/ir/operator/physical/nl_join.rs @@ -13,7 +13,7 @@ use crate::ir::{ define_node!( PhysicalNLJoin, PhysicalNLJoinBorrowed { properties: OperatorProperties, - metadata: PhysicalNLJoinJoinMetadata { + metadata: PhysicalNLJoinMetadata { join_type: JoinType, }, inputs: { @@ -32,7 +32,7 @@ impl PhysicalNLJoin { join_cond: Arc, ) -> Self { Self { - meta: PhysicalNLJoinJoinMetadata { join_type }, + meta: PhysicalNLJoinMetadata { join_type }, common: IRCommon::new(Arc::new([outer, inner]), Arc::new([join_cond])), } } @@ -47,12 +47,8 @@ impl Explain for PhysicalNLJoinBorrowed<'_> { let mut fields = Vec::with_capacity(3); fields.push((".join_type", Pretty::debug(self.join_type()))); fields.push((".join_cond", self.join_cond().explain(ctx, option))); - let children = self - .common - .input_operators - .iter() - .map(|input_op| input_op.explain(ctx, option)) - .collect(); + fields.extend(self.common.explain_operator_properties(ctx, option)); + let children = self.common.explain_input_operators(ctx, option); Pretty::simple_record("PhysicalNLJoin", fields, children) } } diff --git a/optd/core/src/ir/properties/cardinality.rs b/optd/core/src/ir/properties/cardinality.rs index 14f3763..2fb38e2 100644 --- a/optd/core/src/ir/properties/cardinality.rs +++ b/optd/core/src/ir/properties/cardinality.rs @@ -11,7 +11,7 @@ pub struct Cardinality { card: f64, } -pub trait CardinalityEstimator { +pub trait CardinalityEstimator: Send + Sync + 'static { fn estimate(&self, op: &Operator, ctx: &IRContext) -> Cardinality; } @@ -44,11 +44,11 @@ impl Derive for crate::ir::Operator { } fn derive(&self, ctx: &crate::ir::context::IRContext) -> Cardinality { - self.common + *self + .common .properties .cardinality .get_or_init(|| self.derive_by_compute(ctx)) - .clone() } } diff --git a/optd/core/src/ir/properties/mod.rs b/optd/core/src/ir/properties/mod.rs index 4aa3104..fba1850 100644 --- a/optd/core/src/ir/properties/mod.rs +++ b/optd/core/src/ir/properties/mod.rs @@ -1,12 +1,13 @@ mod cardinality; mod output_columns; -pub mod required; +mod required; mod tuple_ordering; use std::sync::{Arc, OnceLock}; pub use cardinality::*; pub use output_columns::OutputColumns; +pub use required::Required; pub use tuple_ordering::*; use crate::ir::context::IRContext; diff --git a/optd/core/src/ir/properties/output_columns.rs b/optd/core/src/ir/properties/output_columns.rs index 4c34de2..2711816 100644 --- a/optd/core/src/ir/properties/output_columns.rs +++ b/optd/core/src/ir/properties/output_columns.rs @@ -1,5 +1,7 @@ use std::{collections::HashSet, sync::Arc}; +use itertools::Itertools; + use crate::ir::{ ColumnSet, OperatorKind, operator::{LogicalGet, PhysicalTableScan}, @@ -19,6 +21,12 @@ impl OutputColumns { } } +impl std::fmt::Display for OutputColumns { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_set().entries(self.set().iter().sorted()).finish() + } +} + impl PropertyMarker for OutputColumns {} impl Derive for crate::ir::Operator { @@ -50,7 +58,6 @@ impl Derive for crate::ir::Operator { }); OutputColumns::from_column_set(set) } - #[cfg(test)] OperatorKind::MockScan(meta) => meta.spec.mocked_output_columns.clone(), } } diff --git a/optd/core/src/ir/properties/required.rs b/optd/core/src/ir/properties/required.rs index 4d351b7..369ab95 100644 --- a/optd/core/src/ir/properties/required.rs +++ b/optd/core/src/ir/properties/required.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::ir::{ Operator, context::IRContext, @@ -6,23 +8,33 @@ use crate::ir::{ #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] pub struct Required { - tuple_ordering: TupleOrdering, + pub tuple_ordering: TupleOrdering, +} + +impl std::fmt::Display for Required { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_map() + .entry(&"tuple_ordering", &self.tuple_ordering) + .finish() + } } -impl PropertyMarker for Required {} +impl PropertyMarker for Arc {} -impl crate::ir::properties::TrySatisfy for Operator { +impl crate::ir::properties::TrySatisfy> for Operator { fn try_satisfy( &self, - property: &Required, + property: &Arc, ctx: &IRContext, - ) -> Option> { + ) -> Option]>> { self.try_satisfy(&property.tuple_ordering, ctx) .map(|inputs_required| { inputs_required .iter() - .map(|p| Required { - tuple_ordering: p.clone(), + .map(|p| { + Arc::new(Required { + tuple_ordering: p.clone(), + }) }) .collect() }) diff --git a/optd/core/src/ir/properties/tuple_ordering.rs b/optd/core/src/ir/properties/tuple_ordering.rs index 0724995..ec3c912 100644 --- a/optd/core/src/ir/properties/tuple_ordering.rs +++ b/optd/core/src/ir/properties/tuple_ordering.rs @@ -23,7 +23,7 @@ impl std::fmt::Display for TupleOrderingDirection { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +#[derive(Clone, PartialEq, Eq, Hash, Default)] pub struct TupleOrdering(Arc); impl PartialOrd for TupleOrdering { @@ -51,6 +51,12 @@ impl std::fmt::Display for TupleOrdering { } } +impl std::fmt::Debug for TupleOrdering { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] struct TupleOrderingInner { /// The specified columns to be ordered on. @@ -96,7 +102,7 @@ impl TupleOrdering { } /// Gets an ordered iterator over [`Column`] specified in the ordering. - pub fn iter_columns<'a>(&'a self) -> impl Iterator { + pub fn iter_columns(&self) -> impl Iterator { self.0.columns.iter() } @@ -126,6 +132,7 @@ impl<'a> Iterator for Iter<'a> { .then(|| TupleOrderingDirection::Asc) .unwrap_or(TupleOrderingDirection::Desc); + self.index += 1; Some((column, direction)) } } @@ -170,7 +177,6 @@ impl crate::ir::properties::TrySatisfy for Operator { .all(|col| output_from_input.set().contains(col)) .then(|| vec![ordering.clone()].into()) } - #[cfg(test)] OperatorKind::MockScan(meta) => { (&meta.spec.mocked_provided_ordering >= ordering).then_some(Arc::new([])) } diff --git a/optd/core/src/ir/rule/mod.rs b/optd/core/src/ir/rule/mod.rs index 1ebffc3..a94afca 100644 --- a/optd/core/src/ir/rule/mod.rs +++ b/optd/core/src/ir/rule/mod.rs @@ -3,7 +3,7 @@ mod set; use std::sync::Arc; -use crate::ir::Operator; +use crate::ir::{IRContext, Operator}; pub use pattern::{OperatorMatchFunc, OperatorPattern}; pub use set::{RuleSet, RuleSetBuilder}; @@ -15,5 +15,7 @@ pub trait Rule: 'static + Send + Sync { fn pattern(&self) -> &OperatorPattern; /// Performs the transformation on `operator`. /// A rule may produce zero or more new plans as part of the transformation. - fn transform(&self, operator: &Operator) -> Result>, ()>; + // TODO(yuchen): use custom error type. + #[allow(clippy::result_unit_err)] + fn transform(&self, operator: &Operator, ctx: &IRContext) -> Result>, ()>; } diff --git a/optd/core/src/ir/rule/pattern.rs b/optd/core/src/ir/rule/pattern.rs index 914d0d6..be6e0d2 100644 --- a/optd/core/src/ir/rule/pattern.rs +++ b/optd/core/src/ir/rule/pattern.rs @@ -33,14 +33,14 @@ impl OperatorPattern { &self.input_operator_patterns } - pub fn top_matches(&self, operator: &Operator) -> bool { - (self.matches)(&operator.kind) + pub fn top_matches(&self, kind: &OperatorKind) -> bool { + (self.matches)(kind) } pub fn matches_without_expand(&self, operator: &Operator) -> bool { let input_ops = operator.input_operators(); self.input_operator_patterns.iter().fold( - self.top_matches(operator), + self.top_matches(&operator.kind), |prev_matches, (i, input_pattern)| { let Some(input_op) = &input_ops.get(*i) else { return false; diff --git a/optd/core/src/ir/rule/set.rs b/optd/core/src/ir/rule/set.rs index 74195e2..e069952 100644 --- a/optd/core/src/ir/rule/set.rs +++ b/optd/core/src/ir/rule/set.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::ir::rule::Rule; +#[derive(Default)] pub struct RuleSet { rules: Arc<[Arc]>, } diff --git a/optd/core/src/ir/scalar/mod.rs b/optd/core/src/ir/scalar/mod.rs index c2e0aa6..61d523b 100644 --- a/optd/core/src/ir/scalar/mod.rs +++ b/optd/core/src/ir/scalar/mod.rs @@ -4,7 +4,7 @@ mod column_ref; mod literal; mod projection_list; -use std::sync::Arc; +use std::{collections::HashSet, sync::Arc}; pub use assign::*; pub use binary_op::*; @@ -13,7 +13,7 @@ pub use literal::*; pub use projection_list::*; -use crate::ir::{IRCommon, Operator, explain::Explain, properties::ScalarProperties}; +use crate::ir::{ColumnSet, IRCommon, Operator, explain::Explain, properties::ScalarProperties}; /// The scalar type and its associated metadata. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -55,6 +55,19 @@ impl Scalar { common: IRCommon::new(operators, scalars), } } + + pub fn used_columns(&self) -> ColumnSet { + match &self.kind { + ScalarKind::Literal(_) => HashSet::new(), + ScalarKind::ColumnRef(meta) => HashSet::from_iter(std::iter::once(meta.column)), + ScalarKind::BinaryOp(_) => self + .input_scalars() + .iter() + .fold(HashSet::new(), |x, y| &x & &y.used_columns()), + ScalarKind::Assign(_) => todo!(), + ScalarKind::ProjectionList(_) => todo!(), + } + } } impl Explain for Scalar { diff --git a/optd/core/src/ir/scalar/projection_list.rs b/optd/core/src/ir/scalar/projection_list.rs index 9c8eb68..03cb95d 100644 --- a/optd/core/src/ir/scalar/projection_list.rs +++ b/optd/core/src/ir/scalar/projection_list.rs @@ -30,7 +30,7 @@ impl ProjectionList { pub fn get_all_assignees(&self) -> impl Iterator { self.members().iter().map(|member| { let assign = member.try_bind_ref::().unwrap(); - assign.assignee().clone() + *assign.assignee() }) } } diff --git a/optd/core/src/ir/value.rs b/optd/core/src/ir/value.rs index 2b251ff..7708b9f 100644 --- a/optd/core/src/ir/value.rs +++ b/optd/core/src/ir/value.rs @@ -14,8 +14,8 @@ impl std::fmt::Display for ScalarValue { type_name: &str, ) -> std::fmt::Result { match optional { - Some(v) => write!(f, "{}::{}", v, type_name), - None => write!(f, "null::{}", type_name), + Some(v) => write!(f, "{v}::{type_name}"), + None => write!(f, "null::{type_name}"), } } diff --git a/optd/core/src/lib.rs b/optd/core/src/lib.rs index 826dced..0eaed76 100644 --- a/optd/core/src/lib.rs +++ b/optd/core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cascades; pub mod ir; pub mod magic; pub mod memo; diff --git a/optd/core/src/magic/card.rs b/optd/core/src/magic/card.rs index 1b48c0e..fc7446b 100644 --- a/optd/core/src/magic/card.rs +++ b/optd/core/src/magic/card.rs @@ -95,7 +95,6 @@ impl CardinalityEstimator for MagicCardinalityEstimator { OperatorKind::EnforcerSort(_) => { op.input_operators()[0].get_property::(ctx) } - #[cfg(test)] OperatorKind::MockScan(meta) => meta.spec.mocked_card, } } diff --git a/optd/core/src/magic/cm.rs b/optd/core/src/magic/cm.rs index fa37aaf..5a7aa40 100644 --- a/optd/core/src/magic/cm.rs +++ b/optd/core/src/magic/cm.rs @@ -48,7 +48,6 @@ impl CostModel for MagicCostModel { let cost = input_card.as_f64() * Self::MAGIC_COMPUTATION_FACTOR * Cost::UNIT; Some(cost) } - #[cfg(test)] OperatorKind::MockScan(meta) => meta.spec.mocked_operator_cost, } } @@ -102,7 +101,7 @@ mod tests { let op1_cost = ctx.cm.compute_total_cost(&op1, &ctx).unwrap(); let op2_cost = ctx.cm.compute_total_cost(&op2, &ctx).unwrap(); - println!("cost1: {:?}, cost2: {:?}", op1_cost, op2_cost); + println!("cost1: {op1_cost:?}, cost2: {op2_cost:?}"); assert!(op1_cost > op2_cost); } } diff --git a/optd/core/src/magic/mod.rs b/optd/core/src/magic/mod.rs index 429f5e6..38d1fa2 100644 --- a/optd/core/src/magic/mod.rs +++ b/optd/core/src/magic/mod.rs @@ -81,7 +81,7 @@ impl IRContext { let mut create_numbered_table = |table_name: String, width: usize, row_count: usize| { let iter = (1..=width) - .map(|column_no| (format!("{}.v{}", table_name, column_no), DataType::Int32)); + .map(|column_no| (format!("{table_name}.v{column_no}"), DataType::Int32)); let schema = Arc::new(SchemaDescription::from_iter(iter)); let table_id = catalog.try_create_table(table_name, schema).unwrap(); catalog.set_table_row_count(table_id, row_count); diff --git a/optd/core/src/memo.rs b/optd/core/src/memo.rs index 382f942..312f56c 100644 --- a/optd/core/src/memo.rs +++ b/optd/core/src/memo.rs @@ -6,13 +6,15 @@ use std::{ use itertools::Itertools; use tokio::sync::watch; -use tracing::trace; +use tracing::{info, instrument, trace}; use crate::{ ir::{ - GroupId, Operator, OperatorKind, Scalar, + Group, GroupId, IRCommon, IRContext, Operator, OperatorKind, Scalar, + convert::IntoOperator, cost::Cost, - properties::{OperatorProperties, required::Required}, + explain::{Explain, ExplainOption}, + properties::{Cardinality, GetProperty, OperatorProperties, OutputColumns, Required}, }, utility::union_find::UnionFind, }; @@ -41,6 +43,10 @@ impl MemoGroupExpr { &self.inputs[self.split..] } + pub fn kind(&self) -> &OperatorKind { + &self.meta + } + pub fn clone_with_inputs(&self, inputs: Box<[GroupId]>) -> Self { Self { meta: self.meta.clone(), @@ -61,7 +67,13 @@ impl std::fmt::Debug for MemoGroupExpr { } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Id(i64); +pub struct Id(pub(crate) i64); + +impl std::fmt::Display for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "id#{}", self.0) + } +} impl Id { pub const UNKNOWN: Self = Id(0); @@ -99,6 +111,10 @@ impl WithId { pub const fn id(&self) -> Id { self.id } + + pub const fn key(&self) -> &K { + &self.key + } } impl From for WithId { @@ -107,7 +123,6 @@ impl From for WithId { } } -#[derive(Default)] pub struct MemoTable { /// Scalar deduplication. scalar_dedup: HashMap, GroupId>, @@ -118,9 +133,21 @@ pub struct MemoTable { id_to_group_ids: UnionFind, groups: BTreeMap, id_allocator: IdAllocator, + ctx: IRContext, } impl MemoTable { + pub fn new(ctx: IRContext) -> Self { + Self { + scalar_dedup: Default::default(), + scalar_id_to_key: Default::default(), + operator_dedup: Default::default(), + id_to_group_ids: Default::default(), + groups: Default::default(), + id_allocator: Default::default(), + ctx, + } + } /// Adds an operator to the memo table. /// /// Returns the group id where the operator belongs: @@ -128,9 +155,10 @@ impl MemoTable { /// - If it already exists: returns the existing group id. /// /// **Note:** This would not trigger group merges. + #[instrument(parent = None, skip_all)] pub fn insert_new_operator(&mut self, operator: Arc) -> Result { self.insert_operator(operator.clone()).map(|first_expr| { - trace!("obtain new expr: {:?}", first_expr); + trace!(id = %first_expr.id(), "obtain new expr"); let id = first_expr.id(); let memo_group = MemoGroup::new(first_expr, operator.properties().clone()); let res = self.groups.insert(GroupId::from(id), memo_group); @@ -150,6 +178,7 @@ impl MemoTable { /// - Returns an error with the target group id. /// /// **Note:** This may trigger cascading group merges. + #[instrument(parent = None, skip(self, operator))] pub fn insert_operator_into_group( &mut self, operator: Arc, @@ -158,7 +187,7 @@ impl MemoTable { let res = self.insert_operator(operator.clone()); let into_group_id = self.id_to_group_ids.find(&into_group_id); res.inspect(|expr| { - trace!("obtain new expr: {:?}", expr); + info!(id = %expr.id(), "obtain new expr"); let group = self.groups.get(&into_group_id).unwrap(); self.id_to_group_ids .merge(&into_group_id, &GroupId::from(expr.id())); @@ -168,13 +197,13 @@ impl MemoTable { }) .map_err(|from_group_id| { trace!( - "got existing group {}, group merges triggered:", + "got existing group {}, group merges triggered", from_group_id ); - self.dump(); + // self.dump(); self.merge_group(into_group_id, from_group_id); - trace!("group merging finished:"); - self.dump(); + trace!("group merging finished"); + // self.dump(); into_group_id }) } @@ -193,7 +222,7 @@ impl MemoTable { if let OperatorKind::Group(group) = &operator.kind { let repr_id = self.id_to_group_ids.find(&group.group_id); trace!("inserted group {}", repr_id); - return Err(GroupId::from(repr_id)); + return Err(repr_id); } // Split point = len(input_operators) @@ -204,8 +233,8 @@ impl MemoTable { .map(|op| { self.insert_operator(op.clone()) .map(|first_expr| { - trace!("obtain new expr: {:?}", first_expr); let group_id = GroupId::from(first_expr.id()); + info!(id = %first_expr.id(), "extra group created"); let memo_group = MemoGroup::new(first_expr, op.properties().clone()); let res = self.groups.insert(group_id, memo_group); assert!(res.is_none()); @@ -235,19 +264,23 @@ impl MemoTable { match self.operator_dedup.entry(group_expr.clone()) { Entry::Occupied(occupied) => { let id = occupied.get(); - Err(GroupId::from( - self.id_to_group_ids.find(&GroupId::from(*id)), - )) + Err(self.id_to_group_ids.find(&GroupId::from(*id))) } Entry::Vacant(vacant) => { let id = self.id_allocator.next_id(); vacant.insert(id); let key_with_id = WithId::new(id, group_expr); + self.infer_properties(operator); Ok(key_with_id) } } } + fn infer_properties(&self, operator: Arc) { + operator.get_property::(&self.ctx); + operator.get_property::(&self.ctx); + } + /// Inserts a scalar into the memo table's scalar deduplication map. /// /// Handles scalar deduplication and group id assignment: @@ -266,7 +299,7 @@ impl MemoTable { let group_id = GroupId::from(self.id_allocator.next_id()); vacant.insert(group_id); self.scalar_id_to_key.insert(group_id, scalar); - trace!("got new scalar with {:?}", group_id); + info!(%group_id, "obtained new scalar expr"); Ok(group_id) } } @@ -281,7 +314,43 @@ impl MemoTable { self.scalar_id_to_key.get(group_id).cloned() } - /// Gets the memo group corresponding to a group id. + pub fn get_operator_one_level( + &self, + group_expr: &MemoGroupExpr, + properties: Arc, + group_id: GroupId, + ) -> Arc { + let input_scalars = group_expr + .input_scalars() + .iter() + .map(|group_id| self.get_scalar(group_id).unwrap()) + .collect(); + + let input_operators = group_expr + .input_operators() + .iter() + .map(|group_id| { + let memo_group = self.get_memo_group(group_id); + + Group::new( + memo_group.group_id, + memo_group.exploration.borrow().properties.clone(), + ) + .into_operator() + }) + .collect(); + + let common = IRCommon::new_with_properties(input_operators, input_scalars, properties); + let group_id = Some(group_id); + + Arc::new(Operator { + group_id, + kind: group_expr.meta.clone(), + common, + }) + } + + /// Gets a shared reference to the memo group corresponding to a group id. /// /// Uses the union-find structure to resolve the representative group id and returns /// the associated memo group: @@ -292,6 +361,17 @@ impl MemoTable { self.groups.get(&repr_group_id).unwrap() } + /// Gets a mutable reference to the memo group corresponding to a group id. + /// + /// Uses the union-find structure to resolve the representative group id and returns + /// the associated memo group: + /// - Finds the representative group id using union-find + /// - Returns a reference to the corresponding memo group + pub fn get_memo_group_mut(&mut self, group_id: &GroupId) -> &mut MemoGroup { + let repr_group_id = self.id_to_group_ids.find(group_id); + self.groups.get_mut(&repr_group_id).unwrap() + } + /// Merges two memo groups into one, combining their expressions. /// /// Transfers all expressions from the source group to the target group and updates @@ -303,7 +383,7 @@ impl MemoTable { /// /// **Note:** This operation may trigger additional group merges recursively. fn merge_group(&mut self, into_group_id: GroupId, from_group_id: GroupId) { - trace!("merging {} <- {}", into_group_id, from_group_id); + info!("merging {} <- {}", into_group_id, from_group_id); if into_group_id == from_group_id { return; } @@ -379,7 +459,7 @@ impl MemoTable { }); group_exprs.clear(); } - trace!(?pending_group_merges); + info!(?pending_group_merges); for (into_group_id, from_group_id) in pending_group_merges { let into_group_id = self.id_to_group_ids.find(&into_group_id); let from_group_id = self.id_to_group_ids.find(&from_group_id); @@ -395,21 +475,80 @@ impl MemoTable { /// /// This method is primarily intended for debugging and testing. pub fn dump(&self) { - trace!("======== MEMO DUMP BEGIN ========"); + let option = ExplainOption::default(); + println!("======== MEMO DUMP BEGIN ========"); + println!("\n[operators]"); + println!("group_ids = {:?}", self.groups.keys()); + println!("total_group_count = {}", self.groups.keys().len()); + println!("total_operator_count = {}", self.operator_dedup.len()); for (group_id, group) in &self.groups { - let exploration = group.exploration.borrow(); + let state = group.exploration.borrow(); assert_eq!(group_id, &group.group_id); - trace!( - "MemoGroup ({}, num_exprs={}):", - group_id, - exploration.exprs.len() + println!("\n[operators.{group_id}]"); + println!("num_exprs = {}", state.exprs.len()); + println!( + "output_columns = {}", + state + .properties + .output_columns + .get() + .map(|x| format!("{x}")) + .unwrap_or("?".to_string()), + ); + println!( + "cardinality = {}", + state + .properties + .cardinality + .get() + .map(|x| format!("{:.2}", x.as_f64())) + .unwrap_or("?".to_string()), ); - exploration.exprs.iter().for_each(|expr| { - trace!("{:?} -> {:?}", expr.id(), &expr.key); - }); + for expr in state.exprs.iter() { + println!("{} = {:?}", expr.id(), expr.key()); + } + + for (required, tx) in group.optimizations.iter() { + println!("\n[operators.{group_id}.required = {required}]"); + let state = tx.borrow(); + let best_index = state + .costed_exprs + .iter() + .enumerate() + .min_by(|(_, x), (_, y)| { + x.total_cost.as_f64().total_cmp(&y.total_cost.as_f64()) + }) + .map(|(i, _)| i); + for (i, costed) in state.costed_exprs.iter().enumerate() { + let inputs = costed + .input_requirements + .iter() + .zip(costed.group_expr.key().input_operators()) + .map(|((required, index), group_id)| { + format!("\"o#{index}@{group_id}\": {required}") + }) + .join(", "); + let opt_desc = best_index + .filter(|best_index| i.eq(best_index)) + .map(|best_index| format!("o#{best_index} (best)")) + .unwrap_or_else(|| format!("o#{i}{:>7}", "")); + println!( + "{opt_desc} = {{ id={}, total = {}, operation = {} inputs: {{{}}} }}", + costed.group_expr.id(), + costed.total_cost, + costed.operator_cost, + inputs + ); + } + } + } + println!("\n[scalars]"); + for (scalar_id, scalar) in &self.scalar_id_to_key { + let s = scalar.explain(&self.ctx, &option).to_one_line_string(true); + println!("{scalar_id} = \"{s}\"") } - trace!("======== MEMO DUMP END =========="); + println!("======== MEMO DUMP END =========="); } } @@ -434,17 +573,20 @@ impl IdAllocator { } } +#[derive(Debug, Clone, Copy, PartialEq, Default)] pub enum Status { + #[default] NotStarted, InProgress, Complete, Obsolete, } +#[derive(Clone)] pub struct Exploration { - exprs: Vec>>, - properties: Arc, - status: Status, + pub exprs: Vec>>, + pub properties: Arc, + pub status: Status, } impl Exploration { @@ -459,25 +601,42 @@ impl Exploration { } } } - +#[derive(Clone)] pub struct CostedExpr { pub group_expr: WithId>, pub operator_cost: Cost, pub total_cost: Cost, /// The input requirements and the index of the costed expressions for the inputs. - pub input_requirements: Arc<[(Required, usize)]>, + pub input_requirements: Arc<[(Arc, usize)]>, +} + +impl CostedExpr { + pub fn new( + group_expr: WithId>, + operator_cost: Cost, + total_cost: Cost, + input_requirements: Arc<[(Arc, usize)]>, + ) -> Self { + Self { + group_expr, + operator_cost, + total_cost, + input_requirements, + } + } } +#[derive(Default, Clone)] pub struct Optimization { pub costed_exprs: Vec, - pub enforcers: Vec, + pub enforcers: Vec>, pub status: Status, } pub struct MemoGroup { - group_id: GroupId, - exploration: watch::Sender, - optimizations: HashMap>, + pub group_id: GroupId, + pub exploration: watch::Sender, + pub optimizations: HashMap, watch::Sender>, } impl MemoGroup { @@ -506,7 +665,7 @@ mod tests { #[test] fn insert_scalar() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let scalar = column_ref(Column(1)).equal(int32(799)); let scalar_from_clone = scalar.clone(); let scalar_dup = column_ref(Column(1)).equal(int32(799)); @@ -519,7 +678,7 @@ mod tests { #[test] fn insert_new_operator() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let join = mock_scan(1, vec![1], 0.).logical_join( mock_scan(2, vec![2], 0.), boolean(true), @@ -541,7 +700,7 @@ mod tests { #[test] fn insert_operator_into_group() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let join = mock_scan(1, vec![1], 0.).logical_join( mock_scan(2, vec![2], 0.), boolean(true), @@ -566,7 +725,7 @@ mod tests { #[test] fn parent_group_merge() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let m1 = mock_scan(1, vec![1], 0.); let m1_alias = mock_scan(2, vec![1], 0.); @@ -596,11 +755,10 @@ mod tests { #[test] #[tracing_test::traced_test] fn cascading_group_merges() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let m1 = mock_scan(1, vec![1], 0.); - let ctx = IRContext::with_empty_magic(); - trace!("\n{}", quick_explain(&m1, &ctx)); + trace!("\n{}", quick_explain(&m1, &memo.ctx)); let m1_alias = mock_scan(2, vec![1], 0.); let g1 = memo @@ -637,7 +795,7 @@ mod tests { #[test] fn insert_partial_binding() { - let mut memo = MemoTable::default(); + let mut memo = MemoTable::new(IRContext::with_empty_magic()); let m1 = mock_scan(1, vec![1], 0.); let m1_alias = mock_scan(2, vec![1], 0.); diff --git a/optd/core/src/rules/enforcers/mod.rs b/optd/core/src/rules/enforcers/mod.rs new file mode 100644 index 0000000..5b592a7 --- /dev/null +++ b/optd/core/src/rules/enforcers/mod.rs @@ -0,0 +1,43 @@ +use std::sync::Arc; + +use crate::ir::{ + convert::IntoOperator, + operator::EnforcerSort, + properties::TupleOrdering, + rule::{OperatorPattern, Rule}, +}; + +pub struct EnforceTupleOrderingRule { + ordering: TupleOrdering, + pattern: OperatorPattern, +} + +impl Rule for EnforceTupleOrderingRule { + fn name(&self) -> &'static str { + "enforce_tuple_ordering" + } + + fn pattern(&self) -> &crate::ir::rule::OperatorPattern { + &self.pattern + } + + fn transform( + &self, + operator: &crate::ir::Operator, + _ctx: &crate::ir::IRContext, + ) -> Result>, ()> { + if self.ordering.is_empty() { + return Ok(vec![]); + } + Ok(vec![ + EnforcerSort::new(self.ordering.clone(), Arc::new(operator.clone())).into_operator(), + ]) + } +} + +impl EnforceTupleOrderingRule { + pub fn new(ordering: TupleOrdering) -> Self { + let pattern = OperatorPattern::with_top_matches(|_| true); + Self { ordering, pattern } + } +} diff --git a/optd/core/src/rules/implementations/mod.rs b/optd/core/src/rules/implementations/mod.rs index f8bba40..8665e24 100644 --- a/optd/core/src/rules/implementations/mod.rs +++ b/optd/core/src/rules/implementations/mod.rs @@ -1,2 +1,5 @@ mod nl_join; mod table_scan; + +pub use nl_join::LogicalJoinAsPhysicalNLJoinRule; +pub use table_scan::LogicalGetAsPhysicalTableScanRule; diff --git a/optd/core/src/rules/implementations/nl_join.rs b/optd/core/src/rules/implementations/nl_join.rs index c88b296..7dd6d2f 100644 --- a/optd/core/src/rules/implementations/nl_join.rs +++ b/optd/core/src/rules/implementations/nl_join.rs @@ -5,17 +5,17 @@ use crate::ir::{ rule::{OperatorPattern, Rule}, }; -pub struct LogicalJoinAsPhysicalNLJoin { +pub struct LogicalJoinAsPhysicalNLJoinRule { pattern: OperatorPattern, } -impl Default for LogicalJoinAsPhysicalNLJoin { +impl Default for LogicalJoinAsPhysicalNLJoinRule { fn default() -> Self { Self::new() } } -impl LogicalJoinAsPhysicalNLJoin { +impl LogicalJoinAsPhysicalNLJoinRule { pub fn new() -> Self { let pattern = OperatorPattern::with_top_matches(|kind| matches!(kind, OperatorKind::LogicalJoin(_))); @@ -23,7 +23,7 @@ impl LogicalJoinAsPhysicalNLJoin { } } -impl Rule for LogicalJoinAsPhysicalNLJoin { +impl Rule for LogicalJoinAsPhysicalNLJoinRule { fn name(&self) -> &'static str { "logical_join_as_physical_nl_join" } @@ -35,10 +35,11 @@ impl Rule for LogicalJoinAsPhysicalNLJoin { fn transform( &self, operator: &crate::ir::Operator, + _ctx: &crate::ir::IRContext, ) -> Result>, ()> { let join = operator.try_bind_ref::().unwrap(); let nl_join = PhysicalNLJoin::new( - join.join_type().clone(), + *join.join_type(), join.outer().clone(), join.inner().clone(), join.join_cond().clone(), @@ -50,7 +51,7 @@ impl Rule for LogicalJoinAsPhysicalNLJoin { #[cfg(test)] mod tests { use crate::ir::{ - ScalarValue, + IRContext, ScalarValue, convert::IntoScalar, operator::{MockScan, MockSpec, join::JoinType}, scalar::Literal, @@ -60,6 +61,7 @@ mod tests { #[test] fn logical_join_as_physical_nl_join_behavior() { + let ctx = IRContext::with_empty_magic(); let m_outer = MockScan::with_mock_spec(1, MockSpec::default()).into_operator(); let m_inner = MockScan::with_mock_spec(2, MockSpec::default()).into_operator(); let join_cond = Literal::boolean(true).into_scalar(); @@ -71,10 +73,10 @@ mod tests { ) .into_operator(); - let rule = LogicalJoinAsPhysicalNLJoin::new(); + let rule = LogicalJoinAsPhysicalNLJoinRule::new(); assert!(rule.pattern.matches_without_expand(&inner_join)); let nl_join = rule - .transform(&inner_join) + .transform(&inner_join, &ctx) .unwrap() .pop() .unwrap() diff --git a/optd/core/src/rules/implementations/table_scan.rs b/optd/core/src/rules/implementations/table_scan.rs index d9fb942..0fabfe9 100644 --- a/optd/core/src/rules/implementations/table_scan.rs +++ b/optd/core/src/rules/implementations/table_scan.rs @@ -5,17 +5,17 @@ use crate::ir::{ rule::{OperatorPattern, Rule}, }; -pub struct LogicalGetAsPhysicalTableScan { +pub struct LogicalGetAsPhysicalTableScanRule { pattern: OperatorPattern, } -impl Default for LogicalGetAsPhysicalTableScan { +impl Default for LogicalGetAsPhysicalTableScanRule { fn default() -> Self { Self::new() } } -impl LogicalGetAsPhysicalTableScan { +impl LogicalGetAsPhysicalTableScanRule { pub fn new() -> Self { let pattern = OperatorPattern::with_top_matches(|kind| matches!(kind, OperatorKind::LogicalGet(_))); @@ -23,7 +23,7 @@ impl LogicalGetAsPhysicalTableScan { } } -impl Rule for LogicalGetAsPhysicalTableScan { +impl Rule for LogicalGetAsPhysicalTableScanRule { fn name(&self) -> &'static str { "logical_get_as_physical_table_scan" } @@ -35,6 +35,7 @@ impl Rule for LogicalGetAsPhysicalTableScan { fn transform( &self, operator: &crate::ir::Operator, + _ctx: &crate::ir::IRContext, ) -> Result>, ()> { let get = operator.try_bind_ref::().unwrap(); let table_scan = PhysicalTableScan::new(*get.table_id(), get.projection_list().clone()); diff --git a/optd/core/src/rules/logical_join_inner_assoc.rs b/optd/core/src/rules/logical_join_inner_assoc.rs index aeceee2..8d127bf 100644 --- a/optd/core/src/rules/logical_join_inner_assoc.rs +++ b/optd/core/src/rules/logical_join_inner_assoc.rs @@ -1,7 +1,8 @@ use crate::ir::{ - OperatorKind, + IRContext, OperatorKind, convert::IntoOperator, operator::{LogicalJoin, join::JoinType}, + properties::{GetProperty, OutputColumns}, rule::{OperatorPattern, Rule}, }; @@ -39,6 +40,7 @@ impl Rule for LogicalJoinInnerAssocRule { fn transform( &self, operator: &crate::ir::Operator, + ctx: &IRContext, ) -> Result>, ()> { // ((a JOIN b, cond_low) JOIN c, cond_up) → (a JOIN (b JOIN c, cond_up), cond_low) let join_upper = operator.try_bind_ref_experimental::().unwrap(); @@ -53,6 +55,16 @@ impl Rule for LogicalJoinInnerAssocRule { let b = join_lower.inner().clone(); let c = join_upper.inner().clone(); + let new_lower_columns = + b.get_property::(ctx).set() & c.get_property::(ctx).set(); + if join_upper + .join_cond() + .used_columns() + .is_superset(&new_lower_columns) + { + return Ok(vec![]); + } + let new_join_upper = LogicalJoin::new( JoinType::Inner, a, @@ -95,9 +107,10 @@ mod tests { ) .into_operator(); + let ctx = IRContext::with_empty_magic(); let rule = LogicalJoinInnerAssocRule::new(); assert!(rule.pattern.matches_without_expand(&inner_joins)); - let res = rule.transform(&inner_joins).unwrap().pop().unwrap(); + let res = rule.transform(&inner_joins, &ctx).unwrap().pop().unwrap(); let new_upper = res.try_bind_ref_experimental::().unwrap(); let a_ref = new_upper .outer() diff --git a/optd/core/src/rules/logical_join_inner_commute.rs b/optd/core/src/rules/logical_join_inner_commute.rs index fdf1965..a17c896 100644 --- a/optd/core/src/rules/logical_join_inner_commute.rs +++ b/optd/core/src/rules/logical_join_inner_commute.rs @@ -41,6 +41,7 @@ impl Rule for LogicalJoinInnerCommuteRule { fn transform( &self, operator: &crate::ir::Operator, + _ctx: &crate::ir::IRContext, ) -> Result>, ()> { let join = operator.try_bind_ref::().unwrap(); assert_eq!(join.join_type(), &JoinType::Inner); @@ -60,6 +61,7 @@ impl Rule for LogicalJoinInnerCommuteRule { #[cfg(test)] mod tests { use crate::ir::{ + IRContext, convert::IntoScalar, operator::{MockScan, MockSpec}, scalar::Literal, @@ -82,7 +84,8 @@ mod tests { let rule = LogicalJoinInnerCommuteRule::new(); assert!(rule.pattern.matches_without_expand(&inner_join)); - let res = rule.transform(&inner_join).unwrap().pop().unwrap(); + let ctx = IRContext::with_empty_magic(); + let res = rule.transform(&inner_join, &ctx).unwrap().pop().unwrap(); let commuted = res.try_bind_ref_experimental::().unwrap(); let new_outer = commuted diff --git a/optd/core/src/rules/mod.rs b/optd/core/src/rules/mod.rs index 0c08925..72d09d6 100644 --- a/optd/core/src/rules/mod.rs +++ b/optd/core/src/rules/mod.rs @@ -1,3 +1,9 @@ +mod enforcers; mod implementations; mod logical_join_inner_assoc; mod logical_join_inner_commute; + +pub use enforcers::*; +pub use implementations::*; +pub use logical_join_inner_assoc::LogicalJoinInnerAssocRule; +pub use logical_join_inner_commute::LogicalJoinInnerCommuteRule; diff --git a/optd/core/src/utility/union_find.rs b/optd/core/src/utility/union_find.rs index b3f802e..1a0d234 100644 --- a/optd/core/src/utility/union_find.rs +++ b/optd/core/src/utility/union_find.rs @@ -201,8 +201,7 @@ mod tests { assert_eq!( repr.find(&i), 1, - "Element {} should have representative 4", - i + "Element {i} should have representative 4" ); } @@ -218,8 +217,7 @@ mod tests { assert_eq!( repr.find(&i), 1, - "After final merge, element {} should have representative 8", - i + "After final merge, element {i} should have representative 8" ); } } diff --git a/optd/core/tests/it_works.rs b/optd/core/tests/it_works.rs index deab76b..dd40ac1 100644 --- a/optd/core/tests/it_works.rs +++ b/optd/core/tests/it_works.rs @@ -1,24 +1,92 @@ use std::sync::Arc; -use optd_core::ir::{ - Column, - catalog::DataSourceId, - convert::{IntoOperator, IntoScalar}, - operator::LogicalGet, - scalar::*, +use itertools::Itertools; + +use optd_core::{ + cascades::Cascades, + ir::{ + Column, IRContext, Operator, + builder::*, + explain::quick_explain, + operator::join::JoinType, + properties::{Required, TupleOrdering, TupleOrderingDirection}, + rule::RuleSet, + }, + rules, }; -#[test] -fn it_works() { - let op = LogicalGet::new( - DataSourceId(1), - ProjectionList::new(Arc::new([ - Assign::new(Column(0), ColumnRef::new(Column(0)).into_scalar()).into_scalar(), - Assign::new(Column(1), ColumnRef::new(Column(1)).into_scalar()).into_scalar(), - ])) - .into_scalar(), - ) - .into_operator(); - assert_eq!(op.input_operators().len(), 0); - assert_eq!(op.input_scalars().len(), 1); +async fn optimize_plan( + opt: Arc, + initial_plan: &Arc, + required: Arc, +) -> Option> { + println!("available rules:"); + for rule in opt.rule_set.iter() { + println!("- {}", rule.name()); + } + println!("\n MEMO BEFORE OPT"); + opt.memo.read().await.dump(); + let optimized = opt.optimize(initial_plan, required.clone()).await; + let initial_explained = quick_explain(initial_plan, &opt.ctx); + let optimized = optimized.unwrap(); + let optimized_explained = quick_explain(&optimized, &opt.ctx); + + let initial_explained = initial_explained.split('\n').collect::>(); + let optimized_explained = optimized_explained.split('\n').collect::>(); + let initial_len = initial_explained[0].len(); + + println!("\nMEMO AFTER OPT"); + opt.memo.read().await.dump(); + + println!("\nEXPLAIN (root_requirement: {}):", required); + std::iter::once(format!("{: println!("{l} {r}"), + itertools::EitherOrBoth::Left(l) => println!("{l}"), + itertools::EitherOrBoth::Right(r) => { + println!("{} {r}", " ".repeat(initial_len)) + } + }); + Some(optimized) +} + +#[tokio::test] +async fn integration() -> Result<(), Box> { + // console_subscriber::init(); + tracing_subscriber::fmt() + .without_time() + .with_max_level(tracing::Level::INFO) + // .with_target(false) // Optional: also remove target + .compact() // Optional: use compact format + .init(); + let m1 = mock_scan(1, vec![1, 2, 3], 10.); + let m2 = mock_scan(2, vec![4, 5], 20.); + let m3 = mock_scan(3, vec![6, 7], 30.); + let required = Arc::new(Required { + tuple_ordering: TupleOrdering::from_iter([(Column(4), TupleOrderingDirection::Asc)]), + }); + let join_m1_m2 = m1 + .logical_join( + m2, + column_ref(Column(1)).equal(column_ref(Column(4))), + JoinType::Inner, + ) + .logical_join( + m3, + column_ref(Column(2)).equal(column_ref(Column(6))), + JoinType::Inner, + ); + + let ctx = IRContext::with_empty_magic(); + let rule_set = RuleSet::builder() + .add_rule(rules::LogicalJoinAsPhysicalNLJoinRule::new()) + .add_rule(rules::LogicalJoinInnerCommuteRule::new()) + .add_rule(rules::LogicalJoinInnerAssocRule::new()) + .build(); + let opt = Arc::new(Cascades::new(ctx, rule_set)); + + optimize_plan(opt, &join_m1_m2, required).await.unwrap(); + Ok(()) } From 8716478f132a891c6a0bb8dc40c9be6b93f46029 Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Fri, 8 Aug 2025 13:07:20 -0700 Subject: [PATCH 03/40] setup CLI and connectors skeleton (#4) Signed-off-by: Yuchen Liang --- .gitignore | 10 +- Cargo.lock | 5396 +++++++++++++++++++++--- Cargo.toml | 14 +- README.md | 16 + cli/Cargo.toml | 21 + cli/src/lib.rs | 87 + cli/src/main.rs | 394 ++ connectors/datafusion/Cargo.toml | 7 + connectors/datafusion/src/extension.rs | 2 + connectors/datafusion/src/lib.rs | 5 + connectors/datafusion/src/planner.rs | 43 + optd/core/Cargo.toml | 3 +- 12 files changed, 5358 insertions(+), 640 deletions(-) create mode 100644 cli/Cargo.toml create mode 100644 cli/src/lib.rs create mode 100644 cli/src/main.rs create mode 100644 connectors/datafusion/Cargo.toml create mode 100644 connectors/datafusion/src/extension.rs create mode 100644 connectors/datafusion/src/lib.rs create mode 100644 connectors/datafusion/src/planner.rs diff --git a/.gitignore b/.gitignore index 5fcadb3..958f6c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,11 @@ + +# rust /target -*.db + +# optd memo dump *.memo + +# datafusion +.history + +*.db \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 0154c51..4a596af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,26 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.3", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -27,919 +47,4464 @@ dependencies = [ ] [[package]] -name = "anyhow" -version = "1.0.98" +name = "alloc-no-stdlib" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" [[package]] -name = "async-stream" -version = "0.3.6" +name = "alloc-stdlib" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", + "alloc-no-stdlib", ] [[package]] -name = "async-stream-impl" -version = "0.3.6" +name = "allocator-api2" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "proc-macro2", - "quote", - "syn", + "libc", ] [[package]] -name = "async-trait" -version = "0.1.88" +name = "anstream" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" dependencies = [ - "proc-macro2", - "quote", - "syn", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", ] [[package]] -name = "atomic-waker" -version = "1.1.2" +name = "anstyle" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] -name = "autocfg" -version = "1.5.0" +name = "anstyle-parse" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] [[package]] -name = "axum" -version = "0.7.9" +name = "anstyle-query" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ - "async-trait", - "axum-core", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", + "windows-sys 0.60.2", ] [[package]] -name = "axum-core" -version = "0.4.5" +name = "anstyle-wincon" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", ] [[package]] -name = "backtrace" -version = "0.3.75" +name = "anyhow" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "apache-avro" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets", + "bigdecimal", + "bzip2 0.4.4", + "crc32fast", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand 0.8.5", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum", + "strum_macros", + "thiserror 1.0.69", + "typed-builder", + "uuid", + "xz2", + "zstd", ] [[package]] -name = "base64" -version = "0.21.7" +name = "arrayref" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] -name = "base64" -version = "0.22.1" +name = "arrayvec" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] -name = "bitflags" -version = "2.9.1" +name = "arrow" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] [[package]] -name = "bitvec" -version = "1.0.1" +name = "arrow-arith" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" dependencies = [ - "funty", - "radium", - "tap", - "wyz", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", ] [[package]] -name = "byteorder" -version = "1.5.0" +name = "arrow-array" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] [[package]] -name = "bytes" -version = "1.10.1" +name = "arrow-buffer" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] [[package]] -name = "cfg-if" -version = "1.0.1" +name = "arrow-cast" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] [[package]] -name = "console-api" -version = "0.8.1" +name = "arrow-csv" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" dependencies = [ - "futures-core", - "prost", - "prost-types", - "tonic", - "tracing-core", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", ] [[package]] -name = "console-subscriber" -version = "0.4.1" +name = "arrow-data" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" dependencies = [ - "console-api", - "crossbeam-channel", - "crossbeam-utils", - "futures-task", - "hdrhistogram", - "humantime", - "hyper-util", - "prost", - "prost-types", - "serde", - "serde_json", - "thread_local", - "tokio", - "tokio-stream", - "tonic", - "tracing", - "tracing-core", - "tracing-subscriber", + "arrow-buffer", + "arrow-schema", + "half", + "num", ] [[package]] -name = "crc32fast" -version = "1.5.0" +name = "arrow-ipc" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" dependencies = [ - "cfg-if", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", + "zstd", ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "arrow-json" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" dependencies = [ - "crossbeam-utils", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.10.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "arrow-ord" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] [[package]] -name = "either" -version = "1.15.0" +name = "arrow-row" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] [[package]] -name = "equivalent" -version = "1.0.2" +name = "arrow-schema" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +dependencies = [ + "serde", + "serde_json", +] [[package]] -name = "flate2" -version = "1.1.2" +name = "arrow-select" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" dependencies = [ - "crc32fast", - "miniz_oxide", + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", ] [[package]] -name = "fnv" -version = "1.0.7" +name = "arrow-string" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] [[package]] -name = "funty" -version = "2.0.0" +name = "async-compression" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2 0.5.2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] [[package]] -name = "futures-channel" -version = "0.3.31" +name = "async-stream" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ + "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] -name = "futures-core" -version = "0.3.31" +name = "async-stream-impl" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "futures-sink" -version = "0.3.31" +name = "async-trait" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "futures-task" -version = "0.3.31" +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-config" +version = "1.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "483020b893cdef3d89637e428d588650c71cfae7ea2e6ecbaee4de4ff99fb2dd" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.3.1", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1541072f81945fa1251f8795ef6c92c4282d74d59f88498ae7d4bf00f0ebdad9" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" +dependencies = [ + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.79.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a847168f15b46329fa32c7aca4e4f1a2e072f9b422f0adb19756f2e1457f111" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b654dd24d65568738593e8239aef279a86a15374ec926ae8714e2d7245f34149" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.81.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c92ea8a7602321c83615c82b408820ad54280fb026e92de0eeea937342fafa24" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "percent-encoding", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2", + "http 1.3.1", + "hyper", + "hyper-rustls", + "hyper-util", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tower 0.5.2", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e107ce0783019dbff59b3a244aa0c114e4a8c9d93498af9162608cd5474e796" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75d52251ed4b9776a3e8487b2a01ac915f73b2da3af8fc1e77e0fce697a550d4" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.3.1", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "8.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +dependencies = [ + "libbz2-rs-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "comfy-table" +version = "7.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "console-api" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857" +dependencies = [ + "futures-core", + "prost", + "prost-types", + "tonic", + "tracing-core", +] + +[[package]] +name = "console-subscriber" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +dependencies = [ + "memchr", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f47772c28553d837e12cdcc0fb04c2a0fe8eca8b704a30f721d076f32407435" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "bzip2 0.6.0", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-avro", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.9.2", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6b29c9c922959285fac53139e12c81014e2ca54704f20355edd7e9d11fd773" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7313553e4c01d184dd49183afdfa22f23204a10a26dd12e6f799203d8fdb95c2" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-cli" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db88b7c2988301968b5234be0011ae73c67559b6f62d771393bb442d16213c60" +dependencies = [ + "arrow", + "async-trait", + "aws-config", + "aws-credential-types", + "clap", + "datafusion", + "dirs", + "env_logger", + "futures", + "log", + "mimalloc", + "object_store", + "parking_lot", + "parquet", + "regex", + "rustyline", + "tokio", + "url", +] + +[[package]] +name = "datafusion-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d66104731b7476a8c86fbe7a6fd741e6329791166ac89a91fcd8336a560ddaf" +dependencies = [ + "ahash", + "apache-avro", + "arrow", + "arrow-ipc", + "base64 0.22.1", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap 2.10.0", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7527ecdfeae6961a8564d3b036507a67bd467fd36a9f10cf8ad7a99db1f1bc" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e5076be33d8eb9f4d99858e5f3477b36c07e61eee8eb93c4320428d9e1e344" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.6.0", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-avro" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831cfe556658133ea4270d616164ce27f737e9e4d5e359e1b1b269e0bf767cef" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "num-traits", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "785518d0f2f136c19b9389a10762c01a5aeb5fcdebdb244297bb656b2862dc88" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71cb7c3bad0951bf5c52505d0e6d87e6c0098156d2a195924cbcdc82238d29ba" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea76ad2c5189c98a6b1d4bdf6c3b3caacc9701c417af6661597c946a201bc328" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-pruning", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.9.2", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bcc45e380db5c6033c3f39e765a3d752679f14315060a7f4030a60066a36946" + +[[package]] +name = "datafusion-execution" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8209805fdce3d5c6e1625f674d3e4ce93e995a56d3709a0bb8d4361062652596" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7879a845e72a00cacffacbdf5f40626049cb9584d2ba8aa0b9172f09833110ab" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.10.0", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6da7e47e70ef2c7678735c82c392bd74687004043f5fc8072ab8678dc6fa459d" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.10.0", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e7b92b04c5c3b1151f055251b36e272071f9088d9701826a533cb4f764af1c8" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f16cb922b62e535a4d484961ac2c1c6d188dbe02e85e026c05f0fabbc8f814e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f71bb59dc8b4dc985c911f2e0d8cf426c21f565b56dca4b852c244101a1a7a2" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27eb3b98a2eb02a8af4ef19cc793cac21fc98d8720b987f15d7d25b8cc875f4d" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e0940fc3e2fa4645a4d323f9ebf9258b2d7fdad12013a471cae4ae5568683" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df03c6c62039578fd110b327c474846fdf3d9077a568f1e8706e585ed30cb98d" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "083659a95914bf3ca568a72b085cb8654576fef1236b260dc2379cb8e5f922b2" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cabe1f32daa2fa54e6b20d14a13a9e85bef97c4161fe8a90d76b6d9693a5ac4" +dependencies = [ + "datafusion-expr", + "quote", + "syn", +] + +[[package]] +name = "datafusion-optimizer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e12a97dcb0ccc569798be1289c744829cce5f18cc9b037054f8d7f93e1d57be" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "indexmap 2.10.0", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "datafusion-physical-expr" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41312712b8659a82b4e9faa8d97a018e7f2ccbdedf2f7cb93ecf256e39858c86" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.10.0", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1649a60ea0319496d616ae3554e84dfcc262c201ab4439abcd83cca989b85b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea3f5b8ba6122426774aaaf11325740b8e5d3afaab9ab39dc63423adca554748" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "itertools 0.14.0", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a595f296929d6cffa12b993ea53e9fe8215fada050d78626c5cf0e2f02b0205" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.10.0", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-session" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd5f2fe790f43839c70fb9604c4f9b59ad290ef64e1d2f927925dd34a9245406" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ebebb82fda37f62f06fe14339f4faa9f197a0320cc4d26ce2a5fd53a5ccd27c" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap 2.10.0", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.60.2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix 1.0.8", + "windows-sys 0.59.0", +] + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +dependencies = [ + "crc32fast", + "libz-rs-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.3.1", + "indexmap 2.10.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "flate2", + "nom", + "num-traits", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.3.1", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.3.1", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" + +[[package]] +name = "hyper" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2", + "http 1.3.1", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.3.1", + "hyper", + "hyper-util", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.0", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown 0.15.5", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + +[[package]] +name = "libloading" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" +dependencies = [ + "cfg-if", + "windows-targets 0.53.3", +] + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "libredox" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "libz-rs-sys" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +dependencies = [ + "zlib-rs", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "mimalloc" +version = "0.1.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "chrono", + "form_urlencoded", + "futures", + "http 1.3.1", + "http-body-util", + "humantime", + "hyper", + "itertools 0.14.0", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand 0.9.2", + "reqwest", + "ring", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.12", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "optd-catalog" +version = "0.1.0" + +[[package]] +name = "optd-cli" +version = "0.1.0" +dependencies = [ + "clap", + "datafusion", + "datafusion-cli", + "dirs", + "object_store", + "regex", + "tokio", + "url", +] + +[[package]] +name = "optd-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "bitvec", + "console-subscriber", + "itertools 0.14.0", + "pretty-xmlish", + "tokio", + "tracing", + "tracing-subscriber", + "tracing-test", +] + +[[package]] +name = "optd-datafusion" +version = "0.1.0" +dependencies = [ + "datafusion", +] + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "parquet" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "ring", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap 2.10.0", + "serde", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty-xmlish" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" + +[[package]] +name = "prettyplease" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + +[[package]] +name = "psm" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +dependencies = [ + "cc", +] + +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + +[[package]] +name = "quick-xml" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.1", + "rustls", + "socket2 0.5.10", + "thiserror 2.0.12", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +dependencies = [ + "bytes", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.1", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.12", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.10", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] [[package]] -name = "futures-util" -version = "0.3.31" +name = "rand_chacha" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ - "futures-core", - "futures-task", - "pin-project-lite", - "pin-utils", + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] -name = "getrandom" -version = "0.2.16" +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "cfg-if", - "libc", - "wasi", + "getrandom 0.2.16", ] [[package]] -name = "gimli" -version = "0.31.1" +name = "rand_core" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.3", +] [[package]] -name = "h2" -version = "0.4.12" +name = "recursive" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap 2.10.0", - "slab", - "tokio", - "tokio-util", - "tracing", + "recursive-proc-macro-impl", + "stacker", ] [[package]] -name = "hashbrown" -version = "0.12.3" +name = "recursive-proc-macro-impl" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn", +] [[package]] -name = "hashbrown" -version = "0.15.4" +name = "redox_syscall" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] [[package]] -name = "hdrhistogram" -version = "7.5.4" +name = "redox_users" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "base64 0.21.7", - "byteorder", - "flate2", - "nom", - "num-traits", + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.12", ] [[package]] -name = "http" -version = "1.3.1" +name = "regex" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ - "bytes", - "fnv", - "itoa", + "aho-corasick", + "memchr", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] -name = "http-body" -version = "1.0.1" +name = "regex-automata" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "bytes", - "http", + "regex-syntax 0.6.29", ] [[package]] -name = "http-body-util" -version = "0.1.3" +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "pin-project-lite", + "aho-corasick", + "memchr", + "regex-syntax 0.8.5", ] [[package]] -name = "httparse" -version = "1.10.1" +name = "regex-lite" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] -name = "httpdate" -version = "1.0.3" +name = "regex-syntax" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] -name = "humantime" -version = "2.2.0" +name = "regex-syntax" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] -name = "hyper" -version = "1.6.0" +name = "reqwest" +version = "0.12.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" dependencies = [ + "base64 0.22.1", "bytes", - "futures-channel", + "futures-core", "futures-util", "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-timeout" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" -dependencies = [ + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", "hyper", + "hyper-rustls", "hyper-util", + "js-sys", + "log", + "percent-encoding", "pin-project-lite", + "quinn", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", "tokio", + "tokio-rustls", + "tokio-util", + "tower 0.5.2", + "tower-http", "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", ] [[package]] -name = "hyper-util" -version = "0.1.16" +name = "ring" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "http", - "http-body", - "hyper", + "cc", + "cfg-if", + "getrandom 0.2.16", "libc", - "pin-project-lite", - "socket2 0.6.0", - "tokio", - "tower-service", - "tracing", + "untrusted", + "windows-sys 0.52.0", ] [[package]] -name = "indexmap" -version = "1.9.3" +name = "rle-decode-fast" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "autocfg", - "hashbrown 0.12.3", + "semver", ] [[package]] -name = "indexmap" -version = "2.10.0" +name = "rustix" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "equivalent", - "hashbrown 0.15.4", + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", ] [[package]] -name = "io-uring" -version = "0.7.9" +name = "rustix" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ "bitflags", - "cfg-if", + "errno", "libc", + "linux-raw-sys 0.9.4", + "windows-sys 0.60.2", ] [[package]] -name = "itertools" -version = "0.14.0" +name = "rustls" +version = "0.23.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" dependencies = [ - "either", + "aws-lc-rs", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", ] [[package]] -name = "itoa" -version = "1.0.15" +name = "rustls-native-certs" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "rustls-pemfile" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] [[package]] -name = "libc" -version = "0.2.174" +name = "rustls-pki-types" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "web-time", + "zeroize", +] [[package]] -name = "lock_api" -version = "0.4.13" +name = "rustls-webpki" +version = "0.103.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" dependencies = [ - "autocfg", - "scopeguard", + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] -name = "log" -version = "0.4.27" +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] -name = "matchers" -version = "0.1.0" +name = "rustyline" +version = "16.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "62fd9ca5ebc709e8535e8ef7c658eb51457987e48c98ead2be482172accc408d" dependencies = [ - "regex-automata 0.1.10", + "bitflags", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "windows-sys 0.59.0", ] [[package]] -name = "matchit" -version = "0.7.3" +name = "ryu" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] -name = "memchr" -version = "2.7.5" +name = "same-file" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] [[package]] -name = "mime" -version = "0.3.17" +name = "schannel" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] [[package]] -name = "minimal-lexical" -version = "0.2.1" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "miniz_oxide" -version = "0.8.9" +name = "security-framework" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" dependencies = [ - "adler2", + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", ] [[package]] -name = "mio" -version = "1.0.4" +name = "security-framework-sys" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" dependencies = [ + "core-foundation-sys", "libc", - "wasi", - "windows-sys 0.59.0", ] [[package]] -name = "nom" -version = "7.1.3" +name = "semver" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] -name = "nu-ansi-term" -version = "0.46.0" +name = "seq-macro" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] -name = "num-traits" -version = "0.2.19" +name = "serde" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ - "autocfg", + "serde_derive", ] [[package]] -name = "object" -version = "0.36.7" +name = "serde_bytes" +version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" dependencies = [ - "memchr", + "serde", ] [[package]] -name = "once_cell" -version = "1.21.3" +name = "serde_derive" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "optd-catalog" -version = "0.1.0" - -[[package]] -name = "optd-core" -version = "0.1.0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ - "anyhow", - "bitvec", - "console-subscriber", - "itertools", - "pretty-xmlish", - "tokio", - "tracing", - "tracing-subscriber", - "tracing-test", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "overload" -version = "0.1.1" +name = "serde_json" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] [[package]] -name = "parking_lot" -version = "0.12.4" +name = "serde_urlencoded" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ - "lock_api", - "parking_lot_core", + "form_urlencoded", + "itoa", + "ryu", + "serde", ] [[package]] -name = "parking_lot_core" -version = "0.9.11" +name = "sha2" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", + "cpufeatures", + "digest", ] [[package]] -name = "percent-encoding" -version = "2.3.1" +name = "sharded-slab" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] [[package]] -name = "pin-project" -version = "1.1.10" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "pin-project-internal" -version = "1.1.10" +name = "signal-hook-registry" +version = "1.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" dependencies = [ - "proc-macro2", - "quote", - "syn", + "libc", ] [[package]] -name = "pin-project-lite" -version = "0.2.16" +name = "simdutf8" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] -name = "pin-utils" -version = "0.1.0" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] -name = "ppv-lite86" -version = "0.2.21" +name = "slab" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] -name = "pretty-xmlish" -version = "0.1.13" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] -name = "proc-macro2" -version = "1.0.95" +name = "snap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] -name = "prost" -version = "0.13.5" +name = "socket2" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ - "bytes", - "prost-derive", + "libc", + "windows-sys 0.52.0", ] [[package]] -name = "prost-derive" -version = "0.13.5" +name = "socket2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn", + "libc", + "windows-sys 0.59.0", ] [[package]] -name = "prost-types" -version = "0.13.5" +name = "sqlparser" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ - "prost", + "log", + "recursive", + "sqlparser_derive", ] [[package]] -name = "quote" -version = "1.0.40" +name = "sqlparser_derive" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", + "quote", + "syn", ] [[package]] -name = "radium" -version = "0.7.0" +name = "stable_deref_trait" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] -name = "rand" -version = "0.8.5" +name = "stacker" +version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" dependencies = [ + "cc", + "cfg-if", "libc", - "rand_chacha", - "rand_core", + "psm", + "windows-sys 0.59.0", ] [[package]] -name = "rand_chacha" -version = "0.3.1" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "rand_core" -version = "0.6.4" +name = "strsim" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] -name = "redox_syscall" -version = "0.5.17" +name = "strum" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" -dependencies = [ - "bitflags", -] +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" [[package]] -name = "regex" -version = "1.11.1" +name = "strum_macros" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", ] [[package]] -name = "regex-automata" -version = "0.1.10" +name = "subtle" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] -name = "regex-automata" -version = "0.4.9" +name = "syn" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.5", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "regex-syntax" -version = "0.6.29" +name = "sync_wrapper" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] -name = "regex-syntax" -version = "0.8.5" +name = "synstructure" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "rustc-demangle" -version = "0.1.26" +name = "tap" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] -name = "rustversion" -version = "1.0.21" +name = "tempfile" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix 1.0.8", + "windows-sys 0.59.0", +] [[package]] -name = "ryu" -version = "1.0.20" +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] [[package]] -name = "scopeguard" -version = "1.2.0" +name = "thiserror" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl 2.0.12", +] [[package]] -name = "serde" -version = "1.0.219" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "serde_derive", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "serde_derive" -version = "1.0.219" +name = "thiserror-impl" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", @@ -947,89 +4512,88 @@ dependencies = [ ] [[package]] -name = "serde_json" -version = "1.0.142" +name = "thread_local" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", + "cfg-if", ] [[package]] -name = "sharded-slab" -version = "0.1.7" +name = "thrift" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ - "lazy_static", + "byteorder", + "integer-encoding", + "ordered-float", ] [[package]] -name = "slab" -version = "0.4.10" +name = "time" +version = "0.3.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] [[package]] -name = "smallvec" -version = "1.15.1" +name = "time-core" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" [[package]] -name = "socket2" -version = "0.5.10" +name = "time-macros" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" dependencies = [ - "libc", - "windows-sys 0.52.0", + "num-conv", + "time-core", ] [[package]] -name = "socket2" -version = "0.6.0" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" dependencies = [ - "libc", - "windows-sys 0.59.0", + "crunchy", ] [[package]] -name = "syn" -version = "2.0.104" +name = "tinystr" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", + "displaydoc", + "zerovec", ] [[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" - -[[package]] -name = "tap" -version = "1.0.1" +name = "tinyvec" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +dependencies = [ + "tinyvec_macros", +] [[package]] -name = "thread_local" -version = "1.1.9" +name = "tinyvec_macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" -dependencies = [ - "cfg-if", -] +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" @@ -1044,6 +4608,7 @@ dependencies = [ "mio", "parking_lot", "pin-project-lite", + "signal-hook-registry", "slab", "socket2 0.6.0", "tokio-macros", @@ -1062,6 +4627,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +dependencies = [ + "rustls", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.17" @@ -1098,8 +4673,8 @@ dependencies = [ "base64 0.22.1", "bytes", "h2", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-timeout", @@ -1127,7 +4702,7 @@ dependencies = [ "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand", + "rand 0.8.5", "slab", "tokio", "tokio-util", @@ -1146,6 +4721,25 @@ dependencies = [ "futures-util", "pin-project-lite", "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -1166,116 +4760,354 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" name = "tracing" version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracing-test" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" +dependencies = [ + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" + +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", + "try-lock", ] [[package]] -name = "tracing-attributes" -version = "0.1.30" +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ - "proc-macro2", - "quote", - "syn", + "wit-bindgen-rt", ] [[package]] -name = "tracing-core" -version = "0.1.34" +name = "wasm-bindgen" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ + "cfg-if", "once_cell", - "valuable", + "rustversion", + "wasm-bindgen-macro", ] [[package]] -name = "tracing-log" -version = "0.2.0" +name = "wasm-bindgen-backend" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ + "bumpalo", "log", - "once_cell", - "tracing-core", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", ] [[package]] -name = "tracing-subscriber" -version = "0.3.19" +name = "wasm-bindgen-futures" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ - "matchers", - "nu-ansi-term", + "cfg-if", + "js-sys", "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", + "wasm-bindgen", + "web-sys", ] [[package]] -name = "tracing-test" -version = "0.2.5" +name = "wasm-bindgen-macro" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ - "tracing-core", - "tracing-subscriber", - "tracing-test-macro", + "quote", + "wasm-bindgen-macro-support", ] [[package]] -name = "tracing-test-macro" -version = "0.2.5" +name = "wasm-bindgen-macro-support" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ + "proc-macro2", "quote", "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", ] [[package]] -name = "try-lock" -version = "0.2.5" +name = "wasm-bindgen-shared" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] -name = "unicode-ident" -version = "1.0.18" +name = "wasm-streams" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] [[package]] -name = "valuable" -version = "0.1.1" +name = "web-sys" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] -name = "want" -version = "0.3.1" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ - "try-lock", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" +name = "which" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] [[package]] name = "winapi" @@ -1293,19 +5125,87 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -1314,7 +5214,16 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", ] [[package]] @@ -1323,14 +5232,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", ] [[package]] @@ -1339,48 +5265,111 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + [[package]] name = "wyz" version = "0.5.1" @@ -1390,6 +5379,45 @@ dependencies = [ "tap", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.26" @@ -1409,3 +5437,97 @@ dependencies = [ "quote", "syn", ] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zlib-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.15+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 8728eae..5d9005b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,15 @@ [workspace] resolver = "3" -members = [ "optd/catalog","optd/core"] +members = ["cli", "connectors/datafusion", "optd/catalog", "optd/core"] + +# By default, only compiles the `optd-core` crate. +default-members = ["optd/core"] + +[workspace.dependencies] + +optd-datafusion = { path = "connectors/datafusion" } + +tokio = { version = "1.47", features = ["macros", "rt", "sync"] } + +# DataFusion dependencies +datafusion = { version = "49.0.0", default-features = false } diff --git a/README.md b/README.md index 825da11..4d9b06b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,19 @@ # optd Query Optimizer Service. + + +## Get Started + +To interact with the CLI, run + +```bash +cargo run -p optd-cli +``` + +## Structure + +- `optd/core`: The core optimizer implementation (IR, properties, rules, cost model, cardinality estimation). +- `optd/catalog`: A persistent catalog implementation. +- `connectors/datafusion`: Utilities needed to use optd in DataFusion. +- `cli`: command line interface based on [`datafusion-cli`](https://datafusion.apache.org/user-guide/cli/index.html). \ No newline at end of file diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 0000000..ac946c8 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "optd-cli" +version = "0.1.0" +edition = "2024" + +[dependencies] +clap = { version = "4.5.41", features = ["derive", "cargo"] } +datafusion = { workspace = true } +datafusion-cli = "49.0.0" +tokio = { workspace = true, features = [ + "macros", + "rt", + "rt-multi-thread", + "sync", + "parking_lot", + "signal", +] } +dirs = "6.0.0" +regex = "1.8" +object_store = "0.12.3" +url = "2.5.4" diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 0000000..a498eca --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,87 @@ +use datafusion::prelude::{DataFrame, SessionContext}; +use datafusion_cli::cli_context::CliSessionContext; + +pub struct OptdCliSessionContext { + inner: SessionContext, +} + +impl OptdCliSessionContext { + pub fn new(inner: SessionContext) -> Self { + Self { inner } + } + + pub fn inner(&self) -> &SessionContext { + &self.inner + } + + pub fn return_empty_dataframe(&self) -> datafusion::common::Result { + let plan = datafusion::logical_expr::LogicalPlanBuilder::empty(false).build()?; + Ok(DataFrame::new(self.inner.state(), plan)) + } +} + +impl CliSessionContext for OptdCliSessionContext { + fn task_ctx(&self) -> std::sync::Arc { + self.inner().task_ctx() + } + + fn session_state(&self) -> datafusion::execution::SessionState { + self.inner().state() + } + + fn register_object_store( + &self, + url: &url::Url, + object_store: std::sync::Arc, + ) -> Option> { + self.inner().register_object_store(url, object_store) + } + + fn register_table_options_extension_from_scheme(&self, scheme: &str) { + self.inner() + .register_table_options_extension_from_scheme(scheme); + } + + fn execute_logical_plan<'life0, 'async_trait>( + &'life0 self, + plan: datafusion::logical_expr::LogicalPlan, + ) -> ::core::pin::Pin< + Box< + dyn ::core::future::Future< + Output = Result< + datafusion::prelude::DataFrame, + datafusion::common::DataFusionError, + >, + > + ::core::marker::Send + + 'async_trait, + >, + > + where + 'life0: 'async_trait, + Self: 'async_trait, + { + let fut = async { + if let datafusion::logical_expr::LogicalPlan::Statement(stmt) = &plan { + match stmt { + datafusion::logical_expr::Statement::TransactionStart(_) => { + println!("START TRANSACTION"); + return self.return_empty_dataframe(); + } + datafusion::logical_expr::Statement::TransactionEnd(transaction_end) => { + use datafusion::logical_expr::TransactionConclusion; + match transaction_end.conclusion { + TransactionConclusion::Commit => println!("COMMIT"), + TransactionConclusion::Rollback => println!("ROLLBACK"), + } + return self.return_empty_dataframe(); + } + _ => (), + } + } + + self.inner.execute_logical_plan(plan).await + }; + + Box::pin(fut) + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 0000000..d53e66d --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,394 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; +use std::env; +use std::num::NonZeroUsize; +use std::path::Path; +use std::process::ExitCode; +use std::sync::{Arc, LazyLock}; + +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::context::SessionConfig; +use datafusion::execution::memory_pool::{ + FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool, +}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::prelude::SessionContext; +use datafusion_cli::catalog::DynamicObjectStoreCatalog; +use datafusion_cli::functions::ParquetMetadataFunc; +use datafusion_cli::{ + DATAFUSION_CLI_VERSION, exec, + pool_type::PoolType, + print_format::PrintFormat, + print_options::{MaxRows, PrintOptions}, +}; + +use clap::Parser; +use datafusion::common::config_err; +use datafusion::config::ConfigOptions; +use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; + +use optd_cli::OptdCliSessionContext; + +#[derive(Debug, Parser, PartialEq)] +#[clap(author, version, about, long_about= None)] +struct Args { + #[clap( + short = 'p', + long, + help = "Path to your data, default to current directory", + value_parser(parse_valid_data_dir) + )] + data_path: Option, + + #[clap( + short = 'b', + long, + help = "The batch size of each query, or use DataFusion default", + value_parser(parse_batch_size) + )] + batch_size: Option, + + #[clap( + short = 'c', + long, + num_args = 0.., + help = "Execute the given command string(s), then exit. Commands are expected to be non empty.", + value_parser(parse_command) + )] + command: Vec, + + #[clap( + short = 'm', + long, + help = "The memory pool limitation (e.g. '10g'), default to None (no limit)", + value_parser(extract_memory_pool_size) + )] + memory_limit: Option, + + #[clap( + short, + long, + num_args = 0.., + help = "Execute commands from file(s), then exit", + value_parser(parse_valid_file) + )] + file: Vec, + + #[clap( + short = 'r', + long, + num_args = 0.., + help = "Run the provided files on startup instead of ~/.datafusionrc", + value_parser(parse_valid_file), + conflicts_with = "file" + )] + rc: Option>, + + #[clap(long, value_enum, default_value_t = PrintFormat::Automatic)] + format: PrintFormat, + + #[clap( + short, + long, + help = "Reduce printing other than the results and work quietly" + )] + quiet: bool, + + #[clap( + long, + help = "Specify the memory pool type 'greedy' or 'fair'", + default_value_t = PoolType::Greedy + )] + mem_pool_type: PoolType, + + #[clap( + long, + help = "The number of top memory consumers to display when query fails due to memory exhaustion. To disable memory consumer tracking, set this value to 0", + default_value = "3" + )] + top_memory_consumers: usize, + + #[clap( + long, + help = "The max number of rows to display for 'Table' format\n[possible values: numbers(0/10/...), inf(no limit)]", + default_value = "40" + )] + maxrows: MaxRows, + + #[clap(long, help = "Enables console syntax highlighting")] + color: bool, + + #[clap( + short = 'd', + long, + help = "Available disk space for spilling queries (e.g. '10g'), default to None (uses DataFusion's default value of '100g')", + value_parser(extract_disk_limit) + )] + disk_limit: Option, +} + +#[tokio::main] +/// Calls [`main_inner`], then handles printing errors and returning the correct exit code +pub async fn main() -> ExitCode { + if let Err(e) = main_inner().await { + println!("Error: {e}"); + return ExitCode::FAILURE; + } + + ExitCode::SUCCESS +} + +pub const OPTD_CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// Main CLI entrypoint +async fn main_inner() -> Result<()> { + let args = Args::parse(); + + if !args.quiet { + println!("DataFusion CLI v{DATAFUSION_CLI_VERSION} (optd's edition v{OPTD_CLI_VERSION})"); + } + + if let Some(ref path) = args.data_path { + let p = Path::new(path); + env::set_current_dir(p).unwrap(); + }; + + let session_config = get_session_config(&args)?; + + let mut rt_builder = RuntimeEnvBuilder::new(); + // set memory pool size + if let Some(memory_limit) = args.memory_limit { + // set memory pool type + let pool: Arc = match args.mem_pool_type { + PoolType::Fair if args.top_memory_consumers == 0 => { + Arc::new(FairSpillPool::new(memory_limit)) + } + PoolType::Fair => Arc::new(TrackConsumersPool::new( + FairSpillPool::new(memory_limit), + NonZeroUsize::new(args.top_memory_consumers).unwrap(), + )), + PoolType::Greedy if args.top_memory_consumers == 0 => { + Arc::new(GreedyMemoryPool::new(memory_limit)) + } + PoolType::Greedy => Arc::new(TrackConsumersPool::new( + GreedyMemoryPool::new(memory_limit), + NonZeroUsize::new(args.top_memory_consumers).unwrap(), + )), + }; + + rt_builder = rt_builder.with_memory_pool(pool) + } + + // set disk limit + if let Some(disk_limit) = args.disk_limit { + let builder = DiskManagerBuilder::default() + .with_mode(DiskManagerMode::OsTmpDirectory) + .with_max_temp_directory_size(disk_limit.try_into().unwrap()); + rt_builder = rt_builder.with_disk_manager_builder(builder); + } + + let runtime_env = rt_builder.build_arc()?; + + // enable dynamic file query + let ctx = SessionContext::new_with_config_rt(session_config, runtime_env).enable_url_table(); + + ctx.refresh_catalogs().await?; + // install dynamic catalog provider that can register required object stores + ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( + ctx.state().catalog_list().clone(), + ctx.state_weak_ref(), + ))); + + // register `parquet_metadata` table function to get metadata from parquet files + ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); + + let ctx = OptdCliSessionContext::new(ctx); + + let mut print_options = PrintOptions { + format: args.format, + quiet: args.quiet, + maxrows: args.maxrows, + color: args.color, + }; + + let commands = args.command; + let files = args.file; + let rc = match args.rc { + Some(file) => file, + None => { + let mut files = Vec::new(); + let home = dirs::home_dir(); + if let Some(p) = home { + let home_rc = p.join(".datafusionrc"); + if home_rc.exists() { + files.push(home_rc.into_os_string().into_string().unwrap()); + } + } + files + } + }; + + if commands.is_empty() && files.is_empty() { + if !rc.is_empty() { + exec::exec_from_files(&ctx, rc, &print_options).await?; + } + // TODO maybe we can have thiserror for cli but for now let's keep it simple + return exec::exec_from_repl(&ctx, &mut print_options) + .await + .map_err(|e| DataFusionError::External(Box::new(e))); + } + + if !files.is_empty() { + exec::exec_from_files(&ctx, files, &print_options).await?; + } + + if !commands.is_empty() { + exec::exec_from_commands(&ctx, commands, &print_options).await?; + } + + Ok(()) +} + +/// Get the session configuration based on the provided arguments +/// and environment settings. +fn get_session_config(args: &Args) -> Result { + // Read options from environment variables and merge with command line options + let mut config_options = ConfigOptions::from_env()?; + + if let Some(batch_size) = args.batch_size { + if batch_size == 0 { + return config_err!("batch_size must be greater than 0"); + } + config_options.execution.batch_size = batch_size; + }; + + // use easier to understand "tree" mode by default + // if the user hasn't specified an explain format in the environment + if env::var_os("DATAFUSION_EXPLAIN_FORMAT").is_none() { + config_options.explain.format = String::from("tree"); + } + + // in the CLI, we want to show NULL values rather the empty strings + if env::var_os("DATAFUSION_FORMAT_NULL").is_none() { + config_options.format.null = String::from("NULL"); + } + + let session_config = SessionConfig::from(config_options).with_information_schema(true); + Ok(session_config) +} + +fn parse_valid_file(dir: &str) -> Result { + if Path::new(dir).is_file() { + Ok(dir.to_string()) + } else { + Err(format!("Invalid file '{dir}'")) + } +} + +fn parse_valid_data_dir(dir: &str) -> Result { + if Path::new(dir).is_dir() { + Ok(dir.to_string()) + } else { + Err(format!("Invalid data directory '{dir}'")) + } +} + +fn parse_batch_size(size: &str) -> Result { + match size.parse::() { + Ok(size) if size > 0 => Ok(size), + _ => Err(format!("Invalid batch size '{size}'")), + } +} + +fn parse_command(command: &str) -> Result { + if !command.is_empty() { + Ok(command.to_string()) + } else { + Err("-c flag expects only non empty commands".to_string()) + } +} + +#[derive(Debug, Clone, Copy)] +enum ByteUnit { + Byte, + KiB, + MiB, + GiB, + TiB, +} + +impl ByteUnit { + fn multiplier(&self) -> u64 { + match self { + ByteUnit::Byte => 1, + ByteUnit::KiB => 1 << 10, + ByteUnit::MiB => 1 << 20, + ByteUnit::GiB => 1 << 30, + ByteUnit::TiB => 1 << 40, + } + } +} + +fn parse_size_string(size: &str, label: &str) -> Result { + static BYTE_SUFFIXES: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("b", ByteUnit::Byte); + m.insert("k", ByteUnit::KiB); + m.insert("kb", ByteUnit::KiB); + m.insert("m", ByteUnit::MiB); + m.insert("mb", ByteUnit::MiB); + m.insert("g", ByteUnit::GiB); + m.insert("gb", ByteUnit::GiB); + m.insert("t", ByteUnit::TiB); + m.insert("tb", ByteUnit::TiB); + m + }); + + static SUFFIX_REGEX: LazyLock = + LazyLock::new(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap()); + + let lower = size.to_lowercase(); + if let Some(caps) = SUFFIX_REGEX.captures(&lower) { + let num_str = caps.get(1).unwrap().as_str(); + let num = num_str + .parse::() + .map_err(|_| format!("Invalid numeric value in {label} '{size}'"))?; + + let suffix = caps.get(2).map(|m| m.as_str()).unwrap_or("b"); + let unit = BYTE_SUFFIXES + .get(suffix) + .ok_or_else(|| format!("Invalid {label} '{size}'"))?; + let total_bytes = usize::try_from(unit.multiplier()) + .ok() + .and_then(|multiplier| num.checked_mul(multiplier)) + .ok_or_else(|| format!("{label} '{size}' is too large"))?; + + Ok(total_bytes) + } else { + Err(format!("Invalid {label} '{size}'")) + } +} + +pub fn extract_memory_pool_size(size: &str) -> Result { + parse_size_string(size, "memory pool size") +} + +pub fn extract_disk_limit(size: &str) -> Result { + parse_size_string(size, "disk limit") +} diff --git a/connectors/datafusion/Cargo.toml b/connectors/datafusion/Cargo.toml new file mode 100644 index 0000000..aa1ad5e --- /dev/null +++ b/connectors/datafusion/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "optd-datafusion" +version = "0.1.0" +edition = "2024" + +[dependencies] +datafusion = { workspace = true } diff --git a/connectors/datafusion/src/extension.rs b/connectors/datafusion/src/extension.rs new file mode 100644 index 0000000..141234f --- /dev/null +++ b/connectors/datafusion/src/extension.rs @@ -0,0 +1,2 @@ +/// The optd datafusion extension used to store shared state. +pub struct OptdExtension; diff --git a/connectors/datafusion/src/lib.rs b/connectors/datafusion/src/lib.rs new file mode 100644 index 0000000..739cc12 --- /dev/null +++ b/connectors/datafusion/src/lib.rs @@ -0,0 +1,5 @@ +mod extension; +mod planner; + +pub use extension::OptdExtension; +pub use planner::OptdQueryPlanner; diff --git a/connectors/datafusion/src/planner.rs b/connectors/datafusion/src/planner.rs new file mode 100644 index 0000000..bead438 --- /dev/null +++ b/connectors/datafusion/src/planner.rs @@ -0,0 +1,43 @@ +use std::sync::Arc; + +use datafusion::{ + execution::{SessionState, context::QueryPlanner}, + logical_expr::LogicalPlan, + physical_plan::ExecutionPlan, +}; + +#[derive(Debug)] +pub struct OptdQueryPlanner; + +impl OptdQueryPlanner { + /// Optd's actual implementation of [`QueryPlanner::create_physical_plan`]. + async fn create_physical_plan_inner( + &self, + _logical_plan: &LogicalPlan, + _session_state: &SessionState, + ) -> datafusion::common::Result> { + todo!() + } +} + +impl QueryPlanner for OptdQueryPlanner { + fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>( + &'life0 self, + logical_plan: &'life1 LogicalPlan, + session_state: &'life2 SessionState, + ) -> ::core::pin::Pin< + Box< + dyn Future>> + + Send + + 'async_trait, + >, + > + where + 'life0: 'async_trait, + 'life1: 'async_trait, + 'life2: 'async_trait, + Self: 'async_trait, + { + Box::pin(self.create_physical_plan_inner(logical_plan, session_state)) + } +} diff --git a/optd/core/Cargo.toml b/optd/core/Cargo.toml index 62ead8d..ff2ca53 100644 --- a/optd/core/Cargo.toml +++ b/optd/core/Cargo.toml @@ -8,13 +8,14 @@ anyhow = "1.0.98" bitvec = "1.0.1" itertools = "0.14.0" pretty-xmlish = "0.1.13" -tokio = { version = "1.47.0", features = [ +tokio = { workspace = true, features = [ "rt-multi-thread", "sync", "macros", "parking_lot", "tracing", ] } + tracing = "0.1.41" [dev-dependencies] From deeac21d4a44556c4ca6a185a58f10da5c8b77cd Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Fri, 8 Aug 2025 14:08:22 -0700 Subject: [PATCH 04/40] setup ci (#5) Signed-off-by: Yuchen Liang --- .github/DOCS.md | 23 +++ .github/dependabot.yml | 19 +++ .github/pull_request_template.md | 3 + .github/workflows/check.yml | 116 ++++++++++++++ .github/workflows/test.yml | 141 ++++++++++++++++++ Cargo.toml | 7 +- cli/Cargo.toml | 5 +- connectors/datafusion/Cargo.toml | 5 +- optd/catalog/Cargo.toml | 5 +- optd/core/Cargo.toml | 5 +- .../src/rules/logical_join_inner_assoc.rs | 4 +- optd/core/src/utility/union_find.rs | 6 +- 12 files changed, 323 insertions(+), 16 deletions(-) create mode 100644 .github/DOCS.md create mode 100644 .github/dependabot.yml create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/check.yml create mode 100644 .github/workflows/test.yml diff --git a/.github/DOCS.md b/.github/DOCS.md new file mode 100644 index 0000000..e932784 --- /dev/null +++ b/.github/DOCS.md @@ -0,0 +1,23 @@ +# Github config and workflows + +In this folder there is configuration for codecoverage, dependabot, and ci +workflows that check the library more deeply than the default configurations. + +This folder can be or was merged using a --allow-unrelated-histories merge +strategy from which provides a +reasonably sensible base for writing your own ci on. By using this strategy +the history of the CI repo is included in your repo, and future updates to +the CI can be merged later. + +To perform this merge run: + +```shell +git remote add ci https://github.com/jonhoo/rust-ci-conf.git +git fetch ci +git merge --allow-unrelated-histories ci/main +``` + +An overview of the files in this project is available at: +, which contains some +rationale for decisions and runs through an example of solving minimal version +and OpenSSL issues. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..d0f091e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: daily + - package-ecosystem: cargo + directory: / + schedule: + interval: daily + ignore: + - dependency-name: "*" + # patch and minor updates don't matter for libraries as consumers of this library build + # with their own lockfile, rather than the version specified in this library's lockfile + # remove this ignore rule if your package has binaries to ensure that the binaries are + # built with the exact set of dependencies and those are up to date. + update-types: + - "version-update:semver-patch" + - "version-update:semver-minor" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..89328f2 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,3 @@ +## Problem + +## Summary of changes diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..b2da25e --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,116 @@ +# Taken from https://github.com/jonhoo/rust-ci-conf/blob/main/.github/workflows/check.yml + +# This workflow runs whenever a PR is opened or updated, or a commit is pushed to main. It runs +# several checks: +# - fmt: checks that the code is formatted according to rustfmt +# - clippy: checks that the code does not contain any clippy warnings +# - doc: checks that the code can be documented without errors +# - hack: check combinations of feature flags +# - msrv: check that the msrv specified in the crate is correct +permissions: + contents: read +# This configuration allows maintainers of this repo to create a branch and pull request based on +# the new branch. Restricting the push trigger to the main branch ensures that the PR only gets +# built once. +on: + push: + branches: [main] + pull_request: +# If new code is pushed to a PR branch, then cancel in progress workflows for that PR. Ensures that +# we don't waste CI time, and returns results quicker https://github.com/jonhoo/rust-ci-conf/pull/5 +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true +name: check +jobs: + fmt: + runs-on: ubuntu-latest + name: stable / fmt + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install stable + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: cargo fmt --check + run: cargo fmt --check + clippy: + runs-on: ubuntu-latest + name: ${{ matrix.toolchain }} / clippy + permissions: + contents: read + checks: write + strategy: + fail-fast: false + matrix: + # Get early warning of new lints which are regularly introduced in beta channels. + toolchain: [stable, beta] + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ${{ matrix.toolchain }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.toolchain }} + components: clippy + - name: cargo clippy + uses: giraffate/clippy-action@v1 + with: + clippy_flags: --all-targets -- Dwarnings + reporter: github-pr-check + fail_on_error: true + github_token: ${{ secrets.GITHUB_TOKEN }} + # # Since this is not a proper library on crates.io we do not need this. + # semver: + # runs-on: ubuntu-latest + # name: semver + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Install stable + # uses: dtolnay/rust-toolchain@stable + # with: + # components: rustfmt + # - name: cargo-semver-checks + # uses: obi1kenobi/cargo-semver-checks-action@v2 + hack: + # cargo-hack checks combinations of feature flags to ensure that features are all additive + # which is required for feature unification + runs-on: ubuntu-latest + name: ubuntu / stable / features + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install stable + uses: dtolnay/rust-toolchain@stable + - name: cargo install cargo-hack + uses: taiki-e/install-action@cargo-hack + # intentionally no target specifier; see https://github.com/jonhoo/rust-ci-conf/pull/4 + # --feature-powerset runs for every combination of features + - name: cargo hack + run: cargo hack --feature-powerset check + msrv: + # check that we can build using the minimal rust version that is specified by this crate + runs-on: ubuntu-latest + # we use a matrix here just because env can't be used in job names + # https://docs.github.com/en/actions/learn-github-actions/contexts#context-availability + strategy: + matrix: + # Rust 2024 Edition MSRV + msrv: ["1.85.0"] + name: ubuntu / ${{ matrix.msrv }} + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ${{ matrix.msrv }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.msrv }} + - name: cargo +${{ matrix.msrv }} check + run: cargo check \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..05ed8f5 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,141 @@ +# Taken from https://github.com/jonhoo/rust-ci-conf/blob/main/.github/workflows/test.yml + +# This is the main CI workflow that runs the test suite on all pushes to main and all pull requests. +# It runs the following jobs: +# - required: runs the test suite on ubuntu with stable and beta rust toolchains +# - minimal: runs the test suite with the minimal versions of the dependencies that satisfy the +# requirements of this crate, and its dependencies +# - os-check: runs the test suite on mac and windows +# - coverage: runs the test suite and collects coverage information +# See check.yml for information about how the concurrency cancellation and workflow triggering works +permissions: + contents: read +on: + push: + branches: [main] + pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true +name: test +jobs: + required: + runs-on: ubuntu-latest + name: ubuntu / ${{ matrix.toolchain }} + strategy: + matrix: + # run on stable and beta to ensure that tests won't break on the next version of the rust + # toolchain + toolchain: [stable, beta] + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ${{ matrix.toolchain }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.toolchain }} + - name: cargo generate-lockfile + # enable this ci template to run regardless of whether the lockfile is checked in or not + if: hashFiles('Cargo.lock') == '' + run: cargo generate-lockfile + # https://twitter.com/jonhoo/status/1571290371124260865 + - name: cargo test --locked + run: cargo test --locked --all-features --all-targets + # https://github.com/rust-lang/cargo/issues/6669 + - name: cargo test --doc + run: cargo test --locked --all-features --doc + minimal: + # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure + # that this crate is compatible with the minimal version that this crate and its dependencies + # require. This will pickup issues where this create relies on functionality that was introduced + # later than the actual version specified (e.g., when we choose just a major version, but a + # method was added after this version). + runs-on: ubuntu-latest + name: ubuntu / stable / minimal-versions + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install stable + uses: dtolnay/rust-toolchain@stable + - name: Install nightly for -Z direct-minimal-versions + uses: dtolnay/rust-toolchain@nightly + - name: rustup default stable + run: rustup default stable + - name: cargo update -Z direct-minimal-versions + run: cargo +nightly update -Z direct-minimal-versions + - name: cargo test + run: cargo test --locked --all-features --all-targets + os-check: + # run cargo test on mac and windows + runs-on: ${{ matrix.os }} + name: ${{ matrix.os }} / stable + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest] + steps: + # if your project needs OpenSSL, uncomment this to fix Windows builds. + # it's commented out by default as the install command takes 5-10m. + # - run: echo "VCPKG_ROOT=$env:VCPKG_INSTALLATION_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append + # if: runner.os == Windows + # - run: vcpkg install openssl:x64-windows-static-md + # if: runner.os == Windows + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install stable + uses: dtolnay/rust-toolchain@stable + - name: cargo generate-lockfile + if: hashFiles('Cargo.lock') == '' + run: cargo generate-lockfile + - name: cargo test + run: cargo test --locked --all-features --all-targets + # coverage: + # # use llvm-cov to build and collect coverage and outputs in a format that + # # is compatible with codecov.io + # # + # # note that codecov as of v4 requires that CODECOV_TOKEN from + # # + # # https://app.codecov.io/gh///settings + # # + # # is set in two places on your repo: + # # + # # - https://github.com/jonhoo/guardian/settings/secrets/actions + # # - https://github.com/jonhoo/guardian/settings/secrets/dependabot + # # + # # (the former is needed for codecov uploads to work with Dependabot PRs) + # # + # # PRs coming from forks of your repo will not have access to the token, but + # # for those, codecov allows uploading coverage reports without a token. + # # it's all a little weird and inconvenient. see + # # + # # https://github.com/codecov/feedback/issues/112 + # # + # # for lots of more discussion + # runs-on: ubuntu-latest + # name: ubuntu / stable / coverage + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Install stable + # uses: dtolnay/rust-toolchain@stable + # with: + # components: llvm-tools-preview + # - name: cargo install cargo-llvm-cov + # uses: taiki-e/install-action@cargo-llvm-cov + # - name: cargo generate-lockfile + # if: hashFiles('Cargo.lock') == '' + # run: cargo generate-lockfile + # - name: cargo llvm-cov + # run: cargo llvm-cov --locked --all-features --lcov --output-path lcov.info + # - name: Record Rust version + # run: echo "RUST=$(rustc --version)" >> "$GITHUB_ENV" + # - name: Upload to codecov.io + # uses: codecov/codecov-action@v5 + # with: + # fail_ci_if_error: true + # token: ${{ secrets.CODECOV_TOKEN }} + # env_vars: OS,RUST \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 5d9005b..4e9730b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -resolver = "3" +resolver = "2" members = ["cli", "connectors/datafusion", "optd/catalog", "optd/core"] # By default, only compiles the `optd-core` crate. @@ -13,3 +13,8 @@ tokio = { version = "1.47", features = ["macros", "rt", "sync"] } # DataFusion dependencies datafusion = { version = "49.0.0", default-features = false } + +[workspace.package] +version = "0.1.0" +edition = "2024" +repository = "https://github.com/yliang412/optd" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ac946c8..a489eb5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "optd-cli" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true +repository.workspace = true [dependencies] clap = { version = "4.5.41", features = ["derive", "cargo"] } diff --git a/connectors/datafusion/Cargo.toml b/connectors/datafusion/Cargo.toml index aa1ad5e..3f0f863 100644 --- a/connectors/datafusion/Cargo.toml +++ b/connectors/datafusion/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "optd-datafusion" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true +repository.workspace = true [dependencies] datafusion = { workspace = true } diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index 172fb96..bec95c2 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "optd-catalog" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true +repository.workspace = true [dependencies] datafusion = "=49.0.0" diff --git a/optd/core/Cargo.toml b/optd/core/Cargo.toml index ff2ca53..2d1681d 100644 --- a/optd/core/Cargo.toml +++ b/optd/core/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "optd-core" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true +repository.workspace = true [dependencies] anyhow = "1.0.98" diff --git a/optd/core/src/rules/logical_join_inner_assoc.rs b/optd/core/src/rules/logical_join_inner_assoc.rs index 8d127bf..5c36418 100644 --- a/optd/core/src/rules/logical_join_inner_assoc.rs +++ b/optd/core/src/rules/logical_join_inner_assoc.rs @@ -57,10 +57,10 @@ impl Rule for LogicalJoinInnerAssocRule { let new_lower_columns = b.get_property::(ctx).set() & c.get_property::(ctx).set(); - if join_upper + if !join_upper .join_cond() .used_columns() - .is_superset(&new_lower_columns) + .is_subset(&new_lower_columns) { return Ok(vec![]); } diff --git a/optd/core/src/utility/union_find.rs b/optd/core/src/utility/union_find.rs index 1a0d234..8f8d668 100644 --- a/optd/core/src/utility/union_find.rs +++ b/optd/core/src/utility/union_find.rs @@ -198,11 +198,7 @@ mod tests { // All should now point to 1 for i in 1..=6 { - assert_eq!( - repr.find(&i), - 1, - "Element {i} should have representative 4" - ); + assert_eq!(repr.find(&i), 1, "Element {i} should have representative 4"); } // Another element joins From 746468c4f64aa6af6bfbc4872719d73d42740eab Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:02:53 -0700 Subject: [PATCH 05/40] core: clean up methods on the IR (#6) ## Problem Some methods are added to the IR as experimental features. We also got feedback from the dev meeting that the rule seems hard to read (or long). We would like to clean up these rough edges. ## Summary of changes - eliminate`try_bind_ref_xxx` and use `try_borrow` - add `borrow_raw_parts` so we always refer to `$node_name` instead of `$ref_name`. - Plumb through property methods to use shorthand. **_TODO:_** Pattern builder can also be generated by macros. --------- Signed-off-by: Yuchen Liang --- optd/core/src/ir/convert.rs | 12 +--- optd/core/src/ir/macros.rs | 22 +++---- optd/core/src/ir/operator/logical/get.rs | 20 ++----- optd/core/src/ir/operator/logical/order_by.rs | 4 +- optd/core/src/ir/operator/mod.rs | 16 ++--- .../src/ir/operator/physical/table_scan.rs | 12 +--- optd/core/src/ir/properties/cardinality.rs | 21 +++++-- optd/core/src/ir/properties/mod.rs | 14 +++-- optd/core/src/ir/properties/output_columns.rs | 46 ++++++++++----- optd/core/src/ir/properties/required.rs | 4 +- optd/core/src/ir/properties/tuple_ordering.rs | 21 ++++--- optd/core/src/ir/scalar/assign.rs | 2 +- optd/core/src/ir/scalar/mod.rs | 6 +- optd/core/src/ir/scalar/projection_list.rs | 4 +- optd/core/src/magic/card.rs | 38 ++++++------ optd/core/src/magic/cm.rs | 15 +++-- .../core/src/rules/implementations/nl_join.rs | 30 +++------- .../src/rules/implementations/table_scan.rs | 2 +- .../src/rules/logical_join_inner_assoc.rs | 58 +++++-------------- .../src/rules/logical_join_inner_commute.rs | 14 ++--- optd/core/tests/it_works.rs | 2 +- 21 files changed, 162 insertions(+), 201 deletions(-) diff --git a/optd/core/src/ir/convert.rs b/optd/core/src/ir/convert.rs index a5ff717..a61c0e1 100644 --- a/optd/core/src/ir/convert.rs +++ b/optd/core/src/ir/convert.rs @@ -26,11 +26,7 @@ impl Operator { T::try_from_operator(self) } - pub fn try_bind_ref(&self) -> Result { - T::try_from_operator(self.clone()) - } - - pub fn try_bind_ref_experimental( + pub fn try_borrow( &self, ) -> Result<>::BorrowedType, &OperatorKind> where @@ -64,11 +60,7 @@ impl Scalar { T::try_from_scalar(self) } - pub fn try_bind_ref(&self) -> Result { - T::try_from_scalar(self.clone()) - } - - pub fn try_bind_ref_experimental( + pub fn try_borrow( &self, ) -> Result<>::BorrowedType, &ScalarKind> where diff --git a/optd/core/src/ir/macros.rs b/optd/core/src/ir/macros.rs index a886917..315b1a4 100644 --- a/optd/core/src/ir/macros.rs +++ b/optd/core/src/ir/macros.rs @@ -133,6 +133,14 @@ macro_rules! generate_common_node { } } + /// Constructs the operator from raw metadata and IR inputs. + pub fn borrow_raw_parts<'ir>(meta: &'ir $metadata_type, common: &'ir crate::ir::IRCommon<$props_type>) -> $ref_name<'ir> { + $ref_name { + meta, + common, + } + } + /// Gets a slice to the input operators. pub fn input_operators(&self) -> &[std::sync::Arc] { &self.common.input_operators @@ -144,15 +152,7 @@ macro_rules! generate_common_node { } } - impl<'ir> $ref_name<'ir> { - /// Constructs the operator from raw metadata and IR inputs. - pub fn from_raw_parts(meta: &'ir $metadata_type, common: &'ir crate::ir::IRCommon<$props_type>) -> Self { - Self { - meta, - common, - } - } - } + }; } @@ -264,7 +264,7 @@ macro_rules! impl_scalar_conversion { ) -> Result { match &scalar.kind { crate::ir::ScalarKind::$node_name(meta) => { - Ok($ref_name::from_raw_parts(meta, &scalar.common)) + Ok($node_name::borrow_raw_parts(meta, &scalar.common)) } other_kind => Err(other_kind), } @@ -308,7 +308,7 @@ macro_rules! impl_operator_conversion { ) -> Result { match &operator.kind { crate::ir::OperatorKind::$node_name(meta) => { - Ok($ref_name::from_raw_parts(meta, &operator.common)) + Ok($node_name::borrow_raw_parts(meta, &operator.common)) } other_kind => Err(other_kind), } diff --git a/optd/core/src/ir/operator/logical/get.rs b/optd/core/src/ir/operator/logical/get.rs index ac0fb25..5610d44 100644 --- a/optd/core/src/ir/operator/logical/get.rs +++ b/optd/core/src/ir/operator/logical/get.rs @@ -31,22 +31,14 @@ impl LogicalGet { } } -impl Derive for LogicalGet { +impl Derive for LogicalGetBorrowed<'_> { fn derive_by_compute(&self, _ctx: &crate::ir::context::IRContext) -> OutputColumns { let projections = self .projection_list() - .try_bind_ref::() + .try_borrow::() .expect("projection_list should typecheck"); OutputColumns::from_column_set(projections.get_all_assignees().collect()) } - - fn derive(&self, ctx: &crate::ir::context::IRContext) -> OutputColumns { - self.common - .properties - .output_columns - .get_or_init(|| self.derive_by_compute(ctx)) - .clone() - } } #[cfg(test)] @@ -68,7 +60,7 @@ impl LogicalGet { #[cfg(test)] mod tests { - use crate::ir::{Column, context::IRContext, convert::IntoOperator, properties::GetProperty}; + use crate::ir::{Column, context::IRContext, convert::IntoOperator}; use super::*; @@ -76,11 +68,11 @@ mod tests { fn logical_get_construct_and_access() { let ctx = IRContext::with_course_tables(); let op = LogicalGet::mock(vec![0, 1]).into_operator(); - let output_columns = op.get_property::(&ctx); - let set = output_columns.set(); + let output_columns = op.output_columns(&ctx); + let set = &*output_columns; assert_eq!(set.len(), 2); assert!(set.contains(&Column(0))); assert!(set.contains(&Column(1))); - assert!(op.try_bind_ref::().is_ok()); + assert!(op.try_borrow::().is_ok()); } } diff --git a/optd/core/src/ir/operator/logical/order_by.rs b/optd/core/src/ir/operator/logical/order_by.rs index abdc898..cd33040 100644 --- a/optd/core/src/ir/operator/logical/order_by.rs +++ b/optd/core/src/ir/operator/logical/order_by.rs @@ -47,7 +47,7 @@ impl LogicalOrderBy { .exprs() .iter() .map(|expr| { - expr.try_bind_ref::() + expr.try_borrow::() .map(|column_ref| *column_ref.column()) .map_err(|_| expr.clone()) }) @@ -131,6 +131,6 @@ mod tests { let res = order_by.try_extract_tuple_ordering().unwrap_err(); assert_eq!(res.len(), 1); - res[0].try_bind_ref::().unwrap(); + assert!(res[0].try_borrow::().is_ok()) } } diff --git a/optd/core/src/ir/operator/mod.rs b/optd/core/src/ir/operator/mod.rs index d32aa9e..4fe4ecb 100644 --- a/optd/core/src/ir/operator/mod.rs +++ b/optd/core/src/ir/operator/mod.rs @@ -21,7 +21,7 @@ pub use physical::mock_scan::*; use crate::ir::explain::Explain; use crate::ir::properties::OperatorProperties; -use crate::ir::{GroupBorrowed, GroupId, GroupMetadata, IRCommon, Scalar}; +use crate::ir::{Group, GroupId, GroupMetadata, IRCommon, Scalar}; /// The operator type and its associated metadata. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -124,27 +124,27 @@ impl Explain for Operator { ) -> pretty_xmlish::Pretty<'a> { match &self.kind { OperatorKind::Group(meta) => { - GroupBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + Group::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::MockScan(meta) => { - MockScanBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + MockScan::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::LogicalGet(_) => todo!(), OperatorKind::LogicalJoin(meta) => { - LogicalJoinBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + LogicalJoin::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::LogicalSelect(meta) => { - LogicalSelectBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + LogicalSelect::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::EnforcerSort(meta) => { - EnforcerSortBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + EnforcerSort::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::PhysicalTableScan(_) => todo!(), OperatorKind::PhysicalNLJoin(meta) => { - PhysicalNLJoinBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + PhysicalNLJoin::borrow_raw_parts(meta, &self.common).explain(ctx, option) } OperatorKind::PhysicalFilter(meta) => { - PhysicalFilterBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + PhysicalFilter::borrow_raw_parts(meta, &self.common).explain(ctx, option) } } } diff --git a/optd/core/src/ir/operator/physical/table_scan.rs b/optd/core/src/ir/operator/physical/table_scan.rs index 52afa27..0fac9de 100644 --- a/optd/core/src/ir/operator/physical/table_scan.rs +++ b/optd/core/src/ir/operator/physical/table_scan.rs @@ -31,22 +31,14 @@ impl PhysicalTableScan { } } -impl Derive for PhysicalTableScan { +impl Derive for PhysicalTableScanBorrowed<'_> { fn derive_by_compute(&self, _ctx: &crate::ir::context::IRContext) -> OutputColumns { let projections = self .projection_list() - .try_bind_ref::() + .try_borrow::() .unwrap(); OutputColumns::from_column_set(projections.get_all_assignees().collect()) } - - fn derive(&self, ctx: &crate::ir::context::IRContext) -> OutputColumns { - self.common - .properties - .output_columns - .get_or_init(|| self.derive_by_compute(ctx)) - .clone() - } } #[cfg(test)] diff --git a/optd/core/src/ir/properties/cardinality.rs b/optd/core/src/ir/properties/cardinality.rs index 2fb38e2..1059102 100644 --- a/optd/core/src/ir/properties/cardinality.rs +++ b/optd/core/src/ir/properties/cardinality.rs @@ -1,7 +1,7 @@ use crate::ir::{ Operator, context::IRContext, - properties::{Derive, PropertyMarker}, + properties::{Derive, GetProperty, PropertyMarker}, }; /// The number of tuples that can be produced by an [`Operator`]. @@ -36,19 +36,30 @@ impl Cardinality { } } -impl PropertyMarker for Cardinality {} +impl PropertyMarker for Cardinality { + type Output = Self; +} impl Derive for crate::ir::Operator { - fn derive_by_compute(&self, ctx: &crate::ir::context::IRContext) -> Cardinality { + fn derive_by_compute(&self, ctx: &IRContext) -> ::Output { ctx.card.estimate(self, ctx) } - fn derive(&self, ctx: &crate::ir::context::IRContext) -> Cardinality { + fn derive( + &self, + ctx: &crate::ir::context::IRContext, + ) -> ::Output { *self .common .properties .cardinality - .get_or_init(|| self.derive_by_compute(ctx)) + .get_or_init(|| >::derive_by_compute(self, ctx)) + } +} + +impl crate::ir::Operator { + pub fn cardinality(&self, ctx: &crate::ir::context::IRContext) -> Cardinality { + self.get_property::(ctx) } } diff --git a/optd/core/src/ir/properties/mod.rs b/optd/core/src/ir/properties/mod.rs index fba1850..612e7b0 100644 --- a/optd/core/src/ir/properties/mod.rs +++ b/optd/core/src/ir/properties/mod.rs @@ -21,18 +21,20 @@ pub struct OperatorProperties { #[derive(Debug, Default)] pub struct ScalarProperties; -pub trait PropertyMarker {} +pub trait PropertyMarker { + type Output; +} pub trait Derive { - fn derive_by_compute(&self, ctx: &IRContext) -> P; + fn derive_by_compute(&self, ctx: &IRContext) -> P::Output; - fn derive(&self, ctx: &IRContext) -> P { + fn derive(&self, ctx: &IRContext) -> P::Output { self.derive_by_compute(ctx) } } pub trait GetProperty { - fn get_property

(&self, ctx: &IRContext) -> P + fn get_property

(&self, ctx: &IRContext) -> P::Output where Self: Derive

, P: PropertyMarker, @@ -41,6 +43,8 @@ pub trait GetProperty { } } -pub trait TrySatisfy { +impl GetProperty for crate::ir::Operator {} + +pub trait TrySatisfy

{ fn try_satisfy(&self, property: &P, ctx: &IRContext) -> Option>; } diff --git a/optd/core/src/ir/properties/output_columns.rs b/optd/core/src/ir/properties/output_columns.rs index 2711816..2288619 100644 --- a/optd/core/src/ir/properties/output_columns.rs +++ b/optd/core/src/ir/properties/output_columns.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, sync::Arc}; +use std::{collections::HashSet, ops::Deref, sync::Arc}; use itertools::Itertools; @@ -16,32 +16,44 @@ impl OutputColumns { Self(Arc::new(set)) } - pub fn set(&self) -> &ColumnSet { - &self.0 + // pub fn set(&self) -> &ColumnSet { + // &self.0 + // } +} + +impl std::ops::Deref for OutputColumns { + type Target = ColumnSet; + + fn deref(&self) -> &Self::Target { + self.0.deref() } } impl std::fmt::Display for OutputColumns { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_set().entries(self.set().iter().sorted()).finish() + f.debug_set().entries(self.deref().iter().sorted()).finish() } } -impl PropertyMarker for OutputColumns {} +impl PropertyMarker for OutputColumns { + type Output = Self; +} impl Derive for crate::ir::Operator { - fn derive_by_compute(&self, ctx: &crate::ir::context::IRContext) -> OutputColumns { + fn derive_by_compute( + &self, + ctx: &crate::ir::IRContext, + ) -> ::Output { match &self.kind { OperatorKind::Group(_) => { // Always derive a placeholder using the normalized expression. panic!("Right now group's properties should always be set.") } OperatorKind::LogicalGet(meta) => { - LogicalGet::from_raw_parts(meta.clone(), self.common.clone()).derive_by_compute(ctx) + LogicalGet::borrow_raw_parts(meta, &self.common).derive(ctx) } OperatorKind::PhysicalTableScan(meta) => { - PhysicalTableScan::from_raw_parts(meta.clone(), self.common.clone()) - .derive_by_compute(ctx) + PhysicalTableScan::borrow_raw_parts(meta, &self.common).derive(ctx) } OperatorKind::LogicalJoin(_) | OperatorKind::PhysicalNLJoin(_) @@ -52,8 +64,7 @@ impl Derive for crate::ir::Operator { .input_operators() .iter() .fold(HashSet::new(), |mut set, op| { - let output_from_child: OutputColumns = op.derive(ctx); - set.extend(output_from_child.set()); + set.extend(&*op.output_columns(ctx)); set }); OutputColumns::from_column_set(set) @@ -62,13 +73,20 @@ impl Derive for crate::ir::Operator { } } - fn derive(&self, ctx: &crate::ir::context::IRContext) -> OutputColumns { + fn derive( + &self, + ctx: &crate::ir::context::IRContext, + ) -> ::Output { self.common .properties .output_columns - .get_or_init(|| self.derive_by_compute(ctx)) + .get_or_init(|| >::derive_by_compute(self, ctx)) .clone() } } -impl GetProperty for crate::ir::Operator {} +impl crate::ir::Operator { + pub fn output_columns(&self, ctx: &crate::ir::context::IRContext) -> OutputColumns { + self.get_property::(ctx) + } +} diff --git a/optd/core/src/ir/properties/required.rs b/optd/core/src/ir/properties/required.rs index 369ab95..cb66ce2 100644 --- a/optd/core/src/ir/properties/required.rs +++ b/optd/core/src/ir/properties/required.rs @@ -19,7 +19,9 @@ impl std::fmt::Display for Required { } } -impl PropertyMarker for Arc {} +impl PropertyMarker for Arc { + type Output = Self; +} impl crate::ir::properties::TrySatisfy> for Operator { fn try_satisfy( diff --git a/optd/core/src/ir/properties/tuple_ordering.rs b/optd/core/src/ir/properties/tuple_ordering.rs index ec3c912..ada7b03 100644 --- a/optd/core/src/ir/properties/tuple_ordering.rs +++ b/optd/core/src/ir/properties/tuple_ordering.rs @@ -3,10 +3,7 @@ use std::sync::Arc; use bitvec::{boxed::BitBox, vec::BitVec}; use crate::ir::operator::*; -use crate::ir::{ - Column, Operator, OperatorCategory, OperatorKind, - properties::{GetProperty, OutputColumns}, -}; +use crate::ir::{Column, Operator, OperatorCategory, OperatorKind}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum TupleOrderingDirection { @@ -137,7 +134,9 @@ impl<'a> Iterator for Iter<'a> { } } -impl crate::ir::properties::PropertyMarker for TupleOrdering {} +impl crate::ir::properties::PropertyMarker for TupleOrdering { + type Output = Self; +} impl crate::ir::properties::TrySatisfy for Operator { fn try_satisfy( @@ -161,20 +160,20 @@ impl crate::ir::properties::TrySatisfy for Operator { ordering.is_empty().then_some(Arc::new([])) } OperatorKind::PhysicalNLJoin(meta) => { - let join = PhysicalNLJoinBorrowed::from_raw_parts(meta, &self.common); + let join = PhysicalNLJoin::borrow_raw_parts(meta, &self.common); - let output_from_outer = join.outer().get_property::(ctx); + let output_from_outer = join.outer().output_columns(ctx); ordering .iter_columns() - .all(|col| output_from_outer.set().contains(col)) + .all(|col| (*output_from_outer).contains(col)) .then(|| vec![ordering.clone(), TupleOrdering::default()].into()) } OperatorKind::PhysicalFilter(meta) => { - let filter = PhysicalFilterBorrowed::from_raw_parts(meta, &self.common); - let output_from_input = filter.input().get_property::(ctx); + let filter = PhysicalFilter::borrow_raw_parts(meta, &self.common); + let output_from_input = filter.input().output_columns(ctx); ordering .iter_columns() - .all(|col| output_from_input.set().contains(col)) + .all(|col| (*output_from_input).contains(col)) .then(|| vec![ordering.clone()].into()) } OperatorKind::MockScan(meta) => { diff --git a/optd/core/src/ir/scalar/assign.rs b/optd/core/src/ir/scalar/assign.rs index f7ab78b..083ab0d 100644 --- a/optd/core/src/ir/scalar/assign.rs +++ b/optd/core/src/ir/scalar/assign.rs @@ -31,7 +31,7 @@ impl Assign { pub fn is_passthrough(&self) -> bool { self.expr() - .try_bind_ref::() + .try_borrow::() .is_ok_and(|column_ref| column_ref.column() == self.assignee()) } } diff --git a/optd/core/src/ir/scalar/mod.rs b/optd/core/src/ir/scalar/mod.rs index 61d523b..ec8be60 100644 --- a/optd/core/src/ir/scalar/mod.rs +++ b/optd/core/src/ir/scalar/mod.rs @@ -78,15 +78,15 @@ impl Explain for Scalar { ) -> pretty_xmlish::Pretty<'a> { match &self.kind { ScalarKind::Literal(meta) => { - LiteralBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + Literal::borrow_raw_parts(meta, &self.common).explain(ctx, option) } ScalarKind::ColumnRef(meta) => { - ColumnRefBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + ColumnRef::borrow_raw_parts(meta, &self.common).explain(ctx, option) } ScalarKind::Assign(_) => todo!(), ScalarKind::ProjectionList(_) => todo!(), ScalarKind::BinaryOp(meta) => { - BinaryOpBorrowed::from_raw_parts(meta, &self.common).explain(ctx, option) + BinaryOp::borrow_raw_parts(meta, &self.common).explain(ctx, option) } } } diff --git a/optd/core/src/ir/scalar/projection_list.rs b/optd/core/src/ir/scalar/projection_list.rs index 03cb95d..ea70e62 100644 --- a/optd/core/src/ir/scalar/projection_list.rs +++ b/optd/core/src/ir/scalar/projection_list.rs @@ -26,10 +26,12 @@ impl ProjectionList { common: IRCommon::with_input_scalars_only(members), } } +} +impl ProjectionListBorrowed<'_> { pub fn get_all_assignees(&self) -> impl Iterator { self.members().iter().map(|member| { - let assign = member.try_bind_ref::().unwrap(); + let assign = member.try_borrow::().unwrap(); *assign.assignee() }) } diff --git a/optd/core/src/magic/card.rs b/optd/core/src/magic/card.rs index fc7446b..f266408 100644 --- a/optd/core/src/magic/card.rs +++ b/optd/core/src/magic/card.rs @@ -1,6 +1,6 @@ use crate::ir::{ operator::*, - properties::{Cardinality, CardinalityEstimator, GetProperty}, + properties::{Cardinality, CardinalityEstimator}, scalar::*, }; @@ -33,8 +33,8 @@ impl CardinalityEstimator for MagicCardinalityEstimator { Cardinality::with_count_lossy(exact_row_count) } OperatorKind::LogicalJoin(meta) => { - let join = LogicalJoinBorrowed::from_raw_parts(meta, &op.common); - let selectivity = if let Ok(literal) = join.join_cond().try_bind_ref::() { + let join = LogicalJoin::borrow_raw_parts(meta, &op.common); + let selectivity = if let Ok(literal) = join.join_cond().try_borrow::() { match literal.value() { crate::ir::ScalarValue::Boolean(Some(true)) => 1., crate::ir::ScalarValue::Boolean(_) => 0., @@ -43,13 +43,13 @@ impl CardinalityEstimator for MagicCardinalityEstimator { } else { MagicCardinalityEstimator::MAGIC_JOIN_COND_SELECTIVITY }; - let left_card = join.outer().get_property::(ctx); - let right_card = join.inner().get_property::(ctx); + let left_card = join.outer().cardinality(ctx); + let right_card = join.inner().cardinality(ctx); selectivity * left_card * right_card } OperatorKind::PhysicalNLJoin(meta) => { - let join = PhysicalNLJoinBorrowed::from_raw_parts(meta, &op.common); - let selectivity = if let Ok(literal) = join.join_cond().try_bind_ref::() { + let join = PhysicalNLJoin::borrow_raw_parts(meta, &op.common); + let selectivity = if let Ok(literal) = join.join_cond().try_borrow::() { match literal.value() { crate::ir::ScalarValue::Boolean(Some(true)) => 1., crate::ir::ScalarValue::Boolean(_) => 0., @@ -58,14 +58,13 @@ impl CardinalityEstimator for MagicCardinalityEstimator { } else { MagicCardinalityEstimator::MAGIC_JOIN_COND_SELECTIVITY }; - let left_card = join.outer().get_property::(ctx); - let right_card = join.inner().get_property::(ctx); + let left_card = join.outer().cardinality(ctx); + let right_card = join.inner().cardinality(ctx); selectivity * left_card * right_card } OperatorKind::LogicalSelect(meta) => { - let filter = LogicalSelectBorrowed::from_raw_parts(meta, &op.common); - let selectivity = if let Ok(literal) = filter.predicate().try_bind_ref::() - { + let filter = LogicalSelect::borrow_raw_parts(meta, &op.common); + let selectivity = if let Ok(literal) = filter.predicate().try_borrow::() { match literal.value() { crate::ir::ScalarValue::Boolean(Some(true)) => 1., crate::ir::ScalarValue::Boolean(_) => 0., @@ -75,12 +74,11 @@ impl CardinalityEstimator for MagicCardinalityEstimator { Self::MAGIC_PREDICATE_SELECTIVITY }; - selectivity * op.input_operators()[0].get_property::(ctx) + selectivity * filter.input().cardinality(ctx) } OperatorKind::PhysicalFilter(meta) => { - let filter = PhysicalFilterBorrowed::from_raw_parts(meta, &op.common); - let selectivity = if let Ok(literal) = filter.predicate().try_bind_ref::() - { + let filter = PhysicalFilter::borrow_raw_parts(meta, &op.common); + let selectivity = if let Ok(literal) = filter.predicate().try_borrow::() { match literal.value() { crate::ir::ScalarValue::Boolean(Some(true)) => 1., crate::ir::ScalarValue::Boolean(_) => 0., @@ -90,11 +88,11 @@ impl CardinalityEstimator for MagicCardinalityEstimator { Self::MAGIC_PREDICATE_SELECTIVITY }; - selectivity * op.input_operators()[0].get_property::(ctx) - } - OperatorKind::EnforcerSort(_) => { - op.input_operators()[0].get_property::(ctx) + selectivity * filter.input().cardinality(ctx) } + OperatorKind::EnforcerSort(meta) => EnforcerSort::borrow_raw_parts(meta, &op.common) + .input() + .cardinality(ctx), OperatorKind::MockScan(meta) => meta.spec.mocked_card, } } diff --git a/optd/core/src/magic/cm.rs b/optd/core/src/magic/cm.rs index 5a7aa40..e14108c 100644 --- a/optd/core/src/magic/cm.rs +++ b/optd/core/src/magic/cm.rs @@ -1,7 +1,6 @@ use crate::ir::{ cost::{Cost, CostModel}, operator::*, - properties::{Cardinality, GetProperty}, }; pub struct MagicCostModel; @@ -23,18 +22,18 @@ impl CostModel for MagicCostModel { OperatorKind::LogicalJoin(_) => None, OperatorKind::LogicalSelect(_) => None, OperatorKind::EnforcerSort(_) => { - let input_card = op.input_operators()[0].get_property::(ctx); + let input_card = op.input_operators()[0].cardinality(ctx); let cost = Cost::UNIT * input_card.as_f64() * input_card.as_f64().ln_1p().max(1.0); Some(cost) } OperatorKind::PhysicalTableScan(_) => { - let card = op.get_property::(ctx); + let card = op.cardinality(ctx); Some(Cost::UNIT * card * 2f64) } OperatorKind::PhysicalNLJoin(meta) => { - let join = PhysicalNLJoinBorrowed::from_raw_parts(meta, &op.common); - let outer_card = join.outer().get_property::(ctx); - let inner_card = join.inner().get_property::(ctx); + let join = PhysicalNLJoin::borrow_raw_parts(meta, &op.common); + let outer_card = join.outer().cardinality(ctx); + let inner_card = join.inner().cardinality(ctx); let cost = outer_card.as_f64() * inner_card.as_f64() * MagicCostModel::MAGIC_COMPUTATION_FACTOR @@ -43,8 +42,8 @@ impl CostModel for MagicCostModel { Some(cost) } OperatorKind::PhysicalFilter(meta) => { - let filter = PhysicalFilterBorrowed::from_raw_parts(meta, &op.common); - let input_card = filter.input().get_property::(ctx); + let filter = PhysicalFilter::borrow_raw_parts(meta, &op.common); + let input_card = filter.input().cardinality(ctx); let cost = input_card.as_f64() * Self::MAGIC_COMPUTATION_FACTOR * Cost::UNIT; Some(cost) } diff --git a/optd/core/src/rules/implementations/nl_join.rs b/optd/core/src/rules/implementations/nl_join.rs index 7dd6d2f..5485f00 100644 --- a/optd/core/src/rules/implementations/nl_join.rs +++ b/optd/core/src/rules/implementations/nl_join.rs @@ -37,7 +37,7 @@ impl Rule for LogicalJoinAsPhysicalNLJoinRule { operator: &crate::ir::Operator, _ctx: &crate::ir::IRContext, ) -> Result>, ()> { - let join = operator.try_bind_ref::().unwrap(); + let join = operator.try_borrow::().unwrap(); let nl_join = PhysicalNLJoin::new( *join.join_type(), join.outer().clone(), @@ -75,38 +75,22 @@ mod tests { let rule = LogicalJoinAsPhysicalNLJoinRule::new(); assert!(rule.pattern.matches_without_expand(&inner_join)); - let nl_join = rule - .transform(&inner_join, &ctx) - .unwrap() - .pop() - .unwrap() - .try_bind_ref::() - .unwrap(); + let after = rule.transform(&inner_join, &ctx).unwrap().pop().unwrap(); + + let nl_join = after.try_borrow::().unwrap(); assert_eq!( &1, - nl_join - .outer() - .try_bind_ref::() - .unwrap() - .mock_id() + nl_join.outer().try_borrow::().unwrap().mock_id() ); assert_eq!( &2, - nl_join - .inner() - .try_bind_ref::() - .unwrap() - .mock_id() + nl_join.inner().try_borrow::().unwrap().mock_id() ); assert_eq!(&JoinType::Inner, nl_join.join_type()); assert_eq!( &ScalarValue::Boolean(Some(true)), - nl_join - .join_cond() - .try_bind_ref::() - .unwrap() - .value() + nl_join.join_cond().try_borrow::().unwrap().value() ); } } diff --git a/optd/core/src/rules/implementations/table_scan.rs b/optd/core/src/rules/implementations/table_scan.rs index 0fabfe9..6f38cd4 100644 --- a/optd/core/src/rules/implementations/table_scan.rs +++ b/optd/core/src/rules/implementations/table_scan.rs @@ -37,7 +37,7 @@ impl Rule for LogicalGetAsPhysicalTableScanRule { operator: &crate::ir::Operator, _ctx: &crate::ir::IRContext, ) -> Result>, ()> { - let get = operator.try_bind_ref::().unwrap(); + let get = operator.try_borrow::().unwrap(); let table_scan = PhysicalTableScan::new(*get.table_id(), get.projection_list().clone()); Ok(vec![table_scan.into_operator()]) } diff --git a/optd/core/src/rules/logical_join_inner_assoc.rs b/optd/core/src/rules/logical_join_inner_assoc.rs index 5c36418..385bec3 100644 --- a/optd/core/src/rules/logical_join_inner_assoc.rs +++ b/optd/core/src/rules/logical_join_inner_assoc.rs @@ -1,8 +1,6 @@ use crate::ir::{ IRContext, OperatorKind, - convert::IntoOperator, operator::{LogicalJoin, join::JoinType}, - properties::{GetProperty, OutputColumns}, rule::{OperatorPattern, Rule}, }; @@ -43,39 +41,29 @@ impl Rule for LogicalJoinInnerAssocRule { ctx: &IRContext, ) -> Result>, ()> { // ((a JOIN b, cond_low) JOIN c, cond_up) → (a JOIN (b JOIN c, cond_up), cond_low) - let join_upper = operator.try_bind_ref_experimental::().unwrap(); + let join_upper = operator.try_borrow::().unwrap(); assert_eq!(join_upper.join_type(), &JoinType::Inner); - let join_lower = join_upper - .outer() - .try_bind_ref_experimental::() - .unwrap(); + let join_lower = join_upper.outer().try_borrow::().unwrap(); assert_eq!(join_lower.join_type(), &JoinType::Inner); let a = join_lower.outer().clone(); let b = join_lower.inner().clone(); let c = join_upper.inner().clone(); - let new_lower_columns = - b.get_property::(ctx).set() & c.get_property::(ctx).set(); if !join_upper .join_cond() .used_columns() - .is_subset(&new_lower_columns) + .is_subset(&(&*b.output_columns(ctx) & &*c.output_columns(ctx))) { return Ok(vec![]); } - let new_join_upper = LogicalJoin::new( - JoinType::Inner, - a, - { - LogicalJoin::new(JoinType::Inner, b, c, join_upper.join_cond().clone()) - .into_operator() - }, + let new_join_upper = a.logical_join( + b.logical_join(c, join_upper.join_cond().clone(), JoinType::Inner), join_lower.join_cond().clone(), + JoinType::Inner, ); - - Ok(vec![new_join_upper.into_operator()]) + Ok(vec![new_join_upper]) } } @@ -83,7 +71,7 @@ impl Rule for LogicalJoinInnerAssocRule { mod tests { use crate::ir::{ ScalarValue, - convert::IntoScalar, + convert::{IntoOperator, IntoScalar}, operator::{MockScan, MockSpec}, scalar::Literal, }; @@ -111,26 +99,12 @@ mod tests { let rule = LogicalJoinInnerAssocRule::new(); assert!(rule.pattern.matches_without_expand(&inner_joins)); let res = rule.transform(&inner_joins, &ctx).unwrap().pop().unwrap(); - let new_upper = res.try_bind_ref_experimental::().unwrap(); - let a_ref = new_upper - .outer() - .try_bind_ref_experimental::() - .unwrap(); - - let new_lower = new_upper - .inner() - .try_bind_ref_experimental::() - .unwrap(); - - let b_ref = new_lower - .outer() - .try_bind_ref_experimental::() - .unwrap(); - - let c_ref = new_lower - .inner() - .try_bind_ref_experimental::() - .unwrap(); + let new_upper = res.try_borrow::().unwrap(); + let a_ref = new_upper.outer().try_borrow::().unwrap(); + + let new_lower = new_upper.inner().try_borrow::().unwrap(); + let b_ref = new_lower.outer().try_borrow::().unwrap(); + let c_ref = new_lower.inner().try_borrow::().unwrap(); assert_eq!(&1, a_ref.mock_id()); assert_eq!(&2, b_ref.mock_id()); @@ -139,7 +113,7 @@ mod tests { &ScalarValue::Boolean(Some(false)), new_upper .join_cond() - .try_bind_ref_experimental::() + .try_borrow::() .unwrap() .value() ); @@ -148,7 +122,7 @@ mod tests { &ScalarValue::Boolean(Some(true)), new_lower .join_cond() - .try_bind_ref_experimental::() + .try_borrow::() .unwrap() .value() ); diff --git a/optd/core/src/rules/logical_join_inner_commute.rs b/optd/core/src/rules/logical_join_inner_commute.rs index a17c896..cd97127 100644 --- a/optd/core/src/rules/logical_join_inner_commute.rs +++ b/optd/core/src/rules/logical_join_inner_commute.rs @@ -43,7 +43,7 @@ impl Rule for LogicalJoinInnerCommuteRule { operator: &crate::ir::Operator, _ctx: &crate::ir::IRContext, ) -> Result>, ()> { - let join = operator.try_bind_ref::().unwrap(); + let join = operator.try_borrow::().unwrap(); assert_eq!(join.join_type(), &JoinType::Inner); let new_outer = join.inner().clone(); @@ -86,16 +86,10 @@ mod tests { assert!(rule.pattern.matches_without_expand(&inner_join)); let ctx = IRContext::with_empty_magic(); let res = rule.transform(&inner_join, &ctx).unwrap().pop().unwrap(); - let commuted = res.try_bind_ref_experimental::().unwrap(); + let commuted = res.try_borrow::().unwrap(); - let new_outer = commuted - .outer() - .try_bind_ref_experimental::() - .unwrap(); - let new_inner = commuted - .inner() - .try_bind_ref_experimental::() - .unwrap(); + let new_outer = commuted.outer().try_borrow::().unwrap(); + let new_inner = commuted.inner().try_borrow::().unwrap(); assert_eq!(new_outer.mock_id(), &2); assert_eq!(new_inner.mock_id(), &1); diff --git a/optd/core/tests/it_works.rs b/optd/core/tests/it_works.rs index dd40ac1..791c633 100644 --- a/optd/core/tests/it_works.rs +++ b/optd/core/tests/it_works.rs @@ -38,7 +38,7 @@ async fn optimize_plan( println!("\nMEMO AFTER OPT"); opt.memo.read().await.dump(); - println!("\nEXPLAIN (root_requirement: {}):", required); + println!("\nEXPLAIN (root_requirement: {required}):"); std::iter::once(format!("{: Date: Tue, 12 Aug 2025 12:36:36 -0400 Subject: [PATCH 06/40] cargo.lock update --- Cargo.lock | 2278 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 2165 insertions(+), 113 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a596af..a1b5444 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "ahash" version = "0.8.12" @@ -132,6 +138,56 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + [[package]] name = "anyhow" version = "1.0.98" @@ -167,6 +223,35 @@ dependencies = [ "zstd", ] +[[package]] +name = "apache-avro" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" +dependencies = [ + "bigdecimal", + "bzip2 0.4.4", + "crc32fast", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand 0.8.5", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum", + "strum_macros", + "thiserror 1.0.69", + "typed-builder", + "uuid", + "xz2", + "zstd", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -255,6 +340,7 @@ dependencies = [ "arrow-select", "atoi", "base64 0.22.1", + "base64 0.22.1", "chrono", "comfy-table", "half", @@ -319,6 +405,7 @@ dependencies = [ "chrono", "half", "indexmap 2.10.0", + "indexmap 2.10.0", "lexical-core", "memchr", "num", @@ -433,6 +520,28 @@ dependencies = [ "syn", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-trait" version = "0.1.88" @@ -459,6 +568,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" @@ -860,107 +975,537 @@ dependencies = [ ] [[package]] -name = "backtrace" -version = "0.3.75" +name = "aws-config" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +checksum = "483020b893cdef3d89637e428d588650c71cfae7ea2e6ecbaee4de4ff99fb2dd" dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.3.1", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", ] [[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "base64-simd" -version = "0.8.0" +name = "aws-credential-types" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +checksum = "1541072f81945fa1251f8795ef6c92c4282d74d59f88498ae7d4bf00f0ebdad9" dependencies = [ - "outref", - "vsimd", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", ] [[package]] -name = "bigdecimal" -version = "0.4.8" +name = "aws-lc-rs" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", + "aws-lc-sys", + "zeroize", ] [[package]] -name = "bindgen" -version = "0.69.5" +name = "aws-lc-sys" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn", - "which", + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", ] [[package]] -name = "bitflags" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" - -[[package]] -name = "bitvec" -version = "1.0.1" +name = "aws-runtime" +version = "1.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" dependencies = [ - "funty", - "radium", - "tap", - "wyz", + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", ] [[package]] -name = "blake2" -version = "0.10.6" +name = "aws-sdk-sso" +version = "1.79.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +checksum = "0a847168f15b46329fa32c7aca4e4f1a2e072f9b422f0adb19756f2e1457f111" dependencies = [ - "digest", -] - -[[package]] + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b654dd24d65568738593e8239aef279a86a15374ec926ae8714e2d7245f34149" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.81.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c92ea8a7602321c83615c82b408820ad54280fb026e92de0eeea937342fafa24" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "percent-encoding", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2", + "http 1.3.1", + "hyper", + "hyper-rustls", + "hyper-util", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tower 0.5.2", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e107ce0783019dbff59b3a244aa0c114e4a8c9d93498af9162608cd5474e796" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75d52251ed4b9776a3e8487b2a01ac915f73b2da3af8fc1e77e0fce697a550d4" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.3.1", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", + "which", + "serde", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] name = "blake3" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1041,6 +1586,26 @@ dependencies = [ "libc", ] +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2" version = "0.5.2" @@ -1089,6 +1654,15 @@ dependencies = [ "nom", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.1" @@ -1101,6 +1675,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.41" @@ -1111,6 +1691,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", + "serde", "windows-link", ] @@ -1137,9 +1718,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.43" +version = "4.5.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f" +checksum = "1c1f056bae57e3e54c3375c41ff79619ddd13460a17d7438712bd0d83fda4ff8" dependencies = [ "clap_builder", "clap_derive", @@ -1147,9 +1728,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.43" +version = "4.5.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65" +checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8" dependencies = [ "anstream", "anstyle", @@ -1209,6 +1790,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "console-api" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "console-api" version = "0.8.1" @@ -1248,6 +1838,32 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "console-subscriber" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "const-random" version = "0.1.18" @@ -1284,6 +1900,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1300,14 +1926,29 @@ dependencies = [ ] [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "core2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" dependencies = [ - "libc", + "memchr", +] + +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", ] +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.5.0" @@ -1326,6 +1967,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1375,6 +2025,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + [[package]] name = "dashmap" version = "6.1.0" @@ -1408,6 +2064,7 @@ dependencies = [ "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-avro", + "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -1428,6 +2085,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -1464,6 +2122,7 @@ dependencies = [ "datafusion-sql", "futures", "itertools 0.14.0", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -1519,6 +2178,32 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-cli" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db88b7c2988301968b5234be0011ae73c67559b6f62d771393bb442d16213c60" +dependencies = [ + "arrow", + "async-trait", + "aws-config", + "aws-credential-types", + "clap", + "datafusion", + "dirs", + "env_logger", + "futures", + "log", + "mimalloc", + "object_store", + "parking_lot", + "parquet", + "regex", + "rustyline", + "tokio", + "url", +] + [[package]] name = "datafusion-common" version = "49.0.0" @@ -1527,12 +2212,15 @@ checksum = "3d66104731b7476a8c86fbe7a6fd741e6329791166ac89a91fcd8336a560ddaf" dependencies = [ "ahash", "apache-avro", + "apache-avro", "arrow", "arrow-ipc", "base64 0.22.1", + "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", + "hex", "indexmap 2.10.0", "libc", "log", @@ -1580,16 +2268,42 @@ dependencies = [ "futures", "glob", "itertools 0.14.0", + "itertools 0.14.0", "log", "object_store", - "parquet", - "rand 0.9.2", - "tempfile", + "parquet", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-avro" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831cfe556658133ea4270d616164ce27f737e9e4d5e359e1b1b269e0bf767cef" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "num-traits", + "object_store", "tokio", - "tokio-util", - "url", - "xz2", - "zstd", ] [[package]] @@ -1690,6 +2404,7 @@ dependencies = [ "datafusion-pruning", "datafusion-session", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -1740,6 +2455,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.10.0", + "indexmap 2.10.0", "paste", "recursive", "serde_json", @@ -1756,6 +2472,8 @@ dependencies = [ "datafusion-common", "indexmap 2.10.0", "itertools 0.14.0", + "indexmap 2.10.0", + "itertools 0.14.0", "paste", ] @@ -1768,6 +2486,7 @@ dependencies = [ "arrow", "arrow-buffer", "base64 0.22.1", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -1779,6 +2498,7 @@ dependencies = [ "datafusion-macros", "hex", "itertools 0.14.0", + "itertools 0.14.0", "log", "md-5", "rand 0.9.2", @@ -1840,6 +2560,7 @@ dependencies = [ "datafusion-macros", "datafusion-physical-expr-common", "itertools 0.14.0", + "itertools 0.14.0", "log", "paste", ] @@ -1913,6 +2634,8 @@ dependencies = [ "datafusion-physical-expr", "indexmap 2.10.0", "itertools 0.14.0", + "indexmap 2.10.0", + "itertools 0.14.0", "log", "recursive", "regex", @@ -1936,6 +2659,8 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.10.0", "itertools 0.14.0", + "indexmap 2.10.0", + "itertools 0.14.0", "log", "paste", "petgraph", @@ -1953,6 +2678,7 @@ dependencies = [ "datafusion-expr-common", "hashbrown 0.14.5", "itertools 0.14.0", + "itertools 0.14.0", ] [[package]] @@ -1971,6 +2697,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", + "itertools 0.14.0", "log", "recursive", ] @@ -1999,6 +2726,8 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.10.0", "itertools 0.14.0", + "indexmap 2.10.0", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -2020,6 +2749,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "itertools 0.14.0", + "itertools 0.14.0", "log", ] @@ -2041,6 +2771,7 @@ dependencies = [ "datafusion-sql", "futures", "itertools 0.14.0", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -2058,12 +2789,24 @@ dependencies = [ "datafusion-common", "datafusion-expr", "indexmap 2.10.0", + "indexmap 2.10.0", "log", "recursive", "regex", "sqlparser", ] +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.4.0" @@ -2102,7 +2845,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -2116,6 +2859,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "dunce" version = "1.0.5" @@ -2134,6 +2883,21 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "serde", +] + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + [[package]] name = "env_filter" version = "0.1.3" @@ -2179,6 +2943,28 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2196,6 +2982,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix 1.0.8", + "windows-sys 0.59.0", +] + [[package]] name = "fixedbitset" version = "0.5.7" @@ -2250,6 +3047,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "funty" version = "2.0.0" @@ -2376,10 +3179,12 @@ checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "js-sys", + "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", "wasm-bindgen", + "wasm-bindgen", ] [[package]] @@ -2413,6 +3218,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.3.1", + "indexmap 2.10.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.6.0" @@ -2430,6 +3254,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -2451,6 +3281,15 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "hashbrown 0.15.4", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2505,6 +3344,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.3.1" @@ -2562,6 +3412,52 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.3.1", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.3.1", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humantime" version = "2.2.0" @@ -2643,6 +3539,81 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyper" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2", + "http 1.3.1", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.3.1", + "hyper", + "hyper-util", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.0", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -2784,6 +3755,16 @@ dependencies = [ "hashbrown 0.12.3", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.10.0" @@ -2842,6 +3823,37 @@ dependencies = [ "either", ] +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -2881,6 +3893,30 @@ dependencies = [ "syn", ] +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "jobserver" version = "0.1.33" @@ -2906,6 +3942,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "lazycell" @@ -3020,7 +4059,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.53.3", + "windows-targets 0.52.6", ] [[package]] @@ -3049,6 +4088,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libmimalloc-sys" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "libz-rs-sys" version = "0.5.1" @@ -3064,6 +4114,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -3098,6 +4154,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "lz4_flex" version = "0.11.5" @@ -3133,6 +4195,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "md-5" version = "0.10.6" @@ -3170,6 +4238,27 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "mimalloc" +version = "0.1.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -3221,6 +4310,37 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -3256,6 +4376,23 @@ dependencies = [ "serde", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -3271,6 +4408,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.46" @@ -3329,16 +4472,23 @@ checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" dependencies = [ "async-trait", "base64 0.22.1", + "base64 0.22.1", "bytes", "chrono", "form_urlencoded", + "form_urlencoded", "futures", "http 1.3.1", "http-body-util", + "http 1.3.1", + "http-body-util", "humantime", "hyper", "itertools 0.14.0", "md-5", + "hyper", + "itertools 0.14.0", + "md-5", "parking_lot", "percent-encoding", "quick-xml", @@ -3350,6 +4500,15 @@ dependencies = [ "serde_json", "serde_urlencoded", "thiserror 2.0.12", + "quick-xml", + "rand 0.9.2", + "reqwest", + "ring", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.12", "tokio", "tracing", "url", @@ -3376,10 +4535,37 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + [[package]] name = "optd-catalog" version = "0.1.0" +[[package]] +name = "optd-cli" +version = "0.1.0" +dependencies = [ + "clap", + "datafusion", + "datafusion-cli", + "dirs", + "object_store", + "regex", + "tokio", + "url", + "uuid", +] + [[package]] name = "optd-cli" version = "0.1.0" @@ -3402,10 +4588,13 @@ dependencies = [ "bitvec", "console-subscriber", "itertools 0.14.0", + "console-subscriber", + "itertools 0.14.0", "pretty-xmlish", "tokio", "tracing", "tracing-subscriber", + "tracing-subscriber", "tracing-test", ] @@ -3422,6 +4611,19 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "optd-datafusion" +version = "0.1.0" +dependencies = [ + "datafusion", +] + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -3437,6 +4639,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "overload" version = "0.1.1" @@ -3481,6 +4689,7 @@ dependencies = [ "arrow-schema", "arrow-select", "base64 0.22.1", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -3522,7 +4731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" dependencies = [ "fixedbitset", - "hashbrown 0.15.5", + "hashbrown 0.15.4", "indexmap 2.10.0", "serde", ] @@ -3537,12 +4746,32 @@ dependencies = [ ] [[package]] -name = "phf_shared" -version = "0.12.1" +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ - "siphasher", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -3589,6 +4818,18 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "portable-atomic-util" version = "0.2.4" @@ -3613,6 +4854,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -3638,6 +4885,16 @@ dependencies = [ "syn", ] +[[package]] +name = "prettyplease" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -3679,6 +4936,38 @@ dependencies = [ "prost", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + [[package]] name = "psm" version = "0.1.26" @@ -3759,6 +5048,77 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + +[[package]] +name = "quick-xml" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.1", + "rustls", + "socket2 0.5.10", + "thiserror 2.0.12", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +dependencies = [ + "bytes", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.1", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.12", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.10", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "quote" version = "1.0.40" @@ -3790,6 +5150,16 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -3889,6 +5259,17 @@ dependencies = [ "thiserror 2.0.12", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -3927,6 +5308,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + [[package]] name = "regex-syntax" version = "0.6.29" @@ -3941,9 +5328,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.22" +version = "0.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" +checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ "base64 0.22.1", "bytes", @@ -4001,6 +5388,26 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -4019,6 +5426,18 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -4041,6 +5460,19 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.0.8" @@ -4051,7 +5483,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -4116,7 +5548,7 @@ dependencies = [ name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "rustyline" @@ -4164,6 +5596,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -4193,6 +5634,29 @@ dependencies = [ "libc", ] +[[package]] +name = "security-framework" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.26" @@ -4223,6 +5687,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_bytes" +version = "0.11.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.219" @@ -4290,7 +5763,17 @@ version = "1.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" dependencies = [ - "libc", + "libc", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +dependencies = [ + "digest", + "rand_core 0.6.4", ] [[package]] @@ -4343,6 +5826,26 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.55.0" @@ -4365,6 +5868,191 @@ dependencies = [ "syn", ] +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64 0.22.1", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.4", + "hashlink", + "indexmap 2.10.0", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.12", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.12", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.12", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.12", + "tracing", + "url", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -4402,6 +6090,29 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum_macros" version = "0.26.4" @@ -4441,6 +6152,15 @@ dependencies = [ "futures-core", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -4468,6 +6188,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix 1.0.8", + "rustix 1.0.8", "windows-sys 0.59.0", ] @@ -4480,6 +6201,15 @@ dependencies = [ "thiserror-impl 1.0.69", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.12" @@ -4489,6 +6219,18 @@ dependencies = [ "thiserror-impl 2.0.12", ] +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "thiserror-impl 2.0.12", +] + [[package]] name = "thiserror-impl" version = "1.0.69" @@ -4561,6 +6303,36 @@ dependencies = [ "time-core", ] +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" + +[[package]] +name = "time-macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -4609,22 +6381,47 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", + "signal-hook-registry", "slab", "socket2 0.6.0", + "socket2 0.6.0", "tokio-macros", "tracing", "windows-sys 0.59.0", + "tracing", + "windows-sys 0.59.0", ] [[package]] name = "tokio-macros" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ - "proc-macro2", - "quote", - "syn", + "futures-core", + "pin-project-lite", + "tokio", ] [[package]] @@ -4756,6 +6553,101 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2 0.5.10", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.41" @@ -4844,6 +6736,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "twox-hash" version = "2.1.1" @@ -4871,17 +6769,41 @@ dependencies = [ ] [[package]] -name = "typenum" -version = "1.18.0" +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] -name = "unicode-ident" -version = "1.0.18" +name = "typed-builder" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -4917,6 +6839,12 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4929,6 +6857,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.17.0" @@ -4938,6 +6872,7 @@ dependencies = [ "getrandom 0.3.3", "js-sys", "serde", + "serde", "wasm-bindgen", ] @@ -4959,6 +6894,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -4978,6 +6919,15 @@ dependencies = [ "try-lock", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -5077,6 +7027,19 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.77" @@ -5109,6 +7072,16 @@ dependencies = [ "rustix 0.38.44", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "redox_syscall", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -5201,11 +7174,11 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.52.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -5226,6 +7199,24 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5236,6 +7227,7 @@ dependencies = [ "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -5259,6 +7251,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5271,6 +7269,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5283,6 +7287,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5295,6 +7305,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -5307,6 +7323,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5319,6 +7341,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5331,6 +7359,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5343,6 +7377,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -5355,6 +7395,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -5385,6 +7431,12 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xz2" version = "0.1.7" From e1bf28c466fb7cfbb801ef0985befcd7858ec706 Mon Sep 17 00:00:00 2001 From: Sirui Huang Date: Thu, 14 Aug 2025 15:34:46 -0400 Subject: [PATCH 07/40] lib.rs intercept create external table --- Cargo.lock | 2088 +++++++----------------------------------------- cli/src/lib.rs | 12 +- 2 files changed, 319 insertions(+), 1781 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a1b5444..7d28d57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" -[[package]] -name = "adler32" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" - [[package]] name = "ahash" version = "0.8.12" @@ -138,90 +132,11 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "anstream" -version = "0.6.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" - -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" -dependencies = [ - "windows-sys 0.60.2", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" -dependencies = [ - "anstyle", - "once_cell_polyfill", - "windows-sys 0.60.2", -] - [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" - -[[package]] -name = "apache-avro" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" -dependencies = [ - "bigdecimal", - "bzip2 0.4.4", - "crc32fast", - "digest", - "libflate", - "log", - "num-bigint", - "quad-rand", - "rand 0.8.5", - "regex-lite", - "serde", - "serde_bytes", - "serde_json", - "snap", - "strum", - "strum_macros", - "thiserror 1.0.69", - "typed-builder", - "uuid", - "xz2", - "zstd", -] +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "apache-avro" @@ -340,7 +255,6 @@ dependencies = [ "arrow-select", "atoi", "base64 0.22.1", - "base64 0.22.1", "chrono", "comfy-table", "half", @@ -405,7 +319,6 @@ dependencies = [ "chrono", "half", "indexmap 2.10.0", - "indexmap 2.10.0", "lexical-core", "memchr", "num", @@ -520,28 +433,6 @@ dependencies = [ "syn", ] -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "async-trait" version = "0.1.88" @@ -568,12 +459,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - [[package]] name = "autocfg" version = "1.5.0" @@ -582,9 +467,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.4" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483020b893cdef3d89637e428d588650c71cfae7ea2e6ecbaee4de4ff99fb2dd" +checksum = "c478f5b10ce55c9a33f87ca3404ca92768b144fc1bfdede7c0121214a8283a25" dependencies = [ "aws-credential-types", "aws-runtime", @@ -671,9 +556,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.79.0" +version = "1.80.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a847168f15b46329fa32c7aca4e4f1a2e072f9b422f0adb19756f2e1457f111" +checksum = "e822be5d4ed48fa7adc983de1b814dea33a5460c7e0e81b053b8d2ca3b14c354" dependencies = [ "aws-credential-types", "aws-runtime", @@ -693,9 +578,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.80.0" +version = "1.81.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b654dd24d65568738593e8239aef279a86a15374ec926ae8714e2d7245f34149" +checksum = "66aa7b30f1fac6e02ca26e3839fa78db3b94f6298a6e7a6208fb59071d93a87e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -715,9 +600,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.81.0" +version = "1.82.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c92ea8a7602321c83615c82b408820ad54280fb026e92de0eeea937342fafa24" +checksum = "2194426df72592f91df0cda790cb1e571aa87d66cecfea59a64031b58145abe3" dependencies = [ "aws-credential-types", "aws-runtime", @@ -975,514 +860,93 @@ dependencies = [ ] [[package]] -name = "aws-config" -version = "1.8.4" +name = "backtrace" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483020b893cdef3d89637e428d588650c71cfae7ea2e6ecbaee4de4ff99fb2dd" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "hex", - "http 1.3.1", - "ring", - "time", - "tokio", - "tracing", - "url", - "zeroize", + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] -name = "aws-credential-types" -version = "1.2.5" +name = "base64" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1541072f81945fa1251f8795ef6c92c4282d74d59f88498ae7d4bf00f0ebdad9" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "zeroize", -] +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] -name = "aws-lc-rs" -version = "1.13.3" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" -dependencies = [ - "aws-lc-sys", - "zeroize", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "aws-lc-sys" -version = "0.30.0" +name = "base64-simd" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" dependencies = [ - "bindgen", - "cc", - "cmake", - "dunce", - "fs_extra", + "outref", + "vsimd", ] [[package]] -name = "aws-runtime" -version = "1.5.10" +name = "base64ct" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" dependencies = [ - "aws-credential-types", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "tracing", - "uuid", + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", ] [[package]] -name = "aws-sdk-sso" -version = "1.79.0" +name = "bindgen" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a847168f15b46329fa32c7aca4e4f1a2e072f9b422f0adb19756f2e1457f111" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", + "which", ] [[package]] -name = "aws-sdk-ssooidc" -version = "1.80.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b654dd24d65568738593e8239aef279a86a15374ec926ae8714e2d7245f34149" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-sts" -version = "1.81.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c92ea8a7602321c83615c82b408820ad54280fb026e92de0eeea937342fafa24" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "1.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" -dependencies = [ - "aws-credential-types", - "aws-smithy-http", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 0.2.12", - "http 1.3.1", - "percent-encoding", - "sha2", - "time", - "tracing", -] - -[[package]] -name = "aws-smithy-async" -version = "1.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "aws-smithy-http" -version = "0.62.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "http 0.2.12", - "http 1.3.1", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - -[[package]] -name = "aws-smithy-http-client" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "h2", - "http 1.3.1", - "hyper", - "hyper-rustls", - "hyper-util", - "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tower 0.5.2", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.61.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-observability" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" -dependencies = [ - "aws-smithy-runtime-api", -] - -[[package]] -name = "aws-smithy-query" -version = "0.60.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-runtime" -version = "1.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e107ce0783019dbff59b3a244aa0c114e4a8c9d93498af9162608cd5474e796" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-client", - "aws-smithy-observability", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.3.1", - "http-body 0.4.6", - "http-body 1.0.1", - "pin-project-lite", - "pin-utils", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-runtime-api" -version = "1.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75d52251ed4b9776a3e8487b2a01ac915f73b2da3af8fc1e77e0fce697a550d4" -dependencies = [ - "aws-smithy-async", - "aws-smithy-types", - "bytes", - "http 0.2.12", - "http 1.3.1", - "pin-project-lite", - "tokio", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-smithy-types" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" -dependencies = [ - "base64-simd", - "bytes", - "bytes-utils", - "http 0.2.12", - "http 1.3.1", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "itoa", - "num-integer", - "pin-project-lite", - "pin-utils", - "ryu", - "serde", - "time", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.60.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" -dependencies = [ - "xmlparser", -] - -[[package]] -name = "aws-types" -version = "1.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" -dependencies = [ - "aws-credential-types", - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "rustc_version", - "tracing", -] - -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core", - "bytes", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "backtrace" -version = "0.3.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" - -[[package]] -name = "bigdecimal" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" -dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", -] - -[[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn", - "which", - "serde", -] - -[[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn", - "which", -] - -[[package]] -name = "bitflags" -version = "2.9.1" +name = "bitflags" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +dependencies = [ + "serde", +] [[package]] name = "bitvec" @@ -1586,26 +1050,6 @@ dependencies = [ "libc", ] -[[package]] -name = "bytes-utils" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" -dependencies = [ - "bytes", - "either", -] - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - [[package]] name = "bzip2" version = "0.5.2" @@ -1654,15 +1098,6 @@ dependencies = [ "nom", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.1" @@ -1675,12 +1110,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.41" @@ -1691,7 +1120,6 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "serde", "windows-link", ] @@ -1718,9 +1146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.44" +version = "4.5.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1f056bae57e3e54c3375c41ff79619ddd13460a17d7438712bd0d83fda4ff8" +checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318" dependencies = [ "clap_builder", "clap_derive", @@ -1740,9 +1168,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.41" +version = "4.5.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6" dependencies = [ "heck", "proc-macro2", @@ -1791,10 +1219,10 @@ dependencies = [ ] [[package]] -name = "console-api" -version = "0.8.1" +name = "concurrent-queue" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", ] @@ -1839,30 +1267,10 @@ dependencies = [ ] [[package]] -name = "console-subscriber" -version = "0.4.1" +name = "const-oid" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01" -dependencies = [ - "console-api", - "crossbeam-channel", - "crossbeam-utils", - "futures-task", - "hdrhistogram", - "humantime", - "hyper-util", - "prost", - "prost-types", - "serde", - "serde_json", - "thread_local", - "tokio", - "tokio-stream", - "tonic", - "tracing", - "tracing-core", - "tracing-subscriber", -] +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" [[package]] name = "const-random" @@ -1900,16 +1308,6 @@ dependencies = [ "libc", ] -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1926,12 +1324,12 @@ dependencies = [ ] [[package]] -name = "core2" -version = "0.4.0" +name = "cpufeatures" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ - "memchr", + "libc", ] [[package]] @@ -1968,8 +1366,8 @@ dependencies = [ ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "crossbeam-queue" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ @@ -2025,12 +1423,6 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" -[[package]] -name = "dary_heap" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" - [[package]] name = "dashmap" version = "6.1.0" @@ -2064,7 +1456,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-avro", - "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -2122,7 +1513,6 @@ dependencies = [ "datafusion-sql", "futures", "itertools 0.14.0", - "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -2178,32 +1568,6 @@ dependencies = [ "url", ] -[[package]] -name = "datafusion-cli" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db88b7c2988301968b5234be0011ae73c67559b6f62d771393bb442d16213c60" -dependencies = [ - "arrow", - "async-trait", - "aws-config", - "aws-credential-types", - "clap", - "datafusion", - "dirs", - "env_logger", - "futures", - "log", - "mimalloc", - "object_store", - "parking_lot", - "parquet", - "regex", - "rustyline", - "tokio", - "url", -] - [[package]] name = "datafusion-common" version = "49.0.0" @@ -2212,11 +1576,9 @@ checksum = "3d66104731b7476a8c86fbe7a6fd741e6329791166ac89a91fcd8336a560ddaf" dependencies = [ "ahash", "apache-avro", - "apache-avro", "arrow", "arrow-ipc", "base64 0.22.1", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", @@ -2268,7 +1630,6 @@ dependencies = [ "futures", "glob", "itertools 0.14.0", - "itertools 0.14.0", "log", "object_store", "parquet", @@ -2306,31 +1667,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-datasource-avro" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831cfe556658133ea4270d616164ce27f737e9e4d5e359e1b1b269e0bf767cef" -dependencies = [ - "apache-avro", - "arrow", - "async-trait", - "bytes", - "chrono", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "num-traits", - "object_store", - "tokio", -] - [[package]] name = "datafusion-datasource-csv" version = "49.0.0" @@ -2455,7 +1791,6 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.10.0", - "indexmap 2.10.0", "paste", "recursive", "serde_json", @@ -2472,8 +1807,6 @@ dependencies = [ "datafusion-common", "indexmap 2.10.0", "itertools 0.14.0", - "indexmap 2.10.0", - "itertools 0.14.0", "paste", ] @@ -2486,7 +1819,6 @@ dependencies = [ "arrow", "arrow-buffer", "base64 0.22.1", - "base64 0.22.1", "blake2", "blake3", "chrono", @@ -2498,7 +1830,6 @@ dependencies = [ "datafusion-macros", "hex", "itertools 0.14.0", - "itertools 0.14.0", "log", "md-5", "rand 0.9.2", @@ -2560,7 +1891,6 @@ dependencies = [ "datafusion-macros", "datafusion-physical-expr-common", "itertools 0.14.0", - "itertools 0.14.0", "log", "paste", ] @@ -2634,8 +1964,6 @@ dependencies = [ "datafusion-physical-expr", "indexmap 2.10.0", "itertools 0.14.0", - "indexmap 2.10.0", - "itertools 0.14.0", "log", "recursive", "regex", @@ -2659,8 +1987,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.10.0", "itertools 0.14.0", - "indexmap 2.10.0", - "itertools 0.14.0", "log", "paste", "petgraph", @@ -2678,7 +2004,6 @@ dependencies = [ "datafusion-expr-common", "hashbrown 0.14.5", "itertools 0.14.0", - "itertools 0.14.0", ] [[package]] @@ -2697,7 +2022,6 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "itertools 0.14.0", "log", "recursive", ] @@ -2726,8 +2050,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.10.0", "itertools 0.14.0", - "indexmap 2.10.0", - "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -2749,7 +2071,6 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "itertools 0.14.0", - "itertools 0.14.0", "log", ] @@ -2771,7 +2092,6 @@ dependencies = [ "datafusion-sql", "futures", "itertools 0.14.0", - "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -2789,7 +2109,6 @@ dependencies = [ "datafusion-common", "datafusion-expr", "indexmap 2.10.0", - "indexmap 2.10.0", "log", "recursive", "regex", @@ -2797,10 +2116,10 @@ dependencies = [ ] [[package]] -name = "deranged" -version = "0.4.0" +name = "der" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", "pem-rfc7468", @@ -2823,6 +2142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -2845,7 +2165,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2860,8 +2180,8 @@ dependencies = [ ] [[package]] -name = "dunce" -version = "1.0.5" +name = "dotenvy" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" @@ -2876,18 +2196,6 @@ name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "endian-type" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" - -[[package]] -name = "env_filter" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ "serde", ] @@ -2944,8 +2252,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" [[package]] -name = "error-code" -version = "3.3.2" +name = "etcetera" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" dependencies = [ @@ -2982,17 +2290,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "fd-lock" -version = "4.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" -dependencies = [ - "cfg-if", - "rustix 1.0.8", - "windows-sys 0.59.0", -] - [[package]] name = "fixedbitset" version = "0.5.7" @@ -3020,6 +2317,17 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + [[package]] name = "fnv" version = "1.0.7" @@ -3047,12 +2355,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "funty" version = "2.0.0" @@ -3101,6 +2403,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.31" @@ -3179,12 +2492,10 @@ checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "js-sys", - "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", "wasm-bindgen", - "wasm-bindgen", ] [[package]] @@ -3195,28 +2506,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" - -[[package]] -name = "h2" -version = "0.4.12" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http 1.3.1", - "indexmap 2.10.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" @@ -3254,12 +2546,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" @@ -3281,13 +2567,13 @@ dependencies = [ "foldhash", ] -[[package]] -name = "hdrhistogram" -version = "7.5.4" +[[package]] +name = "hashlink" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" dependencies = [ - "hashbrown 0.15.4", + "hashbrown 0.15.5", ] [[package]] @@ -3315,6 +2601,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3344,17 +2639,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.3.1" @@ -3412,52 +2696,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http 1.3.1", -] - -[[package]] -name = "http-body-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" -dependencies = [ - "bytes", - "futures-core", - "http 1.3.1", - "http-body 1.0.1", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "humantime" version = "2.2.0" @@ -3539,81 +2777,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hyper" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "h2", - "http 1.3.1", - "http-body 1.0.1", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.27.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" -dependencies = [ - "http 1.3.1", - "hyper", - "hyper-util", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower-service", -] - -[[package]] -name = "hyper-timeout" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" -dependencies = [ - "hyper", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "hyper-util" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" -dependencies = [ - "base64 0.22.1", - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "hyper", - "ipnet", - "libc", - "percent-encoding", - "pin-project-lite", - "socket2 0.6.0", - "tokio", - "tower-service", - "tracing", -] - [[package]] name = "iana-time-zone" version = "0.1.63" @@ -3755,16 +2918,6 @@ dependencies = [ "hashbrown 0.12.3", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.10.0" @@ -3823,37 +2976,6 @@ dependencies = [ "either", ] -[[package]] -name = "ipnet" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" - -[[package]] -name = "iri-string" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "is_terminal_polyfill" -version = "1.70.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.14.0" @@ -3893,30 +3015,6 @@ dependencies = [ "syn", ] -[[package]] -name = "jiff" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" -dependencies = [ - "jiff-static", - "log", - "portable-atomic", - "portable-atomic-util", - "serde", -] - -[[package]] -name = "jiff-static" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "jobserver" version = "0.1.33" @@ -4024,9 +3122,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.174" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "libflate" @@ -4059,7 +3157,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.53.3", ] [[package]] @@ -4086,13 +3184,14 @@ checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ "bitflags", "libc", + "redox_syscall", ] [[package]] -name = "libmimalloc-sys" -version = "0.1.43" +name = "libsqlite3-sys" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" dependencies = [ "cc", "pkg-config", @@ -4114,12 +3213,6 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -4154,12 +3247,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" -[[package]] -name = "lru-slab" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" - [[package]] name = "lz4_flex" version = "0.11.5" @@ -4195,48 +3282,21 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "md-5" version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "mimalloc" -version = "0.1.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + "cfg-if", + "digest", +] [[package]] -name = "minimal-lexical" -version = "0.2.1" +name = "memchr" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "mimalloc" @@ -4310,37 +3370,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nibble_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" -dependencies = [ - "smallvec", -] - -[[package]] -name = "nix" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" -dependencies = [ - "bitflags", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -4408,12 +3437,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - [[package]] name = "num-integer" version = "0.1.46" @@ -4472,23 +3495,16 @@ checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" dependencies = [ "async-trait", "base64 0.22.1", - "base64 0.22.1", "bytes", "chrono", "form_urlencoded", - "form_urlencoded", "futures", "http 1.3.1", "http-body-util", - "http 1.3.1", - "http-body-util", "humantime", "hyper", "itertools 0.14.0", "md-5", - "hyper", - "itertools 0.14.0", - "md-5", "parking_lot", "percent-encoding", "quick-xml", @@ -4499,16 +3515,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "thiserror 2.0.12", - "quick-xml", - "rand 0.9.2", - "reqwest", - "ring", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "thiserror 2.0.12", + "thiserror 2.0.14", "tokio", "tracing", "url", @@ -4535,33 +3542,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" -[[package]] -name = "once_cell_polyfill" -version = "1.70.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" - -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - [[package]] name = "optd-catalog" version = "0.1.0" - -[[package]] -name = "optd-cli" -version = "0.1.0" dependencies = [ - "clap", + "async-trait", "datafusion", - "datafusion-cli", - "dirs", - "object_store", - "regex", - "tokio", + "glob", + "parking_lot", + "sqlx", "url", "uuid", ] @@ -4588,13 +3577,10 @@ dependencies = [ "bitvec", "console-subscriber", "itertools 0.14.0", - "console-subscriber", - "itertools 0.14.0", "pretty-xmlish", "tokio", "tracing", "tracing-subscriber", - "tracing-subscriber", "tracing-test", ] @@ -4611,19 +3597,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "optd-datafusion" -version = "0.1.0" -dependencies = [ - "datafusion", -] - -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "ordered-float" version = "2.10.1" @@ -4639,18 +3612,18 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "outref" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" - [[package]] name = "overload" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.4" @@ -4689,7 +3662,6 @@ dependencies = [ "arrow-schema", "arrow-select", "base64 0.22.1", - "base64 0.22.1", "brotli", "bytes", "chrono", @@ -4718,6 +3690,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -4731,7 +3712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" dependencies = [ "fixedbitset", - "hashbrown 0.15.4", + "hashbrown 0.15.5", "indexmap 2.10.0", "serde", ] @@ -4774,26 +3755,6 @@ dependencies = [ "syn", ] -[[package]] -name = "pin-project" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -4807,20 +3768,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "pkg-config" -version = "0.3.32" +name = "pkcs1" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] [[package]] -name = "portable-atomic" -version = "1.11.1" +name = "pkcs8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] [[package]] -name = "portable-atomic-util" -version = "0.2.4" +name = "pkg-config" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" @@ -4854,12 +3824,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -4885,21 +3849,11 @@ dependencies = [ "syn", ] -[[package]] -name = "prettyplease" -version = "0.2.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" -dependencies = [ - "proc-macro2", - "syn", -] - [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" dependencies = [ "unicode-ident", ] @@ -4911,141 +3865,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", -] - -[[package]] -name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools 0.14.0", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost", -] - -[[package]] -name = "prost" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools 0.14.0", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost", -] - -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - -[[package]] -name = "quad-rand" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" - -[[package]] -name = "quick-xml" -version = "0.38.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" -dependencies = [ - "memchr", - "serde", + "prost-derive", ] [[package]] -name = "quinn" -version = "0.11.8" +name = "prost-derive" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ - "bytes", - "cfg_aliases", - "pin-project-lite", - "quinn-proto", - "quinn-udp", - "rustc-hash 2.1.1", - "rustls", - "socket2 0.5.10", - "thiserror 2.0.12", - "tokio", - "tracing", - "web-time", + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "quinn-proto" -version = "0.11.12" +name = "prost-types" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "bytes", - "getrandom 0.3.3", - "lru-slab", - "rand 0.9.2", - "ring", - "rustc-hash 2.1.1", - "rustls", - "rustls-pki-types", - "slab", - "thiserror 2.0.12", - "tinyvec", - "tracing", - "web-time", + "prost", ] [[package]] -name = "quinn-udp" -version = "0.5.13" +name = "psm" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2 0.5.10", - "tracing", - "windows-sys 0.59.0", + "cc", ] [[package]] @@ -5078,7 +3929,7 @@ dependencies = [ "rustc-hash 2.1.1", "rustls", "socket2 0.5.10", - "thiserror 2.0.12", + "thiserror 2.0.14", "tokio", "tracing", "web-time", @@ -5099,7 +3950,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.12", + "thiserror 2.0.14", "tinyvec", "tracing", "web-time", @@ -5150,16 +4001,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "radix_trie" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" -dependencies = [ - "endian-type", - "nibble_vec", -] - [[package]] name = "rand" version = "0.8.5" @@ -5256,18 +4097,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.16", "libredox", - "thiserror 2.0.12", -] - -[[package]] -name = "redox_users" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror 2.0.12", + "thiserror 2.0.14", ] [[package]] @@ -5308,12 +4138,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" -[[package]] -name = "regex-lite" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" - [[package]] name = "regex-syntax" version = "0.6.29" @@ -5389,8 +4213,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" [[package]] -name = "rle-decode-fast" -version = "1.0.3" +name = "rsa" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" dependencies = [ @@ -5426,18 +4250,6 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustc_version" version = "0.4.1" @@ -5460,19 +4272,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.0.8" @@ -5483,7 +4282,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5548,7 +4347,7 @@ dependencies = [ name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rustyline" @@ -5596,15 +4395,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "schannel" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "scopeguard" version = "1.2.0" @@ -5634,29 +4424,6 @@ dependencies = [ "libc", ] -[[package]] -name = "security-framework" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" -dependencies = [ - "bitflags", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "semver" version = "1.0.26" @@ -5687,15 +4454,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_bytes" -version = "0.11.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" -dependencies = [ - "serde", -] - [[package]] name = "serde_derive" version = "1.0.219" @@ -5731,6 +4489,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.9" @@ -5767,10 +4536,10 @@ dependencies = [ ] [[package]] -name = "signal-hook-registry" -version = "1.4.6" +name = "signature" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", "rand_core 0.6.4", @@ -5799,6 +4568,9 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "snap" @@ -5827,20 +4599,19 @@ dependencies = [ ] [[package]] -name = "socket2" -version = "0.5.10" +name = "spin" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ - "libc", - "windows-sys 0.52.0", + "lock_api", ] [[package]] -name = "socket2" -version = "0.6.0" +name = "spki" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ "base64ct", "der", @@ -5897,7 +4668,7 @@ dependencies = [ "futures-intrusive", "futures-io", "futures-util", - "hashbrown 0.15.4", + "hashbrown 0.15.5", "hashlink", "indexmap 2.10.0", "log", @@ -5908,7 +4679,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror 2.0.12", + "thiserror 2.0.14", "tracing", "url", ] @@ -5987,7 +4758,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.12", + "thiserror 2.0.14", "tracing", "whoami", ] @@ -6024,7 +4795,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.12", + "thiserror 2.0.14", "tracing", "whoami", ] @@ -6048,7 +4819,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.12", + "thiserror 2.0.14", "tracing", "url", ] @@ -6079,22 +4850,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - -[[package]] -name = "strum_macros" -version = "0.26.4" +name = "stringprep" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -6152,15 +4911,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -dependencies = [ - "futures-core", -] - [[package]] name = "synstructure" version = "0.13.2" @@ -6188,7 +4938,6 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix 1.0.8", - "rustix 1.0.8", "windows-sys 0.59.0", ] @@ -6203,32 +4952,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" -dependencies = [ - "thiserror-impl 2.0.12", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" dependencies = [ - "proc-macro2", - "quote", - "syn", - "thiserror-impl 2.0.12", + "thiserror-impl 2.0.14", ] [[package]] @@ -6244,9 +4972,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.12" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" dependencies = [ "proc-macro2", "quote", @@ -6260,47 +4988,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "time" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" +] [[package]] -name = "time-macros" -version = "0.2.22" +name = "thrift" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ - "num-conv", - "time-core", + "byteorder", + "integer-encoding", + "ordered-float", ] [[package]] @@ -6381,15 +5079,11 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "signal-hook-registry", "slab", "socket2 0.6.0", - "socket2 0.6.0", "tokio-macros", "tracing", "windows-sys 0.59.0", - "tracing", - "windows-sys 0.59.0", ] [[package]] @@ -6424,27 +5118,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" -dependencies = [ - "rustls", - "tokio", -] - -[[package]] -name = "tokio-stream" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-util" version = "0.7.16" @@ -6553,107 +5226,13 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" -[[package]] -name = "tonic" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" -dependencies = [ - "async-stream", - "async-trait", - "axum", - "base64 0.22.1", - "bytes", - "h2", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "hyper", - "hyper-timeout", - "hyper-util", - "percent-encoding", - "pin-project", - "prost", - "socket2 0.5.10", - "tokio", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" -dependencies = [ - "futures-core", - "futures-util", - "pin-project-lite", - "sync_wrapper", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-http" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" -dependencies = [ - "bitflags", - "bytes", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "iri-string", - "pin-project-lite", - "tower 0.5.2", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - [[package]] name = "tracing" version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -6736,12 +5315,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - [[package]] name = "twox-hash" version = "2.1.1" @@ -6769,23 +5342,20 @@ dependencies = [ ] [[package]] -name = "typed-builder" -version = "0.19.1" +name = "typenum" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" -dependencies = [ - "typed-builder-macro", -] +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] -name = "typed-builder-macro" -version = "0.19.1" +name = "unicode-bidi" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] -name = "typed-builder" -version = "0.19.1" +name = "unicode-ident" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" @@ -6839,12 +5409,6 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -6857,22 +5421,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be" dependencies = [ "getrandom 0.3.3", "js-sys", "serde", - "serde", "wasm-bindgen", ] @@ -6883,16 +5440,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] -name = "version_check" -version = "0.9.5" +name = "vcpkg" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] -name = "vsimd" -version = "0.8.0" +name = "version_check" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "vsimd" @@ -6919,15 +5476,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -6943,6 +5491,12 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -7027,19 +5581,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wasm-streams" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "web-sys" version = "0.3.77" @@ -7073,12 +5614,12 @@ dependencies = [ ] [[package]] -name = "which" -version = "4.4.2" +name = "whoami" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" dependencies = [ - "redox_syscall", + "libredox", "wasite", ] @@ -7183,20 +5724,20 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.60.2" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets 0.53.3", + "windows-targets 0.52.6", ] [[package]] @@ -7209,12 +5750,18 @@ dependencies = [ ] [[package]] -name = "windows-sys" -version = "0.60.2" +name = "windows-targets" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows-targets 0.53.3", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -7227,7 +5774,6 @@ dependencies = [ "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", "windows_i686_gnullvm 0.52.6", - "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -7252,8 +5798,8 @@ dependencies = [ ] [[package]] -name = "windows-targets" -version = "0.53.3" +name = "windows_aarch64_gnullvm" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" @@ -7270,8 +5816,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" +name = "windows_aarch64_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" @@ -7288,8 +5834,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" [[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" +name = "windows_i686_gnu" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" @@ -7305,12 +5851,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -7324,8 +5864,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" [[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" +name = "windows_i686_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" @@ -7342,8 +5882,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" [[package]] -name = "windows_i686_msvc" -version = "0.53.0" +name = "windows_x86_64_gnu" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" @@ -7360,8 +5900,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" [[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" +name = "windows_x86_64_gnullvm" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" @@ -7378,8 +5918,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" +name = "windows_x86_64_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" @@ -7395,12 +5935,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -7431,12 +5965,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "xz2" version = "0.1.7" diff --git a/cli/src/lib.rs b/cli/src/lib.rs index a498eca..af4af7d 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,3 +1,4 @@ +use datafusion::logical_expr::DdlStatement; use datafusion::prelude::{DataFrame, SessionContext}; use datafusion_cli::cli_context::CliSessionContext; @@ -77,8 +78,17 @@ impl CliSessionContext for OptdCliSessionContext { } _ => (), } - } + } else if let datafusion::logical_expr::LogicalPlan::Ddl(ddl) = &plan { + // match ddl { + // DdlStatement::CreateExternalTable(create_table) => { + // println!("Creating external table"); + // let _ = create_table.clone(); + // return self.return_empty_dataframe(); + // } + // _ => (), + // } + } self.inner.execute_logical_plan(plan).await }; From 762fae7f4dd08d9b67dfdcb314e304eef4b5494a Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 20 Aug 2025 12:14:04 -0400 Subject: [PATCH 08/40] optclicontext create external table impl attempt --- .gitignore | 5 +- Cargo.lock | 583 +++++++++++++++++++++++++++++++ Cargo.toml | 2 - cli/Cargo.toml | 2 + cli/src/lib.rs | 106 +++++- optd/catalog/Cargo.toml | 1 + optd/catalog/src/lib.rs | 7 + optd/catalog/src/optd_catalog.rs | 276 ++++++++------- optd/core/Cargo.toml | 8 +- 9 files changed, 834 insertions(+), 156 deletions(-) create mode 100644 optd/catalog/src/lib.rs diff --git a/.gitignore b/.gitignore index 958f6c7..4ee1a43 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,7 @@ # datafusion .history -*.db \ No newline at end of file +*.db + +# configuration +.vscode/launch.json diff --git a/Cargo.lock b/Cargo.lock index 4a596af..05eeb64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -896,6 +896,12 @@ dependencies = [ "vsimd", ] +[[package]] +name = "base64ct" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + [[package]] name = "bigdecimal" version = "0.4.8" @@ -938,6 +944,9 @@ name = "bitflags" version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +dependencies = [ + "serde", +] [[package]] name = "bitvec" @@ -1209,6 +1218,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "console-api" version = "0.8.1" @@ -1248,6 +1266,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -1308,6 +1332,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.5.0" @@ -1326,6 +1365,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1428,6 +1476,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -1533,6 +1582,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", + "hex", "indexmap 2.10.0", "libc", "log", @@ -1690,6 +1740,7 @@ dependencies = [ "datafusion-pruning", "datafusion-session", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -2064,6 +2115,17 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.4.0" @@ -2080,6 +2142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -2116,6 +2179,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "dunce" version = "1.0.5" @@ -2127,6 +2196,9 @@ name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] [[package]] name = "endian-type" @@ -2179,6 +2251,28 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2223,6 +2317,17 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + [[package]] name = "fnv" version = "1.0.7" @@ -2298,6 +2403,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.31" @@ -2451,6 +2567,15 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2476,6 +2601,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -2906,6 +3040,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "lazycell" @@ -3047,6 +3184,18 @@ checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ "bitflags", "libc", + "redox_syscall", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", ] [[package]] @@ -3256,6 +3405,23 @@ dependencies = [ "serde", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -3379,6 +3545,16 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "optd-catalog" version = "0.1.0" +dependencies = [ + "async-trait", + "datafusion", + "glob", + "parking_lot", + "sqlx", + "tokio", + "url", + "uuid", +] [[package]] name = "optd-cli" @@ -3388,7 +3564,9 @@ dependencies = [ "datafusion", "datafusion-cli", "dirs", + "futures", "object_store", + "optd-catalog", "regex", "tokio", "url", @@ -3443,6 +3621,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.4" @@ -3509,6 +3693,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -3577,6 +3770,27 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -4001,6 +4215,26 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rsa" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -4258,6 +4492,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.9" @@ -4293,6 +4538,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -4316,6 +4571,9 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "snap" @@ -4343,6 +4601,25 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.55.0" @@ -4365,6 +4642,191 @@ dependencies = [ "syn", ] +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64 0.22.1", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink", + "indexmap 2.10.0", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.12", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.12", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.12", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.12", + "tracing", + "url", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -4390,6 +4852,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" @@ -4762,6 +5235,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -4876,12 +5350,33 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -4947,6 +5442,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -4993,6 +5494,12 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -5109,6 +5616,16 @@ dependencies = [ "rustix 0.38.44", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -5199,6 +5716,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -5226,6 +5752,21 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5259,6 +5800,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5271,6 +5818,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5283,6 +5836,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5307,6 +5866,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5319,6 +5884,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5331,6 +5902,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5343,6 +5920,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index 4e9730b..6c77c8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,6 @@ default-members = ["optd/core"] [workspace.dependencies] -optd-datafusion = { path = "connectors/datafusion" } - tokio = { version = "1.47", features = ["macros", "rt", "sync"] } # DataFusion dependencies diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a489eb5..98d9793 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,3 +20,5 @@ dirs = "6.0.0" regex = "1.8" object_store = "0.12.3" url = "2.5.4" +futures = "0.3.31" +optd-catalog = { path = "../optd/catalog", version = "0.1" } \ No newline at end of file diff --git a/cli/src/lib.rs b/cli/src/lib.rs index af4af7d..5141e12 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,6 +1,14 @@ -use datafusion::logical_expr::DdlStatement; -use datafusion::prelude::{DataFrame, SessionContext}; +use std::sync::Arc; +use datafusion::{ + common::{exec_err, not_impl_err, DataFusionError, Result}, + datasource::TableProvider, + logical_expr::{CreateExternalTable, LogicalPlanBuilder}, + prelude::{DataFrame, SessionContext}, + sql::TableReference +}; use datafusion_cli::cli_context::CliSessionContext; +use tokio::sync::RwLock; +use optd_catalog::OptdSchemaProvider; pub struct OptdCliSessionContext { inner: SessionContext, @@ -15,10 +23,78 @@ impl OptdCliSessionContext { &self.inner } - pub fn return_empty_dataframe(&self) -> datafusion::common::Result { - let plan = datafusion::logical_expr::LogicalPlanBuilder::empty(false).build()?; + #[ignore = "not yet fully implemented"] + // pub fn register_optd_catalog(&self, optd_catalog: Arc) -> Result<()> { + // let state = self.inner.state_ref().read().clone(); + // state.register_catalog( + // "ducklake", + // Arc::new(datafusion_ducklake::DuckLakeCatalogProvider::new()), + // ) + // } + + pub fn return_empty_dataframe(&self) -> Result { + let plan = LogicalPlanBuilder::empty(false).build()?; Ok(DataFrame::new(self.inner.state(), plan)) } + + async fn create_external_table( + &self, + cmd: &CreateExternalTable, + ) -> Result { + let exist = self.inner.table_exist(cmd.name.clone())?; + + if cmd.temporary { + return not_impl_err!("Temporary tables not supported"); + } + + if exist { + match cmd.if_not_exists { + true => return self.return_empty_dataframe(), + false => { + return exec_err!("Table '{}' already exists", cmd.name); + } + } + } + + let table_provider: Arc = + self.create_custom_table(cmd).await?; + self.register_table(cmd.name.clone(), table_provider)?; + + self.return_empty_dataframe() + } + + async fn create_custom_table( + &self, + cmd: &CreateExternalTable, + ) -> Result> { + let state = self.inner.state_ref().read().clone(); + let file_type = cmd.file_type.to_uppercase(); + let factory = + state + .table_factories() + .get(file_type.as_str()) + .ok_or_else(|| { + DataFusionError::Execution(format!( + "Unable to find factory for {}", + cmd.file_type + )) + })?; + let table = (*factory).create(&state, cmd).await?; + Ok(table) + } + + pub fn register_table( + &self, + table_ref: impl Into, + provider: Arc, + ) -> Result>> { + let table_ref: TableReference = table_ref.into(); + let table = table_ref.table().to_owned(); + self.inner.state_ref() + .read() + .schema_for_ref(table_ref)? + .register_table(table, provider) + } } impl CliSessionContext for OptdCliSessionContext { @@ -49,10 +125,7 @@ impl CliSessionContext for OptdCliSessionContext { ) -> ::core::pin::Pin< Box< dyn ::core::future::Future< - Output = Result< - datafusion::prelude::DataFrame, - datafusion::common::DataFusionError, - >, + Output = Result, > + ::core::marker::Send + 'async_trait, >, @@ -79,19 +152,16 @@ impl CliSessionContext for OptdCliSessionContext { _ => (), } } else if let datafusion::logical_expr::LogicalPlan::Ddl(ddl) = &plan { - // match ddl { - // DdlStatement::CreateExternalTable(create_table) => { - // println!("Creating external table"); - - // let _ = create_table.clone(); - // return self.return_empty_dataframe(); - // } - // _ => (), - // } + match ddl { + datafusion::logical_expr::DdlStatement::CreateExternalTable(create_table) => { + return self.create_external_table(&create_table).await; + } + _ => (), + } } self.inner.execute_logical_plan(plan).await }; Box::pin(fut) } -} +} \ No newline at end of file diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index bec95c2..0951418 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -11,6 +11,7 @@ parking_lot = "=0.12.4" glob = "0.3.2" url = "2.5.4" async-trait = "0.1.88" +tokio = { workspace = true, features = ["full"] } [dependencies.uuid] version = "1.17.0" diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs new file mode 100644 index 0000000..a11cff1 --- /dev/null +++ b/optd/catalog/src/lib.rs @@ -0,0 +1,7 @@ + +mod optd_catalog; +mod optd_table; + +pub use optd_catalog::*; +pub use optd_table::*; + diff --git a/optd/catalog/src/optd_catalog.rs b/optd/catalog/src/optd_catalog.rs index 51c48ce..ff57663 100644 --- a/optd/catalog/src/optd_catalog.rs +++ b/optd/catalog/src/optd_catalog.rs @@ -1,128 +1,148 @@ -// use datafusion::{ -// catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}, -// error::Result, -// execution::SessionState, -// }; -// use parking_lot::RwLock; -// use std::any::Any; -// use std::sync::{Arc, Weak}; - -// #[derive(Debug)] -// pub struct OptdCatalogProviderList { -// inner: Arc, -// state: Weak>, -// } - -// impl OptdCatalogProviderList { -// pub fn new(inner: Arc, state: Weak>) -> Self { -// Self { inner, state } -// } -// } - -// impl CatalogProviderList for OptdCatalogProviderList { -// fn as_any(&self) -> &dyn Any { -// self -// } - -// fn register_catalog( -// &self, -// name: String, -// catalog: Arc, -// ) -> Option> { -// self.inner.register_catalog(name, catalog) -// } - -// fn catalog_names(&self) -> Vec { -// self.inner.catalog_names() -// } - -// fn catalog(&self, name: &str) -> Option> { -// let state = self.state.clone(); -// self.inner -// .catalog(name) -// .map(|catalog| Arc::new(OptdCatalogProvider::new(catalog, state)) as _) -// } -// } - -// #[derive(Debug)] -// struct OptdCatalogProvider { -// inner: Arc, -// state: Weak>, -// } - -// impl OptdCatalogProvider { -// pub fn new(inner: Arc, state: Weak>) -> Self { -// Self { inner, state } -// } -// } - -// impl CatalogProvider for OptdCatalogProvider { -// fn as_any(&self) -> &dyn Any { -// self -// } - -// fn schema_names(&self) -> Vec { -// self.inner.schema_names() -// } - -// fn schema(&self, name: &str) -> Option> { -// let state = self.state.clone(); -// self.inner -// .schema(name) -// .map(|schema| Arc::new(OptdSchemaProvider::new(schema, state)) as _) -// } - -// fn register_schema( -// &self, -// name: &str, -// schema: Arc, -// ) -> Result>> { -// self.inner.register_schema(name, schema) -// } -// } - -// #[derive(Debug)] -// pub struct OptdSchemaProvider { -// inner: Arc, -// state: Weak>, -// } - -// impl OptdSchemaProvider { -// pub fn new(inner: Arc, state: Weak>) -> Self { -// Self { inner, state } -// } -// } - -// #[async_trait] -// impl SchemaProvider for OptdSchemaProvider { -// fn as_any(&self) -> &(dyn std::any::Any + 'static) { -// self -// } - -// async fn table(&self, name: &str) -> Result>> { -// self.inner.table(name) -// } - -// fn table_names(&self) -> Vec { -// self.inner.table_names() -// } - -// fn register_table( -// &self, -// name: String, -// table: Arc, -// ) -> Result>> { -// self.inner.register_table(name, table) -// } - -// fn deregister_table( -// &self, -// name: &str, -// ) -> Result>> { -// self.inner.deregister_table(name) -// } - -// fn table_exist(&self, name: &str) -> bool { -// self.inner.table_exist(name) -// } -// } +use datafusion::{ + catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}, + error::Result, + execution::SessionState, + common::DataFusionError +}; +use parking_lot::RwLock; +use std::any::Any; +use std::sync::{Arc, Weak}; +use async_trait::async_trait; + +#[derive(Debug)] +pub struct OptdCatalogProviderList { + inner: Arc, + state: Weak>, +} + +impl OptdCatalogProviderList { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } + + // pub fn new_from_location(path: &str) -> Result { + // let url = url::Url::parse(path)?; + // let state = Arc::downgrade(&SessionState::new()); + // let inner = Arc::new(); + // Ok(Self { inner, state }) + // } +} + +impl CatalogProviderList for OptdCatalogProviderList { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + name: String, + catalog: Arc, + ) -> Option> { + self.inner.register_catalog(name, catalog) + } + + fn catalog_names(&self) -> Vec { + self.inner.catalog_names() + } + + fn catalog(&self, name: &str) -> Option> { + let state = self.state.clone(); + self.inner + .catalog(name) + .map(|catalog| Arc::new(OptdCatalogProvider::new(catalog, state)) as _) + } +} + +#[derive(Debug)] +struct OptdCatalogProvider { + inner: Arc, + state: Weak>, +} + +impl OptdCatalogProvider { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } +} + +impl CatalogProvider for OptdCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.inner.schema_names() + } + + fn schema(&self, name: &str) -> Option> { + let state = self.state.clone(); + self.inner + .schema(name) + .map(|schema| Arc::new(OptdSchemaProvider::new(schema, state)) as _) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> Result>> { + self.inner.register_schema(name, schema) + } +} + +#[derive(Debug)] +pub struct OptdSchemaProvider { + inner: Arc, + state: Weak>, +} + +impl OptdSchemaProvider { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } +} + +#[async_trait] +impl SchemaProvider for OptdSchemaProvider { + fn as_any(&self) -> &(dyn std::any::Any + 'static) { + self + } + + async fn table(&self, name: &str) -> Result>, DataFusionError> { + return self.inner.table(name).await; + } + + fn table_names(&self) -> Vec { + self.inner.table_names() + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> Result>> { + self.inner.register_table(name, table) + } + + fn deregister_table( + &self, + name: &str, + ) -> Result>> { + self.inner.deregister_table(name) + } + + fn table_exist(&self, name: &str) -> bool { + self.inner.table_exist(name) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_getters() { + // let catalog = OptdCatalogProviderList::new(); + // assert_eq!(catalog.catalog_names(), Vec::::new()); + } +} \ No newline at end of file diff --git a/optd/core/Cargo.toml b/optd/core/Cargo.toml index 2d1681d..cb3bb2d 100644 --- a/optd/core/Cargo.toml +++ b/optd/core/Cargo.toml @@ -9,13 +9,7 @@ anyhow = "1.0.98" bitvec = "1.0.1" itertools = "0.14.0" pretty-xmlish = "0.1.13" -tokio = { workspace = true, features = [ - "rt-multi-thread", - "sync", - "macros", - "parking_lot", - "tracing", -] } +tokio = { workspace = true, features = ["rt-multi-thread", "sync", "macros", "parking_lot", "tracing", "full"] } tracing = "0.1.41" From 40274ab409f87f32c3b740fd95919f8cc750651a Mon Sep 17 00:00:00 2001 From: HFFuture Date: Sat, 18 Oct 2025 16:45:46 -0400 Subject: [PATCH 09/40] Tentative statistics retriver structs and fetch_table_statistics implementation --- optd/catalog/Cargo.toml | 16 +- optd/catalog/src/ducklake_connection.rs | 392 ++++++++++++++++++++++ optd/catalog/src/lib.rs | 14 +- optd/catalog/src/main.rs | 96 ++---- optd/catalog/src/statistics.rs | 419 ++++++++++++++++++++++++ 5 files changed, 848 insertions(+), 89 deletions(-) create mode 100644 optd/catalog/src/ducklake_connection.rs create mode 100644 optd/catalog/src/statistics.rs diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index 0951418..5a97ff9 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -5,15 +5,15 @@ edition.workspace = true repository.workspace = true [dependencies] -datafusion = "=49.0.0" -sqlx = { version = "=0.8.6", features = ["sqlite"] } -parking_lot = "=0.12.4" +datafusion = "=50.1.0" +parking_lot = "=0.12.5" glob = "0.3.2" url = "2.5.4" async-trait = "0.1.88" tokio = { workspace = true, features = ["full"] } - -[dependencies.uuid] -version = "1.17.0" -# Lets you generate random UUIDs -features = ["v4"] +duckdb = { features = ["r2d2"], version = "1.4.0" } +r2d2 = "0.8.10" +snafu = "0.8.6" +serde_json = "1.0" +futures = "0.3.31" +serde = "1.0.228" diff --git a/optd/catalog/src/ducklake_connection.rs b/optd/catalog/src/ducklake_connection.rs new file mode 100644 index 0000000..f7f53a4 --- /dev/null +++ b/optd/catalog/src/ducklake_connection.rs @@ -0,0 +1,392 @@ +use std::sync::Arc; + +use datafusion::{execution::SendableRecordBatchStream, physical_plan::memory::MemoryStream}; +use duckdb::{ + Connection, DuckdbConnectionManager as DuckDBManager, + arrow::{array::RecordBatch, datatypes::SchemaRef}, +}; +use r2d2::ManageConnection; +use snafu::{ResultExt, prelude::*}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Connection to DuckDB failed: {}", source))] + ConnectionError { source: duckdb::Error }, + #[snafu(display("Failed to get connection from pool: {}", source))] + PoolError { source: r2d2::Error }, + #[snafu(display("Invalid database path: {}", path))] + InvalidPathError { path: Arc }, + #[snafu(display("Arrow query failed: {}", source))] + ArrowError { + source: duckdb::arrow::error::ArrowError, + }, + #[snafu(display("DataFusion error: {}", source))] + DataFusionError { + source: datafusion::error::DataFusionError, + }, + #[snafu(display("Other error: {}", details))] + Other { details: Arc }, +} + +impl From for Error { + fn from(err: datafusion::error::DataFusionError) -> Self { + Error::DataFusionError { source: err } + } +} + +impl From for Error { + fn from(err: duckdb::Error) -> Self { + Error::ConnectionError { source: err } + } +} + +#[derive(Debug)] +pub enum ConnectionMode { + Memory, + File, +} + +pub struct DuckLakeConnectionBuilder { + meta_name: Arc, + path: Arc, + mode: ConnectionMode, + manager: DuckDBManager, +} + +impl std::fmt::Debug for DuckLakeConnectionBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "DuckLakeConnectionBuilder {{ name: {}, path: {}, mode: {:?} }}", + self.meta_name, self.path, self.mode + ) + } +} + +impl DuckLakeConnectionBuilder { + // Default constants + pub const DEFAULT_LAKE_NAME: &str = "meta_lake"; + + pub fn memory() -> Result { + let manager = DuckDBManager::memory().context(ConnectionSnafu)?; + Ok(Self { + meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), + path: Arc::from(":memory:"), + mode: ConnectionMode::Memory, + manager, + }) + } + + pub fn file(path: &str) -> Result { + let manager = DuckDBManager::file(path).context(ConnectionSnafu)?; + Ok(Self { + meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), + path: Arc::from(path), + mode: ConnectionMode::File, + manager, + }) + } + + pub fn meta_name(&mut self, name: &str) -> &Self { + self.meta_name = Arc::from(name); + self + } + + pub fn get_meta_name(&self) -> &str { + self.meta_name.as_ref() + } + + pub fn get_path(&self) -> &str { + self.path.as_ref() + } + + pub fn get_mode(&self) -> &ConnectionMode { + &self.mode + } + + pub fn connect(&self) -> Result { + let mut connection = self.manager.connect().context(ConnectionSnafu)?; + self.ducklake(&mut connection)?; + Ok(connection) + } + + fn ducklake(&self, connection: &mut Connection) -> Result<(), Error> { + let setup_query = match self.mode { + ConnectionMode::Memory => format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:metadata.ducklake' AS {name}; + USE {name}; + "#, + name = self.meta_name.as_ref(), + ), + ConnectionMode::File => format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:metadata.ducklake' AS {name} (DATA_PATH '{path}'); + USE {name}; + "#, + name = self.meta_name.as_ref(), + path = self.path.as_ref() + ), + }; + + connection + .execute_batch(setup_query.as_str()) + .context(ConnectionSnafu)?; + Ok(()) + } + + pub fn initialize_schema(&self, connection: &Connection) -> Result<(), Error> { + // Create tables for storing stats metadata + connection.execute_batch( + format!( + r#" + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (column_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( + query_id BIGINT PRIMARY KEY, + query_string TEXT, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT, + FOREIGN KEY (query_id) REFERENCES __ducklake_metadata_{name}.main.optd_query(query_id) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + PRIMARY KEY (group_id, begin_snapshot) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (group_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (group_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload TEXT, + query_instance_id BIGINT, + PRIMARY KEY (scalar_id, group_id, stats_type, query_instance_id) + ); + + CREATE INDEX IF NOT EXISTS idx_table_stats_snapshot + ON __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats(begin_snapshot, end_snapshot); + "#, + name = self.meta_name.as_ref() + ) + .as_str(), + ) + .context(ConnectionSnafu)?; + + Ok(()) + } +} + +impl Default for DuckLakeConnectionBuilder { + fn default() -> Self { + Self::memory().expect("Failed to create default DuckLakeConnectionBuilder") + } +} + +pub async fn query( + connection: &Connection, + sql: &str, + schema: SchemaRef, + projection: Option>, +) -> Result { + let mut stmt = connection.prepare(sql).context(ConnectionSnafu)?; + + let rbs = stmt.query_arrow([])?.collect::>(); + let stream = MemoryStream::try_new(rbs, schema, projection)?; + Ok(Box::pin(stream)) +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::TryStreamExt; + + #[tokio::test] + async fn test_ducklake_connection() { + let path = "./test_ducklake.db"; + let ducklake_conn = + DuckLakeConnectionBuilder::file(path).expect("Failed to create DuckLakeConnection"); + assert_eq!(ducklake_conn.get_path(), path); + + assert_eq!( + matches!(ducklake_conn.get_mode(), ConnectionMode::File), + true + ); + + assert_eq!(ducklake_conn.get_meta_name(), "meta_lake"); + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + assert!(conn.execute_batch("SELECT 1;").is_ok()); + + let mut stmt = conn + .prepare("select name from (show all tables);") + .expect("Failed to prepare show tables statement"); + + let rows = stmt + .query_map([], |row| row.get::(0)) + .expect("Failed to execute show tables query"); + + let expected = vec![ + "ducklake_column", + "ducklake_column_mapping", + "ducklake_column_tag", + "ducklake_data_file", + "ducklake_delete_file", + "ducklake_file_column_stats", + "ducklake_file_partition_value", + "ducklake_files_scheduled_for_deletion", + "ducklake_inlined_data_tables", + "ducklake_metadata", + "ducklake_name_mapping", + "ducklake_partition_column", + "ducklake_partition_info", + "ducklake_schema", + "ducklake_schema_versions", + "ducklake_snapshot", + "ducklake_snapshot_changes", + "ducklake_table", + "ducklake_table_column_stats", + "ducklake_table_stats", + "ducklake_tag", + "ducklake_view", + ]; + + for (i, row) in rows.enumerate() { + let table_name = row.expect("Failed to get table name"); + assert_eq!(table_name, expected[i]); + } + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + ducklake_conn + .initialize_schema(&conn) + .expect("Failed to initialize schema"); + + let mut stmt = conn + .prepare("select name from (show all tables);") + .expect("Failed to prepare show tables statement"); + + let rows = stmt + .query_map([], |row| row.get::(0)) + .expect("Failed to execute show tables query"); + + let expected = vec![ + "ducklake_column", + "ducklake_column_mapping", + "ducklake_column_tag", + "ducklake_data_file", + "ducklake_delete_file", + "ducklake_file_column_stats", + "ducklake_file_partition_value", + "ducklake_files_scheduled_for_deletion", + "ducklake_inlined_data_tables", + "ducklake_metadata", + "ducklake_name_mapping", + "ducklake_partition_column", + "ducklake_partition_info", + "ducklake_schema", + "ducklake_schema_versions", + "ducklake_snapshot", + "ducklake_snapshot_changes", + "ducklake_table", + "ducklake_table_column_adv_stats", + "ducklake_table_column_stats", + "ducklake_table_stats", + "ducklake_tag", + "ducklake_view", + "optd_execution_subplan_feedback", + "optd_group", + "optd_group_stats", + "optd_query", + "optd_subplan_scalar_feedback", + ]; + + for (i, row) in rows.enumerate() { + let table_name = row.expect("Failed to get table name"); + assert_eq!(table_name, expected[i]); + } + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + conn.execute_batch("CREATE TABLE IF NOT EXISTS test (id INTEGER, name VARCHAR);") + .expect("Failed to create table"); + conn.execute_batch("INSERT INTO test (id, name) VALUES (1, 'Alice'), (2, 'Bob');") + .expect("Failed to insert data"); + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + let schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ + datafusion::arrow::datatypes::Field::new( + "id", + datafusion::arrow::datatypes::DataType::Int32, + false, + ), + datafusion::arrow::datatypes::Field::new( + "name", + datafusion::arrow::datatypes::DataType::Utf8, + false, + ), + ])); + + let rbs = query(&conn, "SELECT * FROM test;", schema, None) + .await + .expect("Failed to execute query"); + + let schema_ref = rbs.schema(); + assert_eq!(schema_ref.fields().len(), 2); + assert_eq!(schema_ref.field(0).name(), "id"); + assert_eq!(schema_ref.field(1).name(), "name"); + + let batches: Vec<_> = rbs + .try_collect() + .await + .expect("Failed to collect record batches"); + + assert_eq!(batches.len(), 1); + let batch = &batches[0]; + assert_eq!(batch.num_rows(), 2); + } + } +} diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index a11cff1..4a76625 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -1,7 +1,9 @@ +// mod optd_catalog; +// mod optd_table; +mod ducklake_connection; +mod statistics; -mod optd_catalog; -mod optd_table; - -pub use optd_catalog::*; -pub use optd_table::*; - +// pub use optd_catalog::*; +// pub use optd_table::*; +pub use ducklake_connection::{ConnectionMode, DuckLakeConnectionBuilder, query}; +pub use statistics::{DuckLakeStatisticsProvider, Error as InterfaceError, StatisticsProvider}; diff --git a/optd/catalog/src/main.rs b/optd/catalog/src/main.rs index 8f7c3b9..3b4fbd6 100644 --- a/optd/catalog/src/main.rs +++ b/optd/catalog/src/main.rs @@ -1,81 +1,27 @@ mod optd_catalog; mod optd_table; -use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; -use tokio; -use uuid::Uuid; - -#[tokio::main] -async fn main() -> Result<(), sqlx::Error> { - // Create Sqlite database file to hold the catalog - const SQLITE_DB_PATH: &str = "catalog.db"; - - // Set connect options - let connect_options = SqliteConnectOptions::new() - .filename(SQLITE_DB_PATH) - .create_if_missing(true); - - // Connect with SqlX - let pool = SqlitePool::connect_with(connect_options) - .await - .expect("Failed to connect to the SQLite database"); - - // Set the metadata catalog name - const METADATA_CATALOG: &str = "catalog"; - - // Execute the given Sql queries to create the catalog - let mut create_catalog_queries = vec![ - // "CREATE TABLE () IF NOT EXISTS {METADATA_CATALOG};", - "CREATE TABLE {METADATA_CATALOG}_metadata(key VARCHAR NOT NULL, value VARCHAR NOT NULL, scope VARCHAR, scope_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_snapshot(snapshot_id BIGINT PRIMARY KEY, snapshot_time TIMESTAMPTZ, schema_version BIGINT, next_catalog_id BIGINT, next_file_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_snapshot_changes(snapshot_id BIGINT PRIMARY KEY, changes_made VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_schema(schema_id BIGINT PRIMARY KEY, schema_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_table(table_id BIGINT, table_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, table_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, partial_file_info VARCHAR, mapping_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_file_column_statistics(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_delete_file(delete_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, format VARCHAR, delete_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, encryption_key VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_column(column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, table_id BIGINT, column_order BIGINT, column_name VARCHAR, column_type VARCHAR, initial_default VARCHAR, default_value VARCHAR, nulls_allowed BOOLEAN, parent_column BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_table_stats(table_id BIGINT, record_count BIGINT, next_row_id BIGINT, file_size_bytes BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_table_column_stats(table_id BIGINT, column_id BIGINT, contains_null BOOLEAN, contains_nan BOOLEAN, min_value VARCHAR, max_value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_partition_info(partition_id BIGINT, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_partition_column(partition_id BIGINT, table_id BIGINT, partition_key_index BIGINT, column_id BIGINT, transform VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_file_partition_value(data_file_id BIGINT, table_id BIGINT, partition_key_index BIGINT, partition_value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_files_scheduled_for_deletion(data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, schedule_start TIMESTAMPTZ);", - "CREATE TABLE {METADATA_CATALOG}_inlined_data_tables(table_id BIGINT, table_name VARCHAR, schema_version BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_column_mapping(mapping_id BIGINT, table_id BIGINT, type VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_name_mapping(mapping_id BIGINT, column_id BIGINT, source_name VARCHAR, target_field_id BIGINT, parent_column BIGINT);", - "INSERT INTO {METADATA_CATALOG}_snapshot VALUES (0, current_timestamp, 0, 1, 0);", - "INSERT INTO {METADATA_CATALOG}_snapshot_changes VALUES (0, 'created_schema:\"main\"');", - //"INSERT INTO {METADATA_CATALOG}_metadata (key, value) VALUES ('version', '0.2'), ('created_by', 'DuckDB %s'), ('data_path', %s), ('encrypted', '%s');" - ]; - - let set_uuid_query = format!( - "UPDATE {METADATA_CATALOG}_schema SET schema_uuid = '{}' WHERE schema_id = 0;", - Uuid::new_v4() - ); - - create_catalog_queries.push(set_uuid_query.as_str()); - - // Format the queries with the metadata catalog name - let formatted_query = create_catalog_queries - .iter() - .map(|query| query.replace("{METADATA_CATALOG}", METADATA_CATALOG)); - - for query in formatted_query { - println!("Executing query: {}", query); - sqlx::query(&query) - .execute(&pool) - .await - .expect("Failed to execute query"); - - println!("Query executed successfully."); - } - - // Close the connection - pool.close().await; +use duckdb::{ + Connection, Error, Result, + arrow::{ + array::{Int32Array, RecordBatch, StringArray}, + datatypes::{DataType, Field, SchemaBuilder}, + }, + params, +}; + +fn main() -> Result<(), Error> { + let conn = Connection::open_in_memory()?; + + conn.execute_batch( + r#" + INSTALL ducklake; + LOAD ducklake; + + ATTACH 'ducklake:metadata.ducklake' AS meta_lake (DATA_PATH 'data_files'); + USE meta_lake; + "#, + )?; Ok(()) } diff --git a/optd/catalog/src/statistics.rs b/optd/catalog/src/statistics.rs new file mode 100644 index 0000000..092bb37 --- /dev/null +++ b/optd/catalog/src/statistics.rs @@ -0,0 +1,419 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use snafu::{ResultExt, prelude::*}; +use std::sync::Arc; + +use crate::ducklake_connection::{DuckLakeConnectionBuilder, Error as ConnectionError}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Database connection error: {}", source))] + Connection { source: ConnectionError }, + #[snafu(display("Query execution failed: {}", source))] + QueryExecution { source: duckdb::Error }, + #[snafu(display("JSON serialization error: {}", source))] + JsonSerialization { source: serde_json::Error }, + #[snafu(display( + "Statistics not found for table: {}, column: {}, snapshot: {}", + table, + column, + snapshot + ))] + StatsNotFound { + table: String, + column: String, + snapshot: i64, + }, + #[snafu(display( + "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", + group_id, + stats_type, + snapshot + ))] + GroupStatsNotFound { + group_id: i64, + stats_type: String, + snapshot: i64, + }, +} + +impl From for Error { + fn from(err: ConnectionError) -> Self { + Error::Connection { source: err } + } +} + +/** Packaged Statistics Objects */ +/** Table statistics -- Contains overall row count and per-column statistics */ +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TableStatistics { + row_count: usize, + column_statistics: Vec, +} + +impl TableStatistics { + fn new(rows: I) -> Self + where + I: IntoIterator< + Item = Result< + ( + i64, + i64, + String, + String, + i64, + i64, + i64, + String, + String, + String, + String, + String, + ), + duckdb::Error, + >, + >, + { + let mut row_count = 0; + let mut column_statistics = Vec::new(); + + for row_result in rows { + if let Ok(( + _table_id, + column_id, + column_name, + column_type, + record_count, + _next_row_id, + _file_size_bytes, + contains_null, + contains_nan, + min_value, + max_value, + _extra_stats_json, + )) = row_result + { + row_count = record_count as usize; // Assuming all rows have the same record_count + + let actual_contains_null = match contains_null.as_str() { + "TRUE" => Some(true), + "FALSE" => Some(false), + _ => None, + }; + + let actual_contains_nan = match contains_nan.as_str() { + "TRUE" => Some(true), + "FALSE" => Some(false), + _ => None, + }; + + let actual_min_value = if min_value == "NULL" { + None + } else { + Some(min_value) + }; + + let actual_max_value = if max_value == "NULL" { + None + } else { + Some(max_value) + }; + + let column_stats = ColumnStatistics::new( + column_id, + column_type, + column_name.clone(), + actual_min_value, + actual_max_value, + actual_contains_null, + actual_contains_nan, + vec![], // Advanced stats can be populated later + ); + + column_statistics.push(column_stats); + } + } + + TableStatistics { + row_count, + column_statistics, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ColumnStatistics { + id: i64, + column_type: String, + name: String, + min: Option, + max: Option, + contains_null: Option, + contains_nan: Option, + advanced_stats: Vec, // TODO, e.g. histogram, ndv, etc. +} + +impl ColumnStatistics { + fn new( + id: i64, + column_type: String, + name: String, + min: Option, + max: Option, + contains_null: Option, + contains_nan: Option, + advanced_stats: Vec, + ) -> Self { + ColumnStatistics { + id, + column_type, + name, + min, + max, + contains_null, + contains_nan, + advanced_stats, + } + } + + #[allow(dead_code)] + fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { + self.advanced_stats.push(stat); + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct AdvanceColumnStatistics { + stats_type: String, + data: Value, +} + +pub trait StatisticsProvider { + /// Create a new memory-based StatisticsProvider + fn memory() -> Result, Error>; + + /// Create a new file-based StatisticsProvider + fn file(path: &str) -> Result, Error>; + + fn get_connection(&self) -> Result; + + fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; + + /// Retrieve table and column statistics at specific snapshot + fn fetch_table_statistics( + &self, + table_name: &str, + snapshot: i64, + connection: &duckdb::Connection, + ) -> Result, Error>; +} + +/// DuckLake-based implementation of StatisticsProvider +pub struct DuckLakeStatisticsProvider { + connection_builder: Arc, +} + +impl DuckLakeStatisticsProvider { + /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB + pub fn memory() -> Result { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); + Ok(Self { connection_builder }) + } + + /// Create a new DuckLakeStatisticsProvider with file-based DuckDB + pub fn file(path: &str) -> Result { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); + Ok(Self { connection_builder }) + } + + /// Insert table column statistics + pub fn insert_table_stats( + &self, + column_id: i64, + begin_snapshot: i64, + end_snapshot: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + let conn = self.connection_builder.connect()?; + let mut stmt = conn + .prepare( + "INSERT OR REPLACE INTO ducklake_table_column_adv_stats + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?)", + ) + .context(QueryExecutionSnafu)?; + + stmt.execute([ + &column_id.to_string(), + &begin_snapshot.to_string(), + &end_snapshot.to_string(), + &table_id.to_string(), + stats_type, + payload, + ]) + .context(QueryExecutionSnafu)?; + + Ok(()) + } +} + +impl StatisticsProvider for DuckLakeStatisticsProvider { + fn memory() -> Result, Error> { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); + Ok(Box::new(Self { connection_builder })) + } + + /// Create a new DuckLakeStatisticsProvider with file-based DuckDB + fn file(path: &str) -> Result, Error> { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); + Ok(Box::new(Self { connection_builder })) + } + + /// Get a connection to the DuckDB instance and initialize the DuckLake-Optd schema + fn get_connection(&self) -> Result { + let conn = self.connection_builder.connect()?; + self.connection_builder.initialize_schema(&conn)?; + Ok(conn) + } + + fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { + let mut stmt = conn + .prepare( + format!( + r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_{name}.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_{name}.main.ducklake_snapshot); + "#, + name = self.connection_builder.get_meta_name() + ) + .as_str(), + ) + .context(QueryExecutionSnafu)?; + + let row = stmt + .query_row([], |row| { + Ok(( + row.get::(0)?, // snapshot_id + row.get::(1)?, // schema_version + row.get::(2)?, // next_catalog_id + row.get::(3)?, // next_file_id + )) + }) + .context(QueryExecutionSnafu)?; + + Ok(row.0) + } + + fn fetch_table_statistics( + &self, + table: &str, + snapshot: i64, + conn: &duckdb::Connection, + ) -> Result, Error> { + // Query for table statistics within the snapshot range + let mut stmt = conn + .prepare( + format!( + r#" + SELECT table_id, column_id, column_name, column_type, record_count, next_row_id, file_size_bytes, contains_null, contains_nan, min_value, max_value, extra_stats + FROM __ducklake_metadata_{name}.main.ducklake_table_stats + LEFT JOIN __ducklake_metadata_{name}.main.ducklake_table_column_stats USING (table_id) + LEFT JOIN __ducklake_metadata_{name}.main.ducklake_column col USING (table_id, column_id) + WHERE record_count IS NOT NULL AND file_size_bytes IS NOT NULL AND + table_id = (SELECT table_id FROM __ducklake_metadata_{name}.main.ducklake_table WHERE table_name = ?) + AND ? >= begin_snapshot AND (? < end_snapshot OR end_snapshot IS NULL) + ORDER BY table_id, column_id; + "#, + name = self.connection_builder.get_meta_name() + ).as_str() + ) + .context(QueryExecutionSnafu)?; + + let rows = stmt + .query_map([table, snapshot.to_string().as_str()], |row| { + Ok(( + row.get::(0)?, // table_id + row.get::(1)?, // column_id + row.get::(2)?, // column_name + row.get::(3)?, // column_type + row.get::(4)?, // record_count + row.get::(5)?, // next_row_id + row.get::(6)?, // file_size_bytes + row.get::(7)?, // contains_null + row.get::(8)?, // contains_nan + row.get::(9)?, // min_value + row.get::(10)?, // max_value + row.get::(11)?, // extra_stats (JSON) + )) + }) + .context(QueryExecutionSnafu)?; + + let table_stats: TableStatistics = TableStatistics::new(rows); + + Ok(Some(table_stats)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_ducklake_statistics_provider_creation() { + { + // Test memory-based provider + let memory_provider = DuckLakeStatisticsProvider::memory(); + assert!(memory_provider.is_ok()); + } + + { + // Test file-based provider + let file_provider = DuckLakeStatisticsProvider::file("./test_stats.db"); + assert!(file_provider.is_ok()); + } + } + + #[test] + fn test_table_stats_insertion() { + let provider = DuckLakeStatisticsProvider::memory().unwrap(); + + // Insert table statistics + let result = + provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + assert!(result.is_ok()); + } + + #[test] + fn test_json_payload_handling() { + let payload = json!({ + "distinct_count": 1000, + "null_count": 50, + "min_value": 1, + "max_value": 999999 + }); + + let payload_str = serde_json::to_string(&payload).unwrap(); + let parsed_back: serde_json::Value = serde_json::from_str(&payload_str).unwrap(); + + assert_eq!(parsed_back["distinct_count"], 1000); + assert_eq!(parsed_back["null_count"], 50); + } + + #[test] + fn test_table_stats_insertion_and_retrieval() { + let provider = DuckLakeStatisticsProvider::memory().unwrap(); + + // Insert table statistics + let result = + provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + assert!(result.is_ok()); + + // Note: Actual retrieval would require setting up the table_metadata + // and column_metadata tables, which would be done by the DuckLake extension + } +} From bad8c8fc363e4ad58d3f0c33421b1fbc86b4aadc Mon Sep 17 00:00:00 2001 From: HFFuture Date: Sat, 18 Oct 2025 22:50:42 -0400 Subject: [PATCH 10/40] fetch stats, update stats, and preliminary tests impl --- Cargo.lock | 475 ++++++++++++++++++--- Cargo.toml | 1 + optd/statistics/Cargo.toml | 19 + optd/statistics/metadata.ducklake | Bin 0 -> 3944448 bytes optd/statistics/src/ducklake_connection.rs | 399 +++++++++++++++++ optd/statistics/src/lib.rs | 5 + optd/statistics/src/statistics.rs | 448 +++++++++++++++++++ optd/statistics/test_stats.db | Bin 0 -> 12288 bytes 8 files changed, 1287 insertions(+), 60 deletions(-) create mode 100644 optd/statistics/Cargo.toml create mode 100644 optd/statistics/metadata.ducklake create mode 100644 optd/statistics/src/ducklake_connection.rs create mode 100644 optd/statistics/src/lib.rs create mode 100644 optd/statistics/src/statistics.rs create mode 100644 optd/statistics/test_stats.db diff --git a/Cargo.lock b/Cargo.lock index 8886726..a9fd1a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.12" @@ -214,7 +225,7 @@ version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow-buffer", "arrow-data", "arrow-schema", @@ -354,6 +365,7 @@ version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" dependencies = [ + "bitflags", "serde", "serde_json", ] @@ -364,7 +376,7 @@ version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", @@ -425,7 +437,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -436,7 +448,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -925,7 +937,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn", + "syn 2.0.106", "which", ] @@ -1000,7 +1012,30 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.106", +] + +[[package]] +name = "borsh" +version = "1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] @@ -1030,6 +1065,28 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -1080,6 +1137,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.32" @@ -1122,7 +1185,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -1177,7 +1240,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -1393,7 +1456,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.106", ] [[package]] @@ -1404,7 +1467,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -1559,7 +1622,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "054873d5563f115f83ef4270b560ac2ce4de713905e825a40cac49d6ff348254" dependencies = [ - "ahash", + "ahash 0.8.12", "apache-avro", "arrow", "arrow-ipc", @@ -1834,7 +1897,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05d47426645aef1e73b1a034c75ab2401bc504175feb191accbe211ec24a342" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow", "datafusion-common", "datafusion-doc", @@ -1855,7 +1918,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05c99f648b2b1743de0c1c19eef07e8cc5a085237f172b2e20bf6934e0a804e4" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow", "datafusion-common", "datafusion-expr-common", @@ -1936,7 +1999,7 @@ checksum = "07c9faa0cdefb6e6e756482b846397b5c2d84d369e30b009472b9ab9b1430fbd" dependencies = [ "datafusion-expr", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -1965,7 +2028,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10bb87a605d8ce9672d5347c0293c12211b0c03923fc12fbdc665fe76e6f9e01" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow", "datafusion-common", "datafusion-expr", @@ -2003,7 +2066,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "845eb44ef1e04d2a15c6d955cb146b40a41814a7be4377f0a541857d3e257d6f" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow", "datafusion-common", "datafusion-expr-common", @@ -2037,7 +2100,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e6688d17b78104e169d7069749832c20ff50f112be853d2c058afe46c889064" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow", "arrow-ord", "arrow-schema", @@ -2170,7 +2233,25 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", +] + +[[package]] +name = "duckdb" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a093eed1c714143b257b95fa323e38527fabf05fbf02bb0d5d2045275ffdaef" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num-integer", + "r2d2", + "rust_decimal", + "strum 0.27.2", ] [[package]] @@ -2236,6 +2317,18 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -2253,6 +2346,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + [[package]] name = "fixedbitset" version = "0.5.7" @@ -2369,7 +2474,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -2486,6 +2591,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" @@ -2493,7 +2601,7 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash", + "ahash 0.8.12", "allocator-api2", ] @@ -2514,6 +2622,15 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2947,7 +3064,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -3058,6 +3175,21 @@ version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +[[package]] +name = "libduckdb-sys" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b93c3ff279601516f01531cadf2ccba50394fbb5f7bf685c6e6b9b07c8dca6f" +dependencies = [ + "cc", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] + [[package]] name = "libloading" version = "0.8.8" @@ -3092,6 +3224,7 @@ checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ "bitflags", "libc", + "redox_syscall", ] [[package]] @@ -3123,11 +3256,10 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -3468,6 +3600,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "optd-statistics" +version = "0.1.0" +dependencies = [ + "async-trait", + "datafusion", + "duckdb", + "futures", + "glob", + "parking_lot", + "r2d2", + "serde", + "serde_json", + "snafu", + "tokio", + "url", +] + [[package]] name = "optd-storage" version = "0.1.0" @@ -3501,9 +3651,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -3511,15 +3661,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link 0.2.1", ] [[package]] @@ -3528,7 +3678,7 @@ version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" dependencies = [ - "ahash", + "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-cast", @@ -3618,7 +3768,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -3691,7 +3841,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.106", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", ] [[package]] @@ -3723,7 +3882,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -3744,6 +3903,26 @@ dependencies = [ "cc", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quad-rand" version = "0.2.3" @@ -3830,6 +4009,17 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r2d2" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" +dependencies = [ + "log", + "parking_lot", + "scheduled-thread-pool", +] + [[package]] name = "radium" version = "0.7.0" @@ -3922,7 +4112,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -3995,6 +4185,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.12.22" @@ -4051,6 +4250,51 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust_decimal" +version = "1.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -4214,12 +4458,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" version = "3.3.0" @@ -4257,10 +4516,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] @@ -4273,15 +4533,24 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4385,7 +4654,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4433,7 +4702,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4478,6 +4747,9 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] [[package]] name = "strum_macros" @@ -4489,7 +4761,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.106", ] [[package]] @@ -4501,7 +4773,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4510,6 +4782,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.106" @@ -4538,7 +4821,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4547,6 +4830,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.20.0" @@ -4577,7 +4871,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4693,7 +4987,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4730,6 +5024,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap 2.11.4", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + [[package]] name = "tonic" version = "0.12.3" @@ -4845,7 +5169,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4905,7 +5229,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -4998,6 +5322,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -5066,7 +5396,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.106", "wasm-bindgen-shared", ] @@ -5101,7 +5431,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5199,7 +5529,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.1.3", "windows-result", "windows-strings", ] @@ -5212,7 +5542,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -5223,7 +5553,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -5232,13 +5562,19 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-result" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -5247,7 +5583,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -5299,7 +5635,7 @@ version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", "windows_i686_gnu 0.53.0", @@ -5406,6 +5742,15 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -5430,6 +5775,16 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix 1.0.8", +] + [[package]] name = "xmlparser" version = "0.13.6" @@ -5465,7 +5820,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", "synstructure", ] @@ -5486,7 +5841,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] @@ -5506,7 +5861,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", "synstructure", ] @@ -5546,7 +5901,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 38a35c1..76e022e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "optd/catalog", "optd/core", "optd/storage", + "optd/statistics", ] # By default, only compiles the `optd-core` crate. diff --git a/optd/statistics/Cargo.toml b/optd/statistics/Cargo.toml new file mode 100644 index 0000000..a5357e7 --- /dev/null +++ b/optd/statistics/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "optd-statistics" +version.workspace = true +edition.workspace = true +repository.workspace = true + +[dependencies] +datafusion = { workspace = true } +parking_lot = "=0.12.5" +glob = "0.3.2" +url = "2.5.4" +async-trait = "0.1.88" +tokio = { workspace = true, features = ["full"] } +duckdb = { features = ["r2d2", "bundled"], version = "1.4.0" } +r2d2 = "0.8.10" +snafu = "0.8.6" +serde_json = "1.0" +futures = "0.3.31" +serde = "1.0.228" diff --git a/optd/statistics/metadata.ducklake b/optd/statistics/metadata.ducklake new file mode 100644 index 0000000000000000000000000000000000000000..1266bc56f460edc0e80c5429b70ab10f96d80971 GIT binary patch literal 3944448 zcmeF)S&Up+dKl;vWRgsh$)$QL)?VsTOTDQURc|G!wb*T-8bb|)1_lCOCJ@ZHNv10^ zlFE!^7kdmaEeTTF#u!}|8kV6aL6VKO$K!zkJqWO2YhaB@R?WbGfqAh!9s}cU!O%Qd z9tbvUb#ux$w2oNAZfB*pk1PBlyK;YyA{`-~I|MkEB zxtD+RY+sUB^WSsVy2I!4Z}a&_tvlQo0t5&UAV7cs0RjXF5FkKc+yypYn*U$^{jawE z{h1=8#pkCk<-b3Dxmy77f0x1+0t5&UAV7cs0RjXF5FjvN0uLU&{{Q~QfBM>2ifn(h z8D3fpXPTkb3}=?Y+-hUFwH!hxz82GA>AxhrP_H*P7vEWFypz;dlXj!oXg9Ji^>k2g zwnyPwce=&3mCe=GklF)mIz$?k%_Oq!o6y87>t+t7M0p&sB((dH%(%{BeXA_+`UHo?f}*GbZC8fqrRT3G}_Devu$R_MsYb?=F-~wYGZJF zTUbvPV{&ne*=!B&KCWf6ZY+Nzso#FkPB!|ssJM4pT8n#~^?EwVz{rL5wXa9K`bN94 zKDg|KxUIJ1&S0%o1$;GWEv`SfmyNDkL!Qd!jysYphs&*{wE>yoTVbd3B%Fym=d+JO z?QsZCLTyKFcRJ~g`%%6lPZwKmeOHt4_qto(-E{Z<{Lg*ut3UfQU%S=Y!}nI3*Ox-v zq~i3=@Ot|8$@J~lYw7zp&t>n|(^dL_yP%Ks*4$(zX(t&(y6!H-qe8Pcs0w%ZW`4M} zcZcG@>TuBY_x!L`wuHC@3z+)xk@^6+c{c)oqQq*|K#E!>*R%< zb<#UBWX0Ok3OKx4KHOVeJd)IxTg~L%ft}N}-b7WDWuc4Hu&ucy{O!XQ`uT35ms=~# zt+>@?$ByirJX;mxndIEC;=ak=DIAjGr)h$aI z;S`m&_tU#(IJdF5RuMDIXM+ciW942rKT_w1?09s|_ZVeGT};BCKARS9?%-$pAMF+{ z)!u%tACK?D&oV{^ZkIRKHrE%ExbanlP}kxK)19Op59!HDbFkpeFfz`GJFs{<*j=Cd zw(WkPg(UpiONXreXSzV$REzy;x)z*^8R`wM2x5n?_P3BnaZ`PD-2oNA}R04T$vB!oQXEoF~DtF5vK!5-N0t5&g zLg3;d<~1vUu@P{gV{EEIMhFlfK!5-N0t5(5ra*O2F%3OTU7E}ak!u132oNAZfB*pk z6E3i?o2Y6@m|KdE6^{?&xD#r3`yMd9xSllHNwXdwFn&K--&kI2bssQ(sZ1sdUb1C8 zOqX$H$1?JX()ZqEG0C5Sn6EZ&);=DK{;3p}3X9k=W8{oB7>Bv{?u-Y+5_k6hDqx;K)O^W6rO|&w{|hEEG~1w+VQH!!`_G zCtkpD^>nLnTZj=aaMZF(Dv!cmdJ>BdX>=9ibSqEl1PBlyK!Csu30#?>vMTC?3si>~ z*)3LkxE*U2E@n)417ixx( zUA35NhOOdtF$w?Z>9qOCwgG=9wsqMt;M>pi4fy3})21)6`AIXJ@4rK9AF<6+*iS2v zt7X}I5+Fc;009C72oNAJodS7ub7C>ZbRsGhfyouPHn~}sa{>ei5FkKcTm`CQjJ-4i z+1~|>aV@`m5gvW5`kjFJ;eHTMzS*F^18uV_)7TCP09|!2~{h zu-VK-fB*pk1WrL9j}=_UI0br15+Fc;009CMEAWwt&AE&dI0*qU#z`nkg8%^n1PBly zK!CvX3slDzC(-Hc$EF`w2?-D&K!5-N0t5)ml0dg1Mb^)4S@IpvBrgXx!Yg zB(3JKvFTncouONUPr{jqa`sWEJr3bXsP&50>7-hZQoEh7&m`dwV`n0<)m>JlajIv2 z?rUHD*`N8^tsZNk@Osj%j@U}?d^6mL&GkaoUT-eVMXh!>)c)wwavYeh7_X;qpG@C= zJtoi$-#nMSUr%#zz#6>KSA+HY$@)fITQ}0GJQG*UUDmj(MM;X;`qs!aTie&U7n1O= zVzuNn2h8zPy&M;tL7LTbY5f068`PA5-Y9;&G6jddQ=v9#F`XWl9O1Io*Af@`r_S2 z>rRrbO1st1LNRT9wb2~hde1jD+jrO22e#ji|8JzE_VtlN;^+vV> z?pwqGtLBv?{F}JdZQI0O8D2GI$wpMsg_EhGtKBNfjxgzVGe7EZb16R%9mBr5w;S!- zV>CmN=#lQlckYFLI0@gqlGffKj(jieq^vKlt!%Eg>W$|8`bN9a&er+3KfhQEKjLHg z$V$u7ueTrE8{8f)+-p2oS&LiP&ckE66{K-Xewa&l+S#l2B%JLPpzla($%u2C0P8}AMobvfVbW?3It;dZVTZzSP=``973$dB~N zb;;uS_HeR=^<;5vy&1RP&DP-g^yTcRzP|SLszd&@>}bES{Eei3`$0R|=riwM^*^74 z-~I3*%eb(ujCYFDo{MX@|6;P(uF7HGCfk?6SnDIgg}66(Cwb6UZ}B{X(x6 zXNRrME8U{rUrxTBmi1zcEz<8*CB2x9IN1YLo@V87>Oyn5u@Wm~fbZ9K7IE>b@#Nq`=@$QX@hF}Y$3NHSqvCt~mY%r9 zLB+YumQLWtvo__5009C7CPv`K#N<>)2@oK#Z-G3Jh<_jzW9-|`3>jAHFlkGZ*i3gTYb9F_%%+G< zZkyp@+O8)y>lwV}(Yu|hdy7?iw|cz2YS@f<2GaMDlNI-E{qkS?cTfKF@BG%IFaP!b z`rCi-Cr`ffH-9I#n<{SLsvMUu{z&t1+-2;F^b+d7H!W`oG^iSf#Fo(UHn;9FwyDP3 z_G4?TzFM14o8PB3{5W*KJ_+IdQ2QWcEzyQ0)xD!GO{!Q$p3Rf(Rpi+u#8qfWs;~7@ zO~00@#-_LF>qx7tx01wf`D-7`zfgI3uU9*TcDWIadUovR3Mn512oNAZfB*pk1PDyO zKpuNk#uJs{>u#a#USf{vSDmgBo|(AfJU4MUmSF+}2oNAZfWRyWjEFs4%b0}~Kp6=T zAaG;?&mY;6EQkOB0t6;cU|U>~-;uW8Zq~~C#qxU)-@Eblt(!L|k3o(J5FkK+009D1 zEbzheS72MD(QSV6sMl^~SQ8dENsVun${7Iy1PBlyFkIm5@KMGPAaF>55uwGX zcIvYgTD*8jWU~_>K!5-N0t5)mvOsOd3SS*!`Y~I{DEceCDhZs*Nkn`vr*iaW}}!&Nq_(W0t5&UIJm%wP)}TOa8`2?AV7e? z$q2lBGRa7V009C7b`!|sit1PBlyK!5;&sSp?u zPsGj`ipv#6w+wrUC#C{LA!bS7mE$O^>lMeLYMBHG5FkK+z&Hzxh$~#D80U(UHv$9* zOuxVnPJa=VkN^P!(<6|_73GzPvRotk065p{1K_13{Kw1N`v8ao{^dg7fZxB;H{dsS z^!^-Y`ODW%yxV~DX^OSGq3rWI+zIj7pNs2Bqn$MC@!6l{*C!#oA8H?jkCsyVZgYP= z37@P?Eni}KRcOA1FH?&#UHVF5{sy}Sh(p3sh&j$DO5b~v#nirZqN%-o{Hf_(pjsU7#EF4GRFvKv~xUdqFnMNeH+#?j)3@L4W`O z0t5)80zZUdq` zvLYu02oNAZfB*pkMh$_q+bUlV zyG2+EapriN*LyeKzIF3vSr(rJ2oNAZfB=Ds6!_Fc=31r+jF&(jE9@F$6!AvyX)$H6 zvGuUqE12fz<5jhCLx2DQ0t5&UAVA=>1ojwa6py0p1sB^cW#n1^v`ZSNRfp0gK!5-N z0t5&UATUk>M-XGUU22>vR9*;_0-q_r_#{Ap009C72%M~d7~^EUrAB}N0Rl5B@YxwH zw_+0@Fp~n+F~)p8c;q8GwD+FL6-J>65FkK+009EiAn+s8P>3-sgWzHeD@B$F5FkK+ z009C72uzYdbx@IYJ=^Oss^}*w>q$2oRhDo^-@4_%eCDOl3=cb?uf}A6-!Akwv;Iy!}9;1Ugy2(*JDVbr+*ls%V`bN z`f6al)x4io*5lCq`Xq$+L+yib_8>*cYyQ==iksnb60TRK`agWUxxCR{ZpBo~uVtz? z_MPha%6`uV(8|uBb#PX0ojwWd6sYa|=@$V41PBlyKw#G4&O#fX5>6+K{D+ zD~7-rOn?9Z0t5(5t-y%5qT=$AD-~0_A}AOE0tBW^;PojhkKzy@Fb)F#ayYscMno3w zfErC_EP(+5`z#DzP{hSto>bRoYMr2rXk#V?m;S_FKRzl?Bq3tJEKk+Dpk4^vjCPXYu^P~goIOiNM(2oNAZfWZC)@>oHPu|G9a z6Cgl<009Eq1wOa^hwlUk%)G#e7^Bzny!$|u%2;F%)6M6}q|b1fFUiYCg=7nNL8N5y zNq_(W0t5&UAVA=d0-rzR{9`kFb+9717@G=`5ds7V5FkK+009EyE+D8F_ZpJFg9&`$ zV6&Nv009C72oNAZ;NSvza51t8d%5stn5(?Ks=RedbMjKkCjkNk2viGvv3jtf1PBly zFq;C^7do1umUP>9h%sh!)lg~z1PBlyKwwe@zBH*Bm+xs25MxYBO)3Wg0t5&UAV7cs zfzuTbTMY4kYshee2@oJafB*pk1PBlqDUin)du*?9PJ4}!m_`yHaC`zkc6^JrWC8>T z5FkL{bOc=QI30JX5+Fc;0D-X-`0=sKl}r&JFku2>j0vkS*(E@L009C72oRVZfuESn zqE^Qkp4vB=wJFyG2oNAZfB*pk1WrLfP;mrXXF2LSkhWte{ipzG@FffqkgZ^zPq@dG~)02 z?FaXh^^N7VR-7t?H{$=V#{XyI|BuA~pN;?1VM~pbjbt@xH`4iEZEh~Uv(k7cd39-d zCE0lGEAi)7{@lyA-+b%!#updg{Nfj0fA!5cPx-o0e(_0w009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5)0s6cZ6E8qX~PyXzGJ(H(eYlh45 ze=~$6)RscH6CTAMwIp=PSN!%kAGB4z9_Hh+IpWN5>i2HEee34U6D^3u2oNAZfB*pk z1PBlqAAvkZ5L=8-jmQasP{jSk2M_`T2oNAZfWU+ah%qLt!eo~K0RjY0TA&E{&z^L; z(j!2Cz{CoOF($U^WSjs20t5&UATWypMch9(i)EjfGK(=LrmAF=009C72oNAZfWTM_ zRL2(c`QWr|_k5ZCCn3BaY9EBG?YG9=CCg-f7IFW%$;`c66Cgl<009C72#lwI;9@-M zMXm@Cm_>mi?ms_^Wmjeb1PBnA3IV~zR8*rv5FkK+009EWB~Zlu7mjQBmP=ro1Oyk; zRGZ2|fB*pk1PBlyK;RSvs)LH`33;{dWAwU@);p4CV-#`!yyx$oLVZh;009C72oNAZ zfWWv2#+`*9Wv)7%IOI{-x}J_M2R6gmBZ)=CAf>2J0>>**#QhhKHx)}KK!5-N z0t5&gP+-J$k$hAdjC3!MY?ZHv`AA(+9gy8@1PBlyFg*fA+<$3$N~9zN2oN}=Kprdf zU&qkT?IBssPJjRb0t5&UAaFVYMcjY+bW)%7RJ#`<_Hr}T`Tigzn<-yJe0QN^*4HNG zCqRGz0RjXF5FoI-K=lOWDd6KNA5K!Ct>2o!PuQ`1ozr653nz}^MK7<;!e zB>@5i2oNAZfWYJm6mkEDCbtBWlXEe~Ga$*%FAp!&l5FkKcQUr>)|D%(VU-=|(@&Y4*kTUY<#2BQA zMz+e=!+d1yEq5uz$fTBp_ins>>*mdqFMt#Y5FkK+009C72oM+xfzydCin#w{V?iJj z1PBlyK!5-N0t5&gp}^_H7)MwDiy}ba2n33_|LG%GltmCAK!5-N0y8fl#+dn400033 z1PF|sKoR%f7`u$g5CH-N_9P(2*b|e92oNAZfB*pk1ZGyCi2I+JS%7^PygJ6vk<`8+ z%|L(v0RjXF5FkKcRs{qVvs&|~RPG|~fA*A;mn;DS1PBlyK!Ct83RDLdS%0(nsQ9jT z+GDH$izYyTz!(S=asP8;kQG@VK!5;&DG-5FkK+009C72<%0mi2GmO z%d}H5O?7aQbtF~mbRLJiAF6z+PN%;yxFBRIRsw|}K!5-N0t5&UAaD`_BZ7}I;_yU` zlcbBJThAF)Z352@oJaVCn_L z7*k)x3P^we0RjXFOuRr5_rEgn1yO(*5vYzaG{BgV>Q_tx1PBlyK!5-N0*4pKgNh`? zcHOn4ySPp#4tW%|uBW5Rq0KPY-}*b7DPKf7VYj*XR{46EkBm!ODw25b#@n}U-Yl1S z@}EWA|AQx=vJ?prAV7cs0RjXF5ZES=#|r&1h6^0qh)I4 zF=l-|P<{dg2oNAZfB*pkhZPW899Gc$`xPkS{!i|AKGPE*K!5-N0t5(5hJYAjGU`b# z2@oJ~%mTGzUcSW>AV7e?3<-!aW~j0il>h+(1PBlqdx7~phda)?hP=yVpdCl6$;W?!h+o12U8tTf(9>Z?h+(QLFE*_V1cs5jfAaIHJt;@ZmQ zYU^=mhRe-xG5=LxZYFWS)lTP52zS$D;*Z#ad)jfFn%FvHHVL#Z#QnQ zBvs2@D3V-TZzk&lOTAo-Z8cVtbmTMJvs8|3Ke#tI@>&FHFE>`|&19*uxsqjg$J|$n z5%v3xmCeEVue3H-RyOL5m6f%xCmF}3?P*`R*H}+l?RuWSzI>$hOX<<&`9@!|Tnk&>gV9T0GpA zc`407y?X1LyVq#n9iY0N4y`Y5)Yp@hMtk{ww(acLI4)<)Tv}USZ47RA3+u^ZOfGIY zo2|j!$hB%_VtKY-)J}12baANx7l{w zDXg`sfUhR4#q|gGve8v*$Wz(eafg!SaJjX#HXt*6E9`Whgfns9efCkPJr3bXsO_lj zPA86wPo&9q=;>n1J;#@l@b|h~-`#Xi|NPH=?W;fgGhe&a+tc?}o7a~@+@#|4&G35q z_Q~|^*K6tfH_v77*%NOLxC{D7Z_Q0sl6I0or0ecNJUTRcgQ{?cZ{~+vdv_=ftPTfV zf6otFb&FCS$ErJe^&4D&`oss;pl=q(p1CCa(v@!+S zsBbLZO`4l4NwXfw%Ht#xudH17qfoRXrL)OU)JL{x(WXOuO zrxkE`wS2g@xOgO~FSnY>y8}C?YrTo8DEkZju_XNM!xsAaZlRZ3E6c68)n&(y?A$zC z72}!Y@KRmM&*3Xi#j}lSwNbyHtZ&2(xUaDNWqBqE-&;6j1wPd+OBv}DmA3cOyJk4I zvA9+dJIrT;2ajXrUN}Ee=ZEZgbj?rp%RR<(N%+%e)56Ui{A~ZD-NL2X+t2mm@qPGN z#>fEf^2XZc`eG6{zKT%lT0CL8leFU@Jy~fE7Q7io#yfEb7EcGe>vP|>-4FC)5`OKa zL)QK?U7&8N#eOwi3(mz1^@djjv%^>NJ#D%pcDonjPOP<&okafKq1IS15M|_lFikiLRx3tJ{P;tQ5S?}2YQAY*|5FkK+009C72%NM) z9zBRHPTF011V#uHasOvWOkxxP0t5&UAV6Sz1;iNRTQ_n>fB*pkVBT009C72oNAZfWQO^RL2&v zyRiKAI21j=rJV+a_e1T2vXh|NQr7WX65hM<_N|*YC#cA>8bT5Gzs7<90RjXF5FkK+ zz|jc^E{@L9k_ZqWa5#Y??tlGoOEEVA0t5&UI5h#m#i`Ltm;eC+1PBnA1c4&%e`6Bz zDvtzCTR?De+BG130t5&UAV7cs0Rj^)P#sic56_E_aV#FA*L}3!kvtoti2Hw36O9R9 zcVwRc0RjXF5FkK+z+nXPSR_8xI}1O`C8dv&k58B1x}J_M2R6gmBZ)=CAf>2J0>>** z#Qkp`Zz`5ffB*pk1PBl~pumXhBKfE^80lUh*(zTT^N}%P(G}GJ<;_Nb009D1B2dKr zpPQ26CrVKX|7#mBS3%v0RjZ}FHpq&U)uki z<{&^I7pRUgGzPbR5FkK+009C72oNB!M}cDvDvG%Otv%+&WCREhAV7cs0RjXFOt3&6 zTF;n#5OVT0CPe}S2oNAZV9W)Ixc{fd zJe#sd;1mT$1R-VQ(TOog5shq>uZQ`_*jw&Wh>=Mx3Gdx_`_|2yr&tn66Cgl<009C7 z2oNA}ngXX2Tg;zkk)=(5009C72oNAZfWR>doKB2!j0Li20t5&U*rULoymkJ+{k^~P zKmU(&ar*1=fBL!FSZ1X=;wfN~pfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF%!a_t@6~_)t%sZ6{eH)b009C74lhs(TRRUQ z^Tp2#I|utkfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV6Te z1#W(?{_}4=-2CqMJ6;3`5Fl`Pfm+zwx!a#FeqPu)*e?PE2oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0^=?4EARY`-`n`q!yo@>%;pco|LN!AdeUeo z&HBdT-DI`#V;^5_EVn+str!d2zNcSz``tIWp9BaHAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfWVjw-27hs=ihp``Q7h#ya*5=K;ZBKwXn5w zzdv96ys&eyUjzscAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z##`XG{_j8j?N|QnFSO?KY}Up*w{k~-009C72uy}RCwtYJp&3FFYD*#936J8BS`zY6 z@jZTfoDbS6Uk~$<*&K1^IQ4rs-oAD7=42FFE(s7IK!Ctl3lwqxnX%5K%n=|!V5mSI zD~K_M0vSPo009C72oNAJ;{rw8e|E+h_EG%m7-K#koL;{uNJDrM!uz52LCCIexPY+_ zW0MjfK!5-N0t5&Um}&t*#Z*`C_!X`QQs>4mXL3Y<009C72oNA}Sb^%`Ld%Q8x|*K= z0RodGP{jS`CMnPIN`L?X0^=)?#~AS@r!1@}^V#XdA&lPOuo)J#&?qr{5+Fc; z009C7j!U43`_CWO@-3IZxCpr3F)lSB9|Q;xAV7cs0RjXjTVT>+iz4p7Fxl#IPk;ac z0t5&UAV6Sx1tu-VnBJODG6It+P{jSk$7Gg4t_ctzK!5;&qZJTi9IdCN5gzv%3~L4f0t5&UAV7csfx`+EasQ>mE@xuquMRG<-luBmQ~2_J zsPd^go&FA|1R)bs(K1Sa009C72oNAZ;Isus1RrI@(TPv}E~1dF^7Swu=~3XPU4PS) zei8Ryo}LmZ2>}8G2oNAZU{(dh7_(aQ%1wX(fl&fQ+`lkt8si8MAV7dXwSX9-+Qm=; z1PBlyK!Cvb2o!PumGQ}uoDdi#P#t4vfHBI>4cNeP@{3qNq_(W0t5&UAV7e?sR-nOM1PFo z0>`OThC~TWlRy#oUz?^fDGLDt1PBlyFj}BGxY%nqx7l>xFRyfLyU_7ZAOG?1roB=< z%*W}GYIH3aOMn0Y0t8M*posggpG;~}AwYn@Xn}65u$P;zF7;m!nZTQ_MspZTfB*pk z1PBlyK;X~*;F~*cvv*HmTK!5-N0t5&UICcTC#jz7hpiiKP`#;n- z*iQlk2oNAZfB=Ch5fEccNku9K0RjXLC{V=xA3oshW+Om=009C60%D8-5(5YjAV7cs z0RpEcP{jQoIkgN(n7|PTh%t@;(IN;CAV7cs0RjXF5QyE?rmy}s-jn@cjUw*l?yqFaZJt2oNAZ zfB=C(fghm5!&l8=h-vAxIRkcAyv#x37v3PG=t}4)jyJYi(s6 zAkSqLp7(^T3$e1|$L4eHm~(RG$d+GhG%p<40*`e;$L)HonYVBP1PBlyut$N~9;Y)I zfhvJKTIp|LGrwDiHVYw!O{;2!G3>YmjxJ<7t{htK3<(r*|BEwJWJOH{&SM}jMFNhi zr(2oZLX3DxB|g_Azvfh4CmD8GW)?z>)2$e(6Cgl<009C7a)Bc5e~AZyNfD?HG4|2i zrWxEXH7S)Xp9BaHAV7cs0RjY$T_BGzvTG7)yO83QHsq*13R~CH(dGR@S<{->bY-p? zzF9oh{kbIkqo>p6Bijc2o!Hi8$AE7?(>LIkpG_N?#AYPTaK8Tzt$oBcdwh_55%<4b zmdz&t0t5&UAV7cs0Rq!0uzuo`Eji;`657o0DT)>!_6+l4=5FkK+009C7h6xmL z|0~1B8$Lupj4=epU;+dP5FkK+009C7_9ZYPwulckEFL4+eGuWuX6R+Q&2aAc8;zHV z`5b$JBJTgevCpUs5+Fc;009C72#lw|h?pfG72k(H13kttd+eBMJWDTE1PBlqdx0YE z|HRm5R0atUAV7dXDImruC43SfK!5-N0t9vyDB}Jf+I2z`5SV2FF~%&{zOoY_K!5-N z0t5&UI68sq*y30_y)EMYPaa(%EQtUC0t5&UAV7csfzubrgNr>j2A|Uy{Pb%~4hS5V zz@>b_&n~{R(s(DSuO{tAv(av3U+U?gr7*YJSZ*zc@F+ap+$^TM*H~{ax0lyi_2t&m zTH4dZTr(_e8`@0bu&bTUoe=JZ*q*By=G%?iD@lEMaLm=)$(`j^eWTU5w{dr^Juv4& z(rOMHo9@lh8NRudzkVnQ-@ei5EQJtL?sOJI+<~S4lJG*k-p&5#VqWV`x3PFPS#6}n zyU+|5^IugZxLl0d+{}yjh3y4DO!1nbTy(3knxqANX8WS&?lszX2Zmlxht`)j>g&l$ zqrH4To9|L@zE0;!I1^FMJ_@zRAv_7SUeP+8RO?Y{w-fd=N%+IqsYq;nmsM$;>Y1PW z+E;(}XTEl;$66@7o`j+{wqk3&^UZK09&JL_Zf`ElMXh!>)c)wwavYeh7_X;qpG@C= zJth#V@?7?QJl<-x-AJqQOk6Q{S>vu2B`Id>TO-qKZC~eJOTxd3 z)soX3Fvm~za$IZ%X;#mr^x*%FUs=0T zwfu!_f~DoE?RIc2+Md;+>(LuY__xYtqG&S(Ba_sF(WU-A3z9 zlC4U+)z3mPZGE-T9Nc=(H#XaM*VYHN->ZvjtEtHfd?9%r%U%E8 z7I47B(sR`-M0aI~r$xih>w3xqo5PjGwN^WxR&LZ=n=32n=xygG@l3Key3raKdvP_M zE#7aeY!0pp7grna4jFYh-(6-|A6QfJ`uwZKY0KFp{H>?T`aGcANBZQtWbs6KIN8E_ zvbeV1j2qr&Yw$$)a&|agU;BF1vHDtenBG|aMpD21pq*^=nV0KqS;h-V_+~smKh*AG zVOtsR6z3}!*KYsCWU*b9!@g~;FN3kxN1W%!&G?<lJGAMTb)XV`F&lBRN0>a#$^*=W=5k{tR>}b1*^OVwHohG}MLhk;_xdZ# zjg5Rg+BUu)_i7UU^w5W$wR`PmJ$W}-+{|Lijm_KlRvNAPQj#=pHx}P1R;~Fv>ua0$ zs=N=kW^LQ)4+!o5*wW2qU#7ODQXWWR_|tc2Ior?s(Ios{mp40I?jBlpK^uK&DX#Z_ z>bU-{V7UYc5FkK+0D+wX=~+&8?$Zn*>(VX$&Eio!r;dN7&qu}g_$@u}i-XEPCT~5= zN9qJ_r>0*72oNAZU^jsx?*H^|(-}{I0D;LB$YX{0_f;{*)3K&Hbrb| z+YArWCUiL#VWnoJ>Fh5Y{onT>HEmZiu<;H`LF%EC;$0(e(TYf|N4LZ z?LYXFC*S#-zZ2U_6*q8Ij>{K+qg9te(H8&wyl4v zyc4MsU`Lm1C0+T@nG4PE&7zO6PbcADKbLl=kaH`O3b!4?9!Y9DiV$dkOV)?^Uj@KBqyTi2L7|hO#IF z0RjXF5FkL{I0Z(8dg6-Xgtu%01PBl~lt2;p|LCC>VrBva2oN|jfjq7#uSArK9eHW! zOz%q2Yf1Qx8{04A!~y>$4j7y3IKTJ@fB#p${SUqsA5>HHd)ymPmVD&prhKCGedJ`t zvxEjBv3*@N8hrT9wkAo#FY1lV*h%L9oU2~t_T!TP0RjXF%(g%g_rE#Y3QA9a!2SjD z@TD@Y=mm0>kzE}8G2oNA}ngT`KKM~DW zMno9{S9JCgS4>2qPjlsvHURlEW$aq>oh0D&nMDB}JvOmPtvjQ{}xQzwwe z73GzPvUnr=065p{1K^D${Kw1N`v8ao{^dg7fZxB;H{dsS^!^-Y`ODW%yxV~DX^OSG zq3rWI+zIj7pNs2Bqn$MC@!6l{*C!#oA8H?jkCsyVZgYP=37@P?Eni}KRj4?EhES#! zW4iQ}#QY6*4G@Qfr4VzRPn5p*CX1N7ceDTyX zA_D{n5ZH%69y;~+F3^qoh6R9SpsZ_weOR0HSOwe~cdWb?PJjRb0t5)80!7^aB^o0I zwuKn^HJtn!Q$Be3RUaY7NL3>V5FkK+009C72+W*-&|>DQd<+U)#QkrLK~`je009C7 z2oNAZ;HU+J7)LE?c?1X$m^gtV?*Flg%eV{^AV7e?SPJAJMtQ$enZ>>KJ@EM?JlWCJ zKpgOQcXTxn2Yh=+R|9dtFU76~di@u~0so=b_rQFAbC9f6eR;oDr`ONgR{46^ErLf) zmi6b8009C72oRWLfg&CkcAPGy4t z0RjXF5FkK+z$pvtG0Z3)McE53dY3Yaxc?_Yj8$UG_4jVPee34UQ!bU{2@oJafB*pk z1PBl~ErG+w7-Ealsypcts1_*V{x4ULHIx7W0t5&UAaL9QVvOS!wtNBv2oRWUfgib z5gsC*J2K!5-N0t5(5q`-)nrM%P1!!;(dW@VZH0RjZZS>WdP>OcS1!_Du0zk9Ux zg#ZBp1P(4x3tKyP`o-;Z;lj?rei0x*fB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&U7;l09<8Pe(y%+w|*OxBk*{qFsZsm>u0RjXF>|LOfy=u+S3?T`% zr4a6fNAX833Hhk_9=|=#2W^$Fhxy2CjyQ9i`n?-(-@18o?+Kfd009C72oNAJh5|+0 ze`XA`CQAe+Mj($B#26D(VKPd9009C72oNAZU`z$7V~hEGaC+ThzRdWO5Z({94?=cb zW8{U4BJMw1&gqi?0RjXF5FkK+0D&10IF8_A2G|slz$6M3asS*T=3Sl%5FkK+0D(ym z$b*Y`AtGx&QRcYQi9;TRt?TLNa$qxDaQ9S+;gbLX0t5&gjX)9ipF5hRSqcFH1SUzK z`a*|R;FDB`@=AaJ0RjXF5FoIdKoR$!-)%bMk3wKXj8W@$FxBmJs`7fr9;Q27%a5Zd zr)3ZzK!5-N0t5&UATS{U)j^7&VnV8KZ`my3{tJ7X+EfGx5FkK+009C74kjSDIGCHc z2oNA}Sb-w$@Ae3I*yWj@009C72%Ne=9$f6PIr&`W?N#|A;=2d_oqBc3009C72oNA} zIDsPWzj(N%{QvFUTaO#peE{&oz1XsB%X00yYsZ(Zo47XMx=K*DO`J9mYz0=K$bqZ} z>xrP2v+_nO?q-*3SSSMRV-OSt{FWBz2Ppbp6v!7?Nd|(v<|WNj^9|Y{L8f!1$R$_e zYL_B8{1FmoIG1zzox_>=?{H?UO@P2q0)tOWt*ezeT^7Zfg{J&0KMhFlfK!5-N0t5&UsJ_4*57&4E zttATr1PBlyK!5-N0#hN7#|Vcxl~Kg~&t`$ft$ckk##A)6LJ}ZAfB*pk1PBm#1OkVT zF~k;+pjl)=;1B{u-2X?1SjD0Q2oNAZfB=CJ3WzaAh%0FV1PBnAL4hLfe{Ke=uF3=m z5GVu&#~5yGtAh|AK!5-N0t5(DL7<5HpRYn)rl1ajiz#R%MI=Cg009C72oNAJ$pV6k zNp79VEL#!x|M6s&PN@hGAV7cs0RjZZCLp*N8@-GP5Fl`z0!7^a!f~c4Edm4x5Fl_k zfjqbv@=<(?hq-^PdrKXzB`i&V009C72vkp?i2MJfdNorc0>>dR_(VtFqxkwBz}Jak zRznh?)S~dk(uY?rTsV#*NrM0Z0t5&UAV7csfhiOiUu;pt{Yz7brpN>c5FkK+009C7 z2plLdIL2__+5^GNAW&I>BJO{&vK3S@0t5&UAV6T$0)mTC3rn8>0RjXF96_Ln`@eF8 zB&2!8Eu^HxF1zutp5+Fc;009C72+XcPehi(y zH^Yy!L*_VM$u3CF4ys?N*PA;l*Vh}@qxy}g-Do!2jclZzPFf8MHyUfLwGj5g$&Jl+ zvwm%BV`nqH)Oo%cPGleIYt1P6d@7Ewh45(zX-226M%UI__3c(;bNkbccHjJ`qE_>u zxp90aJC>i{3$@!xYQ?K|C(Ly++?ReFrx#f7rFrKgwREL4*Pmp!d)nQ7F$%wWB92!> z$O@S(_5AlQo&Vn1OIPams#QO;8XjwgrL+cT(ziF$x1T17C3K6~`%wT-cjdI*ZnU>k z#>G)FHp9s*lY0B+W|UIA*xf*nZ#HhOZ#1%&>za|;FC;kq_VWkXcjZ#*cQ-@c9`pO# znr}8=5?X9)ewLa6Qw>CD~`4^;r zcy|&S+x7TiD{j+BS@?^Q`=ufp@vcDh)lD-~=|NKf1=_@_`ZeNu~ zYx{C$yS;ODbG?ylELR%qjjj4>6g96lR<0KthbM~312(dqZ)6{C*LSk7mmcuVAEG%^ z`1DqL{Tqjcd!_6Ys;dn5ZJ&Gtk!)l0k-2hYVgDxMYf<>at36HkMo-fn;CPsgdeW(r zQY_rt>-`|yt(`o%yzlk?rE_O5pIbhE@q=@pET8}G^2LuXFQ5D5{KuC+US4Z{UT-(9 zu1EEJ+f{bszBJM#Kf1Vl-@WfU>Av@!&h9x+cV%OJ=SHjEXnvMw@n-L?_U+~FUF%!# z^<=ykmJTZ5?eN+IGP)bWm)Rz4Axc#C!rX&a7_^ffcHN!>ggE|rlJVRx!u;7wm(y-> z@6Ud-dt9gH1PBlyKwywSalZfMLGw%-ULZf-NDd2|AtdJ=vtVp5`HN z`~U9VBZ{eP~- z2oNAZfB*pk1PBnQjDYhTm1#;9A#ikoBJO|l=qX8o009C72oRWIfx$7xiF|S#-wr2A zoI4@h4Ye=xQzXd^tyYT%1Aeje;gt&)%B2hm5FkK+0D)r?DB}LNjww0G5FkKc zY=EfcqZBCP{<(dbE*78Xi(#$n^JB&MLNYApr|XyFo!{{l%N4&V$UpsG9G7`zd6x+n z%Nh_MK!5-N0t5&UAV7cs0RqP-P{ibKA76@6B0zuu0RjX@Bry1wXs)3j5w~Os5FkK+ zKs5x4xc@JzQ5&@&K;XCp1Q*98DIEd?2oNAZfB*pk;}t04{&&V!3c#ifXi@T8v7di2MI?ROw5X009C72oNApA%Q%&80yYk&9GQ&hGq!a!wSQ- zu$O$OMIlcjc~8b}=aY8J*R4+a9&cEoUZW}m2oM;vKoR$!8FMjYPk;ac0!I_bV+AqB z(G;ysfB*pk1PBnQpgD=gh%rX%D|G?{2oNAZfB*pkvm!7!ws7CtV{88+ z?*GQIB`6^R1PBlyK!5-N0uw4AxR}s}Q7i%kMk-Lm{eLymOe9W#009C7W=$XuE{41c zeBKorv(`Vor|Sh&$xYpsRkHa91;?5}NY>};ktxS4N;6WNFQS~E&MpNivaA$%H=``;uV zu143^k`lHWo7EQrFrKg zNuA?3&Gje6J$Swmg@1Y`j#oo+$DX8+$x_dM|I+#IoxN13<~12D;OTpS_3d_}y`3^H zj*77vPG*_Z+c!6(l;Xwi26}w6adUm6k+odcjMRQ%p#A(o_FcJ@`rXZtx5xbc_INi6 zze)1#YLAx&v`1cn(Y8w7t$(-^ZQU%pb@pLUw|*+0v)$fWYhCLzeR^wSqg_AXJCVxH zb!5YxP&-j()5X@?X}=kTe}16{skhVS35=HKxvbL`MQtSw(prg9r-f#Cws-0vuudKO zwg6ABY(4ak$_rlUXAe#!oV97y`@C~WtLzS7g}_SI;#v@d70TJx5ZNn4ApM4E^+xlvJc~Db_q1;>ckfx>daozry|8pp0dI%b9+1)95WdW| zU<*;AvKQtaw8Ef$^swvp93RB-&y$Sjei7!+Ub>uii{vJIUIYjbAV8p^0>%0McPm;w z)gnM(zd(L?n?xb*Fuk9~X95HW5FkK+009D36e!~UzaClL#26!^l`sJU1PBlyK!5;& znG(ojiy^O}@+8ff>KiJ1Ux6a-|C{?RWF`Ru1PBlyK!Cu(0?u>mCsC0kgTRp98YF_vGn1U3l|QRw}lB1AV7cs0RjXF5SSc+BJTgY zqn4->D`bH}nOYnt(ZgQYJ)O=jCpN=^M~9afh6D%@AV7cs0RjXFJe)utgNQ92PS0Ai zBv8cte?LogR#yT92oNAZfWX5Dh%p{U%{l}K5Fjvafgezy0OS|9bm3@6RXoe_vb80Rl%CsD<5pUr{TD&!it~$-i2w`B^?IdtWTZ zAMcuGlmGz&1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7e?#0b3f z%U9osuPnX$``C*B0RjY$E>H`*`4+xb44+9q){=kCe0uqwOv+x5mlF&L5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PDx$z^~8#+m-+Q(eM7_R9>js zB$Z8B2oNAZfB*pk1PF{uAkJR3W@v^Gh1zNe*TP;nkx!1}+u=l+^PLdxhT4}Qr1P>x zleKD5_+sh9D;F-5OBoU%Kw#1ZinxDn(#xrQ1PBlyKw#^M1PBlyK!5-N z0<$Dgh6IPJb0=2FVu*6x?B`E=VfS=8yPRl9fB*pk1PBlyK!5-N0t5(*O`weX%OCoe zmoq(4lxU$CmOltcmwU39mX1GGjHk<$Hwj3WE!Qi4PZ0jQ$f(RGTkQ}1e;znlq?axJ zc=5r2009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs o0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAdm?BKUDjY^8f$< literal 0 HcmV?d00001 diff --git a/optd/statistics/src/ducklake_connection.rs b/optd/statistics/src/ducklake_connection.rs new file mode 100644 index 0000000..6fa8deb --- /dev/null +++ b/optd/statistics/src/ducklake_connection.rs @@ -0,0 +1,399 @@ +use std::sync::Arc; + +use datafusion::{execution::SendableRecordBatchStream, physical_plan::memory::MemoryStream}; +use duckdb::{ + Connection, DuckdbConnectionManager as DuckDBManager, + arrow::{array::RecordBatch, datatypes::SchemaRef}, +}; +use r2d2::ManageConnection; +use snafu::{ResultExt, prelude::*}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Connection to DuckDB failed: {}", source))] + ConnectionError { source: duckdb::Error }, + #[snafu(display("Failed to get connection from pool: {}", source))] + PoolError { source: r2d2::Error }, + #[snafu(display("Invalid database path: {}", path))] + InvalidPathError { path: Arc }, + #[snafu(display("Arrow query failed: {}", source))] + ArrowError { + source: duckdb::arrow::error::ArrowError, + }, + #[snafu(display("DataFusion error: {}", source))] + DataFusionError { + source: datafusion::error::DataFusionError, + }, + #[snafu(display("Other error: {}", details))] + Other { details: Arc }, +} + +impl From for Error { + fn from(err: datafusion::error::DataFusionError) -> Self { + Error::DataFusionError { source: err } + } +} + +impl From for Error { + fn from(err: duckdb::Error) -> Self { + Error::ConnectionError { source: err } + } +} + +#[derive(Debug)] +pub enum ConnectionMode { + Memory, + File, +} + +pub struct DuckLakeConnectionBuilder { + meta_name: Arc, + path: Arc, + mode: ConnectionMode, + manager: DuckDBManager, +} + +impl std::fmt::Debug for DuckLakeConnectionBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "DuckLakeConnectionBuilder {{ name: {}, path: {}, mode: {:?} }}", + self.meta_name, self.path, self.mode + ) + } +} + +impl DuckLakeConnectionBuilder { + // Default constants + pub const DEFAULT_LAKE_NAME: &str = "meta_lake"; + + pub fn memory() -> Result { + let manager = DuckDBManager::memory().context(ConnectionSnafu)?; + Ok(Self { + meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), + path: Arc::from(":memory:"), + mode: ConnectionMode::Memory, + manager, + }) + } + + pub fn file(path: &str) -> Result { + let manager = DuckDBManager::file(path).context(ConnectionSnafu)?; + Ok(Self { + meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), + path: Arc::from(path), + mode: ConnectionMode::File, + manager, + }) + } + + pub fn meta_name(&mut self, name: &str) -> &Self { + self.meta_name = Arc::from(name); + self + } + + pub fn get_meta_name(&self) -> &str { + self.meta_name.as_ref() + } + + pub fn get_path(&self) -> &str { + self.path.as_ref() + } + + pub fn get_mode(&self) -> &ConnectionMode { + &self.mode + } + + pub fn connect(&self) -> Result { + let mut connection = self.manager.connect().context(ConnectionSnafu)?; + self.ducklake(&mut connection)?; + Ok(connection) + } + + fn ducklake(&self, connection: &mut Connection) -> Result<(), Error> { + let setup_query = match self.mode { + ConnectionMode::Memory => format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:metadata.ducklake' AS {name}; + USE {name}; + "#, + name = self.meta_name.as_ref(), + ), + ConnectionMode::File => format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:metadata.ducklake' AS {name} (DATA_PATH '{path}'); + USE {name}; + "#, + name = self.meta_name.as_ref(), + path = self.path.as_ref() + ), + }; + + // Try to execute the setup query, but ignore "already attached" errors + match connection.execute_batch(setup_query.as_str()) { + Ok(_) => Ok(()), + Err(duckdb::Error::DuckDBFailure(_, Some(msg))) if msg.contains("already attached") => { + // Database is already attached, just use it + connection + .execute_batch(&format!("USE {}", self.meta_name.as_ref())) + .context(ConnectionSnafu)?; + Ok(()) + } + Err(e) => Err(Error::ConnectionError { source: e }), + } + } + + pub fn initialize_schema(&self, connection: &Connection) -> Result<(), Error> { + // Create tables for storing stats metadata + connection.execute_batch( + format!( + r#" + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (column_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( + query_id BIGINT PRIMARY KEY, + query_string TEXT, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + PRIMARY KEY (group_id, begin_snapshot) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (group_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT, + PRIMARY KEY (group_id, begin_snapshot, stats_type) + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload TEXT, + query_instance_id BIGINT, + PRIMARY KEY (scalar_id, group_id, stats_type, query_instance_id) + ); + + CREATE INDEX IF NOT EXISTS idx_table_stats_snapshot + ON __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats(begin_snapshot, end_snapshot); + "#, + name = self.meta_name.as_ref() + ) + .as_str(), + ) + .context(ConnectionSnafu)?; + + Ok(()) + } +} + +impl Default for DuckLakeConnectionBuilder { + fn default() -> Self { + Self::memory().expect("Failed to create default DuckLakeConnectionBuilder") + } +} + +pub async fn query( + connection: &Connection, + sql: &str, + schema: SchemaRef, + projection: Option>, +) -> Result { + let mut stmt = connection.prepare(sql).context(ConnectionSnafu)?; + + let rbs = stmt.query_arrow([])?.collect::>(); + let stream = MemoryStream::try_new(rbs, schema, projection)?; + Ok(Box::pin(stream)) +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::TryStreamExt; + + #[tokio::test] + async fn test_ducklake_connection() { + let path = "./test_ducklake.db"; + let ducklake_conn = + DuckLakeConnectionBuilder::file(path).expect("Failed to create DuckLakeConnection"); + assert_eq!(ducklake_conn.get_path(), path); + + assert_eq!( + matches!(ducklake_conn.get_mode(), ConnectionMode::File), + true + ); + + assert_eq!(ducklake_conn.get_meta_name(), "meta_lake"); + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + assert!(conn.execute_batch("SELECT 1;").is_ok()); + + let mut stmt = conn + .prepare("select name from (show all tables);") + .expect("Failed to prepare show tables statement"); + + let rows = stmt + .query_map([], |row| row.get::(0)) + .expect("Failed to execute show tables query"); + + let expected = vec![ + "ducklake_column", + "ducklake_column_mapping", + "ducklake_column_tag", + "ducklake_data_file", + "ducklake_delete_file", + "ducklake_file_column_stats", + "ducklake_file_partition_value", + "ducklake_files_scheduled_for_deletion", + "ducklake_inlined_data_tables", + "ducklake_metadata", + "ducklake_name_mapping", + "ducklake_partition_column", + "ducklake_partition_info", + "ducklake_schema", + "ducklake_schema_versions", + "ducklake_snapshot", + "ducklake_snapshot_changes", + "ducklake_table", + "ducklake_table_column_stats", + "ducklake_table_stats", + "ducklake_tag", + "ducklake_view", + ]; + + for (i, row) in rows.enumerate() { + let table_name = row.expect("Failed to get table name"); + assert_eq!(table_name, expected[i]); + } + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + ducklake_conn + .initialize_schema(&conn) + .expect("Failed to initialize schema"); + + let mut stmt = conn + .prepare("select name from (show all tables);") + .expect("Failed to prepare show tables statement"); + + let rows = stmt + .query_map([], |row| row.get::(0)) + .expect("Failed to execute show tables query"); + + let expected = vec![ + "ducklake_column", + "ducklake_column_mapping", + "ducklake_column_tag", + "ducklake_data_file", + "ducklake_delete_file", + "ducklake_file_column_stats", + "ducklake_file_partition_value", + "ducklake_files_scheduled_for_deletion", + "ducklake_inlined_data_tables", + "ducklake_metadata", + "ducklake_name_mapping", + "ducklake_partition_column", + "ducklake_partition_info", + "ducklake_schema", + "ducklake_schema_versions", + "ducklake_snapshot", + "ducklake_snapshot_changes", + "ducklake_table", + "ducklake_table_column_adv_stats", + "ducklake_table_column_stats", + "ducklake_table_stats", + "ducklake_tag", + "ducklake_view", + "optd_execution_subplan_feedback", + "optd_group", + "optd_group_stats", + "optd_query", + "optd_subplan_scalar_feedback", + ]; + + for (i, row) in rows.enumerate() { + let table_name = row.expect("Failed to get table name"); + assert_eq!(table_name, expected[i]); + } + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + conn.execute_batch("CREATE TABLE IF NOT EXISTS test (id INTEGER, name VARCHAR);") + .expect("Failed to create table"); + conn.execute_batch("INSERT INTO test (id, name) VALUES (1, 'Alice'), (2, 'Bob');") + .expect("Failed to insert data"); + } + + { + let conn = ducklake_conn.connect().expect("Failed to get connection"); + let schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ + datafusion::arrow::datatypes::Field::new( + "id", + datafusion::arrow::datatypes::DataType::Int32, + false, + ), + datafusion::arrow::datatypes::Field::new( + "name", + datafusion::arrow::datatypes::DataType::Utf8, + false, + ), + ])); + + let rbs = query(&conn, "SELECT * FROM test;", schema, None) + .await + .expect("Failed to execute query"); + + let schema_ref = rbs.schema(); + assert_eq!(schema_ref.fields().len(), 2); + assert_eq!(schema_ref.field(0).name(), "id"); + assert_eq!(schema_ref.field(1).name(), "name"); + + let batches: Vec<_> = rbs + .try_collect() + .await + .expect("Failed to collect record batches"); + + assert_eq!(batches.len(), 1); + let batch = &batches[0]; + assert_eq!(batch.num_rows(), 2); + } + } +} diff --git a/optd/statistics/src/lib.rs b/optd/statistics/src/lib.rs new file mode 100644 index 0000000..a196ba3 --- /dev/null +++ b/optd/statistics/src/lib.rs @@ -0,0 +1,5 @@ +mod ducklake_connection; +mod statistics; + +pub use ducklake_connection::{ConnectionMode, DuckLakeConnectionBuilder, query}; +pub use statistics::{DuckLakeStatisticsProvider, Error as InterfaceError, StatisticsProvider}; diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs new file mode 100644 index 0000000..d901e57 --- /dev/null +++ b/optd/statistics/src/statistics.rs @@ -0,0 +1,448 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use snafu::{ResultExt, prelude::*}; +use std::sync::Arc; + +use crate::ducklake_connection::{DuckLakeConnectionBuilder, Error as ConnectionError}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Database connection error: {}", source))] + Connection { source: ConnectionError }, + #[snafu(display("Query execution failed: {}", source))] + QueryExecution { source: duckdb::Error }, + #[snafu(display("JSON serialization error: {}", source))] + JsonSerialization { source: serde_json::Error }, + #[snafu(display( + "Statistics not found for table: {}, column: {}, snapshot: {}", + table, + column, + snapshot + ))] + StatsNotFound { + table: String, + column: String, + snapshot: i64, + }, + #[snafu(display( + "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", + group_id, + stats_type, + snapshot + ))] + GroupStatsNotFound { + group_id: i64, + stats_type: String, + snapshot: i64, + }, +} + +impl From for Error { + fn from(err: ConnectionError) -> Self { + Error::Connection { source: err } + } +} + +/** Packaged Statistics Objects */ +/** Table statistics -- Contains overall row count and per-column statistics */ +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TableStatistics { + row_count: usize, + column_statistics: Vec, +} + +impl TableStatistics { + fn new(rows: I) -> Self + where + I: IntoIterator< + Item = Result< + ( + i64, + i64, + String, + String, + i64, + i64, + i64, + String, + String, + String, + String, + String, + ), + duckdb::Error, + >, + >, + { + let mut row_count = 0; + let mut column_statistics = Vec::new(); + + for row_result in rows { + if let Ok(( + _table_id, + column_id, + column_name, + column_type, + record_count, + _next_row_id, + _file_size_bytes, + contains_null, + contains_nan, + min_value, + max_value, + _extra_stats_json, + )) = row_result + { + row_count = record_count as usize; // Assuming all rows have the same record_count + + let actual_contains_null = match contains_null.as_str() { + "TRUE" => Some(true), + "FALSE" => Some(false), + _ => None, + }; + + let actual_contains_nan = match contains_nan.as_str() { + "TRUE" => Some(true), + "FALSE" => Some(false), + _ => None, + }; + + let actual_min_value = if min_value == "NULL" { + None + } else { + Some(min_value) + }; + + let actual_max_value = if max_value == "NULL" { + None + } else { + Some(max_value) + }; + + let column_stats = ColumnStatistics::new( + column_id, + column_type, + column_name.clone(), + actual_min_value, + actual_max_value, + actual_contains_null, + actual_contains_nan, + vec![], // Advanced stats can be populated later + ); + + column_statistics.push(column_stats); + } + } + + TableStatistics { + row_count, + column_statistics, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ColumnStatistics { + id: i64, + column_type: String, + name: String, + min: Option, + max: Option, + contains_null: Option, + contains_nan: Option, + advanced_stats: Vec, // TODO, e.g. histogram, ndv, etc. +} + +impl ColumnStatistics { + fn new( + id: i64, + column_type: String, + name: String, + min: Option, + max: Option, + contains_null: Option, + contains_nan: Option, + advanced_stats: Vec, + ) -> Self { + ColumnStatistics { + id, + column_type, + name, + min, + max, + contains_null, + contains_nan, + advanced_stats, + } + } + + #[allow(dead_code)] + fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { + self.advanced_stats.push(stat); + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct AdvanceColumnStatistics { + stats_type: String, + data: Value, +} + +pub trait StatisticsProvider { + /// Create a new memory-based StatisticsProvider + fn memory() -> Result, Error>; + + /// Create a new file-based StatisticsProvider + fn file(path: &str) -> Result, Error>; + + fn get_connection(&self) -> Result; + + fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; + + /// Retrieve table and column statistics at specific snapshot + fn fetch_table_statistics( + &self, + table_name: &str, + snapshot: i64, + connection: &duckdb::Connection, + ) -> Result, Error>; + + /// Insert table column statistics + fn insert_table_stats( + &self, + column_id: i64, + begin_snapshot: i64, + end_snapshot: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error>; +} + +/// DuckLake-based implementation of StatisticsProvider +pub struct DuckLakeStatisticsProvider { + connection_builder: Arc, +} + +impl DuckLakeStatisticsProvider { + /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB + pub fn memory() -> Result { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); + Ok(Self { connection_builder }) + } + + /// Create a new DuckLakeStatisticsProvider with file-based DuckDB + pub fn file(path: &str) -> Result { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); + Ok(Self { connection_builder }) + } +} + +impl StatisticsProvider for DuckLakeStatisticsProvider { + fn memory() -> Result, Error> { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); + Ok(Box::new(Self { connection_builder })) + } + + /// Create a new DuckLakeStatisticsProvider with file-based DuckDB + fn file(path: &str) -> Result, Error> { + let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); + Ok(Box::new(Self { connection_builder })) + } + + /// Get a connection to the DuckDB instance and initialize the DuckLake-Optd schema + fn get_connection(&self) -> Result { + let conn = self.connection_builder.connect()?; + self.connection_builder.initialize_schema(&conn)?; + Ok(conn) + } + + fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { + let mut stmt = conn + .prepare( + format!( + r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_{name}.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_{name}.main.ducklake_snapshot); + "#, + name = self.connection_builder.get_meta_name() + ) + .as_str(), + ) + .context(QueryExecutionSnafu)?; + + let row = stmt + .query_row([], |row| { + Ok(( + row.get::(0)?, // snapshot_id + row.get::(1)?, // schema_version + row.get::(2)?, // next_catalog_id + row.get::(3)?, // next_file_id + )) + }) + .context(QueryExecutionSnafu)?; + + Ok(row.0) + } + + fn fetch_table_statistics( + &self, + table: &str, + snapshot: i64, + conn: &duckdb::Connection, + ) -> Result, Error> { + // Query for table statistics within the snapshot range + let mut stmt = conn + .prepare( + format!( + r#" + SELECT table_id, column_id, column_name, column_type, record_count, next_row_id, file_size_bytes, contains_null, contains_nan, min_value, max_value, extra_stats + FROM __ducklake_metadata_{name}.main.ducklake_table_stats + LEFT JOIN __ducklake_metadata_{name}.main.ducklake_table_column_stats USING (table_id) + LEFT JOIN __ducklake_metadata_{name}.main.ducklake_column col USING (table_id, column_id) + WHERE record_count IS NOT NULL AND file_size_bytes IS NOT NULL AND + table_id = (SELECT table_id FROM __ducklake_metadata_{name}.main.ducklake_table WHERE table_name = ?) + AND ? >= begin_snapshot AND (? < end_snapshot OR end_snapshot IS NULL) + ORDER BY table_id, column_id; + "#, + name = self.connection_builder.get_meta_name() + ).as_str() + ) + .context(QueryExecutionSnafu)?; + + let rows = stmt + .query_map([table, snapshot.to_string().as_str()], |row| { + Ok(( + row.get::(0)?, // table_id + row.get::(1)?, // column_id + row.get::(2)?, // column_name + row.get::(3)?, // column_type + row.get::(4)?, // record_count + row.get::(5)?, // next_row_id + row.get::(6)?, // file_size_bytes + row.get::(7)?, // contains_null + row.get::(8)?, // contains_nan + row.get::(9)?, // min_value + row.get::(10)?, // max_value + row.get::(11)?, // extra_stats (JSON) + )) + }) + .context(QueryExecutionSnafu)?; + + let table_stats: TableStatistics = TableStatistics::new(rows); + + Ok(Some(table_stats)) + } + + /// Insert table column statistics + fn insert_table_stats( + &self, + column_id: i64, + begin_snapshot: i64, + end_snapshot: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + let conn = self.connection_builder.connect()?; + let table_name = format!( + "__ducklake_metadata_{}.main.ducklake_table_column_adv_stats", + self.connection_builder.get_meta_name() + ); + let query = format!( + "INSERT OR REPLACE INTO {} + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?)", + table_name + ); + let mut stmt = conn.prepare(&query).context(QueryExecutionSnafu)?; + + stmt.execute([ + &column_id.to_string(), + &begin_snapshot.to_string(), + &end_snapshot.to_string(), + &table_id.to_string(), + stats_type, + payload, + ]) + .context(QueryExecutionSnafu)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_ducklake_statistics_provider_creation() { + { + // Test memory-based provider + let memory_provider = DuckLakeStatisticsProvider::memory(); + assert!(memory_provider.is_ok()); + } + + { + // Test file-based provider + let file_provider = DuckLakeStatisticsProvider::file("./test_stats.db"); + assert!(file_provider.is_ok()); + } + } + + #[test] + fn test_table_stats_insertion() { + let provider = DuckLakeStatisticsProvider::memory().unwrap(); + + // Initialize the schema first + let _conn = provider.get_connection().unwrap(); + + // Insert table statistics + let result = + provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + match &result { + Ok(_) => println!("Table stats insertion successful"), + Err(e) => println!("Table stats insertion failed: {}", e), + } + assert!(result.is_ok()); + } + + #[test] + fn test_json_payload_handling() { + let payload = json!({ + "distinct_count": 1000, + "null_count": 50, + "min_value": 1, + "max_value": 999999 + }); + + let payload_str = serde_json::to_string(&payload).unwrap(); + let parsed_back: serde_json::Value = serde_json::from_str(&payload_str).unwrap(); + + assert_eq!(parsed_back["distinct_count"], 1000); + assert_eq!(parsed_back["null_count"], 50); + } + + #[test] + fn test_table_stats_insertion_and_retrieval() { + let provider = DuckLakeStatisticsProvider::memory().unwrap(); + + // Initialize the schema first + let _conn = provider.get_connection().unwrap(); + + // Insert table statistics + let result = + provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + match &result { + Ok(_) => println!("Table stats insertion successful"), + Err(e) => println!("Table stats insertion failed: {}", e), + } + assert!(result.is_ok()); + + // Note: Actual retrieval would require setting up the table_metadata + // and column_metadata tables, which would be done by the DuckLake extension + } +} diff --git a/optd/statistics/test_stats.db b/optd/statistics/test_stats.db new file mode 100644 index 0000000000000000000000000000000000000000..88ec05a2717ff0b85415f32f058b8d67d4e1929f GIT binary patch literal 12288 zcmeI#u?fOZ5CG6Gh`5A}Z6F#VhExjL33k?^?qKB#LYA;|4>xcS7tkUE!4R-Yns?0| zcLxWryTRx&iJtdjy<2UT;lDU1@hnc7caLd4J!Gden>X*65FkK+009C72oNAZfB*pk z?Frmo*UNq}-c}diKT-8V=$GZNOug@*R{8&HBpL(=5FkK+009C72oNAZpi=?=f2aJ8 V6Cgl<009C72oNAZfB=D31U|{RHAesd literal 0 HcmV?d00001 From af832ba06fac84471d154c346431a95b5d194447 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Sat, 18 Oct 2025 23:09:41 -0400 Subject: [PATCH 11/40] update dependencies --- optd/statistics/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optd/statistics/Cargo.toml b/optd/statistics/Cargo.toml index a5357e7..a775026 100644 --- a/optd/statistics/Cargo.toml +++ b/optd/statistics/Cargo.toml @@ -8,8 +8,8 @@ repository.workspace = true datafusion = { workspace = true } parking_lot = "=0.12.5" glob = "0.3.2" -url = "2.5.4" -async-trait = "0.1.88" +url = { version = "^2.5.7" } +async-trait = { version = "^0.1.89" } tokio = { workspace = true, features = ["full"] } duckdb = { features = ["r2d2", "bundled"], version = "1.4.0" } r2d2 = "0.8.10" From a610854bbd34aa3bdb7e4e242d3073cf4ec3eb9a Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Mon, 20 Oct 2025 17:11:43 -0400 Subject: [PATCH 12/40] add some notes to refactoring Signed-off-by: Yuchen Liang --- optd/statistics/src/statistics.rs | 72 +++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index d901e57..e5f0585 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -93,7 +93,7 @@ impl TableStatistics { _extra_stats_json, )) = row_result { - row_count = record_count as usize; // Assuming all rows have the same record_count + row_count = record_count as usize; // Assuming all columns have the same record_count let actual_contains_null = match contains_null.as_str() { "TRUE" => Some(true), @@ -150,7 +150,7 @@ pub struct ColumnStatistics { max: Option, contains_null: Option, contains_nan: Option, - advanced_stats: Vec, // TODO, e.g. histogram, ndv, etc. + advanced_stats: Vec, // TODO, e.g. histogram, number of distinct values (set cardinality), etc. } impl ColumnStatistics { @@ -197,7 +197,7 @@ pub trait StatisticsProvider { fn get_connection(&self) -> Result; - fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; + fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; /// Retrieve table and column statistics at specific snapshot fn fetch_table_statistics( @@ -238,6 +238,15 @@ impl DuckLakeStatisticsProvider { } } +pub struct SnapshotId(i64); + +pub struct CurrentSnapshot { + snapshot_id: SnapshotId, + schema_version: i64, + next_catalog_id: i64, + next_file_id: i64, +} + impl StatisticsProvider for DuckLakeStatisticsProvider { fn memory() -> Result, Error> { let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); @@ -257,7 +266,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(conn) } - fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { + fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { let mut stmt = conn .prepare( format!( @@ -273,18 +282,18 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ) .context(QueryExecutionSnafu)?; - let row = stmt + let current_snapshot = stmt .query_row([], |row| { - Ok(( - row.get::(0)?, // snapshot_id - row.get::(1)?, // schema_version - row.get::(2)?, // next_catalog_id - row.get::(3)?, // next_file_id - )) + Ok(CurrentSnapshot { + snapshot_id: SnapshotId(row.get("snapshot_id")?), + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) }) .context(QueryExecutionSnafu)?; - Ok(row.0) + Ok(current_snapshot) } fn fetch_table_statistics( @@ -346,24 +355,49 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { stats_type: &str, payload: &str, ) -> Result<(), Error> { - let conn = self.connection_builder.connect()?; + let mut conn = self.connection_builder.connect()?; + // let mut txn = conn.transaction().unwrap(); + + let current_snapshot = self.current_snapshot(&conn)?; + + // Parameters: column_id, table_id, (stats_type: &str, payload: &str) + // 1. Get the current snapshot id + // 2. insert with begin_snapshot = current snapshot + 1; + // 3. do an update query, UPDATE end_snapshot = current_snapshot WHERE column_id= ? and table_id= ? and stats_type = ?; + // 4. increment next snapshot id + + // 3. check how duckdb do snapshot id increments (might need update `ducklake_snapshot` table). + + // R"(INSERT INTO {METADATA_CATALOG}.ducklake_snapshot VALUES ({SNAPSHOT_ID}, NOW(), {SCHEMA_VERSION}, {NEXT_CATALOG_ID}, {NEXT_FILE_ID});)"); + // 4. Update snapshot_changes (MIGHT BE optional). + // auto query = StringUtil::Format( + // R"(INSERT INTO {METADATA_CATALOG}.ducklake_snapshot_changes VALUES ({SNAPSHOT_ID}, %s, %s, %s, %s);)", + // SQLStringOrNull(change_info.changes_made), commit_info.author.ToSQLString(), + // commit_info.commit_message.ToSQLString(), commit_info.commit_extra_info.ToSQLString()); + // auto result = transaction.Query(commit_snapshot, query); + // if (result->HasError()) { + // result->GetErrorObject().Throw("Failed to write new snapshot to DuckLake:"); + // } + // Commit + let table_name = format!( "__ducklake_metadata_{}.main.ducklake_table_column_adv_stats", self.connection_builder.get_meta_name() ); + let query = format!( - "INSERT OR REPLACE INTO {} + "INSERT INTO {} (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) VALUES (?, ?, ?, ?, ?, ?)", table_name ); let mut stmt = conn.prepare(&query).context(QueryExecutionSnafu)?; - stmt.execute([ - &column_id.to_string(), - &begin_snapshot.to_string(), - &end_snapshot.to_string(), - &table_id.to_string(), + stmt.execute(duckdb::params![ + &column_id, + &begin_snapshot, + &end_snapshot, + table_id, stats_type, payload, ]) From c114adfc6c6b771925b8cefb4abe9d94794ce720 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 21 Oct 2025 22:11:45 -0400 Subject: [PATCH 13/40] fix non unique table name query, use fromIterator, and remove connection pool --- Cargo.lock | 23 -- optd/statistics/Cargo.toml | 6 +- optd/statistics/src/ducklake_connection.rs | 399 --------------------- optd/statistics/src/lib.rs | 2 - optd/statistics/src/statistics.rs | 369 ++++++++++--------- 5 files changed, 182 insertions(+), 617 deletions(-) delete mode 100644 optd/statistics/src/ducklake_connection.rs diff --git a/Cargo.lock b/Cargo.lock index a9fd1a2..5d05031 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2249,7 +2249,6 @@ dependencies = [ "hashlink", "libduckdb-sys", "num-integer", - "r2d2", "rust_decimal", "strum 0.27.2", ] @@ -3605,12 +3604,10 @@ name = "optd-statistics" version = "0.1.0" dependencies = [ "async-trait", - "datafusion", "duckdb", "futures", "glob", "parking_lot", - "r2d2", "serde", "serde_json", "snafu", @@ -4009,17 +4006,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "r2d2" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" -dependencies = [ - "log", - "parking_lot", - "scheduled-thread-pool", -] - [[package]] name = "radium" version = "0.7.0" @@ -4458,15 +4444,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "scheduled-thread-pool" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot", -] - [[package]] name = "scopeguard" version = "1.2.0" diff --git a/optd/statistics/Cargo.toml b/optd/statistics/Cargo.toml index a775026..5595bbf 100644 --- a/optd/statistics/Cargo.toml +++ b/optd/statistics/Cargo.toml @@ -5,15 +5,13 @@ edition.workspace = true repository.workspace = true [dependencies] -datafusion = { workspace = true } parking_lot = "=0.12.5" glob = "0.3.2" url = { version = "^2.5.7" } async-trait = { version = "^0.1.89" } tokio = { workspace = true, features = ["full"] } -duckdb = { features = ["r2d2", "bundled"], version = "1.4.0" } -r2d2 = "0.8.10" +duckdb = { features = ["bundled"], version = "1.4.0" } snafu = "0.8.6" serde_json = "1.0" futures = "0.3.31" -serde = "1.0.228" +serde = "1.0.0" diff --git a/optd/statistics/src/ducklake_connection.rs b/optd/statistics/src/ducklake_connection.rs deleted file mode 100644 index 6fa8deb..0000000 --- a/optd/statistics/src/ducklake_connection.rs +++ /dev/null @@ -1,399 +0,0 @@ -use std::sync::Arc; - -use datafusion::{execution::SendableRecordBatchStream, physical_plan::memory::MemoryStream}; -use duckdb::{ - Connection, DuckdbConnectionManager as DuckDBManager, - arrow::{array::RecordBatch, datatypes::SchemaRef}, -}; -use r2d2::ManageConnection; -use snafu::{ResultExt, prelude::*}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Connection to DuckDB failed: {}", source))] - ConnectionError { source: duckdb::Error }, - #[snafu(display("Failed to get connection from pool: {}", source))] - PoolError { source: r2d2::Error }, - #[snafu(display("Invalid database path: {}", path))] - InvalidPathError { path: Arc }, - #[snafu(display("Arrow query failed: {}", source))] - ArrowError { - source: duckdb::arrow::error::ArrowError, - }, - #[snafu(display("DataFusion error: {}", source))] - DataFusionError { - source: datafusion::error::DataFusionError, - }, - #[snafu(display("Other error: {}", details))] - Other { details: Arc }, -} - -impl From for Error { - fn from(err: datafusion::error::DataFusionError) -> Self { - Error::DataFusionError { source: err } - } -} - -impl From for Error { - fn from(err: duckdb::Error) -> Self { - Error::ConnectionError { source: err } - } -} - -#[derive(Debug)] -pub enum ConnectionMode { - Memory, - File, -} - -pub struct DuckLakeConnectionBuilder { - meta_name: Arc, - path: Arc, - mode: ConnectionMode, - manager: DuckDBManager, -} - -impl std::fmt::Debug for DuckLakeConnectionBuilder { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "DuckLakeConnectionBuilder {{ name: {}, path: {}, mode: {:?} }}", - self.meta_name, self.path, self.mode - ) - } -} - -impl DuckLakeConnectionBuilder { - // Default constants - pub const DEFAULT_LAKE_NAME: &str = "meta_lake"; - - pub fn memory() -> Result { - let manager = DuckDBManager::memory().context(ConnectionSnafu)?; - Ok(Self { - meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), - path: Arc::from(":memory:"), - mode: ConnectionMode::Memory, - manager, - }) - } - - pub fn file(path: &str) -> Result { - let manager = DuckDBManager::file(path).context(ConnectionSnafu)?; - Ok(Self { - meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), - path: Arc::from(path), - mode: ConnectionMode::File, - manager, - }) - } - - pub fn meta_name(&mut self, name: &str) -> &Self { - self.meta_name = Arc::from(name); - self - } - - pub fn get_meta_name(&self) -> &str { - self.meta_name.as_ref() - } - - pub fn get_path(&self) -> &str { - self.path.as_ref() - } - - pub fn get_mode(&self) -> &ConnectionMode { - &self.mode - } - - pub fn connect(&self) -> Result { - let mut connection = self.manager.connect().context(ConnectionSnafu)?; - self.ducklake(&mut connection)?; - Ok(connection) - } - - fn ducklake(&self, connection: &mut Connection) -> Result<(), Error> { - let setup_query = match self.mode { - ConnectionMode::Memory => format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:metadata.ducklake' AS {name}; - USE {name}; - "#, - name = self.meta_name.as_ref(), - ), - ConnectionMode::File => format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:metadata.ducklake' AS {name} (DATA_PATH '{path}'); - USE {name}; - "#, - name = self.meta_name.as_ref(), - path = self.path.as_ref() - ), - }; - - // Try to execute the setup query, but ignore "already attached" errors - match connection.execute_batch(setup_query.as_str()) { - Ok(_) => Ok(()), - Err(duckdb::Error::DuckDBFailure(_, Some(msg))) if msg.contains("already attached") => { - // Database is already attached, just use it - connection - .execute_batch(&format!("USE {}", self.meta_name.as_ref())) - .context(ConnectionSnafu)?; - Ok(()) - } - Err(e) => Err(Error::ConnectionError { source: e }), - } - } - - pub fn initialize_schema(&self, connection: &Connection) -> Result<(), Error> { - // Create tables for storing stats metadata - connection.execute_batch( - format!( - r#" - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( - column_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - table_id BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (column_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( - query_id BIGINT PRIMARY KEY, - query_string TEXT, - root_group_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( - query_instance_id BIGINT PRIMARY KEY, - query_id BIGINT, - creation_time BIGINT, - snapshot_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - PRIMARY KEY (group_id, begin_snapshot) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (group_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (group_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( - scalar_id BIGINT, - group_id BIGINT, - stats_type VARCHAR, - payload TEXT, - query_instance_id BIGINT, - PRIMARY KEY (scalar_id, group_id, stats_type, query_instance_id) - ); - - CREATE INDEX IF NOT EXISTS idx_table_stats_snapshot - ON __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats(begin_snapshot, end_snapshot); - "#, - name = self.meta_name.as_ref() - ) - .as_str(), - ) - .context(ConnectionSnafu)?; - - Ok(()) - } -} - -impl Default for DuckLakeConnectionBuilder { - fn default() -> Self { - Self::memory().expect("Failed to create default DuckLakeConnectionBuilder") - } -} - -pub async fn query( - connection: &Connection, - sql: &str, - schema: SchemaRef, - projection: Option>, -) -> Result { - let mut stmt = connection.prepare(sql).context(ConnectionSnafu)?; - - let rbs = stmt.query_arrow([])?.collect::>(); - let stream = MemoryStream::try_new(rbs, schema, projection)?; - Ok(Box::pin(stream)) -} - -#[cfg(test)] -mod tests { - use super::*; - use futures::TryStreamExt; - - #[tokio::test] - async fn test_ducklake_connection() { - let path = "./test_ducklake.db"; - let ducklake_conn = - DuckLakeConnectionBuilder::file(path).expect("Failed to create DuckLakeConnection"); - assert_eq!(ducklake_conn.get_path(), path); - - assert_eq!( - matches!(ducklake_conn.get_mode(), ConnectionMode::File), - true - ); - - assert_eq!(ducklake_conn.get_meta_name(), "meta_lake"); - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - assert!(conn.execute_batch("SELECT 1;").is_ok()); - - let mut stmt = conn - .prepare("select name from (show all tables);") - .expect("Failed to prepare show tables statement"); - - let rows = stmt - .query_map([], |row| row.get::(0)) - .expect("Failed to execute show tables query"); - - let expected = vec![ - "ducklake_column", - "ducklake_column_mapping", - "ducklake_column_tag", - "ducklake_data_file", - "ducklake_delete_file", - "ducklake_file_column_stats", - "ducklake_file_partition_value", - "ducklake_files_scheduled_for_deletion", - "ducklake_inlined_data_tables", - "ducklake_metadata", - "ducklake_name_mapping", - "ducklake_partition_column", - "ducklake_partition_info", - "ducklake_schema", - "ducklake_schema_versions", - "ducklake_snapshot", - "ducklake_snapshot_changes", - "ducklake_table", - "ducklake_table_column_stats", - "ducklake_table_stats", - "ducklake_tag", - "ducklake_view", - ]; - - for (i, row) in rows.enumerate() { - let table_name = row.expect("Failed to get table name"); - assert_eq!(table_name, expected[i]); - } - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - ducklake_conn - .initialize_schema(&conn) - .expect("Failed to initialize schema"); - - let mut stmt = conn - .prepare("select name from (show all tables);") - .expect("Failed to prepare show tables statement"); - - let rows = stmt - .query_map([], |row| row.get::(0)) - .expect("Failed to execute show tables query"); - - let expected = vec![ - "ducklake_column", - "ducklake_column_mapping", - "ducklake_column_tag", - "ducklake_data_file", - "ducklake_delete_file", - "ducklake_file_column_stats", - "ducklake_file_partition_value", - "ducklake_files_scheduled_for_deletion", - "ducklake_inlined_data_tables", - "ducklake_metadata", - "ducklake_name_mapping", - "ducklake_partition_column", - "ducklake_partition_info", - "ducklake_schema", - "ducklake_schema_versions", - "ducklake_snapshot", - "ducklake_snapshot_changes", - "ducklake_table", - "ducklake_table_column_adv_stats", - "ducklake_table_column_stats", - "ducklake_table_stats", - "ducklake_tag", - "ducklake_view", - "optd_execution_subplan_feedback", - "optd_group", - "optd_group_stats", - "optd_query", - "optd_subplan_scalar_feedback", - ]; - - for (i, row) in rows.enumerate() { - let table_name = row.expect("Failed to get table name"); - assert_eq!(table_name, expected[i]); - } - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - conn.execute_batch("CREATE TABLE IF NOT EXISTS test (id INTEGER, name VARCHAR);") - .expect("Failed to create table"); - conn.execute_batch("INSERT INTO test (id, name) VALUES (1, 'Alice'), (2, 'Bob');") - .expect("Failed to insert data"); - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - let schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ - datafusion::arrow::datatypes::Field::new( - "id", - datafusion::arrow::datatypes::DataType::Int32, - false, - ), - datafusion::arrow::datatypes::Field::new( - "name", - datafusion::arrow::datatypes::DataType::Utf8, - false, - ), - ])); - - let rbs = query(&conn, "SELECT * FROM test;", schema, None) - .await - .expect("Failed to execute query"); - - let schema_ref = rbs.schema(); - assert_eq!(schema_ref.fields().len(), 2); - assert_eq!(schema_ref.field(0).name(), "id"); - assert_eq!(schema_ref.field(1).name(), "name"); - - let batches: Vec<_> = rbs - .try_collect() - .await - .expect("Failed to collect record batches"); - - assert_eq!(batches.len(), 1); - let batch = &batches[0]; - assert_eq!(batch.num_rows(), 2); - } - } -} diff --git a/optd/statistics/src/lib.rs b/optd/statistics/src/lib.rs index a196ba3..18783a1 100644 --- a/optd/statistics/src/lib.rs +++ b/optd/statistics/src/lib.rs @@ -1,5 +1,3 @@ -mod ducklake_connection; mod statistics; -pub use ducklake_connection::{ConnectionMode, DuckLakeConnectionBuilder, query}; pub use statistics::{DuckLakeStatisticsProvider, Error as InterfaceError, StatisticsProvider}; diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index e5f0585..66aab81 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -1,25 +1,24 @@ +use duckdb::{Connection, Error as DuckDBError, params}; + use serde::{Deserialize, Serialize}; use serde_json::Value; use snafu::{ResultExt, prelude::*}; -use std::sync::Arc; - -use crate::ducklake_connection::{DuckLakeConnectionBuilder, Error as ConnectionError}; #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Database connection error: {}", source))] - Connection { source: ConnectionError }, + Connection { source: DuckDBError }, #[snafu(display("Query execution failed: {}", source))] - QueryExecution { source: duckdb::Error }, + QueryExecution { source: DuckDBError }, #[snafu(display("JSON serialization error: {}", source))] JsonSerialization { source: serde_json::Error }, #[snafu(display( - "Statistics not found for table: {}, column: {}, snapshot: {}", + "Get statistics failed for table: {}, column: {}, snapshot: {}", table, column, snapshot ))] - StatsNotFound { + GetStatsFailed { table: String, column: String, snapshot: i64, @@ -37,12 +36,6 @@ pub enum Error { }, } -impl From for Error { - fn from(err: ConnectionError) -> Self { - Error::Connection { source: err } - } -} - /** Packaged Statistics Objects */ /** Table statistics -- Contains overall row count and per-column statistics */ #[derive(Debug, Clone, Serialize, Deserialize)] @@ -51,82 +44,37 @@ pub struct TableStatistics { column_statistics: Vec, } -impl TableStatistics { - fn new(rows: I) -> Self - where - I: IntoIterator< - Item = Result< - ( - i64, - i64, - String, - String, - i64, - i64, - i64, - String, - String, - String, - String, - String, - ), - duckdb::Error, - >, - >, - { +impl FromIterator> for TableStatistics { + fn from_iter>>(iter: T) -> Self { let mut row_count = 0; let mut column_statistics = Vec::new(); - for row_result in rows { - if let Ok(( - _table_id, + for row_result in iter { + if let Ok(StatisticsEntry { + table_id: _, column_id, column_name, column_type, record_count, - _next_row_id, - _file_size_bytes, + next_row_id: _, + file_size_bytes: _, contains_null, contains_nan, min_value, max_value, - _extra_stats_json, - )) = row_result + extra_stats: _, + }) = row_result { row_count = record_count as usize; // Assuming all columns have the same record_count - let actual_contains_null = match contains_null.as_str() { - "TRUE" => Some(true), - "FALSE" => Some(false), - _ => None, - }; - - let actual_contains_nan = match contains_nan.as_str() { - "TRUE" => Some(true), - "FALSE" => Some(false), - _ => None, - }; - - let actual_min_value = if min_value == "NULL" { - None - } else { - Some(min_value) - }; - - let actual_max_value = if max_value == "NULL" { - None - } else { - Some(max_value) - }; - let column_stats = ColumnStatistics::new( column_id, column_type, column_name.clone(), - actual_min_value, - actual_max_value, - actual_contains_null, - actual_contains_nan, + min_value, + max_value, + contains_null, + contains_nan, vec![], // Advanced stats can be populated later ); @@ -188,23 +136,32 @@ struct AdvanceColumnStatistics { data: Value, } -pub trait StatisticsProvider { - /// Create a new memory-based StatisticsProvider - fn memory() -> Result, Error>; - - /// Create a new file-based StatisticsProvider - fn file(path: &str) -> Result, Error>; +pub struct SnapshotId(i64); - fn get_connection(&self) -> Result; +struct StatisticsEntry { + table_id: i64, + column_id: i64, + column_name: String, + column_type: String, + record_count: i64, + next_row_id: i64, + file_size_bytes: i64, + contains_null: Option, + contains_nan: Option, + min_value: Option, + max_value: Option, + extra_stats: Option, +} - fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; +pub trait StatisticsProvider { + fn current_snapshot(&self) -> Result; /// Retrieve table and column statistics at specific snapshot fn fetch_table_statistics( &self, table_name: &str, snapshot: i64, - connection: &duckdb::Connection, + connection: &Connection, ) -> Result, Error>; /// Insert table column statistics @@ -221,67 +178,99 @@ pub trait StatisticsProvider { /// DuckLake-based implementation of StatisticsProvider pub struct DuckLakeStatisticsProvider { - connection_builder: Arc, + conn: Connection, } impl DuckLakeStatisticsProvider { /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB - pub fn memory() -> Result { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); - Ok(Self { connection_builder }) + fn try_new(location: Option<&str>) -> Result { + let conn = if let Some(path) = location { + Connection::open(path).context(ConnectionSnafu)? + } else { + Connection::open_in_memory().context(ConnectionSnafu)? + }; + + let setup_query = r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:metadata.ducklake' AS metalake; + USE metalake; + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload TEXT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( + query_id BIGINT, + query_string TEXT, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload TEXT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload TEXT, + query_instance_id BIGINT + ); + "#; + conn.execute_batch(setup_query).context(ConnectionSnafu)?; + Ok(Self { conn }) } - /// Create a new DuckLakeStatisticsProvider with file-based DuckDB - pub fn file(path: &str) -> Result { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); - Ok(Self { connection_builder }) + fn get_connection(&self) -> &Connection { + &self.conn } } -pub struct SnapshotId(i64); - -pub struct CurrentSnapshot { - snapshot_id: SnapshotId, - schema_version: i64, - next_catalog_id: i64, - next_file_id: i64, -} - impl StatisticsProvider for DuckLakeStatisticsProvider { - fn memory() -> Result, Error> { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); - Ok(Box::new(Self { connection_builder })) - } - - /// Create a new DuckLakeStatisticsProvider with file-based DuckDB - fn file(path: &str) -> Result, Error> { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); - Ok(Box::new(Self { connection_builder })) - } - - /// Get a connection to the DuckDB instance and initialize the DuckLake-Optd schema - fn get_connection(&self) -> Result { - let conn = self.connection_builder.connect()?; - self.connection_builder.initialize_schema(&conn)?; - Ok(conn) - } - - fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { - let mut stmt = conn - .prepare( - format!( - r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_{name}.main.ducklake_snapshot - WHERE snapshot_id = (SELECT MAX(snapshot_id) - FROM __ducklake_metadata_{name}.main.ducklake_snapshot); - "#, - name = self.connection_builder.get_meta_name() - ) - .as_str(), - ) + fn current_snapshot(&self) -> Result { + let mut stmt = self + .conn + .prepare("FROM snapshot_test.current_snapshot();") .context(QueryExecutionSnafu)?; + struct CurrentSnapshot { + snapshot_id: SnapshotId, + schema_version: i64, + next_catalog_id: i64, + next_file_id: i64, + } + let current_snapshot = stmt .query_row([], |row| { Ok(CurrentSnapshot { @@ -293,54 +282,70 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { }) .context(QueryExecutionSnafu)?; - Ok(current_snapshot) + Ok(current_snapshot.snapshot_id) } fn fetch_table_statistics( &self, table: &str, snapshot: i64, - conn: &duckdb::Connection, + conn: &Connection, ) -> Result, Error> { // Query for table statistics within the snapshot range let mut stmt = conn .prepare( - format!( r#" - SELECT table_id, column_id, column_name, column_type, record_count, next_row_id, file_size_bytes, contains_null, contains_nan, min_value, max_value, extra_stats - FROM __ducklake_metadata_{name}.main.ducklake_table_stats - LEFT JOIN __ducklake_metadata_{name}.main.ducklake_table_column_stats USING (table_id) - LEFT JOIN __ducklake_metadata_{name}.main.ducklake_column col USING (table_id, column_id) - WHERE record_count IS NOT NULL AND file_size_bytes IS NOT NULL AND - table_id = (SELECT table_id FROM __ducklake_metadata_{name}.main.ducklake_table WHERE table_name = ?) - AND ? >= begin_snapshot AND (? < end_snapshot OR end_snapshot IS NULL) - ORDER BY table_id, column_id; - "#, - name = self.connection_builder.get_meta_name() - ).as_str() + SELECT + ts.table_id, + tcs.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcs.contains_null, + tcs.contains_nan, + tcs.min_value, + tcs.max_value, + tcs.extra_stats + FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_stats tcs USING (table_id) + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_column dc USING (table_id, column_id) + INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE + ds.schema_name = current_schema() + AND dt.table_name = ? + AND ts.record_count IS NOT NULL + AND ts.file_size_bytes IS NOT NULL + AND ? >= dc.begin_snapshot + AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) + ORDER BY ts.table_id, tcs.column_id; + "# ) .context(QueryExecutionSnafu)?; - let rows = stmt - .query_map([table, snapshot.to_string().as_str()], |row| { - Ok(( - row.get::(0)?, // table_id - row.get::(1)?, // column_id - row.get::(2)?, // column_name - row.get::(3)?, // column_type - row.get::(4)?, // record_count - row.get::(5)?, // next_row_id - row.get::(6)?, // file_size_bytes - row.get::(7)?, // contains_null - row.get::(8)?, // contains_nan - row.get::(9)?, // min_value - row.get::(10)?, // max_value - row.get::(11)?, // extra_stats (JSON) - )) + let entries = stmt + .query_map([snapshot.to_string().as_str(), table], |row| { + Ok(StatisticsEntry { + table_id: row.get("column_id")?, + column_id: row.get("column_id")?, + column_name: row.get("column_name")?, + column_type: row.get("column_type")?, + record_count: row.get("record_count")?, + next_row_id: row.get("next_row_id")?, + file_size_bytes: row.get("file_size_bytes")?, + contains_null: row.get("contains_null")?, + contains_nan: row.get("contains_nan")?, + min_value: row.get("min_value")?, + max_value: row.get("max_value")?, + extra_stats: row.get("extra_stats")?, + }) }) - .context(QueryExecutionSnafu)?; + .context(QueryExecutionSnafu)? + .map(|result| result.context(QueryExecutionSnafu)); - let table_stats: TableStatistics = TableStatistics::new(rows); + let table_stats: TableStatistics = TableStatistics::from_iter(entries); Ok(Some(table_stats)) } @@ -355,10 +360,9 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { stats_type: &str, payload: &str, ) -> Result<(), Error> { - let mut conn = self.connection_builder.connect()?; // let mut txn = conn.transaction().unwrap(); - let current_snapshot = self.current_snapshot(&conn)?; + let current_snapshot = self.current_snapshot()?; // Parameters: column_id, table_id, (stats_type: &str, payload: &str) // 1. Get the current snapshot id @@ -380,23 +384,16 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { // } // Commit - let table_name = format!( - "__ducklake_metadata_{}.main.ducklake_table_column_adv_stats", - self.connection_builder.get_meta_name() - ); - - let query = format!( - "INSERT INTO {} + let query = "INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) - VALUES (?, ?, ?, ?, ?, ?)", - table_name - ); - let mut stmt = conn.prepare(&query).context(QueryExecutionSnafu)?; - - stmt.execute(duckdb::params![ - &column_id, - &begin_snapshot, - &end_snapshot, + VALUES (?, ?, ?, ?, ?, ?)"; + + let mut stmt = self.conn.prepare(&query).context(QueryExecutionSnafu)?; + + stmt.execute(params![ + column_id, + begin_snapshot, + end_snapshot, table_id, stats_type, payload, @@ -416,23 +413,20 @@ mod tests { fn test_ducklake_statistics_provider_creation() { { // Test memory-based provider - let memory_provider = DuckLakeStatisticsProvider::memory(); + let memory_provider = DuckLakeStatisticsProvider::try_new(None); assert!(memory_provider.is_ok()); } { // Test file-based provider - let file_provider = DuckLakeStatisticsProvider::file("./test_stats.db"); + let file_provider = DuckLakeStatisticsProvider::try_new(Some("./test_stats.db")); assert!(file_provider.is_ok()); } } #[test] fn test_table_stats_insertion() { - let provider = DuckLakeStatisticsProvider::memory().unwrap(); - - // Initialize the schema first - let _conn = provider.get_connection().unwrap(); + let provider = DuckLakeStatisticsProvider::try_new(None).unwrap(); // Insert table statistics let result = @@ -462,10 +456,7 @@ mod tests { #[test] fn test_table_stats_insertion_and_retrieval() { - let provider = DuckLakeStatisticsProvider::memory().unwrap(); - - // Initialize the schema first - let _conn = provider.get_connection().unwrap(); + let provider = DuckLakeStatisticsProvider::try_new(None).unwrap(); // Insert table statistics let result = From b0e5329aa694e55c86b650ad1b06e456b64c9ea8 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 22 Oct 2025 12:55:27 -0400 Subject: [PATCH 14/40] make tests use tempdir --- Cargo.lock | 1 + optd/statistics/Cargo.toml | 3 ++ optd/statistics/src/statistics.rs | 67 +++++++++++++++---------------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d05031..57feee1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3611,6 +3611,7 @@ dependencies = [ "serde", "serde_json", "snafu", + "tempfile", "tokio", "url", ] diff --git a/optd/statistics/Cargo.toml b/optd/statistics/Cargo.toml index 5595bbf..62b2e1c 100644 --- a/optd/statistics/Cargo.toml +++ b/optd/statistics/Cargo.toml @@ -15,3 +15,6 @@ snafu = "0.8.6" serde_json = "1.0" futures = "0.3.31" serde = "1.0.0" + +[dev-dependencies] +tempfile = "3.8" diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 66aab81..628535e 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -154,7 +154,7 @@ struct StatisticsEntry { } pub trait StatisticsProvider { - fn current_snapshot(&self) -> Result; + fn fetch_current_snapshot(&self) -> Result; /// Retrieve table and column statistics at specific snapshot fn fetch_table_statistics( @@ -183,7 +183,7 @@ pub struct DuckLakeStatisticsProvider { impl DuckLakeStatisticsProvider { /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB - fn try_new(location: Option<&str>) -> Result { + pub fn try_new(location: Option<&str>) -> Result { let conn = if let Some(path) = location { Connection::open(path).context(ConnectionSnafu)? } else { @@ -196,7 +196,7 @@ impl DuckLakeStatisticsProvider { ATTACH 'ducklake:metadata.ducklake' AS metalake; USE metalake; - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, @@ -205,26 +205,26 @@ impl DuckLakeStatisticsProvider { payload TEXT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( query_id BIGINT, query_string TEXT, root_group_id BIGINT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( query_instance_id BIGINT PRIMARY KEY, query_id BIGINT, creation_time BIGINT, snapshot_id BIGINT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( group_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( group_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, @@ -232,7 +232,7 @@ impl DuckLakeStatisticsProvider { payload TEXT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( group_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, @@ -240,7 +240,7 @@ impl DuckLakeStatisticsProvider { payload TEXT ); - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( scalar_id BIGINT, group_id BIGINT, stats_type VARCHAR, @@ -252,37 +252,23 @@ impl DuckLakeStatisticsProvider { Ok(Self { conn }) } - fn get_connection(&self) -> &Connection { + pub fn get_connection(&self) -> &Connection { &self.conn } } impl StatisticsProvider for DuckLakeStatisticsProvider { - fn current_snapshot(&self) -> Result { + fn fetch_current_snapshot(&self) -> Result { let mut stmt = self .conn - .prepare("FROM snapshot_test.current_snapshot();") + .prepare("FROM ducklake_current_snapshot('metalake');") .context(QueryExecutionSnafu)?; - struct CurrentSnapshot { - snapshot_id: SnapshotId, - schema_version: i64, - next_catalog_id: i64, - next_file_id: i64, - } - - let current_snapshot = stmt - .query_row([], |row| { - Ok(CurrentSnapshot { - snapshot_id: SnapshotId(row.get("snapshot_id")?), - schema_version: row.get("schema_version")?, - next_catalog_id: row.get("next_catalog_id")?, - next_file_id: row.get("next_file_id")?, - }) - }) + let snapshot_id = stmt + .query_row([], |row| Ok(SnapshotId(row.get(0)?))) .context(QueryExecutionSnafu)?; - Ok(current_snapshot.snapshot_id) + Ok(snapshot_id) } fn fetch_table_statistics( @@ -362,7 +348,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ) -> Result<(), Error> { // let mut txn = conn.transaction().unwrap(); - let current_snapshot = self.current_snapshot()?; + let current_snapshot = self.fetch_current_snapshot()?; // Parameters: column_id, table_id, (stats_type: &str, payload: &str) // 1. Get the current snapshot id @@ -384,7 +370,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { // } // Commit - let query = "INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + let query = "INSERT OR REPLACE INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) VALUES (?, ?, ?, ?, ?, ?)"; @@ -408,6 +394,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { mod tests { use super::*; use serde_json::json; + use tempfile::TempDir; #[test] fn test_ducklake_statistics_provider_creation() { @@ -418,15 +405,21 @@ mod tests { } { - // Test file-based provider - let file_provider = DuckLakeStatisticsProvider::try_new(Some("./test_stats.db")); + // Test file-based provider with temporary directory + let temp_dir = TempDir::new().unwrap(); + let db_path = temp_dir.path().join("test_stats.db"); + let file_provider = + DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())); assert!(file_provider.is_ok()); } } #[test] fn test_table_stats_insertion() { - let provider = DuckLakeStatisticsProvider::try_new(None).unwrap(); + let temp_dir = TempDir::new().unwrap(); + let db_path = temp_dir.path().join("test_insertion.db"); + let provider = + DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); // Insert table statistics let result = @@ -436,6 +429,7 @@ mod tests { Err(e) => println!("Table stats insertion failed: {}", e), } assert!(result.is_ok()); + // temp_dir is automatically cleaned up when it goes out of scope } #[test] @@ -456,7 +450,10 @@ mod tests { #[test] fn test_table_stats_insertion_and_retrieval() { - let provider = DuckLakeStatisticsProvider::try_new(None).unwrap(); + let temp_dir = TempDir::new().unwrap(); + let db_path = temp_dir.path().join("test_retrieval.db"); + let provider = + DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); // Insert table statistics let result = From a85ad15c08fb6db971e8559de2c0991188d227d5 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Fri, 24 Oct 2025 13:07:31 -0400 Subject: [PATCH 15/40] attempted update_stats impl --- optd/statistics/src/statistics.rs | 160 +++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 49 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 628535e..9a6f007 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -1,4 +1,4 @@ -use duckdb::{Connection, Error as DuckDBError, params}; +use duckdb::{Connection, Error as DuckDBError, params, types::Null}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -138,6 +138,13 @@ struct AdvanceColumnStatistics { pub struct SnapshotId(i64); +pub struct CurrentSchema { + schema_name: String, + schema_id: i64, + begin_snapshot: i64, + end_snapshot: i64, +} + struct StatisticsEntry { table_id: i64, column_id: i64, @@ -156,6 +163,8 @@ struct StatisticsEntry { pub trait StatisticsProvider { fn fetch_current_snapshot(&self) -> Result; + fn fetch_current_schema(&self) -> Result; + /// Retrieve table and column statistics at specific snapshot fn fetch_table_statistics( &self, @@ -165,11 +174,9 @@ pub trait StatisticsProvider { ) -> Result, Error>; /// Insert table column statistics - fn insert_table_stats( + fn update_table_column_stats( &self, column_id: i64, - begin_snapshot: i64, - end_snapshot: i64, table_id: i64, stats_type: &str, payload: &str, @@ -271,6 +278,32 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(snapshot_id) } + fn fetch_current_schema(&self) -> Result { + let mut stmt = self + .conn + .prepare( + r#" + SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ds + WHERE ds.schema_name = current_schema(); + "#, + ) + .context(QueryExecutionSnafu)?; + + let snapshot_id = stmt + .query_row([], |row| { + Ok(CurrentSchema { + schema_name: row.get("schema_name")?, + schema_id: row.get("schema_id")?, + begin_snapshot: row.get("begin_snapshot")?, + end_snapshot: row.get("end_snapshot")?, + }) + }) + .context(QueryExecutionSnafu)?; + + Ok(snapshot_id) + } + fn fetch_table_statistics( &self, table: &str, @@ -280,34 +313,34 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { // Query for table statistics within the snapshot range let mut stmt = conn .prepare( - r#" - SELECT - ts.table_id, - tcs.column_id, - dc.column_name, - dc.column_type, - ts.record_count, - ts.next_row_id, - ts.file_size_bytes, - tcs.contains_null, - tcs.contains_nan, - tcs.min_value, - tcs.max_value, - tcs.extra_stats - FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_stats tcs USING (table_id) - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_column dc USING (table_id, column_id) - INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id - WHERE - ds.schema_name = current_schema() - AND dt.table_name = ? - AND ts.record_count IS NOT NULL - AND ts.file_size_bytes IS NOT NULL - AND ? >= dc.begin_snapshot - AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) - ORDER BY ts.table_id, tcs.column_id; - "# + r#" + SELECT + ts.table_id, + tcs.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcs.contains_null, + tcs.contains_nan, + tcs.min_value, + tcs.max_value, + tcs.extra_stats + FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_stats tcs USING (table_id) + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_column dc USING (table_id, column_id) + INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE + ds.schema_name = current_schema() + AND dt.table_name = ? + AND ts.record_count IS NOT NULL + AND ts.file_size_bytes IS NOT NULL + AND ? >= dc.begin_snapshot + AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) + ORDER BY ts.table_id, tcs.column_id; + "# ) .context(QueryExecutionSnafu)?; @@ -336,19 +369,30 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(Some(table_stats)) } - /// Insert table column statistics - fn insert_table_stats( + /// Update table column statistics + fn update_table_column_stats( &self, column_id: i64, - begin_snapshot: i64, - end_snapshot: i64, table_id: i64, stats_type: &str, payload: &str, ) -> Result<(), Error> { - // let mut txn = conn.transaction().unwrap(); + // Start transaction + let mut begin_txn_stmt = self + .conn + .prepare("BEGIN TRANSACTION;") + .context(QueryExecutionSnafu)?; + begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; - let current_snapshot = self.fetch_current_snapshot()?; + let mut snapshot_stmt = self + .conn + .prepare("FROM ducklake_current_snapshot('metalake');") + .context(QueryExecutionSnafu)?; + + let current_snapshot = snapshot_stmt + .query_row([], |row| Ok(SnapshotId(row.get(0)?))) + .context(QueryExecutionSnafu)? + .0; // Parameters: column_id, table_id, (stats_type: &str, payload: &str) // 1. Get the current snapshot id @@ -370,22 +414,42 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { // } // Commit - let query = "INSERT OR REPLACE INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) - VALUES (?, ?, ?, ?, ?, ?)"; - - let mut stmt = self.conn.prepare(&query).context(QueryExecutionSnafu)?; + // Upsert matching past snapshot and insert new snapshot + let mut stmt = self + .conn + .prepare( + r#" + MERGE INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats adv_stats + USING (SELECT ? AS snapshot_update) as end_snapshot_update + ON (adv_stats.end_snapshot IS ? AND adv_stats.stats_type = ?) + WHEN MATCHED THEN UPDATE SET end_snapshot = end_snapshot_update.snapshot_update; + INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?); + "#, + ) + .context(QueryExecutionSnafu)?; stmt.execute(params![ + current_snapshot, + Null, + stats_type, column_id, - begin_snapshot, - end_snapshot, + current_snapshot + 1, + Null, table_id, stats_type, payload, ]) .context(QueryExecutionSnafu)?; + // Commit transaction + let mut begin_txn_stmt = self + .conn + .prepare("COMMIT TRANSACTION;") + .context(QueryExecutionSnafu)?; + begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + Ok(()) } } @@ -422,8 +486,7 @@ mod tests { DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); // Insert table statistics - let result = - provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); match &result { Ok(_) => println!("Table stats insertion successful"), Err(e) => println!("Table stats insertion failed: {}", e), @@ -456,8 +519,7 @@ mod tests { DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); // Insert table statistics - let result = - provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); match &result { Ok(_) => println!("Table stats insertion successful"), Err(e) => println!("Table stats insertion failed: {}", e), From ff7ce20243e4ed0308133e1df80060a428be46c7 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Fri, 24 Oct 2025 17:37:18 -0400 Subject: [PATCH 16/40] add new snapshot logic --- optd/statistics/src/statistics.rs | 115 +++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 26 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 9a6f007..8523344 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -1,3 +1,5 @@ +use std::thread::current; + use duckdb::{Connection, Error as DuckDBError, params, types::Null}; use serde::{Deserialize, Serialize}; @@ -160,6 +162,12 @@ struct StatisticsEntry { extra_stats: Option, } +#[derive(Debug, Serialize, Deserialize)] +struct StatisticsUpdate { + stats_type: String, + payload: String, +} + pub trait StatisticsProvider { fn fetch_current_snapshot(&self) -> Result; @@ -384,21 +392,30 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { .context(QueryExecutionSnafu)?; begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + struct SnapshotInfo { + snapshot_id: i64, + schema_version: i64, + next_catalog_id: i64, + next_file_id: i64, + } + let mut snapshot_stmt = self .conn .prepare("FROM ducklake_current_snapshot('metalake');") .context(QueryExecutionSnafu)?; let current_snapshot = snapshot_stmt - .query_row([], |row| Ok(SnapshotId(row.get(0)?))) - .context(QueryExecutionSnafu)? - .0; + .query_row([], |row| { + Ok(SnapshotInfo { + snapshot_id: row.get("snapshot_id")?, + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) + }) + .context(QueryExecutionSnafu)?; - // Parameters: column_id, table_id, (stats_type: &str, payload: &str) - // 1. Get the current snapshot id - // 2. insert with begin_snapshot = current snapshot + 1; - // 3. do an update query, UPDATE end_snapshot = current_snapshot WHERE column_id= ? and table_id= ? and stats_type = ?; - // 4. increment next snapshot id + let current_snapshot_id = current_snapshot.snapshot_id; // 3. check how duckdb do snapshot id increments (might need update `ducklake_snapshot` table). @@ -414,15 +431,17 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { // } // Commit - // Upsert matching past snapshot and insert new snapshot - let mut stmt = self + // Update matching past snapshot and insert new snapshot + let mut update_stmt = self .conn .prepare( r#" - MERGE INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats adv_stats - USING (SELECT ? AS snapshot_update) as end_snapshot_update - ON (adv_stats.end_snapshot IS ? AND adv_stats.stats_type = ?) - WHEN MATCHED THEN UPDATE SET end_snapshot = end_snapshot_update.snapshot_update; + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats adv_stats + SET end_snapshot_update = ? + WHERE (adv_stats.end_snapshot IS ? + AND adv_stats.stats_type = ? + AND adv_stats.column_id = ? + AND adv_stats.table_id = ?) INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) VALUES (?, ?, ?, ?, ?, ?); @@ -430,18 +449,62 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ) .context(QueryExecutionSnafu)?; - stmt.execute(params![ - current_snapshot, - Null, - stats_type, - column_id, - current_snapshot + 1, - Null, - table_id, - stats_type, - payload, - ]) - .context(QueryExecutionSnafu)?; + update_stmt + .execute(params![ + current_snapshot_id, + Null, + stats_type, + column_id, + table_id, + column_id, + current_snapshot_id + 1, + Null, + table_id, + stats_type, + payload, + ]) + .context(QueryExecutionSnafu)?; + + let mut new_snap_stmt = self + .conn + .prepare( + r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot + (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) + VALUES (?, NOW(), ?, ?, ?); + "#, + ) + .context(QueryExecutionSnafu)?; + + new_snap_stmt + .execute(params![ + current_snapshot_id + 1, + current_snapshot.schema_version, + current_snapshot.next_catalog_id, + current_snapshot.next_file_id, + ]) + .context(QueryExecutionSnafu)?; + + // let mut new_snap_change_stmt = self + // .conn + // .prepare( + // r#" + // INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes + // (snapshot_id, changes_made, author, commit_message, commit_extra_info) + // VALUES (?, ?, ?, ?, ?); + // "#, + // ) + // .context(QueryExecutionSnafu)?; + + // new_snap_change_stmt + // .execute(params![ + // current_snapshot_id + 1, + // format!("updated_stats"), + // Null, + // Null, + // Null, + // ]) + // .context(QueryExecutionSnafu)?; // Commit transaction let mut begin_txn_stmt = self From 1952737766496c39358fc6f43c689835ed444fba Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 28 Oct 2025 13:08:39 -0400 Subject: [PATCH 17/40] more rubust tests for stats --- .gitignore | 4 +- optd/statistics/metadata.ducklake | Bin 3944448 -> 0 bytes optd/statistics/src/statistics.rs | 461 +++++++++++++++++++++++------- optd/statistics/test_stats.db | Bin 12288 -> 0 bytes 4 files changed, 362 insertions(+), 103 deletions(-) delete mode 100644 optd/statistics/metadata.ducklake delete mode 100644 optd/statistics/test_stats.db diff --git a/.gitignore b/.gitignore index 52bfa81..10db29f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ - # rust /target @@ -11,3 +10,6 @@ data/ # datafusion .history +.DS_Store +optd/.DS_Store +optd/statistics/.DS_Store diff --git a/optd/statistics/metadata.ducklake b/optd/statistics/metadata.ducklake deleted file mode 100644 index 1266bc56f460edc0e80c5429b70ab10f96d80971..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3944448 zcmeF)S&Up+dKl;vWRgsh$)$QL)?VsTOTDQURc|G!wb*T-8bb|)1_lCOCJ@ZHNv10^ zlFE!^7kdmaEeTTF#u!}|8kV6aL6VKO$K!zkJqWO2YhaB@R?WbGfqAh!9s}cU!O%Qd z9tbvUb#ux$w2oNAZfB*pk1PBlyK;YyA{`-~I|MkEB zxtD+RY+sUB^WSsVy2I!4Z}a&_tvlQo0t5&UAV7cs0RjXF5FkKc+yypYn*U$^{jawE z{h1=8#pkCk<-b3Dxmy77f0x1+0t5&UAV7cs0RjXF5FjvN0uLU&{{Q~QfBM>2ifn(h z8D3fpXPTkb3}=?Y+-hUFwH!hxz82GA>AxhrP_H*P7vEWFypz;dlXj!oXg9Ji^>k2g zwnyPwce=&3mCe=GklF)mIz$?k%_Oq!o6y87>t+t7M0p&sB((dH%(%{BeXA_+`UHo?f}*GbZC8fqrRT3G}_Devu$R_MsYb?=F-~wYGZJF zTUbvPV{&ne*=!B&KCWf6ZY+Nzso#FkPB!|ssJM4pT8n#~^?EwVz{rL5wXa9K`bN94 zKDg|KxUIJ1&S0%o1$;GWEv`SfmyNDkL!Qd!jysYphs&*{wE>yoTVbd3B%Fym=d+JO z?QsZCLTyKFcRJ~g`%%6lPZwKmeOHt4_qto(-E{Z<{Lg*ut3UfQU%S=Y!}nI3*Ox-v zq~i3=@Ot|8$@J~lYw7zp&t>n|(^dL_yP%Ks*4$(zX(t&(y6!H-qe8Pcs0w%ZW`4M} zcZcG@>TuBY_x!L`wuHC@3z+)xk@^6+c{c)oqQq*|K#E!>*R%< zb<#UBWX0Ok3OKx4KHOVeJd)IxTg~L%ft}N}-b7WDWuc4Hu&ucy{O!XQ`uT35ms=~# zt+>@?$ByirJX;mxndIEC;=ak=DIAjGr)h$aI z;S`m&_tU#(IJdF5RuMDIXM+ciW942rKT_w1?09s|_ZVeGT};BCKARS9?%-$pAMF+{ z)!u%tACK?D&oV{^ZkIRKHrE%ExbanlP}kxK)19Op59!HDbFkpeFfz`GJFs{<*j=Cd zw(WkPg(UpiONXreXSzV$REzy;x)z*^8R`wM2x5n?_P3BnaZ`PD-2oNA}R04T$vB!oQXEoF~DtF5vK!5-N0t5&g zLg3;d<~1vUu@P{gV{EEIMhFlfK!5-N0t5(5ra*O2F%3OTU7E}ak!u132oNAZfB*pk z6E3i?o2Y6@m|KdE6^{?&xD#r3`yMd9xSllHNwXdwFn&K--&kI2bssQ(sZ1sdUb1C8 zOqX$H$1?JX()ZqEG0C5Sn6EZ&);=DK{;3p}3X9k=W8{oB7>Bv{?u-Y+5_k6hDqx;K)O^W6rO|&w{|hEEG~1w+VQH!!`_G zCtkpD^>nLnTZj=aaMZF(Dv!cmdJ>BdX>=9ibSqEl1PBlyK!Csu30#?>vMTC?3si>~ z*)3LkxE*U2E@n)417ixx( zUA35NhOOdtF$w?Z>9qOCwgG=9wsqMt;M>pi4fy3})21)6`AIXJ@4rK9AF<6+*iS2v zt7X}I5+Fc;009C72oNAJodS7ub7C>ZbRsGhfyouPHn~}sa{>ei5FkKcTm`CQjJ-4i z+1~|>aV@`m5gvW5`kjFJ;eHTMzS*F^18uV_)7TCP09|!2~{h zu-VK-fB*pk1WrL9j}=_UI0br15+Fc;009CMEAWwt&AE&dI0*qU#z`nkg8%^n1PBly zK!CvX3slDzC(-Hc$EF`w2?-D&K!5-N0t5)ml0dg1Mb^)4S@IpvBrgXx!Yg zB(3JKvFTncouONUPr{jqa`sWEJr3bXsP&50>7-hZQoEh7&m`dwV`n0<)m>JlajIv2 z?rUHD*`N8^tsZNk@Osj%j@U}?d^6mL&GkaoUT-eVMXh!>)c)wwavYeh7_X;qpG@C= zJtoi$-#nMSUr%#zz#6>KSA+HY$@)fITQ}0GJQG*UUDmj(MM;X;`qs!aTie&U7n1O= zVzuNn2h8zPy&M;tL7LTbY5f068`PA5-Y9;&G6jddQ=v9#F`XWl9O1Io*Af@`r_S2 z>rRrbO1st1LNRT9wb2~hde1jD+jrO22e#ji|8JzE_VtlN;^+vV> z?pwqGtLBv?{F}JdZQI0O8D2GI$wpMsg_EhGtKBNfjxgzVGe7EZb16R%9mBr5w;S!- zV>CmN=#lQlckYFLI0@gqlGffKj(jieq^vKlt!%Eg>W$|8`bN9a&er+3KfhQEKjLHg z$V$u7ueTrE8{8f)+-p2oS&LiP&ckE66{K-Xewa&l+S#l2B%JLPpzla($%u2C0P8}AMobvfVbW?3It;dZVTZzSP=``973$dB~N zb;;uS_HeR=^<;5vy&1RP&DP-g^yTcRzP|SLszd&@>}bES{Eei3`$0R|=riwM^*^74 z-~I3*%eb(ujCYFDo{MX@|6;P(uF7HGCfk?6SnDIgg}66(Cwb6UZ}B{X(x6 zXNRrME8U{rUrxTBmi1zcEz<8*CB2x9IN1YLo@V87>Oyn5u@Wm~fbZ9K7IE>b@#Nq`=@$QX@hF}Y$3NHSqvCt~mY%r9 zLB+YumQLWtvo__5009C7CPv`K#N<>)2@oK#Z-G3Jh<_jzW9-|`3>jAHFlkGZ*i3gTYb9F_%%+G< zZkyp@+O8)y>lwV}(Yu|hdy7?iw|cz2YS@f<2GaMDlNI-E{qkS?cTfKF@BG%IFaP!b z`rCi-Cr`ffH-9I#n<{SLsvMUu{z&t1+-2;F^b+d7H!W`oG^iSf#Fo(UHn;9FwyDP3 z_G4?TzFM14o8PB3{5W*KJ_+IdQ2QWcEzyQ0)xD!GO{!Q$p3Rf(Rpi+u#8qfWs;~7@ zO~00@#-_LF>qx7tx01wf`D-7`zfgI3uU9*TcDWIadUovR3Mn512oNAZfB*pk1PDyO zKpuNk#uJs{>u#a#USf{vSDmgBo|(AfJU4MUmSF+}2oNAZfWRyWjEFs4%b0}~Kp6=T zAaG;?&mY;6EQkOB0t6;cU|U>~-;uW8Zq~~C#qxU)-@Eblt(!L|k3o(J5FkK+009D1 zEbzheS72MD(QSV6sMl^~SQ8dENsVun${7Iy1PBlyFkIm5@KMGPAaF>55uwGX zcIvYgTD*8jWU~_>K!5-N0t5)mvOsOd3SS*!`Y~I{DEceCDhZs*Nkn`vr*iaW}}!&Nq_(W0t5&UIJm%wP)}TOa8`2?AV7e? z$q2lBGRa7V009C7b`!|sit1PBlyK!5;&sSp?u zPsGj`ipv#6w+wrUC#C{LA!bS7mE$O^>lMeLYMBHG5FkK+z&Hzxh$~#D80U(UHv$9* zOuxVnPJa=VkN^P!(<6|_73GzPvRotk065p{1K_13{Kw1N`v8ao{^dg7fZxB;H{dsS z^!^-Y`ODW%yxV~DX^OSGq3rWI+zIj7pNs2Bqn$MC@!6l{*C!#oA8H?jkCsyVZgYP= z37@P?Eni}KRcOA1FH?&#UHVF5{sy}Sh(p3sh&j$DO5b~v#nirZqN%-o{Hf_(pjsU7#EF4GRFvKv~xUdqFnMNeH+#?j)3@L4W`O z0t5)80zZUdq` zvLYu02oNAZfB*pkMh$_q+bUlV zyG2+EapriN*LyeKzIF3vSr(rJ2oNAZfB=Ds6!_Fc=31r+jF&(jE9@F$6!AvyX)$H6 zvGuUqE12fz<5jhCLx2DQ0t5&UAVA=>1ojwa6py0p1sB^cW#n1^v`ZSNRfp0gK!5-N z0t5&UATUk>M-XGUU22>vR9*;_0-q_r_#{Ap009C72%M~d7~^EUrAB}N0Rl5B@YxwH zw_+0@Fp~n+F~)p8c;q8GwD+FL6-J>65FkK+009EiAn+s8P>3-sgWzHeD@B$F5FkK+ z009C72uzYdbx@IYJ=^Oss^}*w>q$2oRhDo^-@4_%eCDOl3=cb?uf}A6-!Akwv;Iy!}9;1Ugy2(*JDVbr+*ls%V`bN z`f6al)x4io*5lCq`Xq$+L+yib_8>*cYyQ==iksnb60TRK`agWUxxCR{ZpBo~uVtz? z_MPha%6`uV(8|uBb#PX0ojwWd6sYa|=@$V41PBlyKw#G4&O#fX5>6+K{D+ zD~7-rOn?9Z0t5(5t-y%5qT=$AD-~0_A}AOE0tBW^;PojhkKzy@Fb)F#ayYscMno3w zfErC_EP(+5`z#DzP{hSto>bRoYMr2rXk#V?m;S_FKRzl?Bq3tJEKk+Dpk4^vjCPXYu^P~goIOiNM(2oNAZfWZC)@>oHPu|G9a z6Cgl<009Eq1wOa^hwlUk%)G#e7^Bzny!$|u%2;F%)6M6}q|b1fFUiYCg=7nNL8N5y zNq_(W0t5&UAVA=d0-rzR{9`kFb+9717@G=`5ds7V5FkK+009EyE+D8F_ZpJFg9&`$ zV6&Nv009C72oNAZ;NSvza51t8d%5stn5(?Ks=RedbMjKkCjkNk2viGvv3jtf1PBly zFq;C^7do1umUP>9h%sh!)lg~z1PBlyKwwe@zBH*Bm+xs25MxYBO)3Wg0t5&UAV7cs zfzuTbTMY4kYshee2@oJafB*pk1PBlqDUin)du*?9PJ4}!m_`yHaC`zkc6^JrWC8>T z5FkL{bOc=QI30JX5+Fc;0D-X-`0=sKl}r&JFku2>j0vkS*(E@L009C72oRVZfuESn zqE^Qkp4vB=wJFyG2oNAZfB*pk1WrLfP;mrXXF2LSkhWte{ipzG@FffqkgZ^zPq@dG~)02 z?FaXh^^N7VR-7t?H{$=V#{XyI|BuA~pN;?1VM~pbjbt@xH`4iEZEh~Uv(k7cd39-d zCE0lGEAi)7{@lyA-+b%!#updg{Nfj0fA!5cPx-o0e(_0w009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5)0s6cZ6E8qX~PyXzGJ(H(eYlh45 ze=~$6)RscH6CTAMwIp=PSN!%kAGB4z9_Hh+IpWN5>i2HEee34U6D^3u2oNAZfB*pk z1PBlqAAvkZ5L=8-jmQasP{jSk2M_`T2oNAZfWU+ah%qLt!eo~K0RjY0TA&E{&z^L; z(j!2Cz{CoOF($U^WSjs20t5&UATWypMch9(i)EjfGK(=LrmAF=009C72oNAZfWTM_ zRL2(c`QWr|_k5ZCCn3BaY9EBG?YG9=CCg-f7IFW%$;`c66Cgl<009C72#lwI;9@-M zMXm@Cm_>mi?ms_^Wmjeb1PBnA3IV~zR8*rv5FkK+009EWB~Zlu7mjQBmP=ro1Oyk; zRGZ2|fB*pk1PBlyK;RSvs)LH`33;{dWAwU@);p4CV-#`!yyx$oLVZh;009C72oNAZ zfWWv2#+`*9Wv)7%IOI{-x}J_M2R6gmBZ)=CAf>2J0>>**#QhhKHx)}KK!5-N z0t5&gP+-J$k$hAdjC3!MY?ZHv`AA(+9gy8@1PBlyFg*fA+<$3$N~9zN2oN}=Kprdf zU&qkT?IBssPJjRb0t5&UAaFVYMcjY+bW)%7RJ#`<_Hr}T`Tigzn<-yJe0QN^*4HNG zCqRGz0RjXF5FoI-K=lOWDd6KNA5K!Ct>2o!PuQ`1ozr653nz}^MK7<;!e zB>@5i2oNAZfWYJm6mkEDCbtBWlXEe~Ga$*%FAp!&l5FkKcQUr>)|D%(VU-=|(@&Y4*kTUY<#2BQA zMz+e=!+d1yEq5uz$fTBp_ins>>*mdqFMt#Y5FkK+009C72oM+xfzydCin#w{V?iJj z1PBlyK!5-N0t5&gp}^_H7)MwDiy}ba2n33_|LG%GltmCAK!5-N0y8fl#+dn400033 z1PF|sKoR%f7`u$g5CH-N_9P(2*b|e92oNAZfB*pk1ZGyCi2I+JS%7^PygJ6vk<`8+ z%|L(v0RjXF5FkKcRs{qVvs&|~RPG|~fA*A;mn;DS1PBlyK!Ct83RDLdS%0(nsQ9jT z+GDH$izYyTz!(S=asP8;kQG@VK!5;&DG-5FkK+009C72<%0mi2GmO z%d}H5O?7aQbtF~mbRLJiAF6z+PN%;yxFBRIRsw|}K!5-N0t5&UAaD`_BZ7}I;_yU` zlcbBJThAF)Z352@oJaVCn_L z7*k)x3P^we0RjXFOuRr5_rEgn1yO(*5vYzaG{BgV>Q_tx1PBlyK!5-N0*4pKgNh`? zcHOn4ySPp#4tW%|uBW5Rq0KPY-}*b7DPKf7VYj*XR{46EkBm!ODw25b#@n}U-Yl1S z@}EWA|AQx=vJ?prAV7cs0RjXF5ZES=#|r&1h6^0qh)I4 zF=l-|P<{dg2oNAZfB*pkhZPW899Gc$`xPkS{!i|AKGPE*K!5-N0t5(5hJYAjGU`b# z2@oJ~%mTGzUcSW>AV7e?3<-!aW~j0il>h+(1PBlqdx7~phda)?hP=yVpdCl6$;W?!h+o12U8tTf(9>Z?h+(QLFE*_V1cs5jfAaIHJt;@ZmQ zYU^=mhRe-xG5=LxZYFWS)lTP52zS$D;*Z#ad)jfFn%FvHHVL#Z#QnQ zBvs2@D3V-TZzk&lOTAo-Z8cVtbmTMJvs8|3Ke#tI@>&FHFE>`|&19*uxsqjg$J|$n z5%v3xmCeEVue3H-RyOL5m6f%xCmF}3?P*`R*H}+l?RuWSzI>$hOX<<&`9@!|Tnk&>gV9T0GpA zc`407y?X1LyVq#n9iY0N4y`Y5)Yp@hMtk{ww(acLI4)<)Tv}USZ47RA3+u^ZOfGIY zo2|j!$hB%_VtKY-)J}12baANx7l{w zDXg`sfUhR4#q|gGve8v*$Wz(eafg!SaJjX#HXt*6E9`Whgfns9efCkPJr3bXsO_lj zPA86wPo&9q=;>n1J;#@l@b|h~-`#Xi|NPH=?W;fgGhe&a+tc?}o7a~@+@#|4&G35q z_Q~|^*K6tfH_v77*%NOLxC{D7Z_Q0sl6I0or0ecNJUTRcgQ{?cZ{~+vdv_=ftPTfV zf6otFb&FCS$ErJe^&4D&`oss;pl=q(p1CCa(v@!+S zsBbLZO`4l4NwXfw%Ht#xudH17qfoRXrL)OU)JL{x(WXOuO zrxkE`wS2g@xOgO~FSnY>y8}C?YrTo8DEkZju_XNM!xsAaZlRZ3E6c68)n&(y?A$zC z72}!Y@KRmM&*3Xi#j}lSwNbyHtZ&2(xUaDNWqBqE-&;6j1wPd+OBv}DmA3cOyJk4I zvA9+dJIrT;2ajXrUN}Ee=ZEZgbj?rp%RR<(N%+%e)56Ui{A~ZD-NL2X+t2mm@qPGN z#>fEf^2XZc`eG6{zKT%lT0CL8leFU@Jy~fE7Q7io#yfEb7EcGe>vP|>-4FC)5`OKa zL)QK?U7&8N#eOwi3(mz1^@djjv%^>NJ#D%pcDonjPOP<&okafKq1IS15M|_lFikiLRx3tJ{P;tQ5S?}2YQAY*|5FkK+009C72%NM) z9zBRHPTF011V#uHasOvWOkxxP0t5&UAV6Sz1;iNRTQ_n>fB*pkVBT009C72oNAZfWQO^RL2&v zyRiKAI21j=rJV+a_e1T2vXh|NQr7WX65hM<_N|*YC#cA>8bT5Gzs7<90RjXF5FkK+ zz|jc^E{@L9k_ZqWa5#Y??tlGoOEEVA0t5&UI5h#m#i`Ltm;eC+1PBnA1c4&%e`6Bz zDvtzCTR?De+BG130t5&UAV7cs0Rj^)P#sic56_E_aV#FA*L}3!kvtoti2Hw36O9R9 zcVwRc0RjXF5FkK+z+nXPSR_8xI}1O`C8dv&k58B1x}J_M2R6gmBZ)=CAf>2J0>>** z#Qkp`Zz`5ffB*pk1PBl~pumXhBKfE^80lUh*(zTT^N}%P(G}GJ<;_Nb009D1B2dKr zpPQ26CrVKX|7#mBS3%v0RjZ}FHpq&U)uki z<{&^I7pRUgGzPbR5FkK+009C72oNB!M}cDvDvG%Otv%+&WCREhAV7cs0RjXFOt3&6 zTF;n#5OVT0CPe}S2oNAZV9W)Ixc{fd zJe#sd;1mT$1R-VQ(TOog5shq>uZQ`_*jw&Wh>=Mx3Gdx_`_|2yr&tn66Cgl<009C7 z2oNA}ngXX2Tg;zkk)=(5009C72oNAZfWR>doKB2!j0Li20t5&U*rULoymkJ+{k^~P zKmU(&ar*1=fBL!FSZ1X=;wfN~pfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF%!a_t@6~_)t%sZ6{eH)b009C74lhs(TRRUQ z^Tp2#I|utkfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV6Te z1#W(?{_}4=-2CqMJ6;3`5Fl`Pfm+zwx!a#FeqPu)*e?PE2oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0^=?4EARY`-`n`q!yo@>%;pco|LN!AdeUeo z&HBdT-DI`#V;^5_EVn+str!d2zNcSz``tIWp9BaHAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfWVjw-27hs=ihp``Q7h#ya*5=K;ZBKwXn5w zzdv96ys&eyUjzscAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z##`XG{_j8j?N|QnFSO?KY}Up*w{k~-009C72uy}RCwtYJp&3FFYD*#936J8BS`zY6 z@jZTfoDbS6Uk~$<*&K1^IQ4rs-oAD7=42FFE(s7IK!Ctl3lwqxnX%5K%n=|!V5mSI zD~K_M0vSPo009C72oNAJ;{rw8e|E+h_EG%m7-K#koL;{uNJDrM!uz52LCCIexPY+_ zW0MjfK!5-N0t5&Um}&t*#Z*`C_!X`QQs>4mXL3Y<009C72oNA}Sb^%`Ld%Q8x|*K= z0RodGP{jS`CMnPIN`L?X0^=)?#~AS@r!1@}^V#XdA&lPOuo)J#&?qr{5+Fc; z009C7j!U43`_CWO@-3IZxCpr3F)lSB9|Q;xAV7cs0RjXjTVT>+iz4p7Fxl#IPk;ac z0t5&UAV6Sx1tu-VnBJODG6It+P{jSk$7Gg4t_ctzK!5;&qZJTi9IdCN5gzv%3~L4f0t5&UAV7csfx`+EasQ>mE@xuquMRG<-luBmQ~2_J zsPd^go&FA|1R)bs(K1Sa009C72oNAZ;Isus1RrI@(TPv}E~1dF^7Swu=~3XPU4PS) zei8Ryo}LmZ2>}8G2oNAZU{(dh7_(aQ%1wX(fl&fQ+`lkt8si8MAV7dXwSX9-+Qm=; z1PBlyK!Cvb2o!PumGQ}uoDdi#P#t4vfHBI>4cNeP@{3qNq_(W0t5&UAV7e?sR-nOM1PFo z0>`OThC~TWlRy#oUz?^fDGLDt1PBlyFj}BGxY%nqx7l>xFRyfLyU_7ZAOG?1roB=< z%*W}GYIH3aOMn0Y0t8M*posggpG;~}AwYn@Xn}65u$P;zF7;m!nZTQ_MspZTfB*pk z1PBlyK;X~*;F~*cvv*HmTK!5-N0t5&UICcTC#jz7hpiiKP`#;n- z*iQlk2oNAZfB=Ch5fEccNku9K0RjXLC{V=xA3oshW+Om=009C60%D8-5(5YjAV7cs z0RpEcP{jQoIkgN(n7|PTh%t@;(IN;CAV7cs0RjXF5QyE?rmy}s-jn@cjUw*l?yqFaZJt2oNAZ zfB=C(fghm5!&l8=h-vAxIRkcAyv#x37v3PG=t}4)jyJYi(s6 zAkSqLp7(^T3$e1|$L4eHm~(RG$d+GhG%p<40*`e;$L)HonYVBP1PBlyut$N~9;Y)I zfhvJKTIp|LGrwDiHVYw!O{;2!G3>YmjxJ<7t{htK3<(r*|BEwJWJOH{&SM}jMFNhi zr(2oZLX3DxB|g_Azvfh4CmD8GW)?z>)2$e(6Cgl<009C7a)Bc5e~AZyNfD?HG4|2i zrWxEXH7S)Xp9BaHAV7cs0RjY$T_BGzvTG7)yO83QHsq*13R~CH(dGR@S<{->bY-p? zzF9oh{kbIkqo>p6Bijc2o!Hi8$AE7?(>LIkpG_N?#AYPTaK8Tzt$oBcdwh_55%<4b zmdz&t0t5&UAV7cs0Rq!0uzuo`Eji;`657o0DT)>!_6+l4=5FkK+009C7h6xmL z|0~1B8$Lupj4=epU;+dP5FkK+009C7_9ZYPwulckEFL4+eGuWuX6R+Q&2aAc8;zHV z`5b$JBJTgevCpUs5+Fc;009C72#lw|h?pfG72k(H13kttd+eBMJWDTE1PBlqdx0YE z|HRm5R0atUAV7dXDImruC43SfK!5-N0t9vyDB}Jf+I2z`5SV2FF~%&{zOoY_K!5-N z0t5&UI68sq*y30_y)EMYPaa(%EQtUC0t5&UAV7csfzubrgNr>j2A|Uy{Pb%~4hS5V zz@>b_&n~{R(s(DSuO{tAv(av3U+U?gr7*YJSZ*zc@F+ap+$^TM*H~{ax0lyi_2t&m zTH4dZTr(_e8`@0bu&bTUoe=JZ*q*By=G%?iD@lEMaLm=)$(`j^eWTU5w{dr^Juv4& z(rOMHo9@lh8NRudzkVnQ-@ei5EQJtL?sOJI+<~S4lJG*k-p&5#VqWV`x3PFPS#6}n zyU+|5^IugZxLl0d+{}yjh3y4DO!1nbTy(3knxqANX8WS&?lszX2Zmlxht`)j>g&l$ zqrH4To9|L@zE0;!I1^FMJ_@zRAv_7SUeP+8RO?Y{w-fd=N%+IqsYq;nmsM$;>Y1PW z+E;(}XTEl;$66@7o`j+{wqk3&^UZK09&JL_Zf`ElMXh!>)c)wwavYeh7_X;qpG@C= zJth#V@?7?QJl<-x-AJqQOk6Q{S>vu2B`Id>TO-qKZC~eJOTxd3 z)soX3Fvm~za$IZ%X;#mr^x*%FUs=0T zwfu!_f~DoE?RIc2+Md;+>(LuY__xYtqG&S(Ba_sF(WU-A3z9 zlC4U+)z3mPZGE-T9Nc=(H#XaM*VYHN->ZvjtEtHfd?9%r%U%E8 z7I47B(sR`-M0aI~r$xih>w3xqo5PjGwN^WxR&LZ=n=32n=xygG@l3Key3raKdvP_M zE#7aeY!0pp7grna4jFYh-(6-|A6QfJ`uwZKY0KFp{H>?T`aGcANBZQtWbs6KIN8E_ zvbeV1j2qr&Yw$$)a&|agU;BF1vHDtenBG|aMpD21pq*^=nV0KqS;h-V_+~smKh*AG zVOtsR6z3}!*KYsCWU*b9!@g~;FN3kxN1W%!&G?<lJGAMTb)XV`F&lBRN0>a#$^*=W=5k{tR>}b1*^OVwHohG}MLhk;_xdZ# zjg5Rg+BUu)_i7UU^w5W$wR`PmJ$W}-+{|Lijm_KlRvNAPQj#=pHx}P1R;~Fv>ua0$ zs=N=kW^LQ)4+!o5*wW2qU#7ODQXWWR_|tc2Ior?s(Ios{mp40I?jBlpK^uK&DX#Z_ z>bU-{V7UYc5FkK+0D+wX=~+&8?$Zn*>(VX$&Eio!r;dN7&qu}g_$@u}i-XEPCT~5= zN9qJ_r>0*72oNAZU^jsx?*H^|(-}{I0D;LB$YX{0_f;{*)3K&Hbrb| z+YArWCUiL#VWnoJ>Fh5Y{onT>HEmZiu<;H`LF%EC;$0(e(TYf|N4LZ z?LYXFC*S#-zZ2U_6*q8Ij>{K+qg9te(H8&wyl4v zyc4MsU`Lm1C0+T@nG4PE&7zO6PbcADKbLl=kaH`O3b!4?9!Y9DiV$dkOV)?^Uj@KBqyTi2L7|hO#IF z0RjXF5FkL{I0Z(8dg6-Xgtu%01PBl~lt2;p|LCC>VrBva2oN|jfjq7#uSArK9eHW! zOz%q2Yf1Qx8{04A!~y>$4j7y3IKTJ@fB#p${SUqsA5>HHd)ymPmVD&prhKCGedJ`t zvxEjBv3*@N8hrT9wkAo#FY1lV*h%L9oU2~t_T!TP0RjXF%(g%g_rE#Y3QA9a!2SjD z@TD@Y=mm0>kzE}8G2oNA}ngT`KKM~DW zMno9{S9JCgS4>2qPjlsvHURlEW$aq>oh0D&nMDB}JvOmPtvjQ{}xQzwwe z73GzPvUnr=065p{1K^D${Kw1N`v8ao{^dg7fZxB;H{dsS^!^-Y`ODW%yxV~DX^OSG zq3rWI+zIj7pNs2Bqn$MC@!6l{*C!#oA8H?jkCsyVZgYP=37@P?Eni}KRj4?EhES#! zW4iQ}#QY6*4G@Qfr4VzRPn5p*CX1N7ceDTyX zA_D{n5ZH%69y;~+F3^qoh6R9SpsZ_weOR0HSOwe~cdWb?PJjRb0t5)80!7^aB^o0I zwuKn^HJtn!Q$Be3RUaY7NL3>V5FkK+009C72+W*-&|>DQd<+U)#QkrLK~`je009C7 z2oNAZ;HU+J7)LE?c?1X$m^gtV?*Flg%eV{^AV7e?SPJAJMtQ$enZ>>KJ@EM?JlWCJ zKpgOQcXTxn2Yh=+R|9dtFU76~di@u~0so=b_rQFAbC9f6eR;oDr`ONgR{46^ErLf) zmi6b8009C72oRWLfg&CkcAPGy4t z0RjXF5FkK+z$pvtG0Z3)McE53dY3Yaxc?_Yj8$UG_4jVPee34UQ!bU{2@oJafB*pk z1PBl~ErG+w7-Ealsypcts1_*V{x4ULHIx7W0t5&UAaL9QVvOS!wtNBv2oRWUfgib z5gsC*J2K!5-N0t5(5q`-)nrM%P1!!;(dW@VZH0RjZZS>WdP>OcS1!_Du0zk9Ux zg#ZBp1P(4x3tKyP`o-;Z;lj?rei0x*fB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&U7;l09<8Pe(y%+w|*OxBk*{qFsZsm>u0RjXF>|LOfy=u+S3?T`% zr4a6fNAX833Hhk_9=|=#2W^$Fhxy2CjyQ9i`n?-(-@18o?+Kfd009C72oNAJh5|+0 ze`XA`CQAe+Mj($B#26D(VKPd9009C72oNAZU`z$7V~hEGaC+ThzRdWO5Z({94?=cb zW8{U4BJMw1&gqi?0RjXF5FkK+0D&10IF8_A2G|slz$6M3asS*T=3Sl%5FkK+0D(ym z$b*Y`AtGx&QRcYQi9;TRt?TLNa$qxDaQ9S+;gbLX0t5&gjX)9ipF5hRSqcFH1SUzK z`a*|R;FDB`@=AaJ0RjXF5FoIdKoR$!-)%bMk3wKXj8W@$FxBmJs`7fr9;Q27%a5Zd zr)3ZzK!5-N0t5&UATS{U)j^7&VnV8KZ`my3{tJ7X+EfGx5FkK+009C74kjSDIGCHc z2oNA}Sb-w$@Ae3I*yWj@009C72%Ne=9$f6PIr&`W?N#|A;=2d_oqBc3009C72oNA} zIDsPWzj(N%{QvFUTaO#peE{&oz1XsB%X00yYsZ(Zo47XMx=K*DO`J9mYz0=K$bqZ} z>xrP2v+_nO?q-*3SSSMRV-OSt{FWBz2Ppbp6v!7?Nd|(v<|WNj^9|Y{L8f!1$R$_e zYL_B8{1FmoIG1zzox_>=?{H?UO@P2q0)tOWt*ezeT^7Zfg{J&0KMhFlfK!5-N0t5&UsJ_4*57&4E zttATr1PBlyK!5-N0#hN7#|Vcxl~Kg~&t`$ft$ckk##A)6LJ}ZAfB*pk1PBm#1OkVT zF~k;+pjl)=;1B{u-2X?1SjD0Q2oNAZfB=CJ3WzaAh%0FV1PBnAL4hLfe{Ke=uF3=m z5GVu&#~5yGtAh|AK!5-N0t5(DL7<5HpRYn)rl1ajiz#R%MI=Cg009C72oNAJ$pV6k zNp79VEL#!x|M6s&PN@hGAV7cs0RjZZCLp*N8@-GP5Fl`z0!7^a!f~c4Edm4x5Fl_k zfjqbv@=<(?hq-^PdrKXzB`i&V009C72vkp?i2MJfdNorc0>>dR_(VtFqxkwBz}Jak zRznh?)S~dk(uY?rTsV#*NrM0Z0t5&UAV7csfhiOiUu;pt{Yz7brpN>c5FkK+009C7 z2plLdIL2__+5^GNAW&I>BJO{&vK3S@0t5&UAV6T$0)mTC3rn8>0RjXF96_Ln`@eF8 zB&2!8Eu^HxF1zutp5+Fc;009C72+XcPehi(y zH^Yy!L*_VM$u3CF4ys?N*PA;l*Vh}@qxy}g-Do!2jclZzPFf8MHyUfLwGj5g$&Jl+ zvwm%BV`nqH)Oo%cPGleIYt1P6d@7Ewh45(zX-226M%UI__3c(;bNkbccHjJ`qE_>u zxp90aJC>i{3$@!xYQ?K|C(Ly++?ReFrx#f7rFrKgwREL4*Pmp!d)nQ7F$%wWB92!> z$O@S(_5AlQo&Vn1OIPams#QO;8XjwgrL+cT(ziF$x1T17C3K6~`%wT-cjdI*ZnU>k z#>G)FHp9s*lY0B+W|UIA*xf*nZ#HhOZ#1%&>za|;FC;kq_VWkXcjZ#*cQ-@c9`pO# znr}8=5?X9)ewLa6Qw>CD~`4^;r zcy|&S+x7TiD{j+BS@?^Q`=ufp@vcDh)lD-~=|NKf1=_@_`ZeNu~ zYx{C$yS;ODbG?ylELR%qjjj4>6g96lR<0KthbM~312(dqZ)6{C*LSk7mmcuVAEG%^ z`1DqL{Tqjcd!_6Ys;dn5ZJ&Gtk!)l0k-2hYVgDxMYf<>at36HkMo-fn;CPsgdeW(r zQY_rt>-`|yt(`o%yzlk?rE_O5pIbhE@q=@pET8}G^2LuXFQ5D5{KuC+US4Z{UT-(9 zu1EEJ+f{bszBJM#Kf1Vl-@WfU>Av@!&h9x+cV%OJ=SHjEXnvMw@n-L?_U+~FUF%!# z^<=ykmJTZ5?eN+IGP)bWm)Rz4Axc#C!rX&a7_^ffcHN!>ggE|rlJVRx!u;7wm(y-> z@6Ud-dt9gH1PBlyKwywSalZfMLGw%-ULZf-NDd2|AtdJ=vtVp5`HN z`~U9VBZ{eP~- z2oNAZfB*pk1PBnQjDYhTm1#;9A#ikoBJO|l=qX8o009C72oRWIfx$7xiF|S#-wr2A zoI4@h4Ye=xQzXd^tyYT%1Aeje;gt&)%B2hm5FkK+0D)r?DB}LNjww0G5FkKc zY=EfcqZBCP{<(dbE*78Xi(#$n^JB&MLNYApr|XyFo!{{l%N4&V$UpsG9G7`zd6x+n z%Nh_MK!5-N0t5&UAV7cs0RqP-P{ibKA76@6B0zuu0RjX@Bry1wXs)3j5w~Os5FkK+ zKs5x4xc@JzQ5&@&K;XCp1Q*98DIEd?2oNAZfB*pk;}t04{&&V!3c#ifXi@T8v7di2MI?ROw5X009C72oNApA%Q%&80yYk&9GQ&hGq!a!wSQ- zu$O$OMIlcjc~8b}=aY8J*R4+a9&cEoUZW}m2oM;vKoR$!8FMjYPk;ac0!I_bV+AqB z(G;ysfB*pk1PBnQpgD=gh%rX%D|G?{2oNAZfB*pkvm!7!ws7CtV{88+ z?*GQIB`6^R1PBlyK!5-N0uw4AxR}s}Q7i%kMk-Lm{eLymOe9W#009C7W=$XuE{41c zeBKorv(`Vor|Sh&$xYpsRkHa91;?5}NY>};ktxS4N;6WNFQS~E&MpNivaA$%H=``;uV zu143^k`lHWo7EQrFrKg zNuA?3&Gje6J$Swmg@1Y`j#oo+$DX8+$x_dM|I+#IoxN13<~12D;OTpS_3d_}y`3^H zj*77vPG*_Z+c!6(l;Xwi26}w6adUm6k+odcjMRQ%p#A(o_FcJ@`rXZtx5xbc_INi6 zze)1#YLAx&v`1cn(Y8w7t$(-^ZQU%pb@pLUw|*+0v)$fWYhCLzeR^wSqg_AXJCVxH zb!5YxP&-j()5X@?X}=kTe}16{skhVS35=HKxvbL`MQtSw(prg9r-f#Cws-0vuudKO zwg6ABY(4ak$_rlUXAe#!oV97y`@C~WtLzS7g}_SI;#v@d70TJx5ZNn4ApM4E^+xlvJc~Db_q1;>ckfx>daozry|8pp0dI%b9+1)95WdW| zU<*;AvKQtaw8Ef$^swvp93RB-&y$Sjei7!+Ub>uii{vJIUIYjbAV8p^0>%0McPm;w z)gnM(zd(L?n?xb*Fuk9~X95HW5FkK+009D36e!~UzaClL#26!^l`sJU1PBlyK!5;& znG(ojiy^O}@+8ff>KiJ1Ux6a-|C{?RWF`Ru1PBlyK!Cu(0?u>mCsC0kgTRp98YF_vGn1U3l|QRw}lB1AV7cs0RjXF5SSc+BJTgY zqn4->D`bH}nOYnt(ZgQYJ)O=jCpN=^M~9afh6D%@AV7cs0RjXFJe)utgNQ92PS0Ai zBv8cte?LogR#yT92oNAZfWX5Dh%p{U%{l}K5Fjvafgezy0OS|9bm3@6RXoe_vb80Rl%CsD<5pUr{TD&!it~$-i2w`B^?IdtWTZ zAMcuGlmGz&1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7e?#0b3f z%U9osuPnX$``C*B0RjY$E>H`*`4+xb44+9q){=kCe0uqwOv+x5mlF&L5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PDx$z^~8#+m-+Q(eM7_R9>js zB$Z8B2oNAZfB*pk1PF{uAkJR3W@v^Gh1zNe*TP;nkx!1}+u=l+^PLdxhT4}Qr1P>x zleKD5_+sh9D;F-5OBoU%Kw#1ZinxDn(#xrQ1PBlyKw#^M1PBlyK!5-N z0<$Dgh6IPJb0=2FVu*6x?B`E=VfS=8yPRl9fB*pk1PBlyK!5-N0t5(*O`weX%OCoe zmoq(4lxU$CmOltcmwU39mX1GGjHk<$Hwj3WE!Qi4PZ0jQ$f(RGTkQ}1e;znlq?axJ zc=5r2009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs o0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAdm?BKUDjY^8f$< diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 8523344..2e0ee7f 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -1,11 +1,11 @@ -use std::thread::current; - use duckdb::{Connection, Error as DuckDBError, params, types::Null}; use serde::{Deserialize, Serialize}; use serde_json::Value; use snafu::{ResultExt, prelude::*}; +const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; + #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Database connection error: {}", source))] @@ -140,6 +140,13 @@ struct AdvanceColumnStatistics { pub struct SnapshotId(i64); +pub struct SnapshotInfo { + snapshot_id: i64, + schema_version: i64, + next_catalog_id: i64, + next_file_id: i64, +} + pub struct CurrentSchema { schema_name: String, schema_id: i64, @@ -171,6 +178,8 @@ struct StatisticsUpdate { pub trait StatisticsProvider { fn fetch_current_snapshot(&self) -> Result; + fn fetch_current_snapshot_info(&self) -> Result; + fn fetch_current_schema(&self) -> Result; /// Retrieve table and column statistics at specific snapshot @@ -198,17 +207,23 @@ pub struct DuckLakeStatisticsProvider { impl DuckLakeStatisticsProvider { /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB - pub fn try_new(location: Option<&str>) -> Result { + /// Parameters: + /// - location: Optional path to database file + /// - metadata_path: Optional path to ducklake metadata file + pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { let conn = if let Some(path) = location { Connection::open(path).context(ConnectionSnafu)? } else { Connection::open_in_memory().context(ConnectionSnafu)? }; - let setup_query = r#" + // Use provided metadata path or default to DEFAULT_METADATA_FILE + let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); + let setup_query = format!( + r#" INSTALL ducklake; LOAD ducklake; - ATTACH 'ducklake:metadata.ducklake' AS metalake; + ATTACH 'ducklake:{}' AS metalake; USE metalake; CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( @@ -262,14 +277,34 @@ impl DuckLakeStatisticsProvider { payload TEXT, query_instance_id BIGINT ); - "#; - conn.execute_batch(setup_query).context(ConnectionSnafu)?; + "#, + metadata_file + ); + conn.execute_batch(&setup_query).context(ConnectionSnafu)?; Ok(Self { conn }) } pub fn get_connection(&self) -> &Connection { &self.conn } + + fn begin_transaction(&self) -> Result<(), Error> { + let mut begin_txn_stmt = self + .conn + .prepare("BEGIN TRANSACTION;") + .context(QueryExecutionSnafu)?; + begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + Ok(()) + } + + fn commit_transaction(&self) -> Result<(), Error> { + let mut commit_txn_stmt = self + .conn + .prepare("COMMIT TRANSACTION;") + .context(QueryExecutionSnafu)?; + commit_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + Ok(()) + } } impl StatisticsProvider for DuckLakeStatisticsProvider { @@ -286,6 +321,33 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(snapshot_id) } + fn fetch_current_snapshot_info(&self) -> Result { + let mut snapshot_stmt = self + .conn + .prepare( + r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot); + "#, + ) + .context(QueryExecutionSnafu)?; + + let current_snapshot_info = snapshot_stmt + .query_row([], |row| { + Ok(SnapshotInfo { + snapshot_id: row.get("snapshot_id")?, + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) + }) + .context(QueryExecutionSnafu)?; + + Ok(current_snapshot_info) + } + fn fetch_current_schema(&self) -> Result { let mut stmt = self .conn @@ -318,7 +380,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { snapshot: i64, conn: &Connection, ) -> Result, Error> { - // Query for table statistics within the snapshot range + // Query for table statistics at the snapshot let mut stmt = conn .prepare( r#" @@ -386,62 +448,41 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { payload: &str, ) -> Result<(), Error> { // Start transaction - let mut begin_txn_stmt = self - .conn - .prepare("BEGIN TRANSACTION;") - .context(QueryExecutionSnafu)?; - begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + self.begin_transaction()?; - struct SnapshotInfo { - snapshot_id: i64, - schema_version: i64, - next_catalog_id: i64, - next_file_id: i64, - } + // Fetch current snapshot info + let current_snapshot = self.fetch_current_snapshot_info()?; + let current_snapshot_id = current_snapshot.snapshot_id; - let mut snapshot_stmt = self + // Update matching past snapshot to close it + let mut update_stmt = self .conn - .prepare("FROM ducklake_current_snapshot('metalake');") + .prepare( + r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SET end_snapshot = ? + WHERE end_snapshot IS NULL + AND stats_type = ? + AND column_id = ? + AND table_id = ?; + "#, + ) .context(QueryExecutionSnafu)?; - let current_snapshot = snapshot_stmt - .query_row([], |row| { - Ok(SnapshotInfo { - snapshot_id: row.get("snapshot_id")?, - schema_version: row.get("schema_version")?, - next_catalog_id: row.get("next_catalog_id")?, - next_file_id: row.get("next_file_id")?, - }) - }) + update_stmt + .execute(params![ + current_snapshot_id, + stats_type, + column_id, + table_id, + ]) .context(QueryExecutionSnafu)?; - let current_snapshot_id = current_snapshot.snapshot_id; - - // 3. check how duckdb do snapshot id increments (might need update `ducklake_snapshot` table). - - // R"(INSERT INTO {METADATA_CATALOG}.ducklake_snapshot VALUES ({SNAPSHOT_ID}, NOW(), {SCHEMA_VERSION}, {NEXT_CATALOG_ID}, {NEXT_FILE_ID});)"); - // 4. Update snapshot_changes (MIGHT BE optional). - // auto query = StringUtil::Format( - // R"(INSERT INTO {METADATA_CATALOG}.ducklake_snapshot_changes VALUES ({SNAPSHOT_ID}, %s, %s, %s, %s);)", - // SQLStringOrNull(change_info.changes_made), commit_info.author.ToSQLString(), - // commit_info.commit_message.ToSQLString(), commit_info.commit_extra_info.ToSQLString()); - // auto result = transaction.Query(commit_snapshot, query); - // if (result->HasError()) { - // result->GetErrorObject().Throw("Failed to write new snapshot to DuckLake:"); - // } - // Commit - - // Update matching past snapshot and insert new snapshot - let mut update_stmt = self + // Insert new snapshot + let mut insert_stmt = self .conn .prepare( r#" - UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats adv_stats - SET end_snapshot_update = ? - WHERE (adv_stats.end_snapshot IS ? - AND adv_stats.stats_type = ? - AND adv_stats.column_id = ? - AND adv_stats.table_id = ?) INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) VALUES (?, ?, ?, ?, ?, ?); @@ -449,13 +490,8 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ) .context(QueryExecutionSnafu)?; - update_stmt + insert_stmt .execute(params![ - current_snapshot_id, - Null, - stats_type, - column_id, - table_id, column_id, current_snapshot_id + 1, Null, @@ -485,33 +521,32 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ]) .context(QueryExecutionSnafu)?; - // let mut new_snap_change_stmt = self - // .conn - // .prepare( - // r#" - // INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes - // (snapshot_id, changes_made, author, commit_message, commit_extra_info) - // VALUES (?, ?, ?, ?, ?); - // "#, - // ) - // .context(QueryExecutionSnafu)?; - - // new_snap_change_stmt - // .execute(params![ - // current_snapshot_id + 1, - // format!("updated_stats"), - // Null, - // Null, - // Null, - // ]) - // .context(QueryExecutionSnafu)?; - - // Commit transaction - let mut begin_txn_stmt = self + let mut new_snap_change_stmt = self .conn - .prepare("COMMIT TRANSACTION;") + .prepare( + r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes + (snapshot_id, changes_made, author, commit_message, commit_extra_info) + VALUES (?, ?, ?, ?, ?); + "#, + ) + .context(QueryExecutionSnafu)?; + + new_snap_change_stmt + .execute(params![ + current_snapshot_id + 1, + format!( + r#"updated_stats:"main"."ducklake_table_column_adv_stats",{}:{}"#, + stats_type, payload + ), + Null, + Null, + Null, + ]) .context(QueryExecutionSnafu)?; - begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; + + // Commit transaction + self.commit_transaction()?; Ok(()) } @@ -521,32 +556,60 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { mod tests { use super::*; use serde_json::json; + use std::sync::atomic::{AtomicU64, Ordering}; + use std::time::{SystemTime, UNIX_EPOCH}; use tempfile::TempDir; + // Counter to ensure unique database names + static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); + + fn create_test_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { + // Create a unique subdirectory to separate DuckLake metadata for each test + let temp_dir = TempDir::new().unwrap(); + let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let unique_dir = temp_dir + .path() + .join(format!("db_{}_{}", timestamp, counter)); + std::fs::create_dir_all(&unique_dir).unwrap(); + let metadata_path = unique_dir.join("metadata.ducklake"); + if !for_file { + let provider = + DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) + .unwrap(); + (temp_dir, provider) + } else { + let db_path = unique_dir.join("test.db"); + let provider = DuckLakeStatisticsProvider::try_new( + Some(db_path.to_str().unwrap()), + Some(metadata_path.to_str().unwrap()), + ) + .unwrap(); + (temp_dir, provider) + } + } + #[test] fn test_ducklake_statistics_provider_creation() { { // Test memory-based provider - let memory_provider = DuckLakeStatisticsProvider::try_new(None); - assert!(memory_provider.is_ok()); + let _memory_provider = create_test_provider(false); + // The provider creation is already asserted in create_test_provider } { - // Test file-based provider with temporary directory - let temp_dir = TempDir::new().unwrap(); - let db_path = temp_dir.path().join("test_stats.db"); - let file_provider = - DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())); - assert!(file_provider.is_ok()); + // Test file-based provider with unique temporary database + let (_temp_dir, _provider) = create_test_provider(true); + // The provider creation is already asserted in create_test_provider } } #[test] fn test_table_stats_insertion() { - let temp_dir = TempDir::new().unwrap(); - let db_path = temp_dir.path().join("test_insertion.db"); - let provider = - DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); + let (_temp_dir, provider) = create_test_provider(true); // Insert table statistics let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); @@ -555,7 +618,6 @@ mod tests { Err(e) => println!("Table stats insertion failed: {}", e), } assert!(result.is_ok()); - // temp_dir is automatically cleaned up when it goes out of scope } #[test] @@ -576,10 +638,7 @@ mod tests { #[test] fn test_table_stats_insertion_and_retrieval() { - let temp_dir = TempDir::new().unwrap(); - let db_path = temp_dir.path().join("test_retrieval.db"); - let provider = - DuckLakeStatisticsProvider::try_new(Some(db_path.to_str().unwrap())).unwrap(); + let (_temp_dir, provider) = create_test_provider(true); // Insert table statistics let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); @@ -592,4 +651,202 @@ mod tests { // Note: Actual retrieval would require setting up the table_metadata // and column_metadata tables, which would be done by the DuckLake extension } + + #[test] + fn test_snapshot_versioning_and_stats_types() { + let (_temp_dir, provider) = create_test_provider(true); + let conn = provider.get_connection(); + + // Test 1: Multiple columns with sequential snapshots + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) + .unwrap(); + + // Verify different columns have sequential snapshots + let mut stmt = conn + .prepare( + r#" + SELECT column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + ORDER BY begin_snapshot; + "#, + ) + .unwrap(); + let snapshots: Vec<(i64, i64)> = stmt + .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + assert_eq!(snapshots.len(), 3); + assert!(snapshots[1].1 > snapshots[0].1); + assert!(snapshots[2].1 > snapshots[1].1); + + // Test 2: Update same column multiple times - verify snapshot continuity + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + + let mut version_stmt = conn + .prepare( + r#" + SELECT begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' + ORDER BY begin_snapshot; + "#, + ) + .unwrap(); + let versions: Vec<(i64, Option, String)> = version_stmt + .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + // Should have 3 versions (original + 2 updates) + assert_eq!(versions.len(), 3); + + // First two closed, last one current + assert!(versions[0].1.is_some()); + assert!(versions[1].1.is_some()); + assert!(versions[2].1.is_none()); + + // Verify snapshot continuity + assert_eq!(versions[0].1.unwrap() + 1, versions[1].0); + assert_eq!(versions[1].1.unwrap() + 1, versions[2].0); + + // Verify payloads updated correctly + assert!(versions[0].2.contains("1000")); + assert!(versions[1].2.contains("1500")); + assert!(versions[2].2.contains("2000")); + + // Test 3: Multiple stat types for same column coexist + provider + .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) + .unwrap(); + + let type_count: i64 = conn + .query_row( + r#" + SELECT COUNT(DISTINCT stats_type) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL + "#, + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(type_count, 3); // ndv, histogram, minmax + } + + #[test] + fn test_snapshot_tracking_and_multi_table_stats() { + let (_temp_dir, provider) = create_test_provider(true); + let conn = provider.get_connection(); + + // Get initial snapshot count + let initial_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap(); + + // Test 1: Snapshot creation tracking - insert stats for 3 columns + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) + .unwrap(); + + let after_table1_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(after_table1_count - initial_count, 3); + + // Verify snapshot_changes were recorded + let changes_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes + WHERE changes_made LIKE 'updated_stats:%' + "#, + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(changes_count, 3); + + // Test 2: Multiple tables with independent tracking + + // Test 2: Multiple tables with independent tracking + provider + .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) + .unwrap(); + + // Verify each table has correct number of stats + let table1_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + let table2_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 2", + [], + |row| row.get(0), + ) + .unwrap(); + + assert_eq!(table1_count, 3); // 3 columns from table 1 + assert_eq!(table2_count, 2); // 2 columns from table 2 + + // Verify all snapshots are sequential across tables + let mut snapshot_stmt = conn + .prepare( + r#" + SELECT table_id, column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + ORDER BY begin_snapshot + "#, + ) + .unwrap(); + let all_snapshots: Vec = snapshot_stmt + .query_map([], |row| row.get(2)) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + // All 5 snapshots should be increasing + for i in 1..all_snapshots.len() { + assert!(all_snapshots[i] > all_snapshots[i - 1]); + } + } } diff --git a/optd/statistics/test_stats.db b/optd/statistics/test_stats.db deleted file mode 100644 index 88ec05a2717ff0b85415f32f058b8d67d4e1929f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI#u?fOZ5CG6Gh`5A}Z6F#VhExjL33k?^?qKB#LYA;|4>xcS7tkUE!4R-Yns?0| zcLxWryTRx&iJtdjy<2UT;lDU1@hnc7caLd4J!Gden>X*65FkK+009C72oNAZfB*pk z?Frmo*UNq}-c}diKT-8V=$GZNOug@*R{8&HBpL(=5FkK+009C72oNAZpi=?=f2aJ8 V6Cgl<009C72oNAZfB=D31U|{RHAesd From 86dcfa1935e0b047bd8ca2d1edf3afda7a4f785f Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 28 Oct 2025 14:31:03 -0400 Subject: [PATCH 18/40] move tests to directory and add basic stats update --- optd/statistics/src/statistics.rs | 404 ++++------------- optd/statistics/tests/statistics_tests.rs | 518 ++++++++++++++++++++++ 2 files changed, 598 insertions(+), 324 deletions(-) create mode 100644 optd/statistics/tests/statistics_tests.rs diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 2e0ee7f..7d7c902 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -138,20 +138,20 @@ struct AdvanceColumnStatistics { data: Value, } -pub struct SnapshotId(i64); +pub struct SnapshotId(pub i64); pub struct SnapshotInfo { - snapshot_id: i64, - schema_version: i64, - next_catalog_id: i64, - next_file_id: i64, + pub snapshot_id: i64, + pub schema_version: i64, + pub next_catalog_id: i64, + pub next_file_id: i64, } pub struct CurrentSchema { - schema_name: String, - schema_id: i64, - begin_snapshot: i64, - end_snapshot: i64, + pub schema_name: String, + pub schema_id: i64, + pub begin_snapshot: i64, + pub end_snapshot: Option, } struct StatisticsEntry { @@ -305,6 +305,49 @@ impl DuckLakeStatisticsProvider { commit_txn_stmt.execute([]).context(QueryExecutionSnafu)?; Ok(()) } + + fn update_regular_column_stats( + &self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + // Column name must be part of the query string, not a parameter + // Only min_value and max_value are supported for regular updates + let query = match stats_type { + "min_value" => { + r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_stats + SET min_value = ? + WHERE column_id = ? AND table_id = ?; + "# + } + "max_value" => { + r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_stats + SET max_value = ? + WHERE column_id = ? AND table_id = ?; + "# + } + _ => { + return Err(Error::QueryExecution { + source: DuckDBError::InvalidParameterName(format!( + "Unsupported regular stats type: {}. Only min_value and max_value are supported.", + stats_type + )), + }); + } + }; + + let mut update_regular_stmt = self.conn.prepare(query).context(QueryExecutionSnafu)?; + + update_regular_stmt + .execute(params![payload, column_id, table_id]) + .context(QueryExecutionSnafu)?; + + Ok(()) + } } impl StatisticsProvider for DuckLakeStatisticsProvider { @@ -415,22 +458,25 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { .context(QueryExecutionSnafu)?; let entries = stmt - .query_map([snapshot.to_string().as_str(), table], |row| { - Ok(StatisticsEntry { - table_id: row.get("column_id")?, - column_id: row.get("column_id")?, - column_name: row.get("column_name")?, - column_type: row.get("column_type")?, - record_count: row.get("record_count")?, - next_row_id: row.get("next_row_id")?, - file_size_bytes: row.get("file_size_bytes")?, - contains_null: row.get("contains_null")?, - contains_nan: row.get("contains_nan")?, - min_value: row.get("min_value")?, - max_value: row.get("max_value")?, - extra_stats: row.get("extra_stats")?, - }) - }) + .query_map( + [table, &snapshot.to_string(), &snapshot.to_string()], + |row| { + Ok(StatisticsEntry { + table_id: row.get("column_id")?, + column_id: row.get("column_id")?, + column_name: row.get("column_name")?, + column_type: row.get("column_type")?, + record_count: row.get("record_count")?, + next_row_id: row.get("next_row_id")?, + file_size_bytes: row.get("file_size_bytes")?, + contains_null: row.get("contains_null")?, + contains_nan: row.get("contains_nan")?, + min_value: row.get("min_value")?, + max_value: row.get("max_value")?, + extra_stats: row.get("extra_stats")?, + }) + }, + ) .context(QueryExecutionSnafu)? .map(|result| result.context(QueryExecutionSnafu)); @@ -454,6 +500,15 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { let current_snapshot = self.fetch_current_snapshot_info()?; let current_snapshot_id = current_snapshot.snapshot_id; + // match the stats_type and see if it's in the regular column stats + match stats_type { + "min_value" | "max_value" => { + self.update_regular_column_stats(column_id, table_id, stats_type, payload)?; + } + // Still update the advanced stats for these types + _ => {} + } + // Update matching past snapshot to close it let mut update_stmt = self .conn @@ -551,302 +606,3 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(()) } } - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - use std::sync::atomic::{AtomicU64, Ordering}; - use std::time::{SystemTime, UNIX_EPOCH}; - use tempfile::TempDir; - - // Counter to ensure unique database names - static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); - - fn create_test_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { - // Create a unique subdirectory to separate DuckLake metadata for each test - let temp_dir = TempDir::new().unwrap(); - let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); - let timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_nanos(); - let unique_dir = temp_dir - .path() - .join(format!("db_{}_{}", timestamp, counter)); - std::fs::create_dir_all(&unique_dir).unwrap(); - let metadata_path = unique_dir.join("metadata.ducklake"); - if !for_file { - let provider = - DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) - .unwrap(); - (temp_dir, provider) - } else { - let db_path = unique_dir.join("test.db"); - let provider = DuckLakeStatisticsProvider::try_new( - Some(db_path.to_str().unwrap()), - Some(metadata_path.to_str().unwrap()), - ) - .unwrap(); - (temp_dir, provider) - } - } - - #[test] - fn test_ducklake_statistics_provider_creation() { - { - // Test memory-based provider - let _memory_provider = create_test_provider(false); - // The provider creation is already asserted in create_test_provider - } - - { - // Test file-based provider with unique temporary database - let (_temp_dir, _provider) = create_test_provider(true); - // The provider creation is already asserted in create_test_provider - } - } - - #[test] - fn test_table_stats_insertion() { - let (_temp_dir, provider) = create_test_provider(true); - - // Insert table statistics - let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); - match &result { - Ok(_) => println!("Table stats insertion successful"), - Err(e) => println!("Table stats insertion failed: {}", e), - } - assert!(result.is_ok()); - } - - #[test] - fn test_json_payload_handling() { - let payload = json!({ - "distinct_count": 1000, - "null_count": 50, - "min_value": 1, - "max_value": 999999 - }); - - let payload_str = serde_json::to_string(&payload).unwrap(); - let parsed_back: serde_json::Value = serde_json::from_str(&payload_str).unwrap(); - - assert_eq!(parsed_back["distinct_count"], 1000); - assert_eq!(parsed_back["null_count"], 50); - } - - #[test] - fn test_table_stats_insertion_and_retrieval() { - let (_temp_dir, provider) = create_test_provider(true); - - // Insert table statistics - let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); - match &result { - Ok(_) => println!("Table stats insertion successful"), - Err(e) => println!("Table stats insertion failed: {}", e), - } - assert!(result.is_ok()); - - // Note: Actual retrieval would require setting up the table_metadata - // and column_metadata tables, which would be done by the DuckLake extension - } - - #[test] - fn test_snapshot_versioning_and_stats_types() { - let (_temp_dir, provider) = create_test_provider(true); - let conn = provider.get_connection(); - - // Test 1: Multiple columns with sequential snapshots - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) - .unwrap(); - provider - .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) - .unwrap(); - provider - .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) - .unwrap(); - - // Verify different columns have sequential snapshots - let mut stmt = conn - .prepare( - r#" - SELECT column_id, begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 - ORDER BY begin_snapshot; - "#, - ) - .unwrap(); - let snapshots: Vec<(i64, i64)> = stmt - .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) - .unwrap() - .map(|r| r.unwrap()) - .collect(); - assert_eq!(snapshots.len(), 3); - assert!(snapshots[1].1 > snapshots[0].1); - assert!(snapshots[2].1 > snapshots[1].1); - - // Test 2: Update same column multiple times - verify snapshot continuity - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) - .unwrap(); - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) - .unwrap(); - - let mut version_stmt = conn - .prepare( - r#" - SELECT begin_snapshot, end_snapshot, payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' - ORDER BY begin_snapshot; - "#, - ) - .unwrap(); - let versions: Vec<(i64, Option, String)> = version_stmt - .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) - .unwrap() - .map(|r| r.unwrap()) - .collect(); - - // Should have 3 versions (original + 2 updates) - assert_eq!(versions.len(), 3); - - // First two closed, last one current - assert!(versions[0].1.is_some()); - assert!(versions[1].1.is_some()); - assert!(versions[2].1.is_none()); - - // Verify snapshot continuity - assert_eq!(versions[0].1.unwrap() + 1, versions[1].0); - assert_eq!(versions[1].1.unwrap() + 1, versions[2].0); - - // Verify payloads updated correctly - assert!(versions[0].2.contains("1000")); - assert!(versions[1].2.contains("1500")); - assert!(versions[2].2.contains("2000")); - - // Test 3: Multiple stat types for same column coexist - provider - .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) - .unwrap(); - provider - .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) - .unwrap(); - - let type_count: i64 = conn - .query_row( - r#" - SELECT COUNT(DISTINCT stats_type) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL - "#, - [], - |row| row.get(0), - ) - .unwrap(); - assert_eq!(type_count, 3); // ndv, histogram, minmax - } - - #[test] - fn test_snapshot_tracking_and_multi_table_stats() { - let (_temp_dir, provider) = create_test_provider(true); - let conn = provider.get_connection(); - - // Get initial snapshot count - let initial_count: i64 = conn - .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", - [], - |row| row.get(0), - ) - .unwrap(); - - // Test 1: Snapshot creation tracking - insert stats for 3 columns - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) - .unwrap(); - provider - .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) - .unwrap(); - provider - .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) - .unwrap(); - - let after_table1_count: i64 = conn - .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", - [], - |row| row.get(0), - ) - .unwrap(); - assert_eq!(after_table1_count - initial_count, 3); - - // Verify snapshot_changes were recorded - let changes_count: i64 = conn - .query_row( - r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes - WHERE changes_made LIKE 'updated_stats:%' - "#, - [], - |row| row.get(0), - ) - .unwrap(); - assert_eq!(changes_count, 3); - - // Test 2: Multiple tables with independent tracking - - // Test 2: Multiple tables with independent tracking - provider - .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) - .unwrap(); - provider - .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) - .unwrap(); - - // Verify each table has correct number of stats - let table1_count: i64 = conn - .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1", - [], - |row| row.get(0), - ) - .unwrap(); - let table2_count: i64 = conn - .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 2", - [], - |row| row.get(0), - ) - .unwrap(); - - assert_eq!(table1_count, 3); // 3 columns from table 1 - assert_eq!(table2_count, 2); // 2 columns from table 2 - - // Verify all snapshots are sequential across tables - let mut snapshot_stmt = conn - .prepare( - r#" - SELECT table_id, column_id, begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - ORDER BY begin_snapshot - "#, - ) - .unwrap(); - let all_snapshots: Vec = snapshot_stmt - .query_map([], |row| row.get(2)) - .unwrap() - .map(|r| r.unwrap()) - .collect(); - - // All 5 snapshots should be increasing - for i in 1..all_snapshots.len() { - assert!(all_snapshots[i] > all_snapshots[i - 1]); - } - } -} diff --git a/optd/statistics/tests/statistics_tests.rs b/optd/statistics/tests/statistics_tests.rs new file mode 100644 index 0000000..cc36a9c --- /dev/null +++ b/optd/statistics/tests/statistics_tests.rs @@ -0,0 +1,518 @@ +use optd_statistics::{DuckLakeStatisticsProvider, StatisticsProvider}; +use serde_json::json; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use tempfile::TempDir; + +// Counter to ensure unique database names +static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); + +fn create_test_statistics_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { + // Create a unique subdirectory to separate DuckLake metadata for each test + let temp_dir = TempDir::new().unwrap(); + let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let unique_dir = temp_dir + .path() + .join(format!("db_{}_{}", timestamp, counter)); + std::fs::create_dir_all(&unique_dir).unwrap(); + let metadata_path = unique_dir.join("metadata.ducklake"); + if !for_file { + let provider = + DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) + .unwrap(); + (temp_dir, provider) + } else { + let db_path = unique_dir.join("test.db"); + let provider = DuckLakeStatisticsProvider::try_new( + Some(db_path.to_str().unwrap()), + Some(metadata_path.to_str().unwrap()), + ) + .unwrap(); + (temp_dir, provider) + } +} + +#[test] +fn test_ducklake_statistics_provider_creation() { + { + // Test memory-based provider + let _memory_provider = create_test_statistics_provider(false); + // The provider creation is already asserted in create_test_provider + } + + { + // Test file-based provider with unique temporary database + let (_temp_dir, _provider) = create_test_statistics_provider(true); + // The provider creation is already asserted in create_test_provider + } +} + +#[test] +fn test_table_stats_insertion() { + let (_temp_dir, provider) = create_test_statistics_provider(true); + + // Insert table statistics + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); + match &result { + Ok(_) => println!("Table stats insertion successful"), + Err(e) => println!("Table stats insertion failed: {}", e), + } + assert!(result.is_ok()); +} + +#[test] +fn test_table_stats_insertion_and_retrieval() { + let (_temp_dir, provider) = create_test_statistics_provider(true); + + // Insert table statistics + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); + match &result { + Ok(_) => println!("Table stats insertion successful"), + Err(e) => println!("Table stats insertion failed: {}", e), + } + assert!(result.is_ok()); + + // Note: Actual retrieval would require setting up the table_metadata + // TODO +} + +#[test] +fn test_fetch_current_schema() { + let (_temp_dir, provider) = create_test_statistics_provider(true); + + // Fetch the current schema + let result = provider.fetch_current_schema(); + + // Print error if it fails + if let Err(ref e) = result { + println!("Error fetching current schema: {}", e); + } + + // The result should be Ok since DuckLake creates a default 'main' schema + assert!( + result.is_ok(), + "Expected fetch_current_schema to succeed, got error: {:?}", + result.err() + ); + + let schema = result.unwrap(); + + // Verify the schema has valid snapshot information + println!( + "Schema name: {}, Schema ID: {}, Begin snapshot: {}, End snapshot: {:?}", + schema.schema_name, schema.schema_id, schema.begin_snapshot, schema.end_snapshot + ); + + // The schema should have a begin_snapshot value (0 for initial schema in DuckLake) + assert_eq!( + schema.schema_name, "main", + "Expected default schema to be 'main'" + ); + assert_eq!( + schema.schema_id, 0, + "Expected schema_id to be 0 for default schema" + ); + assert!( + schema.begin_snapshot >= 0, + "Schema should have a valid begin_snapshot" + ); + + // End snapshot should be None for current active schema + assert!( + schema.end_snapshot.is_none(), + "Current schema should have no end_snapshot (should be None)" + ); +} + +#[test] +fn test_snapshot_versioning_and_stats_types() { + let (_temp_dir, provider) = create_test_statistics_provider(true); + let conn = provider.get_connection(); + + // Test 1: Multiple columns with sequential snapshots + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) + .unwrap(); + + // Verify different columns have sequential snapshots + let mut stmt = conn + .prepare( + r#" + SELECT column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + ORDER BY begin_snapshot; + "#, + ) + .unwrap(); + let snapshots: Vec<(i64, i64)> = stmt + .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + assert_eq!(snapshots.len(), 3); + assert!(snapshots[1].1 > snapshots[0].1); + assert!(snapshots[2].1 > snapshots[1].1); + + // Test 2: Update same column multiple times - verify snapshot continuity + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + + let mut version_stmt = conn + .prepare( + r#" + SELECT begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' + ORDER BY begin_snapshot; + "#, + ) + .unwrap(); + let versions: Vec<(i64, Option, String)> = version_stmt + .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + // Should have 3 versions (original + 2 updates) + assert_eq!(versions.len(), 3); + + // First two closed, last one current + assert!(versions[0].1.is_some()); + assert!(versions[1].1.is_some()); + assert!(versions[2].1.is_none()); + + // Verify snapshot continuity + assert_eq!(versions[0].1.unwrap() + 1, versions[1].0); + assert_eq!(versions[1].1.unwrap() + 1, versions[2].0); + + // Verify payloads updated correctly + assert!(versions[0].2.contains("1000")); + assert!(versions[1].2.contains("1500")); + assert!(versions[2].2.contains("2000")); + + // Test 3: Multiple stat types for same column coexist + provider + .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) + .unwrap(); + + let type_count: i64 = conn + .query_row( + r#" + SELECT COUNT(DISTINCT stats_type) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL + "#, + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(type_count, 3); // ndv, histogram, minmax +} + +#[test] +fn test_snapshot_tracking_and_multi_table_stats() { + let (_temp_dir, provider) = create_test_statistics_provider(true); + let conn = provider.get_connection(); + + // Get initial snapshot count + let initial_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap(); + + // Test 1: Snapshot creation tracking - insert stats for 3 columns + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) + .unwrap(); + + let after_table1_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(after_table1_count - initial_count, 3); + + // Verify snapshot_changes were recorded + let changes_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes + WHERE changes_made LIKE 'updated_stats:%' + "#, + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(changes_count, 3); + + // Test 2: Multiple tables with independent tracking + provider + .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) + .unwrap(); + + // Verify each table has correct number of stats + let table1_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + let table2_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 2", + [], + |row| row.get(0), + ) + .unwrap(); + + assert_eq!(table1_count, 3); // 3 columns from table 1 + assert_eq!(table2_count, 2); // 2 columns from table 2 + + // Verify all snapshots are sequential across tables + let mut snapshot_stmt = conn + .prepare( + r#" + SELECT table_id, column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + ORDER BY begin_snapshot + "#, + ) + .unwrap(); + let all_snapshots: Vec = snapshot_stmt + .query_map([], |row| row.get(2)) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + // All 5 snapshots should be increasing + for i in 1..all_snapshots.len() { + assert!(all_snapshots[i] > all_snapshots[i - 1]); + } +} + +/// Helper function to create a test provider with sample table data +fn create_test_provider_with_data() -> (TempDir, DuckLakeStatisticsProvider, i64, i64) { + let (_temp_dir, provider) = create_test_statistics_provider(false); + let conn = provider.get_connection(); + + // Create a sample table with data + conn.execute_batch( + r#" + CREATE TABLE test_table ( + id INTEGER, + name VARCHAR, + age INTEGER + ); + + INSERT INTO test_table VALUES + (1, 'Alice', 30), + (2, 'Bob', 25), + (3, 'Charlie', 35); + "#, + ) + .unwrap(); + + // Get table_id and column_ids + let mut table_id_stmt = conn + .prepare( + r#" + SELECT table_id + FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; + "#, + ) + .unwrap(); + let table_id: i64 = table_id_stmt.query_row([], |row| row.get(0)).unwrap(); + + // Get the column_id for 'age' column (we'll update stats for this) + let mut column_id_stmt = conn + .prepare( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'; + "#, + ) + .unwrap(); + let age_column_id: i64 = column_id_stmt + .query_row([table_id], |row| row.get(0)) + .unwrap(); + + (_temp_dir, provider, table_id, age_column_id) +} + +#[test] +fn test_update_and_fetch_table_column_stats() { + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + // Get initial snapshot + let initial_snapshot = provider.fetch_current_snapshot().unwrap(); + println!("Initial snapshot ID: {}", initial_snapshot.0); + + // Fetch initial statistics (should have default values from table creation) + let initial_stats = provider + .fetch_table_statistics("test_table", initial_snapshot.0, conn) + .unwrap(); + assert!(initial_stats.is_some()); + + // Update min_value for age column + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + + let snapshot_after_min = provider.fetch_current_snapshot().unwrap(); + assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); + + // Update max_value for age column + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + + let snapshot_after_max = provider.fetch_current_snapshot().unwrap(); + assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); + + // Verify the regular column stats were updated + let mut verify_stmt = conn + .prepare( + r#" + SELECT min_value, max_value + FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats + WHERE table_id = ? AND column_id = ?; + "#, + ) + .unwrap(); + + let (min_val, max_val): (Option, Option) = verify_stmt + .query_row([table_id, age_column_id], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap(); + + assert_eq!(min_val, Some("25".to_string())); + assert_eq!(max_val, Some("35".to_string())); + + // Verify advanced stats were also created in ducklake_table_column_adv_stats + let mut adv_stats_stmt = conn + .prepare( + r#" + SELECT stats_type, payload, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? + ORDER BY stats_type, begin_snapshot; + "#, + ) + .unwrap(); + + let adv_stats: Vec<(String, String, i64, Option)> = adv_stats_stmt + .query_map([table_id, age_column_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + assert_eq!(adv_stats.len(), 2); + assert_eq!(adv_stats[0].0, "max_value"); + assert_eq!(adv_stats[1].0, "min_value"); + assert_eq!(adv_stats[0].1, "35"); + assert_eq!(adv_stats[1].1, "25"); + assert_eq!(adv_stats[0].2, initial_snapshot.0 + 2); + assert_eq!(adv_stats[1].2, initial_snapshot.0 + 1); + assert!(adv_stats[0].3.is_none()); + assert!(adv_stats[1].3.is_none()); + + let max_value_entry = adv_stats + .iter() + .find(|(stats_type, _, _, _)| stats_type == "max_value") + .expect("max_value entry should exist"); + assert_eq!(max_value_entry.1, "35"); + assert_eq!(max_value_entry.2, initial_snapshot.0 + 2); + assert!(max_value_entry.3.is_none()); + + let min_value_entry = adv_stats + .iter() + .find(|(stats_type, _, _, _)| stats_type == "min_value") + .expect("min_value entry should exist"); + assert_eq!(min_value_entry.1, "25"); + assert_eq!(min_value_entry.2, initial_snapshot.0 + 1); + assert!(min_value_entry.3.is_none()); + + // Test updating an advanced stat type (histogram) + let histogram_data = json!({ + "buckets": [ + {"min": 20, "max": 30, "count": 2}, + {"min": 30, "max": 40, "count": 1} + ] + }); + + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + &histogram_data.to_string(), + ) + .unwrap(); + + let snapshot_after_histogram = provider.fetch_current_snapshot().unwrap(); + assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); + + // Verify histogram was added to advanced stats + let mut histogram_stmt = conn + .prepare( + r#" + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? AND stats_type = 'histogram' AND end_snapshot IS NULL; + "#, + ) + .unwrap(); + + let histogram_payload: String = histogram_stmt + .query_row([table_id, age_column_id], |row| row.get(0)) + .unwrap(); + + assert_eq!(histogram_payload, histogram_data.to_string()); + + println!("✓ All update and fetch operations completed successfully"); + println!(" - Initial snapshot: {}", initial_snapshot.0); + println!(" - After min_value update: {}", snapshot_after_min.0); + println!(" - After max_value update: {}", snapshot_after_max.0); + println!(" - After histogram update: {}", snapshot_after_histogram.0); +} From c2f31c2c931eb829ee04545cb7970ac311edaa7e Mon Sep 17 00:00:00 2001 From: HFFuture Date: Thu, 30 Oct 2025 13:01:47 -0400 Subject: [PATCH 19/40] Upgrade fetch and update functions to use adv stats --- optd/statistics/src/statistics.rs | 187 +++++----- optd/statistics/tests/statistics_tests.rs | 412 +++++++++++++++++++++- 2 files changed, 499 insertions(+), 100 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 7d7c902..6b37f3c 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -38,21 +38,37 @@ pub enum Error { }, } +struct TableColumnStatisticsEntry { + table_id: i64, + column_id: i64, + column_name: String, + column_type: String, + record_count: i64, + next_row_id: i64, + file_size_bytes: i64, + stats_type: Option, + payload: Option, +} + /** Packaged Statistics Objects */ /** Table statistics -- Contains overall row count and per-column statistics */ #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TableStatistics { - row_count: usize, - column_statistics: Vec, + pub row_count: usize, + pub column_statistics: Vec, } -impl FromIterator> for TableStatistics { - fn from_iter>>(iter: T) -> Self { +impl FromIterator> for TableStatistics { + fn from_iter>>( + iter: T, + ) -> Self { + let mut row_flag = false; let mut row_count = 0; let mut column_statistics = Vec::new(); + // Stats will be ordered by table_id then column_id for row_result in iter { - if let Ok(StatisticsEntry { + if let Ok(TableColumnStatisticsEntry { table_id: _, column_id, column_name, @@ -60,27 +76,47 @@ impl FromIterator> for TableStatistics { record_count, next_row_id: _, file_size_bytes: _, - contains_null, - contains_nan, - min_value, - max_value, - extra_stats: _, + stats_type, + payload, }) = row_result { - row_count = record_count as usize; // Assuming all columns have the same record_count - - let column_stats = ColumnStatistics::new( - column_id, - column_type, - column_name.clone(), - min_value, - max_value, - contains_null, - contains_nan, - vec![], // Advanced stats can be populated later + // Check if unique table/column combination + if column_statistics + .last() + .map_or(true, |last: &ColumnStatistics| last.column_id != column_id) + { + // New column encountered + column_statistics.push(ColumnStatistics::new( + column_id, + column_type.clone(), + column_name.clone(), + Vec::new(), + )); + } + + assert!( + !column_statistics.is_empty() + && column_statistics.last().unwrap().column_id == column_id, + "Column statistics should not be empty and last column_id should match current column_id" ); - column_statistics.push(column_stats); + // Add advanced statistics + if let Some(last_column_stat) = column_statistics.last_mut() { + if stats_type.is_some() && payload.is_some() { + let advanced_stat = AdvanceColumnStatistics { + stats_type: stats_type.clone().unwrap(), + data: serde_json::from_str(&payload.clone().unwrap()) + .unwrap_or(Value::Null), + }; + last_column_stat.add_advanced_stat(advanced_stat); + } + } + + // Assuming all columns have the same record_count, only need to set once + if !row_flag { + row_count = record_count as usize; + row_flag = true; + } } } @@ -93,35 +129,23 @@ impl FromIterator> for TableStatistics { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ColumnStatistics { - id: i64, - column_type: String, - name: String, - min: Option, - max: Option, - contains_null: Option, - contains_nan: Option, - advanced_stats: Vec, // TODO, e.g. histogram, number of distinct values (set cardinality), etc. + pub column_id: i64, + pub column_type: String, + pub name: String, + pub advanced_stats: Vec, } impl ColumnStatistics { fn new( - id: i64, + column_id: i64, column_type: String, name: String, - min: Option, - max: Option, - contains_null: Option, - contains_nan: Option, advanced_stats: Vec, ) -> Self { ColumnStatistics { - id, + column_id, column_type, name, - min, - max, - contains_null, - contains_nan, advanced_stats, } } @@ -133,9 +157,9 @@ impl ColumnStatistics { } #[derive(Debug, Clone, Serialize, Deserialize)] -struct AdvanceColumnStatistics { - stats_type: String, - data: Value, +pub struct AdvanceColumnStatistics { + pub stats_type: String, + pub data: Value, } pub struct SnapshotId(pub i64); @@ -154,21 +178,6 @@ pub struct CurrentSchema { pub end_snapshot: Option, } -struct StatisticsEntry { - table_id: i64, - column_id: i64, - column_name: String, - column_type: String, - record_count: i64, - next_row_id: i64, - file_size_bytes: i64, - contains_null: Option, - contains_nan: Option, - min_value: Option, - max_value: Option, - extra_stats: Option, -} - #[derive(Debug, Serialize, Deserialize)] struct StatisticsUpdate { stats_type: String, @@ -232,12 +241,12 @@ impl DuckLakeStatisticsProvider { end_snapshot BIGINT, table_id BIGINT, stats_type VARCHAR, - payload TEXT + payload VARCHAR ); CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( query_id BIGINT, - query_string TEXT, + query_string VARCHAR, root_group_id BIGINT ); @@ -259,7 +268,7 @@ impl DuckLakeStatisticsProvider { begin_snapshot BIGINT, end_snapshot BIGINT, stats_type VARCHAR, - payload TEXT + payload VARCHAR ); CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( @@ -267,14 +276,14 @@ impl DuckLakeStatisticsProvider { begin_snapshot BIGINT, end_snapshot BIGINT, stats_type VARCHAR, - payload TEXT + payload VARCHAR ); CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( scalar_id BIGINT, group_id BIGINT, stats_type VARCHAR, - payload TEXT, + payload VARCHAR, query_instance_id BIGINT ); "#, @@ -428,23 +437,24 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { .prepare( r#" SELECT - ts.table_id, - tcs.column_id, - dc.column_name, - dc.column_type, - ts.record_count, - ts.next_row_id, - ts.file_size_bytes, - tcs.contains_null, - tcs.contains_nan, - tcs.min_value, - tcs.max_value, - tcs.extra_stats + ts.table_id, + dc.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcas.stats_type, + tcas.payload FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_stats tcs USING (table_id) - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_column dc USING (table_id, column_id) INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas + ON dc.table_id = tcas.table_id + AND dc.column_id = tcas.column_id + AND ? >= tcas.begin_snapshot + AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) WHERE ds.schema_name = current_schema() AND dt.table_name = ? @@ -452,28 +462,31 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { AND ts.file_size_bytes IS NOT NULL AND ? >= dc.begin_snapshot AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) - ORDER BY ts.table_id, tcs.column_id; + ORDER BY ts.table_id, dc.column_id, tcas.stats_type; "# ) .context(QueryExecutionSnafu)?; let entries = stmt .query_map( - [table, &snapshot.to_string(), &snapshot.to_string()], + [ + &snapshot.to_string(), + &snapshot.to_string(), + table, + &snapshot.to_string(), + &snapshot.to_string(), + ], |row| { - Ok(StatisticsEntry { - table_id: row.get("column_id")?, + Ok(TableColumnStatisticsEntry { + table_id: row.get("table_id")?, column_id: row.get("column_id")?, column_name: row.get("column_name")?, column_type: row.get("column_type")?, record_count: row.get("record_count")?, next_row_id: row.get("next_row_id")?, file_size_bytes: row.get("file_size_bytes")?, - contains_null: row.get("contains_null")?, - contains_nan: row.get("contains_nan")?, - min_value: row.get("min_value")?, - max_value: row.get("max_value")?, - extra_stats: row.get("extra_stats")?, + stats_type: row.get("stats_type")?, + payload: row.get("payload")?, }) }, ) @@ -526,7 +539,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { update_stmt .execute(params![ - current_snapshot_id, + current_snapshot_id + 1, stats_type, column_id, table_id, diff --git a/optd/statistics/tests/statistics_tests.rs b/optd/statistics/tests/statistics_tests.rs index cc36a9c..0b8c472 100644 --- a/optd/statistics/tests/statistics_tests.rs +++ b/optd/statistics/tests/statistics_tests.rs @@ -66,18 +66,80 @@ fn test_table_stats_insertion() { #[test] fn test_table_stats_insertion_and_retrieval() { - let (_temp_dir, provider) = create_test_statistics_provider(true); + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); - // Insert table statistics - let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); - match &result { - Ok(_) => println!("Table stats insertion successful"), - Err(e) => println!("Table stats insertion failed: {}", e), - } - assert!(result.is_ok()); + // Insert some statistics for the age column + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, + ) + .unwrap(); + + // Fetch statistics at the latest snapshot + let latest_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("test_table", latest_snapshot.0, conn) + .unwrap(); + + assert!(stats.is_some()); + let table_stats = stats.unwrap(); - // Note: Actual retrieval would require setting up the table_metadata - // TODO + // Verify we have statistics for all 3 columns (id, name, age) + assert_eq!(table_stats.column_statistics.len(), 3); + assert_eq!(table_stats.row_count, 3); // 3 rows in test_table + + // Find the age column statistics + let age_stats = table_stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .expect("Should have statistics for age column"); + + // Verify advanced stats were retrieved + assert_eq!(age_stats.advanced_stats.len(), 3); // min_value, max_value, histogram + + let min_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "min_value") + .expect("Should have min_value stat"); + // The value gets parsed as JSON, so "25" becomes the number 25 + assert!(min_stat.data == serde_json::json!(25) || min_stat.data == serde_json::json!("25")); + + let max_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "max_value") + .expect("Should have max_value stat"); + assert!(max_stat.data == serde_json::json!(35) || max_stat.data == serde_json::json!("35")); + + let histogram_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "histogram") + .expect("Should have histogram stat"); + assert!(histogram_stat.data.to_string().contains("buckets")); + + println!("✓ Table stats insertion and retrieval successful"); + println!( + " - Columns retrieved: {}", + table_stats.column_statistics.len() + ); + println!(" - Row count: {}", table_stats.row_count); + println!( + " - Age column advanced stats: {}", + age_stats.advanced_stats.len() + ); } #[test] @@ -196,9 +258,9 @@ fn test_snapshot_versioning_and_stats_types() { assert!(versions[1].1.is_some()); assert!(versions[2].1.is_none()); - // Verify snapshot continuity - assert_eq!(versions[0].1.unwrap() + 1, versions[1].0); - assert_eq!(versions[1].1.unwrap() + 1, versions[2].0); + // Verify snapshot continuity - end_snapshot should equal next begin_snapshot + assert_eq!(versions[0].1.unwrap(), versions[1].0); + assert_eq!(versions[1].1.unwrap(), versions[2].0); // Verify payloads updated correctly assert!(versions[0].2.contains("1000")); @@ -516,3 +578,327 @@ fn test_update_and_fetch_table_column_stats() { println!(" - After max_value update: {}", snapshot_after_max.0); println!(" - After histogram update: {}", snapshot_after_histogram.0); } + +#[test] +fn test_fetch_table_stats_with_snapshot_time_travel() { + // Test that fetching statistics at different snapshots returns correct historical data + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + let snapshot_0 = provider.fetch_current_snapshot().unwrap(); + println!("Snapshot 0: {}", snapshot_0.0); + + // Add first version of histogram + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 1, "buckets": [1, 2, 3]}"#, + ) + .unwrap(); + let snapshot_1 = provider.fetch_current_snapshot().unwrap(); + println!("Snapshot 1: {}", snapshot_1.0); + + // Add second version of histogram + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, + ) + .unwrap(); + let snapshot_2 = provider.fetch_current_snapshot().unwrap(); + println!("Snapshot 2: {}", snapshot_2.0); + + // Add third version + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 3, "buckets": [10, 20, 30]}"#, + ) + .unwrap(); + let snapshot_3 = provider.fetch_current_snapshot().unwrap(); + println!("Snapshot 3: {}", snapshot_3.0); + + // Check the database + let mut debug_stmt = conn + .prepare( + r#" + SELECT column_id, stats_type, begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? + ORDER BY begin_snapshot; + "#, + ) + .unwrap(); + println!("\nAdvanced stats in database:"); + for row in debug_stmt + .query_map([table_id, age_column_id], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, Option>(3)?, + row.get::<_, String>(4)?, + )) + }) + .unwrap() + { + let (col_id, stats_type, begin, end, payload) = row.unwrap(); + println!( + " col={}, type={}, begin={}, end={:?}, payload={}", + col_id, stats_type, begin, end, payload + ); + } + + // Fetch at snapshot 0 - should have no advanced stats + let stats_at_0 = provider + .fetch_table_statistics("test_table", snapshot_0.0, conn) + .unwrap() + .unwrap(); + let age_stats_0 = stats_at_0 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_0.advanced_stats.len(), 0); + + // Fetch at snapshot 1 - should have version 1 + let stats_at_1 = provider + .fetch_table_statistics("test_table", snapshot_1.0, conn) + .unwrap() + .unwrap(); + let age_stats_1 = stats_at_1 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_1.advanced_stats.len(), 1); + let histogram_1 = &age_stats_1.advanced_stats[0]; + assert!(histogram_1.data.to_string().contains("\"version\":1")); + + // Fetch at snapshot 2 - should have version 2 + let stats_at_2 = provider + .fetch_table_statistics("test_table", snapshot_2.0, conn) + .unwrap() + .unwrap(); + let age_stats_2 = stats_at_2 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_2.advanced_stats.len(), 1); + let histogram_2 = &age_stats_2.advanced_stats[0]; + assert!(histogram_2.data.to_string().contains("\"version\":2")); + + // Fetch at snapshot 3 - should have version 3 + let stats_at_3 = provider + .fetch_table_statistics("test_table", snapshot_3.0, conn) + .unwrap() + .unwrap(); + let age_stats_3 = stats_at_3 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_3.advanced_stats.len(), 1); + let histogram_3 = &age_stats_3.advanced_stats[0]; + assert!(histogram_3.data.to_string().contains("\"version\":3")); + + println!("✓ Snapshot time-travel test passed"); + println!( + " - Snapshot 0: {} advanced stats", + age_stats_0.advanced_stats.len() + ); + println!(" - Snapshot 1: version 1 histogram"); + println!(" - Snapshot 2: version 2 histogram"); + println!(" - Snapshot 3: version 3 histogram"); +} + +#[test] +fn test_fetch_table_stats_multiple_stat_types() { + // Test fetching when multiple stat types exist for same column + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + // Add multiple different stat types + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [20, 25, 30, 35]}"#, + ) + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "quantiles", + r#"{"p50": 30, "p95": 34, "p99": 35}"#, + ) + .unwrap(); + + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("test_table", current_snapshot.0, conn) + .unwrap() + .unwrap(); + + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + + // Should have all 5 stat types + assert_eq!(age_stats.advanced_stats.len(), 5); + + // Verify all stat types are present + let stat_types: Vec<&str> = age_stats + .advanced_stats + .iter() + .map(|s| s.stats_type.as_str()) + .collect(); + assert!(stat_types.contains(&"min_value")); + assert!(stat_types.contains(&"max_value")); + assert!(stat_types.contains(&"histogram")); + assert!(stat_types.contains(&"ndv")); + assert!(stat_types.contains(&"quantiles")); + + println!("✓ Multiple stat types test passed"); + println!(" - Total stat types: {}", age_stats.advanced_stats.len()); + println!(" - Stat types: {:?}", stat_types); +} + +#[test] +fn test_fetch_table_stats_columns_without_stats() { + // Test that columns without advanced stats are still returned + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + // Only add stats for age column, not for id or name + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("test_table", current_snapshot.0, conn) + .unwrap() + .unwrap(); + + // Should have all 3 columns even though only age has stats + assert_eq!(stats.column_statistics.len(), 3); + + // Find each column + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have id column"); + let name_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "name") + .expect("Should have name column"); + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .expect("Should have age column"); + + // id and name should have no advanced stats + assert_eq!(id_stats.advanced_stats.len(), 0); + assert_eq!(name_stats.advanced_stats.len(), 0); + + // age should have 1 advanced stat + assert_eq!(age_stats.advanced_stats.len(), 1); + + println!("✓ Columns without stats test passed"); + println!(" - Total columns: {}", stats.column_statistics.len()); + println!(" - id stats: {}", id_stats.advanced_stats.len()); + println!(" - name stats: {}", name_stats.advanced_stats.len()); + println!(" - age stats: {}", age_stats.advanced_stats.len()); +} + +#[test] +fn test_fetch_table_stats_row_count() { + // Test that row_count is correctly populated + let (_temp_dir, provider) = create_test_statistics_provider(false); + let conn = provider.get_connection(); + + // Create table with known row count + conn.execute_batch( + r#" + CREATE TABLE large_table ( + col1 INTEGER, + col2 VARCHAR + ); + + INSERT INTO large_table + SELECT i, 'value_' || i::VARCHAR + FROM range(1, 101) t(i); + "#, + ) + .unwrap(); + + // Get table_id + let mut table_id_stmt = conn + .prepare( + r#" + SELECT table_id + FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'large_table'; + "#, + ) + .unwrap(); + let table_id: i64 = table_id_stmt.query_row([], |row| row.get(0)).unwrap(); + + // Get column_id for col1 + let mut column_id_stmt = conn + .prepare( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'col1'; + "#, + ) + .unwrap(); + let col1_id: i64 = column_id_stmt + .query_row([table_id], |row| row.get(0)) + .unwrap(); + + // Add some stats + provider + .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) + .unwrap(); + + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("large_table", current_snapshot.0, conn) + .unwrap() + .unwrap(); + + // Verify row count + assert_eq!(stats.row_count, 100); + assert_eq!(stats.column_statistics.len(), 2); // col1 and col2 + + println!("✓ Row count test passed"); + println!(" - Row count: {}", stats.row_count); + println!(" - Column count: {}", stats.column_statistics.len()); +} From bf988e305dc006e8ca5335467ae9bbfab5b9b389 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Thu, 30 Oct 2025 14:49:54 -0400 Subject: [PATCH 20/40] get schema implementation update --- optd/statistics/src/statistics.rs | 198 ++++++++++---- optd/statistics/tests/statistics_tests.rs | 301 +++++++++++++++++++++- 2 files changed, 437 insertions(+), 62 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index 6b37f3c..e23bade 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -1,4 +1,11 @@ -use duckdb::{Connection, Error as DuckDBError, params, types::Null}; +use std::sync::Arc; + +use duckdb::{ + Connection, Error as DuckDBError, + arrow::datatypes::{DataType, Field, Schema, SchemaRef}, + params, + types::Null, +}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -14,6 +21,8 @@ pub enum Error { QueryExecution { source: DuckDBError }, #[snafu(display("JSON serialization error: {}", source))] JsonSerialization { source: serde_json::Error }, + #[snafu(display("ARROW DataType conversion error: {}", source))] + ArrowDataTypeConversion { source: duckdb::Error }, #[snafu(display( "Get statistics failed for table: {}, column: {}, snapshot: {}", table, @@ -67,56 +76,53 @@ impl FromIterator> for TableStatistics let mut column_statistics = Vec::new(); // Stats will be ordered by table_id then column_id - for row_result in iter { - if let Ok(TableColumnStatisticsEntry { - table_id: _, - column_id, - column_name, - column_type, - record_count, - next_row_id: _, - file_size_bytes: _, - stats_type, - payload, - }) = row_result + for TableColumnStatisticsEntry { + table_id: _, + column_id, + column_name, + column_type, + record_count, + next_row_id: _, + file_size_bytes: _, + stats_type, + payload, + } in iter.into_iter().flatten() + { + // Check if unique table/column combination + if column_statistics + .last() + .is_none_or(|last: &ColumnStatistics| last.column_id != column_id) + { + // New column encountered + column_statistics.push(ColumnStatistics::new( + column_id, + column_type.clone(), + column_name.clone(), + Vec::new(), + )); + } + + assert!( + !column_statistics.is_empty() + && column_statistics.last().unwrap().column_id == column_id, + "Column statistics should not be empty and last column_id should match current column_id" + ); + + if let Some(last_column_stat) = column_statistics.last_mut() + && stats_type.is_some() + && payload.is_some() { - // Check if unique table/column combination - if column_statistics - .last() - .map_or(true, |last: &ColumnStatistics| last.column_id != column_id) - { - // New column encountered - column_statistics.push(ColumnStatistics::new( - column_id, - column_type.clone(), - column_name.clone(), - Vec::new(), - )); - } - - assert!( - !column_statistics.is_empty() - && column_statistics.last().unwrap().column_id == column_id, - "Column statistics should not be empty and last column_id should match current column_id" - ); - - // Add advanced statistics - if let Some(last_column_stat) = column_statistics.last_mut() { - if stats_type.is_some() && payload.is_some() { - let advanced_stat = AdvanceColumnStatistics { - stats_type: stats_type.clone().unwrap(), - data: serde_json::from_str(&payload.clone().unwrap()) - .unwrap_or(Value::Null), - }; - last_column_stat.add_advanced_stat(advanced_stat); - } - } - - // Assuming all columns have the same record_count, only need to set once - if !row_flag { - row_count = record_count as usize; - row_flag = true; - } + let advanced_stat = AdvanceColumnStatistics { + stats_type: stats_type.clone().unwrap(), + data: serde_json::from_str(&payload.clone().unwrap()).unwrap_or(Value::Null), + }; + last_column_stat.add_advanced_stat(advanced_stat); + } + + // Assuming all columns have the same record_count, only need to set once + if !row_flag { + row_count = record_count as usize; + row_flag = true; } } @@ -189,7 +195,9 @@ pub trait StatisticsProvider { fn fetch_current_snapshot_info(&self) -> Result; - fn fetch_current_schema(&self) -> Result; + fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result; + + fn fetch_current_schema_info(&self) -> Result; /// Retrieve table and column statistics at specific snapshot fn fetch_table_statistics( @@ -215,6 +223,43 @@ pub struct DuckLakeStatisticsProvider { } impl DuckLakeStatisticsProvider { + /// Convert DuckDB type string to Arrow DataType + fn duckdb_type_to_arrow(type_str: &str) -> Result { + // Handle common DuckDB types + let data_type = match type_str.to_uppercase().as_str() { + "INTEGER" | "INT" | "INT4" => DataType::Int32, + "BIGINT" | "INT8" | "LONG" => DataType::Int64, + "SMALLINT" | "INT2" | "SHORT" => DataType::Int16, + "TINYINT" | "INT1" => DataType::Int8, + "DOUBLE" | "FLOAT8" => DataType::Float64, + "FLOAT" | "REAL" | "FLOAT4" => DataType::Float32, + "BOOLEAN" | "BOOL" => DataType::Boolean, + "VARCHAR" | "TEXT" | "STRING" => DataType::Utf8, + "DATE" => DataType::Date32, + "TIMESTAMP" => { + DataType::Timestamp(duckdb::arrow::datatypes::TimeUnit::Microsecond, None) + } + "TIME" => DataType::Time64(duckdb::arrow::datatypes::TimeUnit::Microsecond), + "BLOB" | "BYTEA" | "BINARY" => DataType::Binary, + "DECIMAL" => DataType::Decimal128(38, 10), // Default precision and scale + _ => { + // For unsupported types, use Utf8 as fallback or you could error out + // Here we'll just return an error through the ArrowDataTypeConversion variant + return Err(Error::ArrowDataTypeConversion { + source: DuckDBError::FromSqlConversionFailure( + 0, + duckdb::types::Type::Text, + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported DuckDB type for Arrow conversion: {}", type_str), + )), + ), + }); + } + }; + Ok(data_type) + } + /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB /// Parameters: /// - location: Optional path to database file @@ -400,7 +445,56 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { Ok(current_snapshot_info) } - fn fetch_current_schema(&self) -> Result { + fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result { + // Construct the table reference (schema.table or just table) + let table_ref = if let Some(s) = schema { + format!("{}.{}", s, table) + } else { + table.to_string() + }; + + let schema_query = format!("DESCRIBE {};", table_ref); + + let mut stmt = self + .conn + .prepare(&schema_query) + .context(QueryExecutionSnafu)?; + + let mut fields = Vec::new(); + let column_iter = stmt + .query_map([], |row| { + let column_name: String = row.get("column_name")?; + let column_type_str: String = row.get("column_type")?; + let null: String = row.get("null")?; + + // Convert DuckDB type to Arrow type + let column_type = Self::duckdb_type_to_arrow(&column_type_str).map_err(|_| { + DuckDBError::FromSqlConversionFailure( + 0, + duckdb::types::Type::Text, + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Could not convert DuckDB type '{}' to Arrow type", + column_type_str + ), + )), + ) + })?; + + fields.push(Field::new(column_name, column_type, null == "YES")); + Ok(()) + }) + .context(QueryExecutionSnafu)?; + + for result in column_iter { + result.context(QueryExecutionSnafu)?; + } + let schema = Schema::new(fields); + Ok(Arc::new(schema)) + } + + fn fetch_current_schema_info(&self) -> Result { let mut stmt = self .conn .prepare( diff --git a/optd/statistics/tests/statistics_tests.rs b/optd/statistics/tests/statistics_tests.rs index 0b8c472..d3c02bc 100644 --- a/optd/statistics/tests/statistics_tests.rs +++ b/optd/statistics/tests/statistics_tests.rs @@ -147,14 +147,10 @@ fn test_fetch_current_schema() { let (_temp_dir, provider) = create_test_statistics_provider(true); // Fetch the current schema - let result = provider.fetch_current_schema(); - - // Print error if it fails + let result = provider.fetch_current_schema_info(); if let Err(ref e) = result { println!("Error fetching current schema: {}", e); } - - // The result should be Ok since DuckLake creates a default 'main' schema assert!( result.is_ok(), "Expected fetch_current_schema to succeed, got error: {:?}", @@ -572,11 +568,11 @@ fn test_update_and_fetch_table_column_stats() { assert_eq!(histogram_payload, histogram_data.to_string()); - println!("✓ All update and fetch operations completed successfully"); - println!(" - Initial snapshot: {}", initial_snapshot.0); - println!(" - After min_value update: {}", snapshot_after_min.0); - println!(" - After max_value update: {}", snapshot_after_max.0); - println!(" - After histogram update: {}", snapshot_after_histogram.0); + println!("All update and fetch operations completed"); + println!(" Initial snapshot: {}", initial_snapshot.0); + println!(" After min_value update: {}", snapshot_after_min.0); + println!(" After max_value update: {}", snapshot_after_max.0); + println!(" After histogram update: {}", snapshot_after_histogram.0); } #[test] @@ -902,3 +898,288 @@ fn test_fetch_table_stats_row_count() { println!(" - Row count: {}", stats.row_count); println!(" - Column count: {}", stats.column_statistics.len()); } + +#[test] +fn test_fetch_current_schema_arrow() { + let (_temp_dir, provider) = create_test_statistics_provider(false); + let conn = provider.get_connection(); + + // Create a test table + conn.execute_batch( + r#" + CREATE TABLE schema_test_table ( + id INTEGER, + name VARCHAR, + value DOUBLE, + active BOOLEAN + ); + "#, + ) + .unwrap(); + + // Fetch schema without specifying schema (default to current schema) + let schema = provider + .fetch_current_schema(None, "schema_test_table") + .unwrap(); + + assert_eq!(schema.fields().len(), 4); + + // Verify field names and types + let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect(); + assert!(field_names.contains(&"id")); + assert!(field_names.contains(&"name")); + assert!(field_names.contains(&"value")); + assert!(field_names.contains(&"active")); + + let id_field = schema.field_with_name("id").unwrap(); + assert!(matches!( + id_field.data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + let name_field = schema.field_with_name("name").unwrap(); + assert!(matches!( + name_field.data_type(), + &duckdb::arrow::datatypes::DataType::Utf8 + )); + let value_field = schema.field_with_name("value").unwrap(); + assert!(matches!( + value_field.data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + let active_field = schema.field_with_name("active").unwrap(); + assert!(matches!( + active_field.data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); + + // Fetch schema with explicit schema name + let schema_explicit = provider + .fetch_current_schema(Some("main"), "schema_test_table") + .unwrap(); + + assert_eq!(schema_explicit.fields().len(), 4); + assert_eq!(schema.fields().len(), schema_explicit.fields().len()); + + println!("✓ Schema fetch test passed"); + println!(" - Fields: {}", schema.fields().len()); + println!(" - Field names: {:?}", field_names); +} + +#[test] +fn test_multiple_schemas_comprehensive() { + let (_temp_dir, provider) = create_test_statistics_provider(false); + let conn = provider.get_connection(); + + // Get initial schema info (should be 'main') + let initial_schema_info = provider.fetch_current_schema_info().unwrap(); + assert_eq!(initial_schema_info.schema_name, "main"); + assert_eq!(initial_schema_info.schema_id, 0); + assert!(initial_schema_info.end_snapshot.is_none()); + println!("✓ Initial schema: {}", initial_schema_info.schema_name); + + // Create additional schemas + conn.execute_batch( + r#" + CREATE SCHEMA analytics; + CREATE SCHEMA reporting; + "#, + ) + .unwrap(); + println!("✓ Created additional schemas: analytics, reporting"); + + // Create tables in different schemas + conn.execute_batch( + r#" + -- Table in main schema + CREATE TABLE main.users ( + user_id INTEGER, + username VARCHAR, + email VARCHAR, + created_at TIMESTAMP + ); + + -- Table in analytics schema + CREATE TABLE analytics.metrics ( + metric_id BIGINT, + metric_name VARCHAR, + value DOUBLE, + recorded_at DATE + ); + + -- Table in reporting schema + CREATE TABLE reporting.summary ( + report_id SMALLINT, + report_name TEXT, + data BLOB, + is_published BOOLEAN + ); + "#, + ) + .unwrap(); + println!("✓ Created tables in all schemas"); + + // Test 1: Fetch schema from main (without explicit schema parameter) + let main_users_schema = provider.fetch_current_schema(None, "users").unwrap(); + assert_eq!(main_users_schema.fields().len(), 4); + + let user_id_field = main_users_schema.field_with_name("user_id").unwrap(); + assert!(matches!( + user_id_field.data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + + let username_field = main_users_schema.field_with_name("username").unwrap(); + assert!(matches!( + username_field.data_type(), + &duckdb::arrow::datatypes::DataType::Utf8 + )); + + let created_at_field = main_users_schema.field_with_name("created_at").unwrap(); + assert!(matches!( + created_at_field.data_type(), + &duckdb::arrow::datatypes::DataType::Timestamp(_, _) + )); + + println!("✓ Fetched main.users schema (4 fields)"); + + // Test 2: Fetch schema from main (with explicit schema parameter) + let main_users_schema_explicit = provider + .fetch_current_schema(Some("main"), "users") + .unwrap(); + assert_eq!(main_users_schema_explicit.fields().len(), 4); + println!("✓ Fetched main.users schema explicitly"); + + // Test 3: Fetch schema from analytics schema + let analytics_metrics_schema = provider + .fetch_current_schema(Some("analytics"), "metrics") + .unwrap(); + assert_eq!(analytics_metrics_schema.fields().len(), 4); + + let metric_id_field = analytics_metrics_schema + .field_with_name("metric_id") + .unwrap(); + assert!(matches!( + metric_id_field.data_type(), + &duckdb::arrow::datatypes::DataType::Int64 + )); + + let value_field = analytics_metrics_schema.field_with_name("value").unwrap(); + assert!(matches!( + value_field.data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + + let recorded_at_field = analytics_metrics_schema + .field_with_name("recorded_at") + .unwrap(); + assert!(matches!( + recorded_at_field.data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + + println!("✓ Fetched analytics.metrics schema (4 fields)"); + + // Test 4: Fetch schema from reporting schema + let reporting_summary_schema = provider + .fetch_current_schema(Some("reporting"), "summary") + .unwrap(); + assert_eq!(reporting_summary_schema.fields().len(), 4); + + let report_id_field = reporting_summary_schema + .field_with_name("report_id") + .unwrap(); + assert!(matches!( + report_id_field.data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + + let report_name_field = reporting_summary_schema + .field_with_name("report_name") + .unwrap(); + assert!(matches!( + report_name_field.data_type(), + &duckdb::arrow::datatypes::DataType::Utf8 + )); + + let data_field = reporting_summary_schema.field_with_name("data").unwrap(); + assert!(matches!( + data_field.data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); + + let is_published_field = reporting_summary_schema + .field_with_name("is_published") + .unwrap(); + assert!(matches!( + is_published_field.data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); + + println!("✓ Fetched reporting.summary schema (4 fields)"); + + // Test 5: Verify schema_info still returns main (current schema) + let current_schema_info = provider.fetch_current_schema_info().unwrap(); + assert_eq!(current_schema_info.schema_name, "main"); + println!("✓ Current schema is still 'main'"); + + // Test 6: Switch to analytics schema and verify + conn.execute("USE analytics;", []).unwrap(); + let analytics_schema_info = provider.fetch_current_schema_info().unwrap(); + assert_eq!(analytics_schema_info.schema_name, "analytics"); + assert!(analytics_schema_info.end_snapshot.is_none()); + println!("✓ Switched to analytics schema"); + + // Test 7: Fetch table from current schema (analytics) without explicit schema + let metrics_schema_implicit = provider.fetch_current_schema(None, "metrics").unwrap(); + assert_eq!(metrics_schema_implicit.fields().len(), 4); + println!("✓ Fetched metrics from current schema (analytics) implicitly"); + + // Test 8: Can still access other schemas explicitly + let users_from_main = provider + .fetch_current_schema(Some("main"), "users") + .unwrap(); + assert_eq!(users_from_main.fields().len(), 4); + println!("✓ Can still access main.users from analytics schema"); + + // Test 9: Switch to reporting and verify + conn.execute("USE reporting;", []).unwrap(); + let reporting_schema_info = provider.fetch_current_schema_info().unwrap(); + assert_eq!(reporting_schema_info.schema_name, "reporting"); + println!("✓ Switched to reporting schema"); + + // Test 10: Verify all schemas exist in metadata + let mut schema_list_stmt = conn + .prepare( + r#" + SELECT schema_name, schema_id, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema + ORDER BY schema_id; + "#, + ) + .unwrap(); + + let schemas: Vec<(String, i64, i64, Option)> = schema_list_stmt + .query_map([], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + // Should have at least 3 schemas: main, analytics, reporting + assert!(schemas.len() >= 3); + + let schema_names: Vec<&str> = schemas + .iter() + .map(|(name, _, _, _)| name.as_str()) + .collect(); + assert!(schema_names.contains(&"main")); + assert!(schema_names.contains(&"analytics")); + assert!(schema_names.contains(&"reporting")); + + // All schemas should be active (end_snapshot is None) + for (name, _, _, end_snapshot) in &schemas { + println!(" Schema: {}, end_snapshot: {:?}", name, end_snapshot); + assert!(end_snapshot.is_none(), "Schema {} should be active", name); + } +} From 23192a55bc273e59a8fc5aa20fc8502fda1cf9a5 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Thu, 30 Oct 2025 15:39:48 -0400 Subject: [PATCH 21/40] documentation & format --- optd/statistics/src/statistics.rs | 345 +++--- optd/statistics/tests/statistics_tests.rs | 1297 ++++++++++++--------- 2 files changed, 885 insertions(+), 757 deletions(-) diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs index e23bade..394eb53 100644 --- a/optd/statistics/src/statistics.rs +++ b/optd/statistics/src/statistics.rs @@ -13,6 +13,69 @@ use snafu::{ResultExt, prelude::*}; const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; +/// SQL query to fetch table statistics including column metadata and advanced stats at a specific snapshot. +const FETCH_TABLE_STATS_QUERY: &str = r#" + SELECT + ts.table_id, + dc.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcas.stats_type, + tcas.payload + FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts + INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas + ON dc.table_id = tcas.table_id + AND dc.column_id = tcas.column_id + AND ? >= tcas.begin_snapshot + AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) + WHERE + ds.schema_name = current_schema() + AND dt.table_name = ? + AND ts.record_count IS NOT NULL + AND ts.file_size_bytes IS NOT NULL + AND ? >= dc.begin_snapshot + AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) + ORDER BY ts.table_id, dc.column_id, tcas.stats_type; +"#; + +/// SQL query to close an existing advanced statistics entry by setting its end_snapshot. +const UPDATE_ADV_STATS_QUERY: &str = r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SET end_snapshot = ? + WHERE end_snapshot IS NULL + AND stats_type = ? + AND column_id = ? + AND table_id = ?; +"#; + +/// SQL query to insert a new advanced statistics entry. +const INSERT_ADV_STATS_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?); +"#; + +/// SQL query to insert a new snapshot record. +const INSERT_SNAPSHOT_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot + (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) + VALUES (?, NOW(), ?, ?, ?); +"#; + +/// SQL query to record a snapshot change in the change log. +const INSERT_SNAPSHOT_CHANGE_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes + (snapshot_id, changes_made, author, commit_message, commit_extra_info) + VALUES (?, ?, ?, ?, ?); +"#; + +/// Error types for statistics operations. #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Database connection error: {}", source))] @@ -47,6 +110,8 @@ pub enum Error { }, } +/// Internal struct representing a row from the table statistics query. +/// Used for collecting data before aggregating into TableStatistics. struct TableColumnStatisticsEntry { table_id: i64, column_id: i64, @@ -59,8 +124,8 @@ struct TableColumnStatisticsEntry { payload: Option, } -/** Packaged Statistics Objects */ -/** Table statistics -- Contains overall row count and per-column statistics */ +/// Statistics for a table including row count and per-column statistics. +/// Main structure returned when querying table statistics. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TableStatistics { pub row_count: usize, @@ -108,15 +173,14 @@ impl FromIterator> for TableStatistics "Column statistics should not be empty and last column_id should match current column_id" ); - if let Some(last_column_stat) = column_statistics.last_mut() - && stats_type.is_some() - && payload.is_some() - { - let advanced_stat = AdvanceColumnStatistics { - stats_type: stats_type.clone().unwrap(), - data: serde_json::from_str(&payload.clone().unwrap()).unwrap_or(Value::Null), - }; - last_column_stat.add_advanced_stat(advanced_stat); + if let Some(last_column_stat) = column_statistics.last_mut() { + if let (Some(st), Some(pl)) = (stats_type, payload) { + let data = serde_json::from_str(&pl).unwrap_or(Value::Null); + last_column_stat.add_advanced_stat(AdvanceColumnStatistics { + stats_type: st, + data, + }); + } } // Assuming all columns have the same record_count, only need to set once @@ -133,6 +197,7 @@ impl FromIterator> for TableStatistics } } +/// Statistics for a single column including type, name, and advanced statistics. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ColumnStatistics { pub column_id: i64, @@ -148,7 +213,7 @@ impl ColumnStatistics { name: String, advanced_stats: Vec, ) -> Self { - ColumnStatistics { + Self { column_id, column_type, name, @@ -156,20 +221,25 @@ impl ColumnStatistics { } } - #[allow(dead_code)] fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { self.advanced_stats.push(stat); } } +/// An advanced statistics entry with type and serialized data at a snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AdvanceColumnStatistics { pub stats_type: String, pub data: Value, } +/// Identifier for a snapshot in the statistics database. +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct SnapshotId(pub i64); +/// Snapshot metadata including schema version and next IDs. +#[derive(Debug, Clone, Serialize, Deserialize)] + pub struct SnapshotInfo { pub snapshot_id: i64, pub schema_version: i64, @@ -177,6 +247,9 @@ pub struct SnapshotInfo { pub next_file_id: i64, } +/// Schema information including name, ID, and valid snapshot range. +#[derive(Debug, Clone, Serialize, Deserialize)] + pub struct CurrentSchema { pub schema_name: String, pub schema_id: i64, @@ -190,16 +263,21 @@ struct StatisticsUpdate { payload: String, } +/// Trait defining operations for managing table statistics with snapshot-based time travel. pub trait StatisticsProvider { + /// Fetches the current (most recent) snapshot ID. fn fetch_current_snapshot(&self) -> Result; + /// Fetches complete metadata for the current snapshot. fn fetch_current_snapshot_info(&self) -> Result; + /// Fetches the Arrow schema for a table at the current snapshot. fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result; + /// Fetches schema information including name, ID, and snapshot range. fn fetch_current_schema_info(&self) -> Result; - /// Retrieve table and column statistics at specific snapshot + /// Retrieves table and column statistics at a specific snapshot. fn fetch_table_statistics( &self, table_name: &str, @@ -207,7 +285,7 @@ pub trait StatisticsProvider { connection: &Connection, ) -> Result, Error>; - /// Insert table column statistics + /// Updates or inserts advanced statistics for a table column. fn update_table_column_stats( &self, column_id: i64, @@ -217,13 +295,13 @@ pub trait StatisticsProvider { ) -> Result<(), Error>; } -/// DuckLake-based implementation of StatisticsProvider +/// DuckLake-based implementation of StatisticsProvider using DuckDB with snapshot management. pub struct DuckLakeStatisticsProvider { conn: Connection, } impl DuckLakeStatisticsProvider { - /// Convert DuckDB type string to Arrow DataType + /// Converts a DuckDB type string to an Arrow DataType. fn duckdb_type_to_arrow(type_str: &str) -> Result { // Handle common DuckDB types let data_type = match type_str.to_uppercase().as_str() { @@ -260,10 +338,8 @@ impl DuckLakeStatisticsProvider { Ok(data_type) } - /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB - /// Parameters: - /// - location: Optional path to database file - /// - metadata_path: Optional path to ducklake metadata file + /// Creates a new DuckLakeStatisticsProvider with optional file paths. + /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { let conn = if let Some(path) = location { Connection::open(path).context(ConnectionSnafu)? @@ -338,100 +414,46 @@ impl DuckLakeStatisticsProvider { Ok(Self { conn }) } + /// Returns a reference to the underlying DuckDB connection. pub fn get_connection(&self) -> &Connection { &self.conn } + /// Begins a database transaction. fn begin_transaction(&self) -> Result<(), Error> { - let mut begin_txn_stmt = self - .conn - .prepare("BEGIN TRANSACTION;") - .context(QueryExecutionSnafu)?; - begin_txn_stmt.execute([]).context(QueryExecutionSnafu)?; - Ok(()) + self.conn + .execute_batch("BEGIN TRANSACTION;") + .context(QueryExecutionSnafu) } + /// Commits the current database transaction. fn commit_transaction(&self) -> Result<(), Error> { - let mut commit_txn_stmt = self - .conn - .prepare("COMMIT TRANSACTION;") - .context(QueryExecutionSnafu)?; - commit_txn_stmt.execute([]).context(QueryExecutionSnafu)?; - Ok(()) - } - - fn update_regular_column_stats( - &self, - column_id: i64, - table_id: i64, - stats_type: &str, - payload: &str, - ) -> Result<(), Error> { - // Column name must be part of the query string, not a parameter - // Only min_value and max_value are supported for regular updates - let query = match stats_type { - "min_value" => { - r#" - UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_stats - SET min_value = ? - WHERE column_id = ? AND table_id = ?; - "# - } - "max_value" => { - r#" - UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_stats - SET max_value = ? - WHERE column_id = ? AND table_id = ?; - "# - } - _ => { - return Err(Error::QueryExecution { - source: DuckDBError::InvalidParameterName(format!( - "Unsupported regular stats type: {}. Only min_value and max_value are supported.", - stats_type - )), - }); - } - }; - - let mut update_regular_stmt = self.conn.prepare(query).context(QueryExecutionSnafu)?; - - update_regular_stmt - .execute(params![payload, column_id, table_id]) - .context(QueryExecutionSnafu)?; - - Ok(()) + self.conn + .execute_batch("COMMIT TRANSACTION;") + .context(QueryExecutionSnafu) } } impl StatisticsProvider for DuckLakeStatisticsProvider { fn fetch_current_snapshot(&self) -> Result { - let mut stmt = self - .conn + self.conn .prepare("FROM ducklake_current_snapshot('metalake');") - .context(QueryExecutionSnafu)?; - - let snapshot_id = stmt + .context(QueryExecutionSnafu)? .query_row([], |row| Ok(SnapshotId(row.get(0)?))) - .context(QueryExecutionSnafu)?; - - Ok(snapshot_id) + .context(QueryExecutionSnafu) } fn fetch_current_snapshot_info(&self) -> Result { - let mut snapshot_stmt = self - .conn + self.conn .prepare( r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_metalake.main.ducklake_snapshot - WHERE snapshot_id = (SELECT MAX(snapshot_id) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot); - "#, + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot); + "#, ) - .context(QueryExecutionSnafu)?; - - let current_snapshot_info = snapshot_stmt + .context(QueryExecutionSnafu)? .query_row([], |row| { Ok(SnapshotInfo { snapshot_id: row.get("snapshot_id")?, @@ -440,18 +462,13 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { next_file_id: row.get("next_file_id")?, }) }) - .context(QueryExecutionSnafu)?; - - Ok(current_snapshot_info) + .context(QueryExecutionSnafu) } fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result { - // Construct the table reference (schema.table or just table) - let table_ref = if let Some(s) = schema { - format!("{}.{}", s, table) - } else { - table.to_string() - }; + let table_ref = schema + .map(|s| format!("{}.{}", s, table)) + .unwrap_or_else(|| table.to_string()); let schema_query = format!("DESCRIBE {};", table_ref); @@ -495,18 +512,15 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { } fn fetch_current_schema_info(&self) -> Result { - let mut stmt = self - .conn + self.conn .prepare( r#" SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot FROM __ducklake_metadata_metalake.main.ducklake_schema ds WHERE ds.schema_name = current_schema(); - "#, + "#, ) - .context(QueryExecutionSnafu)?; - - let snapshot_id = stmt + .context(QueryExecutionSnafu)? .query_row([], |row| { Ok(CurrentSchema { schema_name: row.get("schema_name")?, @@ -515,9 +529,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { end_snapshot: row.get("end_snapshot")?, }) }) - .context(QueryExecutionSnafu)?; - - Ok(snapshot_id) + .context(QueryExecutionSnafu) } fn fetch_table_statistics( @@ -526,39 +538,8 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { snapshot: i64, conn: &Connection, ) -> Result, Error> { - // Query for table statistics at the snapshot let mut stmt = conn - .prepare( - r#" - SELECT - ts.table_id, - dc.column_id, - dc.column_name, - dc.column_type, - ts.record_count, - ts.next_row_id, - ts.file_size_bytes, - tcas.stats_type, - tcas.payload - FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts - INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id - INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas - ON dc.table_id = tcas.table_id - AND dc.column_id = tcas.column_id - AND ? >= tcas.begin_snapshot - AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) - WHERE - ds.schema_name = current_schema() - AND dt.table_name = ? - AND ts.record_count IS NOT NULL - AND ts.file_size_bytes IS NOT NULL - AND ? >= dc.begin_snapshot - AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) - ORDER BY ts.table_id, dc.column_id, tcas.stats_type; - "# - ) + .prepare(FETCH_TABLE_STATS_QUERY) .context(QueryExecutionSnafu)?; let entries = stmt @@ -587,9 +568,7 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { .context(QueryExecutionSnafu)? .map(|result| result.context(QueryExecutionSnafu)); - let table_stats: TableStatistics = TableStatistics::from_iter(entries); - - Ok(Some(table_stats)) + Ok(Some(TableStatistics::from_iter(entries))) } /// Update table column statistics @@ -607,31 +586,10 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { let current_snapshot = self.fetch_current_snapshot_info()?; let current_snapshot_id = current_snapshot.snapshot_id; - // match the stats_type and see if it's in the regular column stats - match stats_type { - "min_value" | "max_value" => { - self.update_regular_column_stats(column_id, table_id, stats_type, payload)?; - } - // Still update the advanced stats for these types - _ => {} - } - // Update matching past snapshot to close it - let mut update_stmt = self - .conn - .prepare( - r#" - UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - SET end_snapshot = ? - WHERE end_snapshot IS NULL - AND stats_type = ? - AND column_id = ? - AND table_id = ?; - "#, - ) - .context(QueryExecutionSnafu)?; - - update_stmt + self.conn + .prepare(UPDATE_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? .execute(params![ current_snapshot_id + 1, stats_type, @@ -641,18 +599,9 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { .context(QueryExecutionSnafu)?; // Insert new snapshot - let mut insert_stmt = self - .conn - .prepare( - r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) - VALUES (?, ?, ?, ?, ?, ?); - "#, - ) - .context(QueryExecutionSnafu)?; - - insert_stmt + self.conn + .prepare(INSERT_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? .execute(params![ column_id, current_snapshot_id + 1, @@ -663,18 +612,9 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ]) .context(QueryExecutionSnafu)?; - let mut new_snap_stmt = self - .conn - .prepare( - r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot - (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) - VALUES (?, NOW(), ?, ?, ?); - "#, - ) - .context(QueryExecutionSnafu)?; - - new_snap_stmt + self.conn + .prepare(INSERT_SNAPSHOT_QUERY) + .context(QueryExecutionSnafu)? .execute(params![ current_snapshot_id + 1, current_snapshot.schema_version, @@ -683,18 +623,9 @@ impl StatisticsProvider for DuckLakeStatisticsProvider { ]) .context(QueryExecutionSnafu)?; - let mut new_snap_change_stmt = self - .conn - .prepare( - r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes - (snapshot_id, changes_made, author, commit_message, commit_extra_info) - VALUES (?, ?, ?, ?, ?); - "#, - ) - .context(QueryExecutionSnafu)?; - - new_snap_change_stmt + self.conn + .prepare(INSERT_SNAPSHOT_CHANGE_QUERY) + .context(QueryExecutionSnafu)? .execute(params![ current_snapshot_id + 1, format!( diff --git a/optd/statistics/tests/statistics_tests.rs b/optd/statistics/tests/statistics_tests.rs index d3c02bc..bfa20a8 100644 --- a/optd/statistics/tests/statistics_tests.rs +++ b/optd/statistics/tests/statistics_tests.rs @@ -4,11 +4,10 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; use tempfile::TempDir; -// Counter to ensure unique database names static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); -fn create_test_statistics_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { - // Create a unique subdirectory to separate DuckLake metadata for each test +/// Creates a test statistics provider with isolated metadata directory. +fn create_test_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { let temp_dir = TempDir::new().unwrap(); let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); let timestamp = SystemTime::now() @@ -20,56 +19,83 @@ fn create_test_statistics_provider(for_file: bool) -> (TempDir, DuckLakeStatisti .join(format!("db_{}_{}", timestamp, counter)); std::fs::create_dir_all(&unique_dir).unwrap(); let metadata_path = unique_dir.join("metadata.ducklake"); - if !for_file { - let provider = - DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) - .unwrap(); - (temp_dir, provider) - } else { + + let provider = if for_file { let db_path = unique_dir.join("test.db"); - let provider = DuckLakeStatisticsProvider::try_new( + DuckLakeStatisticsProvider::try_new( Some(db_path.to_str().unwrap()), Some(metadata_path.to_str().unwrap()), ) - .unwrap(); - (temp_dir, provider) + } else { + DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) } + .unwrap(); + + (temp_dir, provider) +} + +/// Creates a test provider with a pre-populated test_table (id, name, age columns). +fn create_test_provider_with_data() -> (TempDir, DuckLakeStatisticsProvider, i64, i64) { + let (temp_dir, provider) = create_test_provider(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE test_table (id INTEGER, name VARCHAR, age INTEGER); + INSERT INTO test_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Charlie', 35); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let age_column_id: i64 = conn + .query_row( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + (temp_dir, provider, table_id, age_column_id) } #[test] fn test_ducklake_statistics_provider_creation() { - { - // Test memory-based provider - let _memory_provider = create_test_statistics_provider(false); - // The provider creation is already asserted in create_test_provider - } - - { - // Test file-based provider with unique temporary database - let (_temp_dir, _provider) = create_test_statistics_provider(true); - // The provider creation is already asserted in create_test_provider - } + // Test both memory-based and file-based provider creation. + let (_temp_dir, _provider) = create_test_provider(false); + let (_temp_dir, _provider) = create_test_provider(true); } #[test] fn test_table_stats_insertion() { - let (_temp_dir, provider) = create_test_statistics_provider(true); + // Test basic statistics insertion without errors. + let (_temp_dir, provider) = create_test_provider(true); - // Insert table statistics let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); - match &result { - Ok(_) => println!("Table stats insertion successful"), - Err(e) => println!("Table stats insertion failed: {}", e), - } assert!(result.is_ok()); } #[test] fn test_table_stats_insertion_and_retrieval() { + // Test inserting and retrieving multiple statistics types for a column. let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); let conn = provider.get_connection(); - // Insert some statistics for the age column provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); @@ -85,113 +111,61 @@ fn test_table_stats_insertion_and_retrieval() { ) .unwrap(); - // Fetch statistics at the latest snapshot let latest_snapshot = provider.fetch_current_snapshot().unwrap(); let stats = provider .fetch_table_statistics("test_table", latest_snapshot.0, conn) + .unwrap() .unwrap(); - assert!(stats.is_some()); - let table_stats = stats.unwrap(); - - // Verify we have statistics for all 3 columns (id, name, age) - assert_eq!(table_stats.column_statistics.len(), 3); - assert_eq!(table_stats.row_count, 3); // 3 rows in test_table + assert_eq!(stats.column_statistics.len(), 3); + assert_eq!(stats.row_count, 3); - // Find the age column statistics - let age_stats = table_stats + let age_stats = stats .column_statistics .iter() .find(|cs| cs.name == "age") .expect("Should have statistics for age column"); - // Verify advanced stats were retrieved - assert_eq!(age_stats.advanced_stats.len(), 3); // min_value, max_value, histogram - - let min_stat = age_stats - .advanced_stats - .iter() - .find(|s| s.stats_type == "min_value") - .expect("Should have min_value stat"); - // The value gets parsed as JSON, so "25" becomes the number 25 - assert!(min_stat.data == serde_json::json!(25) || min_stat.data == serde_json::json!("25")); - - let max_stat = age_stats - .advanced_stats - .iter() - .find(|s| s.stats_type == "max_value") - .expect("Should have max_value stat"); - assert!(max_stat.data == serde_json::json!(35) || max_stat.data == serde_json::json!("35")); - - let histogram_stat = age_stats - .advanced_stats - .iter() - .find(|s| s.stats_type == "histogram") - .expect("Should have histogram stat"); - assert!(histogram_stat.data.to_string().contains("buckets")); - - println!("✓ Table stats insertion and retrieval successful"); - println!( - " - Columns retrieved: {}", - table_stats.column_statistics.len() + assert_eq!(age_stats.advanced_stats.len(), 3); + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "min_value" && (s.data == json!(25) || s.data == json!("25"))) + ); + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "max_value" && (s.data == json!(35) || s.data == json!("35"))) ); - println!(" - Row count: {}", table_stats.row_count); - println!( - " - Age column advanced stats: {}", - age_stats.advanced_stats.len() + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "histogram" && s.data.to_string().contains("buckets")) ); } #[test] fn test_fetch_current_schema() { - let (_temp_dir, provider) = create_test_statistics_provider(true); - - // Fetch the current schema - let result = provider.fetch_current_schema_info(); - if let Err(ref e) = result { - println!("Error fetching current schema: {}", e); - } - assert!( - result.is_ok(), - "Expected fetch_current_schema to succeed, got error: {:?}", - result.err() - ); - - let schema = result.unwrap(); - - // Verify the schema has valid snapshot information - println!( - "Schema name: {}, Schema ID: {}, Begin snapshot: {}, End snapshot: {:?}", - schema.schema_name, schema.schema_id, schema.begin_snapshot, schema.end_snapshot - ); + // Test fetching current schema info returns valid metadata. + let (_temp_dir, provider) = create_test_provider(true); - // The schema should have a begin_snapshot value (0 for initial schema in DuckLake) - assert_eq!( - schema.schema_name, "main", - "Expected default schema to be 'main'" - ); - assert_eq!( - schema.schema_id, 0, - "Expected schema_id to be 0 for default schema" - ); - assert!( - schema.begin_snapshot >= 0, - "Schema should have a valid begin_snapshot" - ); + let schema = provider.fetch_current_schema_info().unwrap(); - // End snapshot should be None for current active schema - assert!( - schema.end_snapshot.is_none(), - "Current schema should have no end_snapshot (should be None)" - ); + assert_eq!(schema.schema_name, "main"); + assert_eq!(schema.schema_id, 0); + assert!(schema.begin_snapshot >= 0); + assert!(schema.end_snapshot.is_none()); } #[test] fn test_snapshot_versioning_and_stats_types() { - let (_temp_dir, provider) = create_test_statistics_provider(true); + // Test snapshot creation, versioning, and continuity for multiple stats updates. + let (_temp_dir, provider) = create_test_provider(true); let conn = provider.get_connection(); - // Test 1: Multiple columns with sequential snapshots provider .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); @@ -202,18 +176,16 @@ fn test_snapshot_versioning_and_stats_types() { .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) .unwrap(); - // Verify different columns have sequential snapshots - let mut stmt = conn + let snapshots: Vec<(i64, i64)> = conn .prepare( r#" - SELECT column_id, begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 + SELECT column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 ORDER BY begin_snapshot; "#, ) - .unwrap(); - let snapshots: Vec<(i64, i64)> = stmt + .unwrap() .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) .unwrap() .map(|r| r.unwrap()) @@ -222,7 +194,6 @@ fn test_snapshot_versioning_and_stats_types() { assert!(snapshots[1].1 > snapshots[0].1); assert!(snapshots[2].1 > snapshots[1].1); - // Test 2: Update same column multiple times - verify snapshot continuity provider .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) .unwrap(); @@ -230,40 +201,29 @@ fn test_snapshot_versioning_and_stats_types() { .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - let mut version_stmt = conn + let versions: Vec<(i64, Option, String)> = conn .prepare( r#" - SELECT begin_snapshot, end_snapshot, payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' + SELECT begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' ORDER BY begin_snapshot; "#, ) - .unwrap(); - let versions: Vec<(i64, Option, String)> = version_stmt + .unwrap() .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) .unwrap() .map(|r| r.unwrap()) .collect(); - // Should have 3 versions (original + 2 updates) assert_eq!(versions.len(), 3); - - // First two closed, last one current - assert!(versions[0].1.is_some()); - assert!(versions[1].1.is_some()); - assert!(versions[2].1.is_none()); - - // Verify snapshot continuity - end_snapshot should equal next begin_snapshot + assert!(versions[0].1.is_some() && versions[1].1.is_some() && versions[2].1.is_none()); assert_eq!(versions[0].1.unwrap(), versions[1].0); assert_eq!(versions[1].1.unwrap(), versions[2].0); - - // Verify payloads updated correctly assert!(versions[0].2.contains("1000")); assert!(versions[1].2.contains("1500")); assert!(versions[2].2.contains("2000")); - // Test 3: Multiple stat types for same column coexist provider .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) .unwrap(); @@ -274,23 +234,23 @@ fn test_snapshot_versioning_and_stats_types() { let type_count: i64 = conn .query_row( r#" - SELECT COUNT(DISTINCT stats_type) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT COUNT(DISTINCT stats_type) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL - "#, + "#, [], |row| row.get(0), ) .unwrap(); - assert_eq!(type_count, 3); // ndv, histogram, minmax + assert_eq!(type_count, 3); } #[test] fn test_snapshot_tracking_and_multi_table_stats() { - let (_temp_dir, provider) = create_test_statistics_provider(true); + // Test snapshot creation tracking and statistics isolation across multiple tables. + let (_temp_dir, provider) = create_test_provider(true); let conn = provider.get_connection(); - // Get initial snapshot count let initial_count: i64 = conn .query_row( "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", @@ -299,7 +259,6 @@ fn test_snapshot_tracking_and_multi_table_stats() { ) .unwrap(); - // Test 1: Snapshot creation tracking - insert stats for 3 columns provider .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); @@ -319,21 +278,19 @@ fn test_snapshot_tracking_and_multi_table_stats() { .unwrap(); assert_eq!(after_table1_count - initial_count, 3); - // Verify snapshot_changes were recorded let changes_count: i64 = conn .query_row( r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes WHERE changes_made LIKE 'updated_stats:%' - "#, + "#, [], |row| row.get(0), ) .unwrap(); assert_eq!(changes_count, 3); - // Test 2: Multiple tables with independent tracking provider .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) .unwrap(); @@ -341,163 +298,102 @@ fn test_snapshot_tracking_and_multi_table_stats() { .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) .unwrap(); - // Verify each table has correct number of stats let table1_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1", + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + "#, [], |row| row.get(0), ) .unwrap(); let table2_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 2", + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 2 + "#, [], |row| row.get(0), ) .unwrap(); - assert_eq!(table1_count, 3); // 3 columns from table 1 - assert_eq!(table2_count, 2); // 2 columns from table 2 + assert_eq!(table1_count, 3); + assert_eq!(table2_count, 2); - // Verify all snapshots are sequential across tables - let mut snapshot_stmt = conn + let all_snapshots: Vec = conn .prepare( r#" - SELECT table_id, column_id, begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ORDER BY begin_snapshot - "#, + "#, ) - .unwrap(); - let all_snapshots: Vec = snapshot_stmt - .query_map([], |row| row.get(2)) + .unwrap() + .query_map([], |row| row.get(0)) .unwrap() .map(|r| r.unwrap()) .collect(); - // All 5 snapshots should be increasing for i in 1..all_snapshots.len() { assert!(all_snapshots[i] > all_snapshots[i - 1]); } } -/// Helper function to create a test provider with sample table data -fn create_test_provider_with_data() -> (TempDir, DuckLakeStatisticsProvider, i64, i64) { - let (_temp_dir, provider) = create_test_statistics_provider(false); - let conn = provider.get_connection(); - - // Create a sample table with data - conn.execute_batch( - r#" - CREATE TABLE test_table ( - id INTEGER, - name VARCHAR, - age INTEGER - ); - - INSERT INTO test_table VALUES - (1, 'Alice', 30), - (2, 'Bob', 25), - (3, 'Charlie', 35); - "#, - ) - .unwrap(); - - // Get table_id and column_ids - let mut table_id_stmt = conn - .prepare( - r#" - SELECT table_id - FROM __ducklake_metadata_metalake.main.ducklake_table dt - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id - WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; - "#, - ) - .unwrap(); - let table_id: i64 = table_id_stmt.query_row([], |row| row.get(0)).unwrap(); - - // Get the column_id for 'age' column (we'll update stats for this) - let mut column_id_stmt = conn - .prepare( - r#" - SELECT column_id - FROM __ducklake_metadata_metalake.main.ducklake_column - WHERE table_id = ? AND column_name = 'age'; - "#, - ) - .unwrap(); - let age_column_id: i64 = column_id_stmt - .query_row([table_id], |row| row.get(0)) - .unwrap(); - - (_temp_dir, provider, table_id, age_column_id) -} - #[test] fn test_update_and_fetch_table_column_stats() { + // Test updating min/max values and advanced statistics with snapshot progression. let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); let conn = provider.get_connection(); - // Get initial snapshot let initial_snapshot = provider.fetch_current_snapshot().unwrap(); - println!("Initial snapshot ID: {}", initial_snapshot.0); - - // Fetch initial statistics (should have default values from table creation) - let initial_stats = provider - .fetch_table_statistics("test_table", initial_snapshot.0, conn) - .unwrap(); - assert!(initial_stats.is_some()); + assert!( + provider + .fetch_table_statistics("test_table", initial_snapshot.0, conn) + .unwrap() + .is_some() + ); - // Update min_value for age column provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - let snapshot_after_min = provider.fetch_current_snapshot().unwrap(); assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); - // Update max_value for age column provider .update_table_column_stats(age_column_id, table_id, "max_value", "35") .unwrap(); - let snapshot_after_max = provider.fetch_current_snapshot().unwrap(); assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); - // Verify the regular column stats were updated - let mut verify_stmt = conn - .prepare( + let (min_val, max_val): (Option, Option) = conn + .query_row( r#" - SELECT min_value, max_value - FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats - WHERE table_id = ? AND column_id = ?; + SELECT min_value, max_value + FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats + WHERE table_id = ? AND column_id = ?; "#, + [table_id, age_column_id], + |row| Ok((row.get(0)?, row.get(1)?)), ) .unwrap(); - let (min_val, max_val): (Option, Option) = verify_stmt - .query_row([table_id, age_column_id], |row| { - Ok((row.get(0)?, row.get(1)?)) - }) - .unwrap(); - assert_eq!(min_val, Some("25".to_string())); assert_eq!(max_val, Some("35".to_string())); - // Verify advanced stats were also created in ducklake_table_column_adv_stats - let mut adv_stats_stmt = conn + let adv_stats: Vec<(String, String, i64, Option)> = conn .prepare( r#" - SELECT stats_type, payload, begin_snapshot, end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = ? AND column_id = ? - ORDER BY stats_type, begin_snapshot; + SELECT stats_type, payload, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? + ORDER BY stats_type, begin_snapshot; "#, ) - .unwrap(); - - let adv_stats: Vec<(String, String, i64, Option)> = adv_stats_stmt + .unwrap() .query_map([table_id, age_column_id], |row| { Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) }) @@ -506,85 +402,38 @@ fn test_update_and_fetch_table_column_stats() { .collect(); assert_eq!(adv_stats.len(), 2); - assert_eq!(adv_stats[0].0, "max_value"); - assert_eq!(adv_stats[1].0, "min_value"); - assert_eq!(adv_stats[0].1, "35"); - assert_eq!(adv_stats[1].1, "25"); - assert_eq!(adv_stats[0].2, initial_snapshot.0 + 2); - assert_eq!(adv_stats[1].2, initial_snapshot.0 + 1); - assert!(adv_stats[0].3.is_none()); - assert!(adv_stats[1].3.is_none()); - - let max_value_entry = adv_stats - .iter() - .find(|(stats_type, _, _, _)| stats_type == "max_value") - .expect("max_value entry should exist"); - assert_eq!(max_value_entry.1, "35"); - assert_eq!(max_value_entry.2, initial_snapshot.0 + 2); - assert!(max_value_entry.3.is_none()); - - let min_value_entry = adv_stats - .iter() - .find(|(stats_type, _, _, _)| stats_type == "min_value") - .expect("min_value entry should exist"); - assert_eq!(min_value_entry.1, "25"); - assert_eq!(min_value_entry.2, initial_snapshot.0 + 1); - assert!(min_value_entry.3.is_none()); - - // Test updating an advanced stat type (histogram) - let histogram_data = json!({ - "buckets": [ - {"min": 20, "max": 30, "count": 2}, - {"min": 30, "max": 40, "count": 1} - ] - }); + assert!( + adv_stats + .iter() + .any(|(st, p, _, e)| st == "max_value" && p == "35" && e.is_none()) + ); + assert!( + adv_stats + .iter() + .any(|(st, p, _, e)| st == "min_value" && p == "25" && e.is_none()) + ); provider .update_table_column_stats( age_column_id, table_id, "histogram", - &histogram_data.to_string(), + &json!({"buckets": [{"min": 20, "max": 30, "count": 2}, {"min": 30, "max": 40, "count": 1}]}).to_string(), ) .unwrap(); let snapshot_after_histogram = provider.fetch_current_snapshot().unwrap(); assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); - - // Verify histogram was added to advanced stats - let mut histogram_stmt = conn - .prepare( - r#" - SELECT payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = ? AND column_id = ? AND stats_type = 'histogram' AND end_snapshot IS NULL; - "#, - ) - .unwrap(); - - let histogram_payload: String = histogram_stmt - .query_row([table_id, age_column_id], |row| row.get(0)) - .unwrap(); - - assert_eq!(histogram_payload, histogram_data.to_string()); - - println!("All update and fetch operations completed"); - println!(" Initial snapshot: {}", initial_snapshot.0); - println!(" After min_value update: {}", snapshot_after_min.0); - println!(" After max_value update: {}", snapshot_after_max.0); - println!(" After histogram update: {}", snapshot_after_histogram.0); } #[test] fn test_fetch_table_stats_with_snapshot_time_travel() { - // Test that fetching statistics at different snapshots returns correct historical data + // Test time-travel capability by fetching statistics at different snapshot points. let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); let conn = provider.get_connection(); let snapshot_0 = provider.fetch_current_snapshot().unwrap(); - println!("Snapshot 0: {}", snapshot_0.0); - // Add first version of histogram provider .update_table_column_stats( age_column_id, @@ -594,9 +443,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { ) .unwrap(); let snapshot_1 = provider.fetch_current_snapshot().unwrap(); - println!("Snapshot 1: {}", snapshot_1.0); - // Add second version of histogram provider .update_table_column_stats( age_column_id, @@ -606,9 +453,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { ) .unwrap(); let snapshot_2 = provider.fetch_current_snapshot().unwrap(); - println!("Snapshot 2: {}", snapshot_2.0); - // Add third version provider .update_table_column_stats( age_column_id, @@ -618,40 +463,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { ) .unwrap(); let snapshot_3 = provider.fetch_current_snapshot().unwrap(); - println!("Snapshot 3: {}", snapshot_3.0); - // Check the database - let mut debug_stmt = conn - .prepare( - r#" - SELECT column_id, stats_type, begin_snapshot, end_snapshot, payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = ? AND column_id = ? - ORDER BY begin_snapshot; - "#, - ) - .unwrap(); - println!("\nAdvanced stats in database:"); - for row in debug_stmt - .query_map([table_id, age_column_id], |row| { - Ok(( - row.get::<_, i64>(0)?, - row.get::<_, String>(1)?, - row.get::<_, i64>(2)?, - row.get::<_, Option>(3)?, - row.get::<_, String>(4)?, - )) - }) - .unwrap() - { - let (col_id, stats_type, begin, end, payload) = row.unwrap(); - println!( - " col={}, type={}, begin={}, end={:?}, payload={}", - col_id, stats_type, begin, end, payload - ); - } - - // Fetch at snapshot 0 - should have no advanced stats let stats_at_0 = provider .fetch_table_statistics("test_table", snapshot_0.0, conn) .unwrap() @@ -663,7 +475,6 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .unwrap(); assert_eq!(age_stats_0.advanced_stats.len(), 0); - // Fetch at snapshot 1 - should have version 1 let stats_at_1 = provider .fetch_table_statistics("test_table", snapshot_1.0, conn) .unwrap() @@ -674,10 +485,13 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .find(|cs| cs.name == "age") .unwrap(); assert_eq!(age_stats_1.advanced_stats.len(), 1); - let histogram_1 = &age_stats_1.advanced_stats[0]; - assert!(histogram_1.data.to_string().contains("\"version\":1")); + assert!( + age_stats_1.advanced_stats[0] + .data + .to_string() + .contains("\"version\":1") + ); - // Fetch at snapshot 2 - should have version 2 let stats_at_2 = provider .fetch_table_statistics("test_table", snapshot_2.0, conn) .unwrap() @@ -688,10 +502,13 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .find(|cs| cs.name == "age") .unwrap(); assert_eq!(age_stats_2.advanced_stats.len(), 1); - let histogram_2 = &age_stats_2.advanced_stats[0]; - assert!(histogram_2.data.to_string().contains("\"version\":2")); + assert!( + age_stats_2.advanced_stats[0] + .data + .to_string() + .contains("\"version\":2") + ); - // Fetch at snapshot 3 - should have version 3 let stats_at_3 = provider .fetch_table_statistics("test_table", snapshot_3.0, conn) .unwrap() @@ -702,26 +519,20 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .find(|cs| cs.name == "age") .unwrap(); assert_eq!(age_stats_3.advanced_stats.len(), 1); - let histogram_3 = &age_stats_3.advanced_stats[0]; - assert!(histogram_3.data.to_string().contains("\"version\":3")); - - println!("✓ Snapshot time-travel test passed"); - println!( - " - Snapshot 0: {} advanced stats", - age_stats_0.advanced_stats.len() + assert!( + age_stats_3.advanced_stats[0] + .data + .to_string() + .contains("\"version\":3") ); - println!(" - Snapshot 1: version 1 histogram"); - println!(" - Snapshot 2: version 2 histogram"); - println!(" - Snapshot 3: version 3 histogram"); } #[test] fn test_fetch_table_stats_multiple_stat_types() { - // Test fetching when multiple stat types exist for same column + // Test fetching when multiple statistics types exist for the same column. let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); let conn = provider.get_connection(); - // Add multiple different stat types provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); @@ -760,33 +571,27 @@ fn test_fetch_table_stats_multiple_stat_types() { .find(|cs| cs.name == "age") .unwrap(); - // Should have all 5 stat types assert_eq!(age_stats.advanced_stats.len(), 5); - // Verify all stat types are present let stat_types: Vec<&str> = age_stats .advanced_stats .iter() .map(|s| s.stats_type.as_str()) .collect(); + assert!(stat_types.contains(&"min_value")); assert!(stat_types.contains(&"max_value")); assert!(stat_types.contains(&"histogram")); assert!(stat_types.contains(&"ndv")); assert!(stat_types.contains(&"quantiles")); - - println!("✓ Multiple stat types test passed"); - println!(" - Total stat types: {}", age_stats.advanced_stats.len()); - println!(" - Stat types: {:?}", stat_types); } #[test] fn test_fetch_table_stats_columns_without_stats() { - // Test that columns without advanced stats are still returned + // Test that columns without advanced statistics are still returned in fetch results. let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); let conn = provider.get_connection(); - // Only add stats for age column, not for id or name provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); @@ -797,10 +602,8 @@ fn test_fetch_table_stats_columns_without_stats() { .unwrap() .unwrap(); - // Should have all 3 columns even though only age has stats assert_eq!(stats.column_statistics.len(), 3); - // Find each column let id_stats = stats .column_statistics .iter() @@ -817,69 +620,52 @@ fn test_fetch_table_stats_columns_without_stats() { .find(|cs| cs.name == "age") .expect("Should have age column"); - // id and name should have no advanced stats assert_eq!(id_stats.advanced_stats.len(), 0); assert_eq!(name_stats.advanced_stats.len(), 0); - - // age should have 1 advanced stat assert_eq!(age_stats.advanced_stats.len(), 1); - - println!("✓ Columns without stats test passed"); - println!(" - Total columns: {}", stats.column_statistics.len()); - println!(" - id stats: {}", id_stats.advanced_stats.len()); - println!(" - name stats: {}", name_stats.advanced_stats.len()); - println!(" - age stats: {}", age_stats.advanced_stats.len()); } #[test] fn test_fetch_table_stats_row_count() { - // Test that row_count is correctly populated - let (_temp_dir, provider) = create_test_statistics_provider(false); + // Test that row_count is correctly populated from table statistics. + let (_temp_dir, provider) = create_test_provider(false); let conn = provider.get_connection(); - // Create table with known row count conn.execute_batch( r#" - CREATE TABLE large_table ( - col1 INTEGER, - col2 VARCHAR - ); - - INSERT INTO large_table - SELECT i, 'value_' || i::VARCHAR - FROM range(1, 101) t(i); + CREATE TABLE large_table (col1 INTEGER, col2 VARCHAR); + INSERT INTO large_table SELECT i, 'value_' || i::VARCHAR FROM range(1, 101) t(i); "#, ) .unwrap(); - // Get table_id - let mut table_id_stmt = conn - .prepare( + let table_id: i64 = conn + .query_row( r#" SELECT table_id - FROM __ducklake_metadata_metalake.main.ducklake_table dt - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id - WHERE ds.schema_name = current_schema() AND dt.table_name = 'large_table'; + FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds + ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() + AND dt.table_name = 'large_table'; "#, + [], + |row| row.get(0), ) .unwrap(); - let table_id: i64 = table_id_stmt.query_row([], |row| row.get(0)).unwrap(); - // Get column_id for col1 - let mut column_id_stmt = conn - .prepare( + let col1_id: i64 = conn + .query_row( r#" SELECT column_id - FROM __ducklake_metadata_metalake.main.ducklake_column - WHERE table_id = ? AND column_name = 'col1'; + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'col1'; "#, + [table_id], + |row| row.get(0), ) .unwrap(); - let col1_id: i64 = column_id_stmt - .query_row([table_id], |row| row.get(0)) - .unwrap(); - // Add some stats provider .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) .unwrap(); @@ -890,21 +676,16 @@ fn test_fetch_table_stats_row_count() { .unwrap() .unwrap(); - // Verify row count assert_eq!(stats.row_count, 100); - assert_eq!(stats.column_statistics.len(), 2); // col1 and col2 - - println!("✓ Row count test passed"); - println!(" - Row count: {}", stats.row_count); - println!(" - Column count: {}", stats.column_statistics.len()); + assert_eq!(stats.column_statistics.len(), 2); } #[test] fn test_fetch_current_schema_arrow() { - let (_temp_dir, provider) = create_test_statistics_provider(false); + // Test fetching Arrow schema from DuckDB table with type conversions. + let (_temp_dir, provider) = create_test_provider(false); let conn = provider.get_connection(); - // Create a test table conn.execute_batch( r#" CREATE TABLE schema_test_table ( @@ -917,248 +698,168 @@ fn test_fetch_current_schema_arrow() { ) .unwrap(); - // Fetch schema without specifying schema (default to current schema) let schema = provider .fetch_current_schema(None, "schema_test_table") .unwrap(); assert_eq!(schema.fields().len(), 4); - // Verify field names and types let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect(); assert!(field_names.contains(&"id")); assert!(field_names.contains(&"name")); assert!(field_names.contains(&"value")); assert!(field_names.contains(&"active")); - let id_field = schema.field_with_name("id").unwrap(); assert!(matches!( - id_field.data_type(), + schema.field_with_name("id").unwrap().data_type(), &duckdb::arrow::datatypes::DataType::Int32 )); - let name_field = schema.field_with_name("name").unwrap(); assert!(matches!( - name_field.data_type(), + schema.field_with_name("name").unwrap().data_type(), &duckdb::arrow::datatypes::DataType::Utf8 )); - let value_field = schema.field_with_name("value").unwrap(); assert!(matches!( - value_field.data_type(), + schema.field_with_name("value").unwrap().data_type(), &duckdb::arrow::datatypes::DataType::Float64 )); - let active_field = schema.field_with_name("active").unwrap(); assert!(matches!( - active_field.data_type(), + schema.field_with_name("active").unwrap().data_type(), &duckdb::arrow::datatypes::DataType::Boolean )); - // Fetch schema with explicit schema name let schema_explicit = provider .fetch_current_schema(Some("main"), "schema_test_table") .unwrap(); - assert_eq!(schema_explicit.fields().len(), 4); - assert_eq!(schema.fields().len(), schema_explicit.fields().len()); - - println!("✓ Schema fetch test passed"); - println!(" - Fields: {}", schema.fields().len()); - println!(" - Field names: {:?}", field_names); } #[test] fn test_multiple_schemas_comprehensive() { - let (_temp_dir, provider) = create_test_statistics_provider(false); + // Test schema fetching and metadata tracking across multiple database schemas. + let (_temp_dir, provider) = create_test_provider(false); let conn = provider.get_connection(); - // Get initial schema info (should be 'main') let initial_schema_info = provider.fetch_current_schema_info().unwrap(); assert_eq!(initial_schema_info.schema_name, "main"); assert_eq!(initial_schema_info.schema_id, 0); assert!(initial_schema_info.end_snapshot.is_none()); - println!("✓ Initial schema: {}", initial_schema_info.schema_name); - // Create additional schemas conn.execute_batch( r#" CREATE SCHEMA analytics; CREATE SCHEMA reporting; + CREATE TABLE main.users (user_id INTEGER, username VARCHAR, email VARCHAR, created_at TIMESTAMP); + CREATE TABLE analytics.metrics (metric_id BIGINT, metric_name VARCHAR, value DOUBLE, recorded_at DATE); + CREATE TABLE reporting.summary (report_id SMALLINT, report_name TEXT, data BLOB, is_published BOOLEAN); "#, ) .unwrap(); - println!("✓ Created additional schemas: analytics, reporting"); - - // Create tables in different schemas - conn.execute_batch( - r#" - -- Table in main schema - CREATE TABLE main.users ( - user_id INTEGER, - username VARCHAR, - email VARCHAR, - created_at TIMESTAMP - ); - - -- Table in analytics schema - CREATE TABLE analytics.metrics ( - metric_id BIGINT, - metric_name VARCHAR, - value DOUBLE, - recorded_at DATE - ); - - -- Table in reporting schema - CREATE TABLE reporting.summary ( - report_id SMALLINT, - report_name TEXT, - data BLOB, - is_published BOOLEAN - ); - "#, - ) - .unwrap(); - println!("✓ Created tables in all schemas"); - // Test 1: Fetch schema from main (without explicit schema parameter) let main_users_schema = provider.fetch_current_schema(None, "users").unwrap(); assert_eq!(main_users_schema.fields().len(), 4); - - let user_id_field = main_users_schema.field_with_name("user_id").unwrap(); assert!(matches!( - user_id_field.data_type(), + main_users_schema + .field_with_name("user_id") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Int32 )); - - let username_field = main_users_schema.field_with_name("username").unwrap(); assert!(matches!( - username_field.data_type(), + main_users_schema + .field_with_name("username") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Utf8 )); - - let created_at_field = main_users_schema.field_with_name("created_at").unwrap(); assert!(matches!( - created_at_field.data_type(), + main_users_schema + .field_with_name("created_at") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Timestamp(_, _) )); - println!("✓ Fetched main.users schema (4 fields)"); - - // Test 2: Fetch schema from main (with explicit schema parameter) - let main_users_schema_explicit = provider - .fetch_current_schema(Some("main"), "users") - .unwrap(); - assert_eq!(main_users_schema_explicit.fields().len(), 4); - println!("✓ Fetched main.users schema explicitly"); - - // Test 3: Fetch schema from analytics schema let analytics_metrics_schema = provider .fetch_current_schema(Some("analytics"), "metrics") .unwrap(); assert_eq!(analytics_metrics_schema.fields().len(), 4); - - let metric_id_field = analytics_metrics_schema - .field_with_name("metric_id") - .unwrap(); assert!(matches!( - metric_id_field.data_type(), + analytics_metrics_schema + .field_with_name("metric_id") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Int64 )); - - let value_field = analytics_metrics_schema.field_with_name("value").unwrap(); assert!(matches!( - value_field.data_type(), + analytics_metrics_schema + .field_with_name("value") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Float64 )); - - let recorded_at_field = analytics_metrics_schema - .field_with_name("recorded_at") - .unwrap(); assert!(matches!( - recorded_at_field.data_type(), + analytics_metrics_schema + .field_with_name("recorded_at") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Date32 )); - println!("✓ Fetched analytics.metrics schema (4 fields)"); - - // Test 4: Fetch schema from reporting schema let reporting_summary_schema = provider .fetch_current_schema(Some("reporting"), "summary") .unwrap(); assert_eq!(reporting_summary_schema.fields().len(), 4); - - let report_id_field = reporting_summary_schema - .field_with_name("report_id") - .unwrap(); assert!(matches!( - report_id_field.data_type(), + reporting_summary_schema + .field_with_name("report_id") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Int16 )); - - let report_name_field = reporting_summary_schema - .field_with_name("report_name") - .unwrap(); - assert!(matches!( - report_name_field.data_type(), - &duckdb::arrow::datatypes::DataType::Utf8 - )); - - let data_field = reporting_summary_schema.field_with_name("data").unwrap(); assert!(matches!( - data_field.data_type(), + reporting_summary_schema + .field_with_name("data") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Binary )); - - let is_published_field = reporting_summary_schema - .field_with_name("is_published") - .unwrap(); assert!(matches!( - is_published_field.data_type(), + reporting_summary_schema + .field_with_name("is_published") + .unwrap() + .data_type(), &duckdb::arrow::datatypes::DataType::Boolean )); - println!("✓ Fetched reporting.summary schema (4 fields)"); - - // Test 5: Verify schema_info still returns main (current schema) let current_schema_info = provider.fetch_current_schema_info().unwrap(); assert_eq!(current_schema_info.schema_name, "main"); - println!("✓ Current schema is still 'main'"); - // Test 6: Switch to analytics schema and verify conn.execute("USE analytics;", []).unwrap(); let analytics_schema_info = provider.fetch_current_schema_info().unwrap(); assert_eq!(analytics_schema_info.schema_name, "analytics"); assert!(analytics_schema_info.end_snapshot.is_none()); - println!("✓ Switched to analytics schema"); - // Test 7: Fetch table from current schema (analytics) without explicit schema let metrics_schema_implicit = provider.fetch_current_schema(None, "metrics").unwrap(); assert_eq!(metrics_schema_implicit.fields().len(), 4); - println!("✓ Fetched metrics from current schema (analytics) implicitly"); - // Test 8: Can still access other schemas explicitly let users_from_main = provider .fetch_current_schema(Some("main"), "users") .unwrap(); assert_eq!(users_from_main.fields().len(), 4); - println!("✓ Can still access main.users from analytics schema"); - // Test 9: Switch to reporting and verify conn.execute("USE reporting;", []).unwrap(); let reporting_schema_info = provider.fetch_current_schema_info().unwrap(); assert_eq!(reporting_schema_info.schema_name, "reporting"); - println!("✓ Switched to reporting schema"); - // Test 10: Verify all schemas exist in metadata - let mut schema_list_stmt = conn + let schemas: Vec<(String, i64, i64, Option)> = conn .prepare( r#" - SELECT schema_name, schema_id, begin_snapshot, end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_schema - ORDER BY schema_id; + SELECT schema_name, schema_id, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema + ORDER BY schema_id; "#, ) - .unwrap(); - - let schemas: Vec<(String, i64, i64, Option)> = schema_list_stmt + .unwrap() .query_map([], |row| { Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) }) @@ -1166,7 +867,6 @@ fn test_multiple_schemas_comprehensive() { .map(|r| r.unwrap()) .collect(); - // Should have at least 3 schemas: main, analytics, reporting assert!(schemas.len() >= 3); let schema_names: Vec<&str> = schemas @@ -1177,9 +877,506 @@ fn test_multiple_schemas_comprehensive() { assert!(schema_names.contains(&"analytics")); assert!(schema_names.contains(&"reporting")); - // All schemas should be active (end_snapshot is None) for (name, _, _, end_snapshot) in &schemas { - println!(" Schema: {}, end_snapshot: {:?}", name, end_snapshot); assert!(end_snapshot.is_none(), "Schema {} should be active", name); } } + +#[test] +fn test_error_handling_edge_cases() { + // Test various error scenarios: non-existent tables, invalid snapshots, invalid IDs. + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + // Non-existent table returns empty results + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("nonexistent_table", current_snapshot.0, conn) + .unwrap(); + assert!(stats.is_some()); + assert_eq!(stats.unwrap().column_statistics.len(), 0); + + // Invalid/future snapshot still returns data + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + let future_stats = provider + .fetch_table_statistics("test_table", 99999, conn) + .unwrap(); + assert!(future_stats.is_some()); + assert_eq!(future_stats.unwrap().column_statistics.len(), 3); + + // Updating with invalid IDs succeeds without error + let result = + provider.update_table_column_stats(9999, 9999, "ndv", r#"{"distinct_count": 100}"#); + assert!(result.is_ok()); + + // Fetching schema for non-existent table returns error + assert!( + provider + .fetch_current_schema(None, "nonexistent_table") + .is_err() + ); + + // Invalid schema name returns error + conn.execute_batch("CREATE TABLE test (id INTEGER);") + .unwrap(); + assert!( + provider + .fetch_current_schema(Some("nonexistent_schema"), "test") + .is_err() + ); +} + +#[test] +fn test_update_same_stat_rapidly() { + // Test updating the same statistic multiple times in rapid succession. + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let conn = provider.get_connection(); + + let initial_snapshot = provider.fetch_current_snapshot().unwrap(); + + for i in 1..=5 { + provider + .update_table_column_stats( + age_column_id, + table_id, + "ndv", + &format!(r#"{{"distinct_count": {}}}"#, i * 100), + ) + .unwrap(); + } + + let final_snapshot = provider.fetch_current_snapshot().unwrap(); + assert_eq!(final_snapshot.0, initial_snapshot.0 + 5); + + let versions: Vec<(i64, Option)> = conn + .prepare( + r#" + SELECT begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? AND stats_type = 'ndv' + ORDER BY begin_snapshot; + "#, + ) + .unwrap() + .query_map([table_id, age_column_id], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + assert_eq!(versions.len(), 5); + for i in 0..4 { + assert!(versions[i].1.is_some()); + assert_eq!(versions[i].1.unwrap(), versions[i + 1].0); + } + assert!(versions[4].1.is_none()); +} + +#[test] +fn test_data_edge_cases() { + // Test empty tables, single columns, special characters, and large payloads. + let (_temp_dir, provider) = create_test_provider(false); + let conn = provider.get_connection(); + + // Empty table with zero rows + conn.execute_batch("CREATE TABLE empty_table (id INTEGER, name VARCHAR);") + .unwrap(); + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let empty_stats = provider + .fetch_table_statistics("empty_table", current_snapshot.0, conn) + .unwrap() + .unwrap(); + assert_eq!(empty_stats.row_count, 0); + + // Single column table + conn.execute_batch( + r#" + CREATE TABLE single_col (value INTEGER); + INSERT INTO single_col VALUES (1), (2), (3); + "#, + ) + .unwrap(); + let single_snapshot = provider.fetch_current_snapshot().unwrap(); + let single_stats = provider + .fetch_table_statistics("single_col", single_snapshot.0, conn) + .unwrap() + .unwrap(); + assert_eq!(single_stats.column_statistics.len(), 1); + assert_eq!(single_stats.row_count, 3); + assert_eq!(single_stats.column_statistics[0].name, "value"); + + // Special characters in payload + conn.execute_batch( + r#" + CREATE TABLE test_table (id INTEGER, age INTEGER); + INSERT INTO test_table VALUES (1, 25), (2, 30); + "#, + ) + .unwrap(); + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + let age_column_id: i64 = conn + .query_row( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; + provider + .update_table_column_stats(age_column_id, table_id, "special_test", special_payload) + .unwrap(); + let retrieved: String = conn + .query_row( + r#" + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + AND end_snapshot IS NULL; + "#, + [age_column_id, table_id], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(retrieved, special_payload); + + // Large payload + let large_histogram: Vec = (0..1000).collect(); + let large_payload = json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + provider + .update_table_column_stats(age_column_id, table_id, "large_histogram", &large_payload) + .unwrap(); + let new_snapshot = provider.fetch_current_snapshot().unwrap(); + let large_stats = provider + .fetch_table_statistics("test_table", new_snapshot.0, conn) + .unwrap() + .unwrap(); + let age_stats = large_stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + let large_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .unwrap(); + assert!(large_stat.data.to_string().len() > 1000); +} + +#[test] +fn test_schema_edge_cases() { + // Test schema fetching with nullable/non-nullable columns and complex types. + let (_temp_dir, provider) = create_test_provider(false); + let conn = provider.get_connection(); + + // Mixed nullable and non-nullable columns + conn.execute_batch( + r#" + CREATE TABLE mixed_nulls ( + id INTEGER NOT NULL, + optional_name VARCHAR, + required_age INTEGER NOT NULL, + optional_value DOUBLE + ); + "#, + ) + .unwrap(); + let mixed_schema = provider.fetch_current_schema(None, "mixed_nulls").unwrap(); + assert_eq!(mixed_schema.fields().len(), 4); + assert!(!mixed_schema.field_with_name("id").unwrap().is_nullable()); + assert!( + mixed_schema + .field_with_name("optional_name") + .unwrap() + .is_nullable() + ); + assert!( + !mixed_schema + .field_with_name("required_age") + .unwrap() + .is_nullable() + ); + assert!( + mixed_schema + .field_with_name("optional_value") + .unwrap() + .is_nullable() + ); + + // Complex types + conn.execute_batch( + r#" + CREATE TABLE complex_types ( + tiny_col TINYINT, + small_col SMALLINT, + int_col INTEGER, + big_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + time_col TIME, + timestamp_col TIMESTAMP, + blob_col BLOB, + bool_col BOOLEAN + ); + "#, + ) + .unwrap(); + let complex_schema = provider + .fetch_current_schema(None, "complex_types") + .unwrap(); + assert_eq!(complex_schema.fields().len(), 11); + assert!(matches!( + complex_schema + .field_with_name("tiny_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int8 + )); + assert!(matches!( + complex_schema + .field_with_name("small_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + assert!(matches!( + complex_schema + .field_with_name("float_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Float32 + )); + assert!(matches!( + complex_schema + .field_with_name("date_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + assert!(matches!( + complex_schema + .field_with_name("time_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Time64(_) + )); + assert!(matches!( + complex_schema + .field_with_name("blob_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); +} + +#[test] +fn test_concurrent_snapshot_isolation() { + // Test statistics with special characters and edge case JSON values. + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; + let result = provider.update_table_column_stats( + age_column_id, + table_id, + "special_test", + special_payload, + ); + + assert!(result.is_ok()); + + let conn = provider.get_connection(); + let retrieved_payload: String = conn + .query_row( + r#" + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + AND end_snapshot IS NULL; + "#, + [age_column_id, table_id], + |row| row.get(0), + ) + .unwrap(); + + assert_eq!(retrieved_payload, special_payload); +} + +#[test] +fn test_large_statistics_payload() { + // Test handling of large statistics payloads. + let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + + let large_histogram: Vec = (0..1000).collect(); + let large_payload = json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + + let result = provider.update_table_column_stats( + age_column_id, + table_id, + "large_histogram", + &large_payload, + ); + + assert!(result.is_ok()); + + let conn = provider.get_connection(); + let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let stats = provider + .fetch_table_statistics("test_table", current_snapshot.0, conn) + .unwrap() + .unwrap(); + + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + + let large_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .expect("Should have large_histogram stat"); + + assert!(large_stat.data.to_string().len() > 1000); +} + +#[test] +fn test_mixed_null_and_non_null_columns() { + // Test schema fetching with mixed nullable and non-nullable columns. + let (_temp_dir, provider) = create_test_provider(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE mixed_nulls ( + id INTEGER NOT NULL, + optional_name VARCHAR, + required_age INTEGER NOT NULL, + optional_value DOUBLE + ); + "#, + ) + .unwrap(); + + let schema = provider.fetch_current_schema(None, "mixed_nulls").unwrap(); + + assert_eq!(schema.fields().len(), 4); + + let id_field = schema.field_with_name("id").unwrap(); + assert!(!id_field.is_nullable()); + + let optional_name_field = schema.field_with_name("optional_name").unwrap(); + assert!(optional_name_field.is_nullable()); + + let required_age_field = schema.field_with_name("required_age").unwrap(); + assert!(!required_age_field.is_nullable()); + + let optional_value_field = schema.field_with_name("optional_value").unwrap(); + assert!(optional_value_field.is_nullable()); +} + +#[test] +fn test_schema_with_complex_types() { + // Test schema fetching with various complex and edge case data types. + let (_temp_dir, provider) = create_test_provider(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE complex_types ( + tiny_col TINYINT, + small_col SMALLINT, + int_col INTEGER, + big_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + time_col TIME, + timestamp_col TIMESTAMP, + blob_col BLOB, + bool_col BOOLEAN + ); + "#, + ) + .unwrap(); + + let schema = provider + .fetch_current_schema(None, "complex_types") + .unwrap(); + + assert_eq!(schema.fields().len(), 11); + + assert!(matches!( + schema.field_with_name("tiny_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int8 + )); + assert!(matches!( + schema.field_with_name("small_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + assert!(matches!( + schema.field_with_name("int_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + assert!(matches!( + schema.field_with_name("big_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int64 + )); + assert!(matches!( + schema.field_with_name("float_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Float32 + )); + assert!(matches!( + schema.field_with_name("double_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + assert!(matches!( + schema.field_with_name("date_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + assert!(matches!( + schema.field_with_name("time_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Time64(_) + )); + assert!(matches!( + schema.field_with_name("timestamp_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Timestamp(_, _) + )); + assert!(matches!( + schema.field_with_name("blob_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); + assert!(matches!( + schema.field_with_name("bool_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); +} From dd3f9eb7b7cfdb99f150a8a86250a6bf07866871 Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Fri, 31 Oct 2025 00:52:45 -0400 Subject: [PATCH 22/40] refactor Signed-off-by: Yuchen Liang --- .gitignore | 4 +- Cargo.lock | 28 +- Cargo.toml | 9 +- optd/catalog/Cargo.toml | 7 + optd/catalog/src/lib.rs | 629 +++++++++++++++++ .../tests/statistics_tests.rs | 188 +++-- optd/statistics/Cargo.toml | 20 - optd/statistics/src/lib.rs | 3 - optd/statistics/src/statistics.rs | 646 ------------------ optd/storage/Cargo.toml | 7 - optd/storage/src/lib.rs | 269 -------- 11 files changed, 734 insertions(+), 1076 deletions(-) rename optd/{statistics => catalog}/tests/statistics_tests.rs (87%) delete mode 100644 optd/statistics/Cargo.toml delete mode 100644 optd/statistics/src/lib.rs delete mode 100644 optd/statistics/src/statistics.rs delete mode 100644 optd/storage/Cargo.toml delete mode 100644 optd/storage/src/lib.rs diff --git a/.gitignore b/.gitignore index 10db29f..c6c9fba 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,6 @@ data/ # datafusion .history + +# macOS .DS_Store -optd/.DS_Store -optd/statistics/.DS_Store diff --git a/Cargo.lock b/Cargo.lock index 57feee1..9fad2ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3555,6 +3555,13 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "optd-catalog" version = "0.1.0" +dependencies = [ + "duckdb", + "serde", + "serde_json", + "snafu", + "tempfile", +] [[package]] name = "optd-cli" @@ -3599,27 +3606,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "optd-statistics" -version = "0.1.0" -dependencies = [ - "async-trait", - "duckdb", - "futures", - "glob", - "parking_lot", - "serde", - "serde_json", - "snafu", - "tempfile", - "tokio", - "url", -] - -[[package]] -name = "optd-storage" -version = "0.1.0" - [[package]] name = "option-ext" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 76e022e..b43738c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,6 @@ [workspace] resolver = "2" -members = [ - "cli", - "connectors/datafusion", - "optd/catalog", - "optd/core", - "optd/storage", - "optd/statistics", -] +members = ["cli", "connectors/datafusion", "optd/core", "optd/catalog"] # By default, only compiles the `optd-core` crate. default-members = ["optd/core"] diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index 332c535..f390741 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -5,3 +5,10 @@ edition.workspace = true repository.workspace = true [dependencies] +serde = { version = "1.0", features = ["derive"] } +duckdb = { version = "1.4.0", features = ["bundled"] } +snafu = "0.8.6" +serde_json = "1.0" + +[dev-dependencies] +tempfile = "3.8" diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 8b13789..aee6590 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -1 +1,630 @@ +use std::sync::Arc; +use duckdb::{ + Connection, Error as DuckDBError, + arrow::datatypes::{DataType, Field, Schema, SchemaRef}, + params, + types::Null, +}; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use snafu::{ResultExt, prelude::*}; + +/// Operations for managing table statistics with snapshot-based time travel. +pub trait Catalog { + /// Gets the current (most recent) snapshot ID. + fn current_snapshot(&self) -> Result; + + /// Gets complete metadata for the current snapshot. + fn current_snapshot_info(&self) -> Result; + + /// Gets the Arrow schema for a table at the current snapshot. + fn current_schema(&self, schema: Option<&str>, table: &str) -> Result; + + /// Gets schema information including name, ID, and snapshot range. + fn current_schema_info(&self) -> Result; + + /// Retrieves table and column statistics at a specific snapshot. + fn table_statistics( + &self, + table_name: &str, + snapshot: SnapshotId, + connection: &Connection, + ) -> Result, Error>; + + /// Updates or inserts advanced statistics for a table column. + fn update_table_column_stats( + &self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error>; +} + +const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; + +/// SQL query to fetch table statistics including column metadata and advanced stats at a specific snapshot. +const FETCH_TABLE_STATS_QUERY: &str = r#" + SELECT + ts.table_id, + dc.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcas.stats_type, + tcas.payload + FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts + INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas + ON dc.table_id = tcas.table_id + AND dc.column_id = tcas.column_id + AND ? >= tcas.begin_snapshot + AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) + WHERE + ds.schema_name = current_schema() + AND dt.table_name = ? + AND ts.record_count IS NOT NULL + AND ts.file_size_bytes IS NOT NULL + AND ? >= dc.begin_snapshot + AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) + ORDER BY ts.table_id, dc.column_id, tcas.stats_type; +"#; + +/// SQL query to close an existing advanced statistics entry by setting its end_snapshot. +const UPDATE_ADV_STATS_QUERY: &str = r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SET end_snapshot = ? + WHERE end_snapshot IS NULL + AND stats_type = ? + AND column_id = ? + AND table_id = ?; +"#; + +/// SQL query to insert a new advanced statistics entry. +const INSERT_ADV_STATS_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?); +"#; + +/// SQL query to insert a new snapshot record. +const INSERT_SNAPSHOT_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot + (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) + VALUES (?, NOW(), ?, ?, ?); +"#; + +/// SQL query to record a snapshot change in the change log. +const INSERT_SNAPSHOT_CHANGE_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes + (snapshot_id, changes_made, author, commit_message, commit_extra_info) + VALUES (?, ?, ?, ?, ?); +"#; + +/// Error types for statistics operations. +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Database connection error: {}", source))] + Connection { source: DuckDBError }, + #[snafu(display("Query execution failed: {}", source))] + QueryExecution { source: DuckDBError }, + #[snafu(display("JSON serialization error: {}", source))] + JsonSerialization { source: serde_json::Error }, + #[snafu(display("ARROW DataType conversion error: {}", source))] + ArrowDataTypeConversion { source: duckdb::Error }, + #[snafu(display( + "Get statistics failed for table: {}, column: {}, snapshot: {}", + table, + column, + snapshot + ))] + GetStatsFailed { + table: String, + column: String, + snapshot: i64, + }, + #[snafu(display( + "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", + group_id, + stats_type, + snapshot + ))] + GroupStatsNotFound { + group_id: i64, + stats_type: String, + snapshot: i64, + }, +} + +/// Internal representation of a row from the table statistics query. +/// Used for collecting data before aggregating into TableStatistics. +struct TableColumnStatisticsEntry { + _table_id: i64, + column_id: i64, + column_name: String, + column_type: String, + record_count: i64, + _next_row_id: i64, + _file_size_bytes: i64, + stats_type: Option, + payload: Option, +} + +/// Statistics for a table including row count and per-column statistics. +/// Main structure returned when querying table statistics. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TableStatistics { + pub row_count: usize, + pub column_statistics: Vec, +} + +impl FromIterator> for TableStatistics { + fn from_iter>>( + iter: T, + ) -> Self { + let mut row_flag = false; + let mut row_count = 0; + let mut column_statistics = Vec::new(); + + // Stats will be ordered by table_id then column_id + for e in iter.into_iter().flatten() { + // Check if unique table/column combination + if column_statistics + .last() + .is_none_or(|last: &ColumnStatistics| last.column_id != e.column_id) + { + // New column encountered + column_statistics.push(ColumnStatistics::new( + e.column_id, + e.column_type.clone(), + e.column_name.clone(), + Vec::new(), + )); + } + + assert!( + !column_statistics.is_empty() + && column_statistics.last().unwrap().column_id == e.column_id, + "Column statistics should not be empty and last column_id should match current column_id" + ); + + if let Some(last_column_stat) = column_statistics.last_mut() { + if let (Some(stats_type), Some(payload)) = (e.stats_type, e.payload) { + let data = serde_json::from_str(&payload).unwrap_or(Value::Null); + last_column_stat + .add_advanced_stat(AdvanceColumnStatistics { stats_type, data }); + } + } + + // Assuming all columns have the same record_count, only need to set once + if !row_flag { + row_count = e.record_count as usize; + row_flag = true; + } + } + + TableStatistics { + row_count, + column_statistics, + } + } +} + +/// Statistics for a single column including type, name, and advanced statistics. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ColumnStatistics { + pub column_id: i64, + pub column_type: String, + pub name: String, + pub advanced_stats: Vec, +} + +impl ColumnStatistics { + fn new( + column_id: i64, + column_type: String, + name: String, + advanced_stats: Vec, + ) -> Self { + Self { + column_id, + column_type, + name, + advanced_stats, + } + } + + fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { + self.advanced_stats.push(stat); + } +} + +/// An advanced statistics entry with type and serialized data at a snapshot. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AdvanceColumnStatistics { + /// Type of the statistical summaries (e.g., histogram, distinct count). + pub stats_type: String, + /// Serialized data for the statistics at a snapshot. + pub data: Value, +} + +/// Identifier for a snapshot in the statistics database. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct SnapshotId(pub i64); + +/// Snapshot metadata including schema version and next IDs. +#[derive(Debug, Clone, Serialize, Deserialize)] + +pub struct SnapshotInfo { + pub id: SnapshotId, + pub schema_version: i64, + pub next_catalog_id: i64, + pub next_file_id: i64, +} + +/// Schema information including name, ID, and valid snapshot range. +#[derive(Debug, Clone, Serialize, Deserialize)] + +pub struct CurrentSchema { + pub schema_name: String, + pub schema_id: i64, + pub begin_snapshot: i64, + pub end_snapshot: Option, +} + +// TODO(ray): remove this once we have use. +#[allow(dead_code)] +#[derive(Debug, Serialize, Deserialize)] +struct StatisticsUpdate { + stats_type: String, + payload: String, +} + +/// A catalog implementation using DuckDB with snapshot management. +pub struct DuckLakeCatalog { + conn: Connection, +} + +impl Catalog for DuckLakeCatalog { + fn current_snapshot(&self) -> Result { + self.conn + .prepare("FROM ducklake_current_snapshot('metalake');") + .context(QueryExecutionSnafu)? + .query_row([], |row| Ok(SnapshotId(row.get(0)?))) + .context(QueryExecutionSnafu) + } + + fn current_snapshot_info(&self) -> Result { + self.conn + .prepare( + r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot); + "#, + ) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(SnapshotInfo { + id: SnapshotId(row.get("snapshot_id")?), + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) + }) + .context(QueryExecutionSnafu) + } + + fn current_schema(&self, schema: Option<&str>, table: &str) -> Result { + let table_ref = schema + .map(|s| format!("{}.{}", s, table)) + .unwrap_or_else(|| table.to_string()); + + let schema_query = format!("DESCRIBE {};", table_ref); + + let mut stmt = self + .conn + .prepare(&schema_query) + .context(QueryExecutionSnafu)?; + + let mut fields = Vec::new(); + let column_iter = stmt + .query_map([], |row| { + let column_name: String = row.get("column_name")?; + let column_type_str: String = row.get("column_type")?; + let null: String = row.get("null")?; + + // Convert DuckDB type to Arrow type + let column_type = Self::duckdb_type_to_arrow(&column_type_str).map_err(|_| { + DuckDBError::FromSqlConversionFailure( + 0, + duckdb::types::Type::Text, + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Could not convert DuckDB type '{}' to Arrow type", + column_type_str + ), + )), + ) + })?; + + fields.push(Field::new(column_name, column_type, null == "YES")); + Ok(()) + }) + .context(QueryExecutionSnafu)?; + + for result in column_iter { + result.context(QueryExecutionSnafu)?; + } + let schema = Schema::new(fields); + Ok(Arc::new(schema)) + } + + fn current_schema_info(&self) -> Result { + self.conn + .prepare( + r#" + SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ds + WHERE ds.schema_name = current_schema(); + "#, + ) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(CurrentSchema { + schema_name: row.get("schema_name")?, + schema_id: row.get("schema_id")?, + begin_snapshot: row.get("begin_snapshot")?, + end_snapshot: row.get("end_snapshot")?, + }) + }) + .context(QueryExecutionSnafu) + } + + fn table_statistics( + &self, + table: &str, + snapshot: SnapshotId, + conn: &Connection, + ) -> Result, Error> { + let mut stmt = conn + .prepare(FETCH_TABLE_STATS_QUERY) + .context(QueryExecutionSnafu)?; + + let entries = stmt + .query_map( + params![&snapshot.0, &snapshot.0, table, &snapshot.0, &snapshot.0,], + |row| { + Ok(TableColumnStatisticsEntry { + _table_id: row.get("table_id")?, + column_id: row.get("column_id")?, + column_name: row.get("column_name")?, + column_type: row.get("column_type")?, + record_count: row.get("record_count")?, + _next_row_id: row.get("next_row_id")?, + _file_size_bytes: row.get("file_size_bytes")?, + stats_type: row.get("stats_type")?, + payload: row.get("payload")?, + }) + }, + ) + .context(QueryExecutionSnafu)? + .map(|result| result.context(QueryExecutionSnafu)); + + Ok(Some(TableStatistics::from_iter(entries))) + } + + /// Update table column statistics + fn update_table_column_stats( + &self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + // Start transaction + self.begin_transaction()?; + + // Fetch current snapshot info + let curr_snapshot = self.current_snapshot_info()?; + + // Update matching past snapshot to close it + self.conn + .prepare(UPDATE_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + stats_type, + column_id, + table_id, + ]) + .context(QueryExecutionSnafu)?; + + // Insert new snapshot + self.conn + .prepare(INSERT_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + column_id, + curr_snapshot.id.0 + 1, + Null, + table_id, + stats_type, + payload, + ]) + .context(QueryExecutionSnafu)?; + + self.conn + .prepare(INSERT_SNAPSHOT_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + curr_snapshot.schema_version, + curr_snapshot.next_catalog_id, + curr_snapshot.next_file_id, + ]) + .context(QueryExecutionSnafu)?; + + self.conn + .prepare(INSERT_SNAPSHOT_CHANGE_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + format!( + r#"updated_stats:"main"."ducklake_table_column_adv_stats",{}:{}"#, + stats_type, payload + ), + Null, + Null, + Null, + ]) + .context(QueryExecutionSnafu)?; + + // Commit transaction + self.commit_transaction()?; + + Ok(()) + } +} + +impl DuckLakeCatalog { + /// Creates a new DuckLakeStatisticsProvider with optional file paths. + /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. + pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { + let conn = if let Some(path) = location { + Connection::open(path).context(ConnectionSnafu)? + } else { + Connection::open_in_memory().context(ConnectionSnafu)? + }; + + // Use provided metadata path or default to DEFAULT_METADATA_FILE + let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); + let setup_query = format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:{}' AS metalake; + USE metalake; + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( + query_id BIGINT, + query_string VARCHAR, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload VARCHAR, + query_instance_id BIGINT + ); + "#, + metadata_file + ); + conn.execute_batch(&setup_query).context(ConnectionSnafu)?; + Ok(Self { conn }) + } + + /// Returns a reference to the underlying DuckDB connection. + pub fn get_connection(&self) -> &Connection { + &self.conn + } + + /// Begins a database transaction. + fn begin_transaction(&self) -> Result<(), Error> { + self.conn + .execute_batch("BEGIN TRANSACTION;") + .context(QueryExecutionSnafu) + } + + /// Commits the current database transaction. + fn commit_transaction(&self) -> Result<(), Error> { + self.conn + .execute_batch("COMMIT TRANSACTION;") + .context(QueryExecutionSnafu) + } + + /// Converts a DuckDB type string to an Arrow DataType. + fn duckdb_type_to_arrow(type_str: &str) -> Result { + // Handle common DuckDB types + let data_type = match type_str.to_uppercase().as_str() { + "INTEGER" | "INT" | "INT4" => DataType::Int32, + "BIGINT" | "INT8" | "LONG" => DataType::Int64, + "SMALLINT" | "INT2" | "SHORT" => DataType::Int16, + "TINYINT" | "INT1" => DataType::Int8, + "DOUBLE" | "FLOAT8" => DataType::Float64, + "FLOAT" | "REAL" | "FLOAT4" => DataType::Float32, + "BOOLEAN" | "BOOL" => DataType::Boolean, + "VARCHAR" | "TEXT" | "STRING" => DataType::Utf8, + "DATE" => DataType::Date32, + "TIMESTAMP" => { + DataType::Timestamp(duckdb::arrow::datatypes::TimeUnit::Microsecond, None) + } + "TIME" => DataType::Time64(duckdb::arrow::datatypes::TimeUnit::Microsecond), + "BLOB" | "BYTEA" | "BINARY" => DataType::Binary, + "DECIMAL" => DataType::Decimal128(38, 10), // Default precision and scale + _ => { + // For unsupported types, use Utf8 as fallback or you could error out + // Here we'll just return an error through the ArrowDataTypeConversion variant + return Err(Error::ArrowDataTypeConversion { + source: DuckDBError::FromSqlConversionFailure( + 0, + duckdb::types::Type::Text, + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported DuckDB type for Arrow conversion: {}", type_str), + )), + ), + }); + } + }; + Ok(data_type) + } +} diff --git a/optd/statistics/tests/statistics_tests.rs b/optd/catalog/tests/statistics_tests.rs similarity index 87% rename from optd/statistics/tests/statistics_tests.rs rename to optd/catalog/tests/statistics_tests.rs index bfa20a8..accc2fc 100644 --- a/optd/statistics/tests/statistics_tests.rs +++ b/optd/catalog/tests/statistics_tests.rs @@ -1,4 +1,4 @@ -use optd_statistics::{DuckLakeStatisticsProvider, StatisticsProvider}; +use optd_catalog::{Catalog, DuckLakeCatalog, SnapshotId}; use serde_json::json; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -6,8 +6,8 @@ use tempfile::TempDir; static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); -/// Creates a test statistics provider with isolated metadata directory. -fn create_test_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) { +/// Creates a test catalog with isolated metadata directory. +fn create_test_catalog(for_file: bool) -> (TempDir, DuckLakeCatalog) { let temp_dir = TempDir::new().unwrap(); let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); let timestamp = SystemTime::now() @@ -20,24 +20,24 @@ fn create_test_provider(for_file: bool) -> (TempDir, DuckLakeStatisticsProvider) std::fs::create_dir_all(&unique_dir).unwrap(); let metadata_path = unique_dir.join("metadata.ducklake"); - let provider = if for_file { + let catalog = if for_file { let db_path = unique_dir.join("test.db"); - DuckLakeStatisticsProvider::try_new( + DuckLakeCatalog::try_new( Some(db_path.to_str().unwrap()), Some(metadata_path.to_str().unwrap()), ) } else { - DuckLakeStatisticsProvider::try_new(None, Some(metadata_path.to_str().unwrap())) + DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap())) } .unwrap(); - (temp_dir, provider) + (temp_dir, catalog) } -/// Creates a test provider with a pre-populated test_table (id, name, age columns). -fn create_test_provider_with_data() -> (TempDir, DuckLakeStatisticsProvider, i64, i64) { - let (temp_dir, provider) = create_test_provider(false); - let conn = provider.get_connection(); +/// Creates a test catalog with a pre-populated test_table (id, name, age columns). +fn create_test_catalog_with_data() -> (TempDir, DuckLakeCatalog, i64, i64) { + let (temp_dir, catalog) = create_test_catalog(false); + let conn = catalog.get_connection(); conn.execute_batch( r#" @@ -71,20 +71,20 @@ fn create_test_provider_with_data() -> (TempDir, DuckLakeStatisticsProvider, i64 ) .unwrap(); - (temp_dir, provider, table_id, age_column_id) + (temp_dir, catalog, table_id, age_column_id) } #[test] fn test_ducklake_statistics_provider_creation() { // Test both memory-based and file-based provider creation. - let (_temp_dir, _provider) = create_test_provider(false); - let (_temp_dir, _provider) = create_test_provider(true); + let (_temp_dir, _provider) = create_test_catalog(false); + let (_temp_dir, _provider) = create_test_catalog(true); } #[test] fn test_table_stats_insertion() { // Test basic statistics insertion without errors. - let (_temp_dir, provider) = create_test_provider(true); + let (_temp_dir, provider) = create_test_catalog(true); let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); assert!(result.is_ok()); @@ -93,7 +93,7 @@ fn test_table_stats_insertion() { #[test] fn test_table_stats_insertion_and_retrieval() { // Test inserting and retrieving multiple statistics types for a column. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); provider @@ -111,9 +111,9 @@ fn test_table_stats_insertion_and_retrieval() { ) .unwrap(); - let latest_snapshot = provider.fetch_current_snapshot().unwrap(); + let latest_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("test_table", latest_snapshot.0, conn) + .table_statistics("test_table", latest_snapshot, conn) .unwrap() .unwrap(); @@ -148,11 +148,11 @@ fn test_table_stats_insertion_and_retrieval() { } #[test] -fn test_fetch_current_schema() { +fn test_current_schema() { // Test fetching current schema info returns valid metadata. - let (_temp_dir, provider) = create_test_provider(true); + let (_temp_dir, provider) = create_test_catalog(true); - let schema = provider.fetch_current_schema_info().unwrap(); + let schema = provider.current_schema_info().unwrap(); assert_eq!(schema.schema_name, "main"); assert_eq!(schema.schema_id, 0); @@ -163,7 +163,7 @@ fn test_fetch_current_schema() { #[test] fn test_snapshot_versioning_and_stats_types() { // Test snapshot creation, versioning, and continuity for multiple stats updates. - let (_temp_dir, provider) = create_test_provider(true); + let (_temp_dir, provider) = create_test_catalog(true); let conn = provider.get_connection(); provider @@ -248,7 +248,7 @@ fn test_snapshot_versioning_and_stats_types() { #[test] fn test_snapshot_tracking_and_multi_table_stats() { // Test snapshot creation tracking and statistics isolation across multiple tables. - let (_temp_dir, provider) = create_test_provider(true); + let (_temp_dir, provider) = create_test_catalog(true); let conn = provider.get_connection(); let initial_count: i64 = conn @@ -346,13 +346,13 @@ fn test_snapshot_tracking_and_multi_table_stats() { #[test] fn test_update_and_fetch_table_column_stats() { // Test updating min/max values and advanced statistics with snapshot progression. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let initial_snapshot = provider.fetch_current_snapshot().unwrap(); + let initial_snapshot = provider.current_snapshot().unwrap(); assert!( provider - .fetch_table_statistics("test_table", initial_snapshot.0, conn) + .table_statistics("test_table", initial_snapshot, conn) .unwrap() .is_some() ); @@ -360,13 +360,13 @@ fn test_update_and_fetch_table_column_stats() { provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - let snapshot_after_min = provider.fetch_current_snapshot().unwrap(); + let snapshot_after_min = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); provider .update_table_column_stats(age_column_id, table_id, "max_value", "35") .unwrap(); - let snapshot_after_max = provider.fetch_current_snapshot().unwrap(); + let snapshot_after_max = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); let (min_val, max_val): (Option, Option) = conn @@ -422,17 +422,17 @@ fn test_update_and_fetch_table_column_stats() { ) .unwrap(); - let snapshot_after_histogram = provider.fetch_current_snapshot().unwrap(); + let snapshot_after_histogram = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); } #[test] fn test_fetch_table_stats_with_snapshot_time_travel() { // Test time-travel capability by fetching statistics at different snapshot points. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let snapshot_0 = provider.fetch_current_snapshot().unwrap(); + let snapshot_0 = provider.current_snapshot().unwrap(); provider .update_table_column_stats( @@ -442,7 +442,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { r#"{"version": 1, "buckets": [1, 2, 3]}"#, ) .unwrap(); - let snapshot_1 = provider.fetch_current_snapshot().unwrap(); + let snapshot_1 = provider.current_snapshot().unwrap(); provider .update_table_column_stats( @@ -452,7 +452,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, ) .unwrap(); - let snapshot_2 = provider.fetch_current_snapshot().unwrap(); + let snapshot_2 = provider.current_snapshot().unwrap(); provider .update_table_column_stats( @@ -462,10 +462,10 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { r#"{"version": 3, "buckets": [10, 20, 30]}"#, ) .unwrap(); - let snapshot_3 = provider.fetch_current_snapshot().unwrap(); + let snapshot_3 = provider.current_snapshot().unwrap(); let stats_at_0 = provider - .fetch_table_statistics("test_table", snapshot_0.0, conn) + .table_statistics("test_table", snapshot_0, conn) .unwrap() .unwrap(); let age_stats_0 = stats_at_0 @@ -476,7 +476,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { assert_eq!(age_stats_0.advanced_stats.len(), 0); let stats_at_1 = provider - .fetch_table_statistics("test_table", snapshot_1.0, conn) + .table_statistics("test_table", snapshot_1, conn) .unwrap() .unwrap(); let age_stats_1 = stats_at_1 @@ -493,7 +493,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { ); let stats_at_2 = provider - .fetch_table_statistics("test_table", snapshot_2.0, conn) + .table_statistics("test_table", snapshot_2, conn) .unwrap() .unwrap(); let age_stats_2 = stats_at_2 @@ -510,7 +510,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { ); let stats_at_3 = provider - .fetch_table_statistics("test_table", snapshot_3.0, conn) + .table_statistics("test_table", snapshot_3, conn) .unwrap() .unwrap(); let age_stats_3 = stats_at_3 @@ -530,7 +530,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { #[test] fn test_fetch_table_stats_multiple_stat_types() { // Test fetching when multiple statistics types exist for the same column. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); provider @@ -559,9 +559,9 @@ fn test_fetch_table_stats_multiple_stat_types() { ) .unwrap(); - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("test_table", current_snapshot.0, conn) + .table_statistics("test_table", current_snapshot, conn) .unwrap() .unwrap(); @@ -589,16 +589,16 @@ fn test_fetch_table_stats_multiple_stat_types() { #[test] fn test_fetch_table_stats_columns_without_stats() { // Test that columns without advanced statistics are still returned in fetch results. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); provider .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("test_table", current_snapshot.0, conn) + .table_statistics("test_table", current_snapshot, conn) .unwrap() .unwrap(); @@ -628,7 +628,7 @@ fn test_fetch_table_stats_columns_without_stats() { #[test] fn test_fetch_table_stats_row_count() { // Test that row_count is correctly populated from table statistics. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -670,9 +670,9 @@ fn test_fetch_table_stats_row_count() { .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) .unwrap(); - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("large_table", current_snapshot.0, conn) + .table_statistics("large_table", current_snapshot, conn) .unwrap() .unwrap(); @@ -681,9 +681,9 @@ fn test_fetch_table_stats_row_count() { } #[test] -fn test_fetch_current_schema_arrow() { +fn test_current_schema_arrow() { // Test fetching Arrow schema from DuckDB table with type conversions. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -698,9 +698,7 @@ fn test_fetch_current_schema_arrow() { ) .unwrap(); - let schema = provider - .fetch_current_schema(None, "schema_test_table") - .unwrap(); + let schema = provider.current_schema(None, "schema_test_table").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -728,7 +726,7 @@ fn test_fetch_current_schema_arrow() { )); let schema_explicit = provider - .fetch_current_schema(Some("main"), "schema_test_table") + .current_schema(Some("main"), "schema_test_table") .unwrap(); assert_eq!(schema_explicit.fields().len(), 4); } @@ -736,10 +734,10 @@ fn test_fetch_current_schema_arrow() { #[test] fn test_multiple_schemas_comprehensive() { // Test schema fetching and metadata tracking across multiple database schemas. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); - let initial_schema_info = provider.fetch_current_schema_info().unwrap(); + let initial_schema_info = provider.current_schema_info().unwrap(); assert_eq!(initial_schema_info.schema_name, "main"); assert_eq!(initial_schema_info.schema_id, 0); assert!(initial_schema_info.end_snapshot.is_none()); @@ -755,7 +753,7 @@ fn test_multiple_schemas_comprehensive() { ) .unwrap(); - let main_users_schema = provider.fetch_current_schema(None, "users").unwrap(); + let main_users_schema = provider.current_schema(None, "users").unwrap(); assert_eq!(main_users_schema.fields().len(), 4); assert!(matches!( main_users_schema @@ -780,7 +778,7 @@ fn test_multiple_schemas_comprehensive() { )); let analytics_metrics_schema = provider - .fetch_current_schema(Some("analytics"), "metrics") + .current_schema(Some("analytics"), "metrics") .unwrap(); assert_eq!(analytics_metrics_schema.fields().len(), 4); assert!(matches!( @@ -806,7 +804,7 @@ fn test_multiple_schemas_comprehensive() { )); let reporting_summary_schema = provider - .fetch_current_schema(Some("reporting"), "summary") + .current_schema(Some("reporting"), "summary") .unwrap(); assert_eq!(reporting_summary_schema.fields().len(), 4); assert!(matches!( @@ -831,24 +829,22 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Boolean )); - let current_schema_info = provider.fetch_current_schema_info().unwrap(); + let current_schema_info = provider.current_schema_info().unwrap(); assert_eq!(current_schema_info.schema_name, "main"); conn.execute("USE analytics;", []).unwrap(); - let analytics_schema_info = provider.fetch_current_schema_info().unwrap(); + let analytics_schema_info = provider.current_schema_info().unwrap(); assert_eq!(analytics_schema_info.schema_name, "analytics"); assert!(analytics_schema_info.end_snapshot.is_none()); - let metrics_schema_implicit = provider.fetch_current_schema(None, "metrics").unwrap(); + let metrics_schema_implicit = provider.current_schema(None, "metrics").unwrap(); assert_eq!(metrics_schema_implicit.fields().len(), 4); - let users_from_main = provider - .fetch_current_schema(Some("main"), "users") - .unwrap(); + let users_from_main = provider.current_schema(Some("main"), "users").unwrap(); assert_eq!(users_from_main.fields().len(), 4); conn.execute("USE reporting;", []).unwrap(); - let reporting_schema_info = provider.fetch_current_schema_info().unwrap(); + let reporting_schema_info = provider.current_schema_info().unwrap(); assert_eq!(reporting_schema_info.schema_name, "reporting"); let schemas: Vec<(String, i64, i64, Option)> = conn @@ -885,13 +881,13 @@ fn test_multiple_schemas_comprehensive() { #[test] fn test_error_handling_edge_cases() { // Test various error scenarios: non-existent tables, invalid snapshots, invalid IDs. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); // Non-existent table returns empty results - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("nonexistent_table", current_snapshot.0, conn) + .table_statistics("nonexistent_table", current_snapshot, conn) .unwrap(); assert!(stats.is_some()); assert_eq!(stats.unwrap().column_statistics.len(), 0); @@ -901,7 +897,7 @@ fn test_error_handling_edge_cases() { .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); let future_stats = provider - .fetch_table_statistics("test_table", 99999, conn) + .table_statistics("test_table", SnapshotId(99999), conn) .unwrap(); assert!(future_stats.is_some()); assert_eq!(future_stats.unwrap().column_statistics.len(), 3); @@ -912,18 +908,14 @@ fn test_error_handling_edge_cases() { assert!(result.is_ok()); // Fetching schema for non-existent table returns error - assert!( - provider - .fetch_current_schema(None, "nonexistent_table") - .is_err() - ); + assert!(provider.current_schema(None, "nonexistent_table").is_err()); // Invalid schema name returns error conn.execute_batch("CREATE TABLE test (id INTEGER);") .unwrap(); assert!( provider - .fetch_current_schema(Some("nonexistent_schema"), "test") + .current_schema(Some("nonexistent_schema"), "test") .is_err() ); } @@ -931,10 +923,10 @@ fn test_error_handling_edge_cases() { #[test] fn test_update_same_stat_rapidly() { // Test updating the same statistic multiple times in rapid succession. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let initial_snapshot = provider.fetch_current_snapshot().unwrap(); + let initial_snapshot = provider.current_snapshot().unwrap(); for i in 1..=5 { provider @@ -947,7 +939,7 @@ fn test_update_same_stat_rapidly() { .unwrap(); } - let final_snapshot = provider.fetch_current_snapshot().unwrap(); + let final_snapshot = provider.current_snapshot().unwrap(); assert_eq!(final_snapshot.0, initial_snapshot.0 + 5); let versions: Vec<(i64, Option)> = conn @@ -978,15 +970,15 @@ fn test_update_same_stat_rapidly() { #[test] fn test_data_edge_cases() { // Test empty tables, single columns, special characters, and large payloads. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); // Empty table with zero rows conn.execute_batch("CREATE TABLE empty_table (id INTEGER, name VARCHAR);") .unwrap(); - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let empty_stats = provider - .fetch_table_statistics("empty_table", current_snapshot.0, conn) + .table_statistics("empty_table", current_snapshot, conn) .unwrap() .unwrap(); assert_eq!(empty_stats.row_count, 0); @@ -999,9 +991,9 @@ fn test_data_edge_cases() { "#, ) .unwrap(); - let single_snapshot = provider.fetch_current_snapshot().unwrap(); + let single_snapshot = provider.current_snapshot().unwrap(); let single_stats = provider - .fetch_table_statistics("single_col", single_snapshot.0, conn) + .table_statistics("single_col", single_snapshot, conn) .unwrap() .unwrap(); assert_eq!(single_stats.column_statistics.len(), 1); @@ -1068,9 +1060,9 @@ fn test_data_edge_cases() { provider .update_table_column_stats(age_column_id, table_id, "large_histogram", &large_payload) .unwrap(); - let new_snapshot = provider.fetch_current_snapshot().unwrap(); + let new_snapshot = provider.current_snapshot().unwrap(); let large_stats = provider - .fetch_table_statistics("test_table", new_snapshot.0, conn) + .table_statistics("test_table", new_snapshot, conn) .unwrap() .unwrap(); let age_stats = large_stats @@ -1089,7 +1081,7 @@ fn test_data_edge_cases() { #[test] fn test_schema_edge_cases() { // Test schema fetching with nullable/non-nullable columns and complex types. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); // Mixed nullable and non-nullable columns @@ -1104,7 +1096,7 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let mixed_schema = provider.fetch_current_schema(None, "mixed_nulls").unwrap(); + let mixed_schema = provider.current_schema(None, "mixed_nulls").unwrap(); assert_eq!(mixed_schema.fields().len(), 4); assert!(!mixed_schema.field_with_name("id").unwrap().is_nullable()); assert!( @@ -1145,9 +1137,7 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let complex_schema = provider - .fetch_current_schema(None, "complex_types") - .unwrap(); + let complex_schema = provider.current_schema(None, "complex_types").unwrap(); assert_eq!(complex_schema.fields().len(), 11); assert!(matches!( complex_schema @@ -1196,7 +1186,7 @@ fn test_schema_edge_cases() { #[test] fn test_concurrent_snapshot_isolation() { // Test statistics with special characters and edge case JSON values. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let special_payload = r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; @@ -1229,7 +1219,7 @@ fn test_concurrent_snapshot_isolation() { #[test] fn test_large_statistics_payload() { // Test handling of large statistics payloads. - let (_temp_dir, provider, table_id, age_column_id) = create_test_provider_with_data(); + let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let large_histogram: Vec = (0..1000).collect(); let large_payload = json!({ @@ -1248,9 +1238,9 @@ fn test_large_statistics_payload() { assert!(result.is_ok()); let conn = provider.get_connection(); - let current_snapshot = provider.fetch_current_snapshot().unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); let stats = provider - .fetch_table_statistics("test_table", current_snapshot.0, conn) + .table_statistics("test_table", current_snapshot, conn) .unwrap() .unwrap(); @@ -1272,7 +1262,7 @@ fn test_large_statistics_payload() { #[test] fn test_mixed_null_and_non_null_columns() { // Test schema fetching with mixed nullable and non-nullable columns. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -1287,7 +1277,7 @@ fn test_mixed_null_and_non_null_columns() { ) .unwrap(); - let schema = provider.fetch_current_schema(None, "mixed_nulls").unwrap(); + let schema = provider.current_schema(None, "mixed_nulls").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -1307,7 +1297,7 @@ fn test_mixed_null_and_non_null_columns() { #[test] fn test_schema_with_complex_types() { // Test schema fetching with various complex and edge case data types. - let (_temp_dir, provider) = create_test_provider(false); + let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -1329,9 +1319,7 @@ fn test_schema_with_complex_types() { ) .unwrap(); - let schema = provider - .fetch_current_schema(None, "complex_types") - .unwrap(); + let schema = provider.current_schema(None, "complex_types").unwrap(); assert_eq!(schema.fields().len(), 11); diff --git a/optd/statistics/Cargo.toml b/optd/statistics/Cargo.toml deleted file mode 100644 index 62b2e1c..0000000 --- a/optd/statistics/Cargo.toml +++ /dev/null @@ -1,20 +0,0 @@ -[package] -name = "optd-statistics" -version.workspace = true -edition.workspace = true -repository.workspace = true - -[dependencies] -parking_lot = "=0.12.5" -glob = "0.3.2" -url = { version = "^2.5.7" } -async-trait = { version = "^0.1.89" } -tokio = { workspace = true, features = ["full"] } -duckdb = { features = ["bundled"], version = "1.4.0" } -snafu = "0.8.6" -serde_json = "1.0" -futures = "0.3.31" -serde = "1.0.0" - -[dev-dependencies] -tempfile = "3.8" diff --git a/optd/statistics/src/lib.rs b/optd/statistics/src/lib.rs deleted file mode 100644 index 18783a1..0000000 --- a/optd/statistics/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod statistics; - -pub use statistics::{DuckLakeStatisticsProvider, Error as InterfaceError, StatisticsProvider}; diff --git a/optd/statistics/src/statistics.rs b/optd/statistics/src/statistics.rs deleted file mode 100644 index 394eb53..0000000 --- a/optd/statistics/src/statistics.rs +++ /dev/null @@ -1,646 +0,0 @@ -use std::sync::Arc; - -use duckdb::{ - Connection, Error as DuckDBError, - arrow::datatypes::{DataType, Field, Schema, SchemaRef}, - params, - types::Null, -}; - -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use snafu::{ResultExt, prelude::*}; - -const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; - -/// SQL query to fetch table statistics including column metadata and advanced stats at a specific snapshot. -const FETCH_TABLE_STATS_QUERY: &str = r#" - SELECT - ts.table_id, - dc.column_id, - dc.column_name, - dc.column_type, - ts.record_count, - ts.next_row_id, - ts.file_size_bytes, - tcas.stats_type, - tcas.payload - FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts - INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id - INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id - LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas - ON dc.table_id = tcas.table_id - AND dc.column_id = tcas.column_id - AND ? >= tcas.begin_snapshot - AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) - WHERE - ds.schema_name = current_schema() - AND dt.table_name = ? - AND ts.record_count IS NOT NULL - AND ts.file_size_bytes IS NOT NULL - AND ? >= dc.begin_snapshot - AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) - ORDER BY ts.table_id, dc.column_id, tcas.stats_type; -"#; - -/// SQL query to close an existing advanced statistics entry by setting its end_snapshot. -const UPDATE_ADV_STATS_QUERY: &str = r#" - UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - SET end_snapshot = ? - WHERE end_snapshot IS NULL - AND stats_type = ? - AND column_id = ? - AND table_id = ?; -"#; - -/// SQL query to insert a new advanced statistics entry. -const INSERT_ADV_STATS_QUERY: &str = r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) - VALUES (?, ?, ?, ?, ?, ?); -"#; - -/// SQL query to insert a new snapshot record. -const INSERT_SNAPSHOT_QUERY: &str = r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot - (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) - VALUES (?, NOW(), ?, ?, ?); -"#; - -/// SQL query to record a snapshot change in the change log. -const INSERT_SNAPSHOT_CHANGE_QUERY: &str = r#" - INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes - (snapshot_id, changes_made, author, commit_message, commit_extra_info) - VALUES (?, ?, ?, ?, ?); -"#; - -/// Error types for statistics operations. -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Database connection error: {}", source))] - Connection { source: DuckDBError }, - #[snafu(display("Query execution failed: {}", source))] - QueryExecution { source: DuckDBError }, - #[snafu(display("JSON serialization error: {}", source))] - JsonSerialization { source: serde_json::Error }, - #[snafu(display("ARROW DataType conversion error: {}", source))] - ArrowDataTypeConversion { source: duckdb::Error }, - #[snafu(display( - "Get statistics failed for table: {}, column: {}, snapshot: {}", - table, - column, - snapshot - ))] - GetStatsFailed { - table: String, - column: String, - snapshot: i64, - }, - #[snafu(display( - "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", - group_id, - stats_type, - snapshot - ))] - GroupStatsNotFound { - group_id: i64, - stats_type: String, - snapshot: i64, - }, -} - -/// Internal struct representing a row from the table statistics query. -/// Used for collecting data before aggregating into TableStatistics. -struct TableColumnStatisticsEntry { - table_id: i64, - column_id: i64, - column_name: String, - column_type: String, - record_count: i64, - next_row_id: i64, - file_size_bytes: i64, - stats_type: Option, - payload: Option, -} - -/// Statistics for a table including row count and per-column statistics. -/// Main structure returned when querying table statistics. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TableStatistics { - pub row_count: usize, - pub column_statistics: Vec, -} - -impl FromIterator> for TableStatistics { - fn from_iter>>( - iter: T, - ) -> Self { - let mut row_flag = false; - let mut row_count = 0; - let mut column_statistics = Vec::new(); - - // Stats will be ordered by table_id then column_id - for TableColumnStatisticsEntry { - table_id: _, - column_id, - column_name, - column_type, - record_count, - next_row_id: _, - file_size_bytes: _, - stats_type, - payload, - } in iter.into_iter().flatten() - { - // Check if unique table/column combination - if column_statistics - .last() - .is_none_or(|last: &ColumnStatistics| last.column_id != column_id) - { - // New column encountered - column_statistics.push(ColumnStatistics::new( - column_id, - column_type.clone(), - column_name.clone(), - Vec::new(), - )); - } - - assert!( - !column_statistics.is_empty() - && column_statistics.last().unwrap().column_id == column_id, - "Column statistics should not be empty and last column_id should match current column_id" - ); - - if let Some(last_column_stat) = column_statistics.last_mut() { - if let (Some(st), Some(pl)) = (stats_type, payload) { - let data = serde_json::from_str(&pl).unwrap_or(Value::Null); - last_column_stat.add_advanced_stat(AdvanceColumnStatistics { - stats_type: st, - data, - }); - } - } - - // Assuming all columns have the same record_count, only need to set once - if !row_flag { - row_count = record_count as usize; - row_flag = true; - } - } - - TableStatistics { - row_count, - column_statistics, - } - } -} - -/// Statistics for a single column including type, name, and advanced statistics. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ColumnStatistics { - pub column_id: i64, - pub column_type: String, - pub name: String, - pub advanced_stats: Vec, -} - -impl ColumnStatistics { - fn new( - column_id: i64, - column_type: String, - name: String, - advanced_stats: Vec, - ) -> Self { - Self { - column_id, - column_type, - name, - advanced_stats, - } - } - - fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { - self.advanced_stats.push(stat); - } -} - -/// An advanced statistics entry with type and serialized data at a snapshot. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AdvanceColumnStatistics { - pub stats_type: String, - pub data: Value, -} - -/// Identifier for a snapshot in the statistics database. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SnapshotId(pub i64); - -/// Snapshot metadata including schema version and next IDs. -#[derive(Debug, Clone, Serialize, Deserialize)] - -pub struct SnapshotInfo { - pub snapshot_id: i64, - pub schema_version: i64, - pub next_catalog_id: i64, - pub next_file_id: i64, -} - -/// Schema information including name, ID, and valid snapshot range. -#[derive(Debug, Clone, Serialize, Deserialize)] - -pub struct CurrentSchema { - pub schema_name: String, - pub schema_id: i64, - pub begin_snapshot: i64, - pub end_snapshot: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -struct StatisticsUpdate { - stats_type: String, - payload: String, -} - -/// Trait defining operations for managing table statistics with snapshot-based time travel. -pub trait StatisticsProvider { - /// Fetches the current (most recent) snapshot ID. - fn fetch_current_snapshot(&self) -> Result; - - /// Fetches complete metadata for the current snapshot. - fn fetch_current_snapshot_info(&self) -> Result; - - /// Fetches the Arrow schema for a table at the current snapshot. - fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result; - - /// Fetches schema information including name, ID, and snapshot range. - fn fetch_current_schema_info(&self) -> Result; - - /// Retrieves table and column statistics at a specific snapshot. - fn fetch_table_statistics( - &self, - table_name: &str, - snapshot: i64, - connection: &Connection, - ) -> Result, Error>; - - /// Updates or inserts advanced statistics for a table column. - fn update_table_column_stats( - &self, - column_id: i64, - table_id: i64, - stats_type: &str, - payload: &str, - ) -> Result<(), Error>; -} - -/// DuckLake-based implementation of StatisticsProvider using DuckDB with snapshot management. -pub struct DuckLakeStatisticsProvider { - conn: Connection, -} - -impl DuckLakeStatisticsProvider { - /// Converts a DuckDB type string to an Arrow DataType. - fn duckdb_type_to_arrow(type_str: &str) -> Result { - // Handle common DuckDB types - let data_type = match type_str.to_uppercase().as_str() { - "INTEGER" | "INT" | "INT4" => DataType::Int32, - "BIGINT" | "INT8" | "LONG" => DataType::Int64, - "SMALLINT" | "INT2" | "SHORT" => DataType::Int16, - "TINYINT" | "INT1" => DataType::Int8, - "DOUBLE" | "FLOAT8" => DataType::Float64, - "FLOAT" | "REAL" | "FLOAT4" => DataType::Float32, - "BOOLEAN" | "BOOL" => DataType::Boolean, - "VARCHAR" | "TEXT" | "STRING" => DataType::Utf8, - "DATE" => DataType::Date32, - "TIMESTAMP" => { - DataType::Timestamp(duckdb::arrow::datatypes::TimeUnit::Microsecond, None) - } - "TIME" => DataType::Time64(duckdb::arrow::datatypes::TimeUnit::Microsecond), - "BLOB" | "BYTEA" | "BINARY" => DataType::Binary, - "DECIMAL" => DataType::Decimal128(38, 10), // Default precision and scale - _ => { - // For unsupported types, use Utf8 as fallback or you could error out - // Here we'll just return an error through the ArrowDataTypeConversion variant - return Err(Error::ArrowDataTypeConversion { - source: DuckDBError::FromSqlConversionFailure( - 0, - duckdb::types::Type::Text, - Box::new(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Unsupported DuckDB type for Arrow conversion: {}", type_str), - )), - ), - }); - } - }; - Ok(data_type) - } - - /// Creates a new DuckLakeStatisticsProvider with optional file paths. - /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. - pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { - let conn = if let Some(path) = location { - Connection::open(path).context(ConnectionSnafu)? - } else { - Connection::open_in_memory().context(ConnectionSnafu)? - }; - - // Use provided metadata path or default to DEFAULT_METADATA_FILE - let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); - let setup_query = format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:{}' AS metalake; - USE metalake; - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( - column_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - table_id BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( - query_id BIGINT, - query_string VARCHAR, - root_group_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( - query_instance_id BIGINT PRIMARY KEY, - query_id BIGINT, - creation_time BIGINT, - snapshot_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( - scalar_id BIGINT, - group_id BIGINT, - stats_type VARCHAR, - payload VARCHAR, - query_instance_id BIGINT - ); - "#, - metadata_file - ); - conn.execute_batch(&setup_query).context(ConnectionSnafu)?; - Ok(Self { conn }) - } - - /// Returns a reference to the underlying DuckDB connection. - pub fn get_connection(&self) -> &Connection { - &self.conn - } - - /// Begins a database transaction. - fn begin_transaction(&self) -> Result<(), Error> { - self.conn - .execute_batch("BEGIN TRANSACTION;") - .context(QueryExecutionSnafu) - } - - /// Commits the current database transaction. - fn commit_transaction(&self) -> Result<(), Error> { - self.conn - .execute_batch("COMMIT TRANSACTION;") - .context(QueryExecutionSnafu) - } -} - -impl StatisticsProvider for DuckLakeStatisticsProvider { - fn fetch_current_snapshot(&self) -> Result { - self.conn - .prepare("FROM ducklake_current_snapshot('metalake');") - .context(QueryExecutionSnafu)? - .query_row([], |row| Ok(SnapshotId(row.get(0)?))) - .context(QueryExecutionSnafu) - } - - fn fetch_current_snapshot_info(&self) -> Result { - self.conn - .prepare( - r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_metalake.main.ducklake_snapshot - WHERE snapshot_id = (SELECT MAX(snapshot_id) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot); - "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(SnapshotInfo { - snapshot_id: row.get("snapshot_id")?, - schema_version: row.get("schema_version")?, - next_catalog_id: row.get("next_catalog_id")?, - next_file_id: row.get("next_file_id")?, - }) - }) - .context(QueryExecutionSnafu) - } - - fn fetch_current_schema(&self, schema: Option<&str>, table: &str) -> Result { - let table_ref = schema - .map(|s| format!("{}.{}", s, table)) - .unwrap_or_else(|| table.to_string()); - - let schema_query = format!("DESCRIBE {};", table_ref); - - let mut stmt = self - .conn - .prepare(&schema_query) - .context(QueryExecutionSnafu)?; - - let mut fields = Vec::new(); - let column_iter = stmt - .query_map([], |row| { - let column_name: String = row.get("column_name")?; - let column_type_str: String = row.get("column_type")?; - let null: String = row.get("null")?; - - // Convert DuckDB type to Arrow type - let column_type = Self::duckdb_type_to_arrow(&column_type_str).map_err(|_| { - DuckDBError::FromSqlConversionFailure( - 0, - duckdb::types::Type::Text, - Box::new(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Could not convert DuckDB type '{}' to Arrow type", - column_type_str - ), - )), - ) - })?; - - fields.push(Field::new(column_name, column_type, null == "YES")); - Ok(()) - }) - .context(QueryExecutionSnafu)?; - - for result in column_iter { - result.context(QueryExecutionSnafu)?; - } - let schema = Schema::new(fields); - Ok(Arc::new(schema)) - } - - fn fetch_current_schema_info(&self) -> Result { - self.conn - .prepare( - r#" - SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_schema ds - WHERE ds.schema_name = current_schema(); - "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(CurrentSchema { - schema_name: row.get("schema_name")?, - schema_id: row.get("schema_id")?, - begin_snapshot: row.get("begin_snapshot")?, - end_snapshot: row.get("end_snapshot")?, - }) - }) - .context(QueryExecutionSnafu) - } - - fn fetch_table_statistics( - &self, - table: &str, - snapshot: i64, - conn: &Connection, - ) -> Result, Error> { - let mut stmt = conn - .prepare(FETCH_TABLE_STATS_QUERY) - .context(QueryExecutionSnafu)?; - - let entries = stmt - .query_map( - [ - &snapshot.to_string(), - &snapshot.to_string(), - table, - &snapshot.to_string(), - &snapshot.to_string(), - ], - |row| { - Ok(TableColumnStatisticsEntry { - table_id: row.get("table_id")?, - column_id: row.get("column_id")?, - column_name: row.get("column_name")?, - column_type: row.get("column_type")?, - record_count: row.get("record_count")?, - next_row_id: row.get("next_row_id")?, - file_size_bytes: row.get("file_size_bytes")?, - stats_type: row.get("stats_type")?, - payload: row.get("payload")?, - }) - }, - ) - .context(QueryExecutionSnafu)? - .map(|result| result.context(QueryExecutionSnafu)); - - Ok(Some(TableStatistics::from_iter(entries))) - } - - /// Update table column statistics - fn update_table_column_stats( - &self, - column_id: i64, - table_id: i64, - stats_type: &str, - payload: &str, - ) -> Result<(), Error> { - // Start transaction - self.begin_transaction()?; - - // Fetch current snapshot info - let current_snapshot = self.fetch_current_snapshot_info()?; - let current_snapshot_id = current_snapshot.snapshot_id; - - // Update matching past snapshot to close it - self.conn - .prepare(UPDATE_ADV_STATS_QUERY) - .context(QueryExecutionSnafu)? - .execute(params![ - current_snapshot_id + 1, - stats_type, - column_id, - table_id, - ]) - .context(QueryExecutionSnafu)?; - - // Insert new snapshot - self.conn - .prepare(INSERT_ADV_STATS_QUERY) - .context(QueryExecutionSnafu)? - .execute(params![ - column_id, - current_snapshot_id + 1, - Null, - table_id, - stats_type, - payload, - ]) - .context(QueryExecutionSnafu)?; - - self.conn - .prepare(INSERT_SNAPSHOT_QUERY) - .context(QueryExecutionSnafu)? - .execute(params![ - current_snapshot_id + 1, - current_snapshot.schema_version, - current_snapshot.next_catalog_id, - current_snapshot.next_file_id, - ]) - .context(QueryExecutionSnafu)?; - - self.conn - .prepare(INSERT_SNAPSHOT_CHANGE_QUERY) - .context(QueryExecutionSnafu)? - .execute(params![ - current_snapshot_id + 1, - format!( - r#"updated_stats:"main"."ducklake_table_column_adv_stats",{}:{}"#, - stats_type, payload - ), - Null, - Null, - Null, - ]) - .context(QueryExecutionSnafu)?; - - // Commit transaction - self.commit_transaction()?; - - Ok(()) - } -} diff --git a/optd/storage/Cargo.toml b/optd/storage/Cargo.toml deleted file mode 100644 index a23a2b5..0000000 --- a/optd/storage/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "optd-storage" -version.workspace = true -edition.workspace = true -repository.workspace = true - -[dependencies] diff --git a/optd/storage/src/lib.rs b/optd/storage/src/lib.rs deleted file mode 100644 index b4a7cf8..0000000 --- a/optd/storage/src/lib.rs +++ /dev/null @@ -1,269 +0,0 @@ -#![allow(dead_code)] - -use std::collections::BTreeSet; - -enum CompactionType { - MergeAdjacentTables, - RewriteDeletes, -} - -enum CleanupType { - OldFiles, - OrphanedFiles, -} - -struct DuckLakeTag { - key: String, - value: String, -} - -struct DuckLakeSchemaSetting { - schema_id: usize, - tag: DuckLakeTag, -} - -struct DuckLakeTableSetting { - schema_id: usize, - tag: DuckLakeTag, -} - -struct DuckLakeMetadata { - tags: Vec, - schema_settings: Vec, - table_settings: Vec, -} - -struct DuckLakeSchemaInfo { - id: usize, - uuid: String, - name: String, - path: String, - tags: Vec, -} - -struct DuckLakeColumnInfo { - /// Field index. - id: usize, - name: String, - typ: String, - // TODO: switch to value type? - initial_default: String, - default_value: String, - nulls_allowed: bool, - children: Vec, - tags: Vec, -} - -struct DuckLakeInlinedTableInfo { - table_name: String, - schema_version: usize, -} - -struct DuckLakeTableInfo { - /// Table index. - id: usize, - /// Schema index. - schema_id: usize, - uuid: String, - name: String, - columns: Vec, - tags: Vec, - inlined_data_tables: Vec, -} - -struct DuckLakeColumnStatsInfo { - column_id: usize, - value_count: String, - null_count: String, - column_size_bytes: String, - min_val: String, - max_val: String, - contains_nan: String, - extra_stats: String, -} - -struct DuckLakeFilePartitionInfo { - partition_column_index: usize, - partition_value: String, -} - -struct DuckLakePartialFileInfo { - snapshot_id: usize, - max_row_count: usize, -} - -struct DuckLakeFileInfo { - // DataFileIndex, - id: usize, - // TableIndex - table_id: usize, - file_name: String, - row_count: usize, - file_size_bytes: usize, - footer_size: Option, - row_id_start: Option, - partition_id: Option, - begin_snapshot: Option, - max_partial_file_snapshot: Option, - encryption_key: Option, - mapping_id: usize, - column_stats: Vec, - partition_values: Vec, - partial_file_info: Vec, -} - -// struct DuckLakeInlinedData { -// data: Box, -// column_stats: BTreeMap, -// } - -// struct DuckLakeInlinedDataDeletes { -// rows: BTreeSet, -// } - -// struct DuckLakeInlinedDataInfo { -// table_id: usize, -// row_id_start: usize, -// data: Option>, -// } - -struct DuckLakeDeletedInlinedDataInfo { - table_id: usize, - table_name: String, - deleted_row_ids: Vec, -} - -struct DuckLakeDeleteFileInfo { - id: usize, - table_id: usize, - data_file_id: usize, - path: String, - delete_count: usize, - file_size_bytes: usize, - footer_size: usize, - encryption_key: String, -} - -struct DuckLakePartitionFieldInfo { - // default = 0 - partition_key_index: usize, - field_id: usize, - transform: String, -} - -struct DuckLakePartitionInfo { - id: Option, - table_id: usize, - fields: Vec, -} - -struct DuckLakeGlobalColumnStatsInfo { - column_id: usize, - contains_null: bool, - has_contains_null: bool, - contains_nan: bool, - has_contains_nan: bool, - min_val: String, - has_min: bool, - // TODO(yuchen): should this be Option? - max_val: String, - has_max: bool, - extra_stats: String, - has_extra_stats: bool, -} - -struct DuckLakeGlobalStatsInfo { - table_id: usize, - initialized: bool, - record_count: usize, - next_row_id: usize, - table_size_bytes: usize, - column_stats: Vec, -} - -struct SnapshotChangeInfo { - changes_made: String, -} - -struct SnapshotDeletedFromFiles { - /// DataFileIndex - deleted_from_files: BTreeSet, -} - -struct DuckLakeSnapshotInfo { - id: usize, - // TODO: timestamp_tz_t - time: String, - schema_version: usize, - change_info: SnapshotChangeInfo, - author: String, - commit_message: String, - commit_extra_info: String, -} - -struct DuckLakeViewInfo { - id: usize, - schema_id: usize, - uuid: String, - name: String, - dialect: String, - column_aliases: Vec, - sql: String, - tags: Vec, -} - -struct DuckLakeTagInfo { - id: usize, - key: String, - value: String, -} - -struct DuckLakeColumnTagInfo { - table_id: usize, - field_index: usize, - key: String, - value: String, -} - -struct DuckLakeDroppedColumn { - table_id: usize, - field_id: usize, -} - -struct DuckLakeNewColumn { - table_id: usize, - column_info: DuckLakeColumnInfo, - parent_index: Option, -} - -struct DuckLakeCatalogInfo { - schemas: Vec, - tables: Vec, - views: Vec, - partitions: Vec, -} - -struct DuckLakeFileData { - path: String, - encryption_key: String, - file_size_bytes: usize, - footer_size: Option, -} - -enum DuckLakeDataType { - DataFile, - InlinedData, - TransactionLocalInlinedData, -} - -struct DuckLakeFileListEntry { - file: DuckLakeFileData, - delete_file: DuckLakeFileData, - row_id_start: Option, - snapshot_id: Option, - max_row_count: Option, - snapshot_filter: Option, - mapping_id: usize, - /// default: DuckLakeDataType::DataFile; - data_type: DuckLakeDataType, -} From 54731f624d8cdbd62e47215e2b7952dc4fb16875 Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Fri, 31 Oct 2025 00:54:09 -0400 Subject: [PATCH 23/40] fix clippy Signed-off-by: Yuchen Liang --- optd/catalog/src/lib.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index aee6590..4e46d12 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -194,12 +194,11 @@ impl FromIterator> for TableStatistics "Column statistics should not be empty and last column_id should match current column_id" ); - if let Some(last_column_stat) = column_statistics.last_mut() { - if let (Some(stats_type), Some(payload)) = (e.stats_type, e.payload) { - let data = serde_json::from_str(&payload).unwrap_or(Value::Null); - last_column_stat - .add_advanced_stat(AdvanceColumnStatistics { stats_type, data }); - } + if let Some(last_column_stat) = column_statistics.last_mut() + && let (Some(stats_type), Some(payload)) = (e.stats_type, e.payload) + { + let data = serde_json::from_str(&payload).unwrap_or(Value::Null); + last_column_stat.add_advanced_stat(AdvanceColumnStatistics { stats_type, data }); } // Assuming all columns have the same record_count, only need to set once From 17530801ae477473dc7c92f4c8b566c21ed0be56 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Mon, 3 Nov 2025 22:44:27 -0500 Subject: [PATCH 24/40] refactor methods as associated --- optd/catalog/src/lib.rs | 140 ++++---- optd/catalog/tests/statistics_tests.rs | 478 ++++++++++++------------- 2 files changed, 298 insertions(+), 320 deletions(-) diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 4e46d12..903a6c1 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -14,28 +14,31 @@ use snafu::{ResultExt, prelude::*}; /// Operations for managing table statistics with snapshot-based time travel. pub trait Catalog { /// Gets the current (most recent) snapshot ID. - fn current_snapshot(&self) -> Result; + fn current_snapshot(conn: &Connection) -> Result; /// Gets complete metadata for the current snapshot. - fn current_snapshot_info(&self) -> Result; + fn current_snapshot_info(conn: &Connection) -> Result; /// Gets the Arrow schema for a table at the current snapshot. - fn current_schema(&self, schema: Option<&str>, table: &str) -> Result; + fn current_schema( + conn: &Connection, + schema: Option<&str>, + table: &str, + ) -> Result; /// Gets schema information including name, ID, and snapshot range. - fn current_schema_info(&self) -> Result; + fn current_schema_info(conn: &Connection) -> Result; /// Retrieves table and column statistics at a specific snapshot. fn table_statistics( - &self, + connection: &Connection, table_name: &str, snapshot: SnapshotId, - connection: &Connection, ) -> Result, Error>; /// Updates or inserts advanced statistics for a table column. fn update_table_column_stats( - &self, + connection: &Connection, column_id: i64, table_id: i64, stats_type: &str, @@ -291,47 +294,46 @@ pub struct DuckLakeCatalog { } impl Catalog for DuckLakeCatalog { - fn current_snapshot(&self) -> Result { - self.conn - .prepare("FROM ducklake_current_snapshot('metalake');") + fn current_snapshot(conn: &Connection) -> Result { + conn.prepare("FROM ducklake_current_snapshot('metalake');") .context(QueryExecutionSnafu)? .query_row([], |row| Ok(SnapshotId(row.get(0)?))) .context(QueryExecutionSnafu) } - fn current_snapshot_info(&self) -> Result { - self.conn - .prepare( - r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_metalake.main.ducklake_snapshot + fn current_snapshot_info(conn: &Connection) -> Result { + conn.prepare( + r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot WHERE snapshot_id = (SELECT MAX(snapshot_id) FROM __ducklake_metadata_metalake.main.ducklake_snapshot); "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(SnapshotInfo { - id: SnapshotId(row.get("snapshot_id")?), - schema_version: row.get("schema_version")?, - next_catalog_id: row.get("next_catalog_id")?, - next_file_id: row.get("next_file_id")?, - }) + ) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(SnapshotInfo { + id: SnapshotId(row.get("snapshot_id")?), + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, }) - .context(QueryExecutionSnafu) + }) + .context(QueryExecutionSnafu) } - fn current_schema(&self, schema: Option<&str>, table: &str) -> Result { + fn current_schema( + conn: &Connection, + schema: Option<&str>, + table: &str, + ) -> Result { let table_ref = schema .map(|s| format!("{}.{}", s, table)) .unwrap_or_else(|| table.to_string()); let schema_query = format!("DESCRIBE {};", table_ref); - let mut stmt = self - .conn - .prepare(&schema_query) - .context(QueryExecutionSnafu)?; + let mut stmt = conn.prepare(&schema_query).context(QueryExecutionSnafu)?; let mut fields = Vec::new(); let column_iter = stmt @@ -367,32 +369,30 @@ impl Catalog for DuckLakeCatalog { Ok(Arc::new(schema)) } - fn current_schema_info(&self) -> Result { - self.conn - .prepare( - r#" - SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_schema ds - WHERE ds.schema_name = current_schema(); + fn current_schema_info(conn: &Connection) -> Result { + conn.prepare( + r#" + SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ds + WHERE ds.schema_name = current_schema(); "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(CurrentSchema { - schema_name: row.get("schema_name")?, - schema_id: row.get("schema_id")?, - begin_snapshot: row.get("begin_snapshot")?, - end_snapshot: row.get("end_snapshot")?, - }) + ) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(CurrentSchema { + schema_name: row.get("schema_name")?, + schema_id: row.get("schema_id")?, + begin_snapshot: row.get("begin_snapshot")?, + end_snapshot: row.get("end_snapshot")?, }) - .context(QueryExecutionSnafu) + }) + .context(QueryExecutionSnafu) } fn table_statistics( - &self, + conn: &Connection, table: &str, snapshot: SnapshotId, - conn: &Connection, ) -> Result, Error> { let mut stmt = conn .prepare(FETCH_TABLE_STATS_QUERY) @@ -423,21 +423,17 @@ impl Catalog for DuckLakeCatalog { /// Update table column statistics fn update_table_column_stats( - &self, + conn: &Connection, column_id: i64, table_id: i64, stats_type: &str, payload: &str, ) -> Result<(), Error> { - // Start transaction - self.begin_transaction()?; - // Fetch current snapshot info - let curr_snapshot = self.current_snapshot_info()?; + let curr_snapshot = Self::current_snapshot_info(conn)?; // Update matching past snapshot to close it - self.conn - .prepare(UPDATE_ADV_STATS_QUERY) + conn.prepare(UPDATE_ADV_STATS_QUERY) .context(QueryExecutionSnafu)? .execute(params![ curr_snapshot.id.0 + 1, @@ -448,8 +444,7 @@ impl Catalog for DuckLakeCatalog { .context(QueryExecutionSnafu)?; // Insert new snapshot - self.conn - .prepare(INSERT_ADV_STATS_QUERY) + conn.prepare(INSERT_ADV_STATS_QUERY) .context(QueryExecutionSnafu)? .execute(params![ column_id, @@ -461,8 +456,7 @@ impl Catalog for DuckLakeCatalog { ]) .context(QueryExecutionSnafu)?; - self.conn - .prepare(INSERT_SNAPSHOT_QUERY) + conn.prepare(INSERT_SNAPSHOT_QUERY) .context(QueryExecutionSnafu)? .execute(params![ curr_snapshot.id.0 + 1, @@ -472,8 +466,7 @@ impl Catalog for DuckLakeCatalog { ]) .context(QueryExecutionSnafu)?; - self.conn - .prepare(INSERT_SNAPSHOT_CHANGE_QUERY) + conn.prepare(INSERT_SNAPSHOT_CHANGE_QUERY) .context(QueryExecutionSnafu)? .execute(params![ curr_snapshot.id.0 + 1, @@ -487,9 +480,6 @@ impl Catalog for DuckLakeCatalog { ]) .context(QueryExecutionSnafu)?; - // Commit transaction - self.commit_transaction()?; - Ok(()) } } @@ -576,18 +566,10 @@ impl DuckLakeCatalog { &self.conn } - /// Begins a database transaction. - fn begin_transaction(&self) -> Result<(), Error> { - self.conn - .execute_batch("BEGIN TRANSACTION;") - .context(QueryExecutionSnafu) - } - - /// Commits the current database transaction. - fn commit_transaction(&self) -> Result<(), Error> { - self.conn - .execute_batch("COMMIT TRANSACTION;") - .context(QueryExecutionSnafu) + /// Returns a mutable reference to the underlying DuckDB connection. + /// Required for creating transactions. + pub fn get_connection_mut(&mut self) -> &mut Connection { + &mut self.conn } /// Converts a DuckDB type string to an Arrow DataType. @@ -610,8 +592,6 @@ impl DuckLakeCatalog { "BLOB" | "BYTEA" | "BINARY" => DataType::Binary, "DECIMAL" => DataType::Decimal128(38, 10), // Default precision and scale _ => { - // For unsupported types, use Utf8 as fallback or you could error out - // Here we'll just return an error through the ArrowDataTypeConversion variant return Err(Error::ArrowDataTypeConversion { source: DuckDBError::FromSqlConversionFailure( 0, diff --git a/optd/catalog/tests/statistics_tests.rs b/optd/catalog/tests/statistics_tests.rs index accc2fc..b8a114f 100644 --- a/optd/catalog/tests/statistics_tests.rs +++ b/optd/catalog/tests/statistics_tests.rs @@ -62,8 +62,8 @@ fn create_test_catalog_with_data() -> (TempDir, DuckLakeCatalog, i64, i64) { let age_column_id: i64 = conn .query_row( r#" - SELECT column_id - FROM __ducklake_metadata_metalake.main.ducklake_column + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column WHERE table_id = ? AND column_name = 'age'; "#, [table_id], @@ -85,8 +85,15 @@ fn test_ducklake_statistics_provider_creation() { fn test_table_stats_insertion() { // Test basic statistics insertion without errors. let (_temp_dir, provider) = create_test_catalog(true); + let conn = provider.get_connection(); - let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); + let result = DuckLakeCatalog::update_table_column_stats( + conn, + 1, + 1, + "ndv", + r#"{"distinct_count": 1000}"#, + ); assert!(result.is_ok()); } @@ -96,24 +103,21 @@ fn test_table_stats_insertion_and_retrieval() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - provider - .update_table_column_stats(age_column_id, table_id, "min_value", "25") + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") .unwrap(); - provider - .update_table_column_stats(age_column_id, table_id, "max_value", "35") - .unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "histogram", - r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, - ) + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, + ) + .unwrap(); - let latest_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("test_table", latest_snapshot, conn) + let latest_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = DuckLakeCatalog::table_statistics(conn, "test_table", latest_snapshot) .unwrap() .unwrap(); @@ -151,8 +155,9 @@ fn test_table_stats_insertion_and_retrieval() { fn test_current_schema() { // Test fetching current schema info returns valid metadata. let (_temp_dir, provider) = create_test_catalog(true); + let conn = provider.get_connection(); - let schema = provider.current_schema_info().unwrap(); + let schema = DuckLakeCatalog::current_schema_info(conn).unwrap(); assert_eq!(schema.schema_name, "main"); assert_eq!(schema.schema_id, 0); @@ -166,22 +171,19 @@ fn test_snapshot_versioning_and_stats_types() { let (_temp_dir, provider) = create_test_catalog(true); let conn = provider.get_connection(); - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); - provider - .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 2, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - provider - .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) + DuckLakeCatalog::update_table_column_stats(conn, 3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) .unwrap(); let snapshots: Vec<(i64, i64)> = conn .prepare( r#" - SELECT column_id, begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 + SELECT column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 ORDER BY begin_snapshot; "#, ) @@ -194,19 +196,17 @@ fn test_snapshot_versioning_and_stats_types() { assert!(snapshots[1].1 > snapshots[0].1); assert!(snapshots[2].1 > snapshots[1].1); - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) + DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1500}"#) .unwrap(); - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); let versions: Vec<(i64, Option, String)> = conn .prepare( r#" - SELECT begin_snapshot, end_snapshot, payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' + SELECT begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' ORDER BY begin_snapshot; "#, ) @@ -224,18 +224,22 @@ fn test_snapshot_versioning_and_stats_types() { assert!(versions[1].2.contains("1500")); assert!(versions[2].2.contains("2000")); - provider - .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) - .unwrap(); - provider - .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) + DuckLakeCatalog::update_table_column_stats( + conn, + 1, + 1, + "histogram", + r#"{"buckets": [1,2,3,4,5]}"#, + ) + .unwrap(); + DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "minmax", r#"{"min": 0, "max": 100}"#) .unwrap(); let type_count: i64 = conn .query_row( r#" - SELECT COUNT(DISTINCT stats_type) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT COUNT(DISTINCT stats_type) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL "#, [], @@ -259,14 +263,11 @@ fn test_snapshot_tracking_and_multi_table_stats() { ) .unwrap(); - provider - .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); - provider - .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 2, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - provider - .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 3, 1, "ndv", r#"{"distinct_count": 3000}"#) .unwrap(); let after_table1_count: i64 = conn @@ -281,8 +282,8 @@ fn test_snapshot_tracking_and_multi_table_stats() { let changes_count: i64 = conn .query_row( r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes WHERE changes_made LIKE 'updated_stats:%' "#, [], @@ -291,18 +292,16 @@ fn test_snapshot_tracking_and_multi_table_stats() { .unwrap(); assert_eq!(changes_count, 3); - provider - .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 1, 2, "ndv", r#"{"distinct_count": 5000}"#) .unwrap(); - provider - .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) + DuckLakeCatalog::update_table_column_stats(conn, 2, 2, "ndv", r#"{"distinct_count": 6000}"#) .unwrap(); let table1_count: i64 = conn .query_row( r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 1 "#, [], @@ -312,8 +311,8 @@ fn test_snapshot_tracking_and_multi_table_stats() { let table2_count: i64 = conn .query_row( r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats WHERE table_id = 2 "#, [], @@ -327,8 +326,8 @@ fn test_snapshot_tracking_and_multi_table_stats() { let all_snapshots: Vec = conn .prepare( r#" - SELECT begin_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SELECT begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ORDER BY begin_snapshot "#, ) @@ -349,31 +348,28 @@ fn test_update_and_fetch_table_column_stats() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let initial_snapshot = provider.current_snapshot().unwrap(); + let initial_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); assert!( - provider - .table_statistics("test_table", initial_snapshot, conn) + DuckLakeCatalog::table_statistics(conn, "test_table", initial_snapshot) .unwrap() .is_some() ); - provider - .update_table_column_stats(age_column_id, table_id, "min_value", "25") + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") .unwrap(); - let snapshot_after_min = provider.current_snapshot().unwrap(); + let snapshot_after_min = DuckLakeCatalog::current_snapshot(conn).unwrap(); assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); - provider - .update_table_column_stats(age_column_id, table_id, "max_value", "35") + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") .unwrap(); - let snapshot_after_max = provider.current_snapshot().unwrap(); + let snapshot_after_max = DuckLakeCatalog::current_snapshot(conn).unwrap(); assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); let (min_val, max_val): (Option, Option) = conn .query_row( r#" - SELECT min_value, max_value - FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats + SELECT min_value, max_value + FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats WHERE table_id = ? AND column_id = ?; "#, [table_id, age_column_id], @@ -387,9 +383,9 @@ fn test_update_and_fetch_table_column_stats() { let adv_stats: Vec<(String, String, i64, Option)> = conn .prepare( r#" - SELECT stats_type, payload, begin_snapshot, end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = ? AND column_id = ? + SELECT stats_type, payload, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? ORDER BY stats_type, begin_snapshot; "#, ) @@ -413,8 +409,7 @@ fn test_update_and_fetch_table_column_stats() { .any(|(st, p, _, e)| st == "min_value" && p == "25" && e.is_none()) ); - provider - .update_table_column_stats( + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "histogram", @@ -422,7 +417,7 @@ fn test_update_and_fetch_table_column_stats() { ) .unwrap(); - let snapshot_after_histogram = provider.current_snapshot().unwrap(); + let snapshot_after_histogram = DuckLakeCatalog::current_snapshot(conn).unwrap(); assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); } @@ -432,40 +427,39 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let snapshot_0 = provider.current_snapshot().unwrap(); + let snapshot_0 = DuckLakeCatalog::current_snapshot(conn).unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "histogram", - r#"{"version": 1, "buckets": [1, 2, 3]}"#, - ) - .unwrap(); - let snapshot_1 = provider.current_snapshot().unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "histogram", + r#"{"version": 1, "buckets": [1, 2, 3]}"#, + ) + .unwrap(); + let snapshot_1 = DuckLakeCatalog::current_snapshot(conn).unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "histogram", - r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, - ) - .unwrap(); - let snapshot_2 = provider.current_snapshot().unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "histogram", + r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, + ) + .unwrap(); + let snapshot_2 = DuckLakeCatalog::current_snapshot(conn).unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "histogram", - r#"{"version": 3, "buckets": [10, 20, 30]}"#, - ) - .unwrap(); - let snapshot_3 = provider.current_snapshot().unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "histogram", + r#"{"version": 3, "buckets": [10, 20, 30]}"#, + ) + .unwrap(); + let snapshot_3 = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats_at_0 = provider - .table_statistics("test_table", snapshot_0, conn) + let stats_at_0 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_0) .unwrap() .unwrap(); let age_stats_0 = stats_at_0 @@ -475,8 +469,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .unwrap(); assert_eq!(age_stats_0.advanced_stats.len(), 0); - let stats_at_1 = provider - .table_statistics("test_table", snapshot_1, conn) + let stats_at_1 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_1) .unwrap() .unwrap(); let age_stats_1 = stats_at_1 @@ -492,8 +485,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .contains("\"version\":1") ); - let stats_at_2 = provider - .table_statistics("test_table", snapshot_2, conn) + let stats_at_2 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_2) .unwrap() .unwrap(); let age_stats_2 = stats_at_2 @@ -509,8 +501,7 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .contains("\"version\":2") ); - let stats_at_3 = provider - .table_statistics("test_table", snapshot_3, conn) + let stats_at_3 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_3) .unwrap() .unwrap(); let age_stats_3 = stats_at_3 @@ -533,35 +524,37 @@ fn test_fetch_table_stats_multiple_stat_types() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - provider - .update_table_column_stats(age_column_id, table_id, "min_value", "25") - .unwrap(); - provider - .update_table_column_stats(age_column_id, table_id, "max_value", "35") + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") .unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "histogram", - r#"{"buckets": [20, 25, 30, 35]}"#, - ) - .unwrap(); - provider - .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) - .unwrap(); - provider - .update_table_column_stats( - age_column_id, - table_id, - "quantiles", - r#"{"p50": 30, "p95": 34, "p99": 35}"#, - ) + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "histogram", + r#"{"buckets": [20, 25, 30, 35]}"#, + ) + .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "ndv", + r#"{"distinct_count": 3}"#, + ) + .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "quantiles", + r#"{"p50": 30, "p95": 34, "p99": 35}"#, + ) + .unwrap(); - let current_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("test_table", current_snapshot, conn) + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) .unwrap() .unwrap(); @@ -592,13 +585,11 @@ fn test_fetch_table_stats_columns_without_stats() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - provider - .update_table_column_stats(age_column_id, table_id, "min_value", "25") + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") .unwrap(); - let current_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("test_table", current_snapshot, conn) + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) .unwrap() .unwrap(); @@ -642,11 +633,11 @@ fn test_fetch_table_stats_row_count() { let table_id: i64 = conn .query_row( r#" - SELECT table_id - FROM __ducklake_metadata_metalake.main.ducklake_table dt - INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds - ON dt.schema_id = ds.schema_id - WHERE ds.schema_name = current_schema() + SELECT table_id + FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds + ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'large_table'; "#, [], @@ -657,8 +648,8 @@ fn test_fetch_table_stats_row_count() { let col1_id: i64 = conn .query_row( r#" - SELECT column_id - FROM __ducklake_metadata_metalake.main.ducklake_column + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column WHERE table_id = ? AND column_name = 'col1'; "#, [table_id], @@ -666,13 +657,17 @@ fn test_fetch_table_stats_row_count() { ) .unwrap(); - provider - .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) - .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + col1_id, + table_id, + "ndv", + r#"{"distinct_count": 100}"#, + ) + .unwrap(); - let current_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("large_table", current_snapshot, conn) + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = DuckLakeCatalog::table_statistics(conn, "large_table", current_snapshot) .unwrap() .unwrap(); @@ -698,7 +693,7 @@ fn test_current_schema_arrow() { ) .unwrap(); - let schema = provider.current_schema(None, "schema_test_table").unwrap(); + let schema = DuckLakeCatalog::current_schema(conn, None, "schema_test_table").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -725,9 +720,8 @@ fn test_current_schema_arrow() { &duckdb::arrow::datatypes::DataType::Boolean )); - let schema_explicit = provider - .current_schema(Some("main"), "schema_test_table") - .unwrap(); + let schema_explicit = + DuckLakeCatalog::current_schema(conn, Some("main"), "schema_test_table").unwrap(); assert_eq!(schema_explicit.fields().len(), 4); } @@ -737,7 +731,7 @@ fn test_multiple_schemas_comprehensive() { let (_temp_dir, provider) = create_test_catalog(false); let conn = provider.get_connection(); - let initial_schema_info = provider.current_schema_info().unwrap(); + let initial_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); assert_eq!(initial_schema_info.schema_name, "main"); assert_eq!(initial_schema_info.schema_id, 0); assert!(initial_schema_info.end_snapshot.is_none()); @@ -753,7 +747,7 @@ fn test_multiple_schemas_comprehensive() { ) .unwrap(); - let main_users_schema = provider.current_schema(None, "users").unwrap(); + let main_users_schema = DuckLakeCatalog::current_schema(conn, None, "users").unwrap(); assert_eq!(main_users_schema.fields().len(), 4); assert!(matches!( main_users_schema @@ -777,9 +771,8 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Timestamp(_, _) )); - let analytics_metrics_schema = provider - .current_schema(Some("analytics"), "metrics") - .unwrap(); + let analytics_metrics_schema = + DuckLakeCatalog::current_schema(conn, Some("analytics"), "metrics").unwrap(); assert_eq!(analytics_metrics_schema.fields().len(), 4); assert!(matches!( analytics_metrics_schema @@ -803,9 +796,8 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Date32 )); - let reporting_summary_schema = provider - .current_schema(Some("reporting"), "summary") - .unwrap(); + let reporting_summary_schema = + DuckLakeCatalog::current_schema(conn, Some("reporting"), "summary").unwrap(); assert_eq!(reporting_summary_schema.fields().len(), 4); assert!(matches!( reporting_summary_schema @@ -829,29 +821,29 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Boolean )); - let current_schema_info = provider.current_schema_info().unwrap(); + let current_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); assert_eq!(current_schema_info.schema_name, "main"); conn.execute("USE analytics;", []).unwrap(); - let analytics_schema_info = provider.current_schema_info().unwrap(); + let analytics_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); assert_eq!(analytics_schema_info.schema_name, "analytics"); assert!(analytics_schema_info.end_snapshot.is_none()); - let metrics_schema_implicit = provider.current_schema(None, "metrics").unwrap(); + let metrics_schema_implicit = DuckLakeCatalog::current_schema(conn, None, "metrics").unwrap(); assert_eq!(metrics_schema_implicit.fields().len(), 4); - let users_from_main = provider.current_schema(Some("main"), "users").unwrap(); + let users_from_main = DuckLakeCatalog::current_schema(conn, Some("main"), "users").unwrap(); assert_eq!(users_from_main.fields().len(), 4); conn.execute("USE reporting;", []).unwrap(); - let reporting_schema_info = provider.current_schema_info().unwrap(); + let reporting_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); assert_eq!(reporting_schema_info.schema_name, "reporting"); let schemas: Vec<(String, i64, i64, Option)> = conn .prepare( r#" - SELECT schema_name, schema_id, begin_snapshot, end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_schema + SELECT schema_name, schema_id, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ORDER BY schema_id; "#, ) @@ -885,39 +877,37 @@ fn test_error_handling_edge_cases() { let conn = provider.get_connection(); // Non-existent table returns empty results - let current_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("nonexistent_table", current_snapshot, conn) - .unwrap(); + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = + DuckLakeCatalog::table_statistics(conn, "nonexistent_table", current_snapshot).unwrap(); assert!(stats.is_some()); assert_eq!(stats.unwrap().column_statistics.len(), 0); // Invalid/future snapshot still returns data - provider - .update_table_column_stats(age_column_id, table_id, "min_value", "25") - .unwrap(); - let future_stats = provider - .table_statistics("test_table", SnapshotId(99999), conn) + DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") .unwrap(); + let future_stats = + DuckLakeCatalog::table_statistics(conn, "test_table", SnapshotId(99999)).unwrap(); assert!(future_stats.is_some()); assert_eq!(future_stats.unwrap().column_statistics.len(), 3); // Updating with invalid IDs succeeds without error - let result = - provider.update_table_column_stats(9999, 9999, "ndv", r#"{"distinct_count": 100}"#); + let result = DuckLakeCatalog::update_table_column_stats( + conn, + 9999, + 9999, + "ndv", + r#"{"distinct_count": 100}"#, + ); assert!(result.is_ok()); // Fetching schema for non-existent table returns error - assert!(provider.current_schema(None, "nonexistent_table").is_err()); + assert!(DuckLakeCatalog::current_schema(conn, None, "nonexistent_table").is_err()); // Invalid schema name returns error conn.execute_batch("CREATE TABLE test (id INTEGER);") .unwrap(); - assert!( - provider - .current_schema(Some("nonexistent_schema"), "test") - .is_err() - ); + assert!(DuckLakeCatalog::current_schema(conn, Some("nonexistent_schema"), "test").is_err()); } #[test] @@ -926,28 +916,28 @@ fn test_update_same_stat_rapidly() { let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); let conn = provider.get_connection(); - let initial_snapshot = provider.current_snapshot().unwrap(); + let initial_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); for i in 1..=5 { - provider - .update_table_column_stats( - age_column_id, - table_id, - "ndv", - &format!(r#"{{"distinct_count": {}}}"#, i * 100), - ) - .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "ndv", + &format!(r#"{{"distinct_count": {}}}"#, i * 100), + ) + .unwrap(); } - let final_snapshot = provider.current_snapshot().unwrap(); + let final_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); assert_eq!(final_snapshot.0, initial_snapshot.0 + 5); let versions: Vec<(i64, Option)> = conn .prepare( r#" - SELECT begin_snapshot, end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = ? AND column_id = ? AND stats_type = 'ndv' + SELECT begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? AND stats_type = 'ndv' ORDER BY begin_snapshot; "#, ) @@ -976,9 +966,8 @@ fn test_data_edge_cases() { // Empty table with zero rows conn.execute_batch("CREATE TABLE empty_table (id INTEGER, name VARCHAR);") .unwrap(); - let current_snapshot = provider.current_snapshot().unwrap(); - let empty_stats = provider - .table_statistics("empty_table", current_snapshot, conn) + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let empty_stats = DuckLakeCatalog::table_statistics(conn, "empty_table", current_snapshot) .unwrap() .unwrap(); assert_eq!(empty_stats.row_count, 0); @@ -991,9 +980,8 @@ fn test_data_edge_cases() { "#, ) .unwrap(); - let single_snapshot = provider.current_snapshot().unwrap(); - let single_stats = provider - .table_statistics("single_col", single_snapshot, conn) + let single_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let single_stats = DuckLakeCatalog::table_statistics(conn, "single_col", single_snapshot) .unwrap() .unwrap(); assert_eq!(single_stats.column_statistics.len(), 1); @@ -1022,8 +1010,8 @@ fn test_data_edge_cases() { let age_column_id: i64 = conn .query_row( r#" - SELECT column_id - FROM __ducklake_metadata_metalake.main.ducklake_column + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column WHERE table_id = ? AND column_name = 'age'; "#, [table_id], @@ -1033,15 +1021,20 @@ fn test_data_edge_cases() { let special_payload = r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; - provider - .update_table_column_stats(age_column_id, table_id, "special_test", special_payload) - .unwrap(); + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "special_test", + special_payload, + ) + .unwrap(); let retrieved: String = conn .query_row( r#" - SELECT payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' AND end_snapshot IS NULL; "#, [age_column_id, table_id], @@ -1057,12 +1050,16 @@ fn test_data_edge_cases() { "metadata": "x".repeat(1000) }) .to_string(); - provider - .update_table_column_stats(age_column_id, table_id, "large_histogram", &large_payload) - .unwrap(); - let new_snapshot = provider.current_snapshot().unwrap(); - let large_stats = provider - .table_statistics("test_table", new_snapshot, conn) + DuckLakeCatalog::update_table_column_stats( + conn, + age_column_id, + table_id, + "large_histogram", + &large_payload, + ) + .unwrap(); + let new_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let large_stats = DuckLakeCatalog::table_statistics(conn, "test_table", new_snapshot) .unwrap() .unwrap(); let age_stats = large_stats @@ -1096,7 +1093,7 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let mixed_schema = provider.current_schema(None, "mixed_nulls").unwrap(); + let mixed_schema = DuckLakeCatalog::current_schema(conn, None, "mixed_nulls").unwrap(); assert_eq!(mixed_schema.fields().len(), 4); assert!(!mixed_schema.field_with_name("id").unwrap().is_nullable()); assert!( @@ -1137,7 +1134,7 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let complex_schema = provider.current_schema(None, "complex_types").unwrap(); + let complex_schema = DuckLakeCatalog::current_schema(conn, None, "complex_types").unwrap(); assert_eq!(complex_schema.fields().len(), 11); assert!(matches!( complex_schema @@ -1187,10 +1184,12 @@ fn test_schema_edge_cases() { fn test_concurrent_snapshot_isolation() { // Test statistics with special characters and edge case JSON values. let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); + let conn = provider.get_connection(); let special_payload = r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; - let result = provider.update_table_column_stats( + let result = DuckLakeCatalog::update_table_column_stats( + conn, age_column_id, table_id, "special_test", @@ -1199,13 +1198,12 @@ fn test_concurrent_snapshot_isolation() { assert!(result.is_ok()); - let conn = provider.get_connection(); let retrieved_payload: String = conn .query_row( r#" - SELECT payload - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' AND end_snapshot IS NULL; "#, [age_column_id, table_id], @@ -1220,6 +1218,7 @@ fn test_concurrent_snapshot_isolation() { fn test_large_statistics_payload() { // Test handling of large statistics payloads. let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); + let conn = provider.get_connection(); let large_histogram: Vec = (0..1000).collect(); let large_payload = json!({ @@ -1228,7 +1227,8 @@ fn test_large_statistics_payload() { }) .to_string(); - let result = provider.update_table_column_stats( + let result = DuckLakeCatalog::update_table_column_stats( + conn, age_column_id, table_id, "large_histogram", @@ -1237,10 +1237,8 @@ fn test_large_statistics_payload() { assert!(result.is_ok()); - let conn = provider.get_connection(); - let current_snapshot = provider.current_snapshot().unwrap(); - let stats = provider - .table_statistics("test_table", current_snapshot, conn) + let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) .unwrap() .unwrap(); @@ -1277,7 +1275,7 @@ fn test_mixed_null_and_non_null_columns() { ) .unwrap(); - let schema = provider.current_schema(None, "mixed_nulls").unwrap(); + let schema = DuckLakeCatalog::current_schema(conn, None, "mixed_nulls").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -1319,7 +1317,7 @@ fn test_schema_with_complex_types() { ) .unwrap(); - let schema = provider.current_schema(None, "complex_types").unwrap(); + let schema = DuckLakeCatalog::current_schema(conn, None, "complex_types").unwrap(); assert_eq!(schema.fields().len(), 11); From 7bf7e27046cae9f62b2f83fb28bfd01aa99638cf Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 4 Nov 2025 20:18:28 -0500 Subject: [PATCH 25/40] cleanup --- .DS_Store | Bin 0 -> 10244 bytes Cargo.lock | 965 ++++++++++-------------- optd/catalog/src/ducklake_connection.rs | 392 ---------- optd/catalog/src/lib.rs | 12 +- optd/catalog/src/main.rs | 96 ++- optd/catalog/src/statistics.rs | 419 ---------- optd/catalog/src/write.rs | 7 - 7 files changed, 472 insertions(+), 1419 deletions(-) create mode 100644 .DS_Store delete mode 100644 optd/catalog/src/ducklake_connection.rs delete mode 100644 optd/catalog/src/statistics.rs delete mode 100644 optd/catalog/src/write.rs diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0971cf0bd0150ee2ecfaa42761e5eb0a19902cf0 GIT binary patch literal 10244 zcmeHMYitx%6uxI#+8H{u(>go~g{~}Og%(PAwLFsT0}7?3mF>I5y1O&9L#H!!XLgH_ znkBv>YGUy57mP9Sk)TnCCJI4+D2W;$X^5gEiZL+bcG>Ae{}4ms++^;# z_ndRjIrp1$cIM6!LZBg`Y#<~+2;uRfR1&e9rBOM(W;D*_1dsx*C;IS)aLgPVpP}h9 zd<1+1d<1+1d<1+1ZWjdbnN1sC;84H!5%3Z45y&7we;)#PQ5te-%AxG113PvEK$?qe z)^IG*0n&g=LoQ7@6eZZeS&GzI3hogDoaM+5xOyR%rX1=lC*U4F!1WC72?dyUv>#yV z1VRq=dmjNGftd&dkd;f;kzt|{+jD+jbo%vd*_heX6xGcr^03rWUw0h*xZW#Q{!uvfB-{Vtfs>E5I`S@C$?Mw;vc9)yE9;1PvG;NA6>bEO6$T{z)8V_T#}i=jx{(nU!pG z_L{Rfz1J)0y$bUS#06q!S9g!t*Q>=$Bdi@&homA4H@p|89dR{@8|{gidScA5hNR+H zM2{u(h^^M^I#otqkrj)Zy53=0nyr~eU$3nmvD1b;qTYVXjN6UO*pMV8EcJkjwzQ<$ zgci|M3H8=KPR&@|YTQB^427Ic3ug)!m)&{Sn)RFNb~fy8**{ez6_?y0FBJO`UBlK6 zX==PLV#QQL(Tw4)2~Dv_dXk!@MRm2)h>WRxF;oguCB;&aTq-Z)r6Av%h-wccH2cA+ z5~;XImdgV?FP$4yFqk$|p%xY9Wu;s$4^VDnT*Y4DdB$EXSBRY!g1}1=fpOM~RbuBT z-hM%{98R@ZBX{YMsHzK+;w;-NZk2nI&h7+hpTpcHE|h!m8|WBr#S(ZHkSK@h1zx&z zA9dXtvF!G7)o?>#_AP? z+9{Sw+B;-iA!$ZZ!*oZyovfF>;&~zH08dCTROt+Z7nKwnF+c^`;bwaJGPazoCbc9) zI>{j{x8vj~a*CWK=g9@~8M#b;Ait1b$qn)+`3pG60TC8J5lB!9DEYPou@g=^>7F9W`irVFh5|f=%gV$R<>?xnUYlK5Ch8Ghz^at;ryT9zDjv&3n&(a{?!oCkDbJafRjs4tDRFEn zmep*;;)qB&Q_D)1F4N%lx*~+Cgef+ZHHi}1xg^}`{Qi<$ zA>Wgq$#rrA@}K~-x*RH?8n!_Qc0)5}^*(5WZs@_h?q^wj2!=t!%(fu`NjL&W;bC|L z9)-u@3C!;2-0VILFTzXkGMs@o;7xc7-h=aS0WQJ^@F9ExQ*afo!H-VXX0o`=o5gTF zJBza)xdY4JPe1-MS)TnOG@IP>Xw8?XtIXPDIBw{sF^oJ~QF-{djo|CRFuodOza|P# z5B=6WU7j5mA*uaM&TEOsMs{qdbFF?BoviqTdk4QjH-7UG@DcD4@DaEb2;?|M0`&X; zuDSpJzZI^08$JR)0=FpwklPe)YQW&qXQ%0La>xkabw6ITQN5Hy*%Iui3gxrU@i?KM o<11WVHKnwnJmk`pL)miVv*k$tqdx=u^S{Dw0P)ZN|JU>X- }, - #[snafu(display("Arrow query failed: {}", source))] - ArrowError { - source: duckdb::arrow::error::ArrowError, - }, - #[snafu(display("DataFusion error: {}", source))] - DataFusionError { - source: datafusion::error::DataFusionError, - }, - #[snafu(display("Other error: {}", details))] - Other { details: Arc }, -} - -impl From for Error { - fn from(err: datafusion::error::DataFusionError) -> Self { - Error::DataFusionError { source: err } - } -} - -impl From for Error { - fn from(err: duckdb::Error) -> Self { - Error::ConnectionError { source: err } - } -} - -#[derive(Debug)] -pub enum ConnectionMode { - Memory, - File, -} - -pub struct DuckLakeConnectionBuilder { - meta_name: Arc, - path: Arc, - mode: ConnectionMode, - manager: DuckDBManager, -} - -impl std::fmt::Debug for DuckLakeConnectionBuilder { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "DuckLakeConnectionBuilder {{ name: {}, path: {}, mode: {:?} }}", - self.meta_name, self.path, self.mode - ) - } -} - -impl DuckLakeConnectionBuilder { - // Default constants - pub const DEFAULT_LAKE_NAME: &str = "meta_lake"; - - pub fn memory() -> Result { - let manager = DuckDBManager::memory().context(ConnectionSnafu)?; - Ok(Self { - meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), - path: Arc::from(":memory:"), - mode: ConnectionMode::Memory, - manager, - }) - } - - pub fn file(path: &str) -> Result { - let manager = DuckDBManager::file(path).context(ConnectionSnafu)?; - Ok(Self { - meta_name: Arc::from(Self::DEFAULT_LAKE_NAME), - path: Arc::from(path), - mode: ConnectionMode::File, - manager, - }) - } - - pub fn meta_name(&mut self, name: &str) -> &Self { - self.meta_name = Arc::from(name); - self - } - - pub fn get_meta_name(&self) -> &str { - self.meta_name.as_ref() - } - - pub fn get_path(&self) -> &str { - self.path.as_ref() - } - - pub fn get_mode(&self) -> &ConnectionMode { - &self.mode - } - - pub fn connect(&self) -> Result { - let mut connection = self.manager.connect().context(ConnectionSnafu)?; - self.ducklake(&mut connection)?; - Ok(connection) - } - - fn ducklake(&self, connection: &mut Connection) -> Result<(), Error> { - let setup_query = match self.mode { - ConnectionMode::Memory => format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:metadata.ducklake' AS {name}; - USE {name}; - "#, - name = self.meta_name.as_ref(), - ), - ConnectionMode::File => format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:metadata.ducklake' AS {name} (DATA_PATH '{path}'); - USE {name}; - "#, - name = self.meta_name.as_ref(), - path = self.path.as_ref() - ), - }; - - connection - .execute_batch(setup_query.as_str()) - .context(ConnectionSnafu)?; - Ok(()) - } - - pub fn initialize_schema(&self, connection: &Connection) -> Result<(), Error> { - // Create tables for storing stats metadata - connection.execute_batch( - format!( - r#" - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats ( - column_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - table_id BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (column_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query ( - query_id BIGINT PRIMARY KEY, - query_string TEXT, - root_group_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_query_instance ( - query_instance_id BIGINT PRIMARY KEY, - query_id BIGINT, - creation_time BIGINT, - snapshot_id BIGINT, - FOREIGN KEY (query_id) REFERENCES __ducklake_metadata_{name}.main.optd_query(query_id) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - PRIMARY KEY (group_id, begin_snapshot) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_group_stats ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (group_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_execution_subplan_feedback ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload TEXT, - PRIMARY KEY (group_id, begin_snapshot, stats_type) - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_{name}.main.optd_subplan_scalar_feedback ( - scalar_id BIGINT, - group_id BIGINT, - stats_type VARCHAR, - payload TEXT, - query_instance_id BIGINT, - PRIMARY KEY (scalar_id, group_id, stats_type, query_instance_id) - ); - - CREATE INDEX IF NOT EXISTS idx_table_stats_snapshot - ON __ducklake_metadata_{name}.main.ducklake_table_column_adv_stats(begin_snapshot, end_snapshot); - "#, - name = self.meta_name.as_ref() - ) - .as_str(), - ) - .context(ConnectionSnafu)?; - - Ok(()) - } -} - -impl Default for DuckLakeConnectionBuilder { - fn default() -> Self { - Self::memory().expect("Failed to create default DuckLakeConnectionBuilder") - } -} - -pub async fn query( - connection: &Connection, - sql: &str, - schema: SchemaRef, - projection: Option>, -) -> Result { - let mut stmt = connection.prepare(sql).context(ConnectionSnafu)?; - - let rbs = stmt.query_arrow([])?.collect::>(); - let stream = MemoryStream::try_new(rbs, schema, projection)?; - Ok(Box::pin(stream)) -} - -#[cfg(test)] -mod tests { - use super::*; - use futures::TryStreamExt; - - #[tokio::test] - async fn test_ducklake_connection() { - let path = "./test_ducklake.db"; - let ducklake_conn = - DuckLakeConnectionBuilder::file(path).expect("Failed to create DuckLakeConnection"); - assert_eq!(ducklake_conn.get_path(), path); - - assert_eq!( - matches!(ducklake_conn.get_mode(), ConnectionMode::File), - true - ); - - assert_eq!(ducklake_conn.get_meta_name(), "meta_lake"); - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - assert!(conn.execute_batch("SELECT 1;").is_ok()); - - let mut stmt = conn - .prepare("select name from (show all tables);") - .expect("Failed to prepare show tables statement"); - - let rows = stmt - .query_map([], |row| row.get::(0)) - .expect("Failed to execute show tables query"); - - let expected = vec![ - "ducklake_column", - "ducklake_column_mapping", - "ducklake_column_tag", - "ducklake_data_file", - "ducklake_delete_file", - "ducklake_file_column_stats", - "ducklake_file_partition_value", - "ducklake_files_scheduled_for_deletion", - "ducklake_inlined_data_tables", - "ducklake_metadata", - "ducklake_name_mapping", - "ducklake_partition_column", - "ducklake_partition_info", - "ducklake_schema", - "ducklake_schema_versions", - "ducklake_snapshot", - "ducklake_snapshot_changes", - "ducklake_table", - "ducklake_table_column_stats", - "ducklake_table_stats", - "ducklake_tag", - "ducklake_view", - ]; - - for (i, row) in rows.enumerate() { - let table_name = row.expect("Failed to get table name"); - assert_eq!(table_name, expected[i]); - } - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - ducklake_conn - .initialize_schema(&conn) - .expect("Failed to initialize schema"); - - let mut stmt = conn - .prepare("select name from (show all tables);") - .expect("Failed to prepare show tables statement"); - - let rows = stmt - .query_map([], |row| row.get::(0)) - .expect("Failed to execute show tables query"); - - let expected = vec![ - "ducklake_column", - "ducklake_column_mapping", - "ducklake_column_tag", - "ducklake_data_file", - "ducklake_delete_file", - "ducklake_file_column_stats", - "ducklake_file_partition_value", - "ducklake_files_scheduled_for_deletion", - "ducklake_inlined_data_tables", - "ducklake_metadata", - "ducklake_name_mapping", - "ducklake_partition_column", - "ducklake_partition_info", - "ducklake_schema", - "ducklake_schema_versions", - "ducklake_snapshot", - "ducklake_snapshot_changes", - "ducklake_table", - "ducklake_table_column_adv_stats", - "ducklake_table_column_stats", - "ducklake_table_stats", - "ducklake_tag", - "ducklake_view", - "optd_execution_subplan_feedback", - "optd_group", - "optd_group_stats", - "optd_query", - "optd_subplan_scalar_feedback", - ]; - - for (i, row) in rows.enumerate() { - let table_name = row.expect("Failed to get table name"); - assert_eq!(table_name, expected[i]); - } - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - conn.execute_batch("CREATE TABLE IF NOT EXISTS test (id INTEGER, name VARCHAR);") - .expect("Failed to create table"); - conn.execute_batch("INSERT INTO test (id, name) VALUES (1, 'Alice'), (2, 'Bob');") - .expect("Failed to insert data"); - } - - { - let conn = ducklake_conn.connect().expect("Failed to get connection"); - let schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ - datafusion::arrow::datatypes::Field::new( - "id", - datafusion::arrow::datatypes::DataType::Int32, - false, - ), - datafusion::arrow::datatypes::Field::new( - "name", - datafusion::arrow::datatypes::DataType::Utf8, - false, - ), - ])); - - let rbs = query(&conn, "SELECT * FROM test;", schema, None) - .await - .expect("Failed to execute query"); - - let schema_ref = rbs.schema(); - assert_eq!(schema_ref.fields().len(), 2); - assert_eq!(schema_ref.field(0).name(), "id"); - assert_eq!(schema_ref.field(1).name(), "name"); - - let batches: Vec<_> = rbs - .try_collect() - .await - .expect("Failed to collect record batches"); - - assert_eq!(batches.len(), 1); - let batch = &batches[0]; - assert_eq!(batch.num_rows(), 2); - } - } -} diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 4a76625..cf1844b 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -1,9 +1,5 @@ -// mod optd_catalog; -// mod optd_table; -mod ducklake_connection; -mod statistics; +mod optd_catalog; +mod optd_table; -// pub use optd_catalog::*; -// pub use optd_table::*; -pub use ducklake_connection::{ConnectionMode, DuckLakeConnectionBuilder, query}; -pub use statistics::{DuckLakeStatisticsProvider, Error as InterfaceError, StatisticsProvider}; +pub use optd_catalog::*; +pub use optd_table::*; diff --git a/optd/catalog/src/main.rs b/optd/catalog/src/main.rs index 3b4fbd6..8f7c3b9 100644 --- a/optd/catalog/src/main.rs +++ b/optd/catalog/src/main.rs @@ -1,27 +1,81 @@ mod optd_catalog; mod optd_table; -use duckdb::{ - Connection, Error, Result, - arrow::{ - array::{Int32Array, RecordBatch, StringArray}, - datatypes::{DataType, Field, SchemaBuilder}, - }, - params, -}; - -fn main() -> Result<(), Error> { - let conn = Connection::open_in_memory()?; - - conn.execute_batch( - r#" - INSTALL ducklake; - LOAD ducklake; - - ATTACH 'ducklake:metadata.ducklake' AS meta_lake (DATA_PATH 'data_files'); - USE meta_lake; - "#, - )?; +use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; +use tokio; +use uuid::Uuid; + +#[tokio::main] +async fn main() -> Result<(), sqlx::Error> { + // Create Sqlite database file to hold the catalog + const SQLITE_DB_PATH: &str = "catalog.db"; + + // Set connect options + let connect_options = SqliteConnectOptions::new() + .filename(SQLITE_DB_PATH) + .create_if_missing(true); + + // Connect with SqlX + let pool = SqlitePool::connect_with(connect_options) + .await + .expect("Failed to connect to the SQLite database"); + + // Set the metadata catalog name + const METADATA_CATALOG: &str = "catalog"; + + // Execute the given Sql queries to create the catalog + let mut create_catalog_queries = vec![ + // "CREATE TABLE () IF NOT EXISTS {METADATA_CATALOG};", + "CREATE TABLE {METADATA_CATALOG}_metadata(key VARCHAR NOT NULL, value VARCHAR NOT NULL, scope VARCHAR, scope_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_snapshot(snapshot_id BIGINT PRIMARY KEY, snapshot_time TIMESTAMPTZ, schema_version BIGINT, next_catalog_id BIGINT, next_file_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_snapshot_changes(snapshot_id BIGINT PRIMARY KEY, changes_made VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_schema(schema_id BIGINT PRIMARY KEY, schema_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_table(table_id BIGINT, table_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, table_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, partial_file_info VARCHAR, mapping_id BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_file_column_statistics(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN);", + "CREATE TABLE {METADATA_CATALOG}_delete_file(delete_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, format VARCHAR, delete_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, encryption_key VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_column(column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, table_id BIGINT, column_order BIGINT, column_name VARCHAR, column_type VARCHAR, initial_default VARCHAR, default_value VARCHAR, nulls_allowed BOOLEAN, parent_column BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_table_stats(table_id BIGINT, record_count BIGINT, next_row_id BIGINT, file_size_bytes BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_table_column_stats(table_id BIGINT, column_id BIGINT, contains_null BOOLEAN, contains_nan BOOLEAN, min_value VARCHAR, max_value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_partition_info(partition_id BIGINT, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_partition_column(partition_id BIGINT, table_id BIGINT, partition_key_index BIGINT, column_id BIGINT, transform VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_file_partition_value(data_file_id BIGINT, table_id BIGINT, partition_key_index BIGINT, partition_value VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_files_scheduled_for_deletion(data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, schedule_start TIMESTAMPTZ);", + "CREATE TABLE {METADATA_CATALOG}_inlined_data_tables(table_id BIGINT, table_name VARCHAR, schema_version BIGINT);", + "CREATE TABLE {METADATA_CATALOG}_column_mapping(mapping_id BIGINT, table_id BIGINT, type VARCHAR);", + "CREATE TABLE {METADATA_CATALOG}_name_mapping(mapping_id BIGINT, column_id BIGINT, source_name VARCHAR, target_field_id BIGINT, parent_column BIGINT);", + "INSERT INTO {METADATA_CATALOG}_snapshot VALUES (0, current_timestamp, 0, 1, 0);", + "INSERT INTO {METADATA_CATALOG}_snapshot_changes VALUES (0, 'created_schema:\"main\"');", + //"INSERT INTO {METADATA_CATALOG}_metadata (key, value) VALUES ('version', '0.2'), ('created_by', 'DuckDB %s'), ('data_path', %s), ('encrypted', '%s');" + ]; + + let set_uuid_query = format!( + "UPDATE {METADATA_CATALOG}_schema SET schema_uuid = '{}' WHERE schema_id = 0;", + Uuid::new_v4() + ); + + create_catalog_queries.push(set_uuid_query.as_str()); + + // Format the queries with the metadata catalog name + let formatted_query = create_catalog_queries + .iter() + .map(|query| query.replace("{METADATA_CATALOG}", METADATA_CATALOG)); + + for query in formatted_query { + println!("Executing query: {}", query); + sqlx::query(&query) + .execute(&pool) + .await + .expect("Failed to execute query"); + + println!("Query executed successfully."); + } + + // Close the connection + pool.close().await; Ok(()) } diff --git a/optd/catalog/src/statistics.rs b/optd/catalog/src/statistics.rs deleted file mode 100644 index 092bb37..0000000 --- a/optd/catalog/src/statistics.rs +++ /dev/null @@ -1,419 +0,0 @@ -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use snafu::{ResultExt, prelude::*}; -use std::sync::Arc; - -use crate::ducklake_connection::{DuckLakeConnectionBuilder, Error as ConnectionError}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Database connection error: {}", source))] - Connection { source: ConnectionError }, - #[snafu(display("Query execution failed: {}", source))] - QueryExecution { source: duckdb::Error }, - #[snafu(display("JSON serialization error: {}", source))] - JsonSerialization { source: serde_json::Error }, - #[snafu(display( - "Statistics not found for table: {}, column: {}, snapshot: {}", - table, - column, - snapshot - ))] - StatsNotFound { - table: String, - column: String, - snapshot: i64, - }, - #[snafu(display( - "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", - group_id, - stats_type, - snapshot - ))] - GroupStatsNotFound { - group_id: i64, - stats_type: String, - snapshot: i64, - }, -} - -impl From for Error { - fn from(err: ConnectionError) -> Self { - Error::Connection { source: err } - } -} - -/** Packaged Statistics Objects */ -/** Table statistics -- Contains overall row count and per-column statistics */ -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TableStatistics { - row_count: usize, - column_statistics: Vec, -} - -impl TableStatistics { - fn new(rows: I) -> Self - where - I: IntoIterator< - Item = Result< - ( - i64, - i64, - String, - String, - i64, - i64, - i64, - String, - String, - String, - String, - String, - ), - duckdb::Error, - >, - >, - { - let mut row_count = 0; - let mut column_statistics = Vec::new(); - - for row_result in rows { - if let Ok(( - _table_id, - column_id, - column_name, - column_type, - record_count, - _next_row_id, - _file_size_bytes, - contains_null, - contains_nan, - min_value, - max_value, - _extra_stats_json, - )) = row_result - { - row_count = record_count as usize; // Assuming all rows have the same record_count - - let actual_contains_null = match contains_null.as_str() { - "TRUE" => Some(true), - "FALSE" => Some(false), - _ => None, - }; - - let actual_contains_nan = match contains_nan.as_str() { - "TRUE" => Some(true), - "FALSE" => Some(false), - _ => None, - }; - - let actual_min_value = if min_value == "NULL" { - None - } else { - Some(min_value) - }; - - let actual_max_value = if max_value == "NULL" { - None - } else { - Some(max_value) - }; - - let column_stats = ColumnStatistics::new( - column_id, - column_type, - column_name.clone(), - actual_min_value, - actual_max_value, - actual_contains_null, - actual_contains_nan, - vec![], // Advanced stats can be populated later - ); - - column_statistics.push(column_stats); - } - } - - TableStatistics { - row_count, - column_statistics, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ColumnStatistics { - id: i64, - column_type: String, - name: String, - min: Option, - max: Option, - contains_null: Option, - contains_nan: Option, - advanced_stats: Vec, // TODO, e.g. histogram, ndv, etc. -} - -impl ColumnStatistics { - fn new( - id: i64, - column_type: String, - name: String, - min: Option, - max: Option, - contains_null: Option, - contains_nan: Option, - advanced_stats: Vec, - ) -> Self { - ColumnStatistics { - id, - column_type, - name, - min, - max, - contains_null, - contains_nan, - advanced_stats, - } - } - - #[allow(dead_code)] - fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { - self.advanced_stats.push(stat); - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct AdvanceColumnStatistics { - stats_type: String, - data: Value, -} - -pub trait StatisticsProvider { - /// Create a new memory-based StatisticsProvider - fn memory() -> Result, Error>; - - /// Create a new file-based StatisticsProvider - fn file(path: &str) -> Result, Error>; - - fn get_connection(&self) -> Result; - - fn current_snapshot(&self, connection: &duckdb::Connection) -> Result; - - /// Retrieve table and column statistics at specific snapshot - fn fetch_table_statistics( - &self, - table_name: &str, - snapshot: i64, - connection: &duckdb::Connection, - ) -> Result, Error>; -} - -/// DuckLake-based implementation of StatisticsProvider -pub struct DuckLakeStatisticsProvider { - connection_builder: Arc, -} - -impl DuckLakeStatisticsProvider { - /// Create a new DuckLakeStatisticsProvider with memory-based DuckDB - pub fn memory() -> Result { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); - Ok(Self { connection_builder }) - } - - /// Create a new DuckLakeStatisticsProvider with file-based DuckDB - pub fn file(path: &str) -> Result { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); - Ok(Self { connection_builder }) - } - - /// Insert table column statistics - pub fn insert_table_stats( - &self, - column_id: i64, - begin_snapshot: i64, - end_snapshot: i64, - table_id: i64, - stats_type: &str, - payload: &str, - ) -> Result<(), Error> { - let conn = self.connection_builder.connect()?; - let mut stmt = conn - .prepare( - "INSERT OR REPLACE INTO ducklake_table_column_adv_stats - (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) - VALUES (?, ?, ?, ?, ?, ?)", - ) - .context(QueryExecutionSnafu)?; - - stmt.execute([ - &column_id.to_string(), - &begin_snapshot.to_string(), - &end_snapshot.to_string(), - &table_id.to_string(), - stats_type, - payload, - ]) - .context(QueryExecutionSnafu)?; - - Ok(()) - } -} - -impl StatisticsProvider for DuckLakeStatisticsProvider { - fn memory() -> Result, Error> { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::memory()?); - Ok(Box::new(Self { connection_builder })) - } - - /// Create a new DuckLakeStatisticsProvider with file-based DuckDB - fn file(path: &str) -> Result, Error> { - let connection_builder = Arc::new(DuckLakeConnectionBuilder::file(path)?); - Ok(Box::new(Self { connection_builder })) - } - - /// Get a connection to the DuckDB instance and initialize the DuckLake-Optd schema - fn get_connection(&self) -> Result { - let conn = self.connection_builder.connect()?; - self.connection_builder.initialize_schema(&conn)?; - Ok(conn) - } - - fn current_snapshot(&self, conn: &duckdb::Connection) -> Result { - let mut stmt = conn - .prepare( - format!( - r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_{name}.main.ducklake_snapshot - WHERE snapshot_id = (SELECT MAX(snapshot_id) - FROM __ducklake_metadata_{name}.main.ducklake_snapshot); - "#, - name = self.connection_builder.get_meta_name() - ) - .as_str(), - ) - .context(QueryExecutionSnafu)?; - - let row = stmt - .query_row([], |row| { - Ok(( - row.get::(0)?, // snapshot_id - row.get::(1)?, // schema_version - row.get::(2)?, // next_catalog_id - row.get::(3)?, // next_file_id - )) - }) - .context(QueryExecutionSnafu)?; - - Ok(row.0) - } - - fn fetch_table_statistics( - &self, - table: &str, - snapshot: i64, - conn: &duckdb::Connection, - ) -> Result, Error> { - // Query for table statistics within the snapshot range - let mut stmt = conn - .prepare( - format!( - r#" - SELECT table_id, column_id, column_name, column_type, record_count, next_row_id, file_size_bytes, contains_null, contains_nan, min_value, max_value, extra_stats - FROM __ducklake_metadata_{name}.main.ducklake_table_stats - LEFT JOIN __ducklake_metadata_{name}.main.ducklake_table_column_stats USING (table_id) - LEFT JOIN __ducklake_metadata_{name}.main.ducklake_column col USING (table_id, column_id) - WHERE record_count IS NOT NULL AND file_size_bytes IS NOT NULL AND - table_id = (SELECT table_id FROM __ducklake_metadata_{name}.main.ducklake_table WHERE table_name = ?) - AND ? >= begin_snapshot AND (? < end_snapshot OR end_snapshot IS NULL) - ORDER BY table_id, column_id; - "#, - name = self.connection_builder.get_meta_name() - ).as_str() - ) - .context(QueryExecutionSnafu)?; - - let rows = stmt - .query_map([table, snapshot.to_string().as_str()], |row| { - Ok(( - row.get::(0)?, // table_id - row.get::(1)?, // column_id - row.get::(2)?, // column_name - row.get::(3)?, // column_type - row.get::(4)?, // record_count - row.get::(5)?, // next_row_id - row.get::(6)?, // file_size_bytes - row.get::(7)?, // contains_null - row.get::(8)?, // contains_nan - row.get::(9)?, // min_value - row.get::(10)?, // max_value - row.get::(11)?, // extra_stats (JSON) - )) - }) - .context(QueryExecutionSnafu)?; - - let table_stats: TableStatistics = TableStatistics::new(rows); - - Ok(Some(table_stats)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - #[test] - fn test_ducklake_statistics_provider_creation() { - { - // Test memory-based provider - let memory_provider = DuckLakeStatisticsProvider::memory(); - assert!(memory_provider.is_ok()); - } - - { - // Test file-based provider - let file_provider = DuckLakeStatisticsProvider::file("./test_stats.db"); - assert!(file_provider.is_ok()); - } - } - - #[test] - fn test_table_stats_insertion() { - let provider = DuckLakeStatisticsProvider::memory().unwrap(); - - // Insert table statistics - let result = - provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); - assert!(result.is_ok()); - } - - #[test] - fn test_json_payload_handling() { - let payload = json!({ - "distinct_count": 1000, - "null_count": 50, - "min_value": 1, - "max_value": 999999 - }); - - let payload_str = serde_json::to_string(&payload).unwrap(); - let parsed_back: serde_json::Value = serde_json::from_str(&payload_str).unwrap(); - - assert_eq!(parsed_back["distinct_count"], 1000); - assert_eq!(parsed_back["null_count"], 50); - } - - #[test] - fn test_table_stats_insertion_and_retrieval() { - let provider = DuckLakeStatisticsProvider::memory().unwrap(); - - // Insert table statistics - let result = - provider.insert_table_stats(1, 1, 100, 1, "ndv", r#"{"distinct_count": 1000}"#); - assert!(result.is_ok()); - - // Note: Actual retrieval would require setting up the table_metadata - // and column_metadata tables, which would be done by the DuckLake extension - } -} diff --git a/optd/catalog/src/write.rs b/optd/catalog/src/write.rs deleted file mode 100644 index 3edda94..0000000 --- a/optd/catalog/src/write.rs +++ /dev/null @@ -1,7 +0,0 @@ -use std::{any::Any, fmt, sync::Arc}; - -use async_trait::async_trait; - -use crate::sql::sql_provider_datafusion::{ - get_stream, to_execution_error, Result as SqlResult -} \ No newline at end of file From d25b78ead37c49b79176c6fdeb9e04404f840bf9 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 4 Nov 2025 22:17:13 -0500 Subject: [PATCH 26/40] modularity refactor + general cleanup --- optd/catalog/src/lib.rs | 446 +++++++++-------- optd/catalog/tests/statistics_tests.rs | 666 +++++++++++++------------ 2 files changed, 579 insertions(+), 533 deletions(-) diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 903a6c1..21a60b6 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -1,8 +1,6 @@ -use std::sync::Arc; - use duckdb::{ Connection, Error as DuckDBError, - arrow::datatypes::{DataType, Field, Schema, SchemaRef}, + arrow::datatypes::{Field, Schema, SchemaRef}, params, types::Null, }; @@ -10,35 +8,32 @@ use duckdb::{ use serde::{Deserialize, Serialize}; use serde_json::Value; use snafu::{ResultExt, prelude::*}; +use std::{collections::HashMap, sync::Arc}; /// Operations for managing table statistics with snapshot-based time travel. pub trait Catalog { /// Gets the current (most recent) snapshot ID. - fn current_snapshot(conn: &Connection) -> Result; + fn current_snapshot(&mut self) -> Result; /// Gets complete metadata for the current snapshot. - fn current_snapshot_info(conn: &Connection) -> Result; + fn current_snapshot_info(&mut self) -> Result; /// Gets the Arrow schema for a table at the current snapshot. - fn current_schema( - conn: &Connection, - schema: Option<&str>, - table: &str, - ) -> Result; + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result; /// Gets schema information including name, ID, and snapshot range. - fn current_schema_info(conn: &Connection) -> Result; + fn current_schema_info(&mut self) -> Result; /// Retrieves table and column statistics at a specific snapshot. fn table_statistics( - connection: &Connection, + &mut self, table_name: &str, snapshot: SnapshotId, ) -> Result, Error>; /// Updates or inserts advanced statistics for a table column. fn update_table_column_stats( - connection: &Connection, + &mut self, column_id: i64, table_id: i64, stats_type: &str, @@ -48,6 +43,75 @@ pub trait Catalog { const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; +const CREATE_EXTRA_TABLES_QUERY: &str = r#" + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( + query_id BIGINT, + query_string VARCHAR, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload VARCHAR, + query_instance_id BIGINT + ); +"#; + +// SQL query to fetch the latest snapshot information. +const SNAPSHOT_INFO_QUERY: &str = r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot); +"#; + +// SQL query to fetch schema information including name, ID, and snapshot valid range. +const SCHEMA_INFO_QUERY: &str = r#" + SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ds + WHERE ds.schema_name = current_schema(); +"#; + /// SQL query to fetch table statistics including column metadata and advanced stats at a specific snapshot. const FETCH_TABLE_STATS_QUERY: &str = r#" SELECT @@ -117,6 +181,8 @@ pub enum Error { Connection { source: DuckDBError }, #[snafu(display("Query execution failed: {}", source))] QueryExecution { source: DuckDBError }, + #[snafu(display("Transaction error: {}", source))] + Transaction { source: DuckDBError }, #[snafu(display("JSON serialization error: {}", source))] JsonSerialization { source: serde_json::Error }, #[snafu(display("ARROW DataType conversion error: {}", source))] @@ -280,49 +346,120 @@ pub struct CurrentSchema { pub end_snapshot: Option, } -// TODO(ray): remove this once we have use. -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -struct StatisticsUpdate { - stats_type: String, - payload: String, -} - /// A catalog implementation using DuckDB with snapshot management. pub struct DuckLakeCatalog { conn: Connection, } impl Catalog for DuckLakeCatalog { - fn current_snapshot(conn: &Connection) -> Result { + fn current_snapshot(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_snapshot_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_snapshot_info(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_snapshot_info_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_schema_inner(&txn, schema, table); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_schema_info(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_schema_info_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn table_statistics( + &mut self, + table: &str, + snapshot: SnapshotId, + ) -> Result, Error> { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::table_statistics_inner(&txn, table, snapshot); + txn.commit().context(TransactionSnafu)?; + result + } + + /// Update table column statistics + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = + Self::update_table_column_stats_inner(&txn, column_id, table_id, stats_type, payload); + txn.commit().context(TransactionSnafu)?; + result + } +} + +impl DuckLakeCatalog { + /// Creates a new DuckLakeStatisticsProvider with optional file paths. + /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. + pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { + let conn = if let Some(path) = location { + Connection::open(path).context(ConnectionSnafu)? + } else { + Connection::open_in_memory().context(ConnectionSnafu)? + }; + + // Use provided metadata path or default to DEFAULT_METADATA_FILE + let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); + let setup_query = format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:{metadata_file}' AS metalake; + USE metalake; + + {CREATE_EXTRA_TABLES_QUERY} + "# + ); + conn.execute_batch(&setup_query).context(ConnectionSnafu)?; + Ok(Self { conn }) + } + + /// Returns a reference to the underlying DuckDB connection. + pub fn get_connection(&self) -> &Connection { + &self.conn + } + + fn current_snapshot_inner(conn: &Connection) -> Result { conn.prepare("FROM ducklake_current_snapshot('metalake');") .context(QueryExecutionSnafu)? .query_row([], |row| Ok(SnapshotId(row.get(0)?))) .context(QueryExecutionSnafu) } - fn current_snapshot_info(conn: &Connection) -> Result { - conn.prepare( - r#" - SELECT snapshot_id, schema_version, next_catalog_id, next_file_id - FROM __ducklake_metadata_metalake.main.ducklake_snapshot - WHERE snapshot_id = (SELECT MAX(snapshot_id) - FROM __ducklake_metadata_metalake.main.ducklake_snapshot); - "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(SnapshotInfo { - id: SnapshotId(row.get("snapshot_id")?), - schema_version: row.get("schema_version")?, - next_catalog_id: row.get("next_catalog_id")?, - next_file_id: row.get("next_file_id")?, + fn current_snapshot_info_inner(conn: &Connection) -> Result { + conn.prepare(SNAPSHOT_INFO_QUERY) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(SnapshotInfo { + id: SnapshotId(row.get("snapshot_id")?), + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) }) - }) - .context(QueryExecutionSnafu) + .context(QueryExecutionSnafu) } - fn current_schema( + fn current_schema_inner( conn: &Connection, schema: Option<&str>, table: &str, @@ -331,65 +468,60 @@ impl Catalog for DuckLakeCatalog { .map(|s| format!("{}.{}", s, table)) .unwrap_or_else(|| table.to_string()); - let schema_query = format!("DESCRIBE {};", table_ref); - + // Use SELECT * with LIMIT 0 to get schema with data types + let schema_query = format!("SELECT * FROM {table_ref} LIMIT 0;"); let mut stmt = conn.prepare(&schema_query).context(QueryExecutionSnafu)?; + let arrow_result = stmt.query_arrow([]).context(QueryExecutionSnafu)?; + let arrow_schema = arrow_result.get_schema(); + + // Get nullable info from DESCRIBE + // This is to fix Arrow API limitation with nullable info + let describe_query = format!("DESCRIBE {table_ref}"); + let mut stmt = conn.prepare(&describe_query).context(QueryExecutionSnafu)?; + let mut nullable_map = HashMap::new(); + let mut rows = stmt.query([]).context(QueryExecutionSnafu)?; + + while let Some(row) = rows.next().context(QueryExecutionSnafu)? { + let col_name: String = row.get(0).context(QueryExecutionSnafu)?; + let null_str: String = row.get(2).context(QueryExecutionSnafu)?; + nullable_map.insert(col_name, null_str == "YES"); + } - let mut fields = Vec::new(); - let column_iter = stmt - .query_map([], |row| { - let column_name: String = row.get("column_name")?; - let column_type_str: String = row.get("column_type")?; - let null: String = row.get("null")?; - - // Convert DuckDB type to Arrow type - let column_type = Self::duckdb_type_to_arrow(&column_type_str).map_err(|_| { - DuckDBError::FromSqlConversionFailure( - 0, - duckdb::types::Type::Text, - Box::new(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Could not convert DuckDB type '{}' to Arrow type", - column_type_str - ), - )), - ) - })?; - - fields.push(Field::new(column_name, column_type, null == "YES")); - Ok(()) + // Rebuild schema with correct nullable flags + let fields: Vec<_> = arrow_schema + .fields() + .iter() + .map(|field| { + let nullable = nullable_map + .get(field.name().as_str()) + .copied() + .unwrap_or(true); + Arc::new(Field::new( + field.name().as_str(), + field.data_type().clone(), + nullable, + )) }) - .context(QueryExecutionSnafu)?; + .collect(); - for result in column_iter { - result.context(QueryExecutionSnafu)?; - } - let schema = Schema::new(fields); - Ok(Arc::new(schema)) + Ok(Arc::new(Schema::new(fields))) } - fn current_schema_info(conn: &Connection) -> Result { - conn.prepare( - r#" - SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot - FROM __ducklake_metadata_metalake.main.ducklake_schema ds - WHERE ds.schema_name = current_schema(); - "#, - ) - .context(QueryExecutionSnafu)? - .query_row([], |row| { - Ok(CurrentSchema { - schema_name: row.get("schema_name")?, - schema_id: row.get("schema_id")?, - begin_snapshot: row.get("begin_snapshot")?, - end_snapshot: row.get("end_snapshot")?, + fn current_schema_info_inner(conn: &Connection) -> Result { + conn.prepare(SCHEMA_INFO_QUERY) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(CurrentSchema { + schema_name: row.get("schema_name")?, + schema_id: row.get("schema_id")?, + begin_snapshot: row.get("begin_snapshot")?, + end_snapshot: row.get("end_snapshot")?, + }) }) - }) - .context(QueryExecutionSnafu) + .context(QueryExecutionSnafu) } - fn table_statistics( + fn table_statistics_inner( conn: &Connection, table: &str, snapshot: SnapshotId, @@ -421,8 +553,7 @@ impl Catalog for DuckLakeCatalog { Ok(Some(TableStatistics::from_iter(entries))) } - /// Update table column statistics - fn update_table_column_stats( + fn update_table_column_stats_inner( conn: &Connection, column_id: i64, table_id: i64, @@ -430,7 +561,7 @@ impl Catalog for DuckLakeCatalog { payload: &str, ) -> Result<(), Error> { // Fetch current snapshot info - let curr_snapshot = Self::current_snapshot_info(conn)?; + let curr_snapshot = Self::current_snapshot_info_inner(conn)?; // Update matching past snapshot to close it conn.prepare(UPDATE_ADV_STATS_QUERY) @@ -471,8 +602,7 @@ impl Catalog for DuckLakeCatalog { .execute(params![ curr_snapshot.id.0 + 1, format!( - r#"updated_stats:"main"."ducklake_table_column_adv_stats",{}:{}"#, - stats_type, payload + r#"updated_stats:"main"."ducklake_table_column_adv_stats",{stats_type}:{payload}"#, ), Null, Null, @@ -483,127 +613,3 @@ impl Catalog for DuckLakeCatalog { Ok(()) } } - -impl DuckLakeCatalog { - /// Creates a new DuckLakeStatisticsProvider with optional file paths. - /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. - pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { - let conn = if let Some(path) = location { - Connection::open(path).context(ConnectionSnafu)? - } else { - Connection::open_in_memory().context(ConnectionSnafu)? - }; - - // Use provided metadata path or default to DEFAULT_METADATA_FILE - let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); - let setup_query = format!( - r#" - INSTALL ducklake; - LOAD ducklake; - ATTACH 'ducklake:{}' AS metalake; - USE metalake; - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( - column_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - table_id BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( - query_id BIGINT, - query_string VARCHAR, - root_group_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( - query_instance_id BIGINT PRIMARY KEY, - query_id BIGINT, - creation_time BIGINT, - snapshot_id BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( - group_id BIGINT, - begin_snapshot BIGINT, - end_snapshot BIGINT, - stats_type VARCHAR, - payload VARCHAR - ); - - CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( - scalar_id BIGINT, - group_id BIGINT, - stats_type VARCHAR, - payload VARCHAR, - query_instance_id BIGINT - ); - "#, - metadata_file - ); - conn.execute_batch(&setup_query).context(ConnectionSnafu)?; - Ok(Self { conn }) - } - - /// Returns a reference to the underlying DuckDB connection. - pub fn get_connection(&self) -> &Connection { - &self.conn - } - - /// Returns a mutable reference to the underlying DuckDB connection. - /// Required for creating transactions. - pub fn get_connection_mut(&mut self) -> &mut Connection { - &mut self.conn - } - - /// Converts a DuckDB type string to an Arrow DataType. - fn duckdb_type_to_arrow(type_str: &str) -> Result { - // Handle common DuckDB types - let data_type = match type_str.to_uppercase().as_str() { - "INTEGER" | "INT" | "INT4" => DataType::Int32, - "BIGINT" | "INT8" | "LONG" => DataType::Int64, - "SMALLINT" | "INT2" | "SHORT" => DataType::Int16, - "TINYINT" | "INT1" => DataType::Int8, - "DOUBLE" | "FLOAT8" => DataType::Float64, - "FLOAT" | "REAL" | "FLOAT4" => DataType::Float32, - "BOOLEAN" | "BOOL" => DataType::Boolean, - "VARCHAR" | "TEXT" | "STRING" => DataType::Utf8, - "DATE" => DataType::Date32, - "TIMESTAMP" => { - DataType::Timestamp(duckdb::arrow::datatypes::TimeUnit::Microsecond, None) - } - "TIME" => DataType::Time64(duckdb::arrow::datatypes::TimeUnit::Microsecond), - "BLOB" | "BYTEA" | "BINARY" => DataType::Binary, - "DECIMAL" => DataType::Decimal128(38, 10), // Default precision and scale - _ => { - return Err(Error::ArrowDataTypeConversion { - source: DuckDBError::FromSqlConversionFailure( - 0, - duckdb::types::Type::Text, - Box::new(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Unsupported DuckDB type for Arrow conversion: {}", type_str), - )), - ), - }); - } - }; - Ok(data_type) - } -} diff --git a/optd/catalog/tests/statistics_tests.rs b/optd/catalog/tests/statistics_tests.rs index b8a114f..8cf7112 100644 --- a/optd/catalog/tests/statistics_tests.rs +++ b/optd/catalog/tests/statistics_tests.rs @@ -84,40 +84,35 @@ fn test_ducklake_statistics_provider_creation() { #[test] fn test_table_stats_insertion() { // Test basic statistics insertion without errors. - let (_temp_dir, provider) = create_test_catalog(true); - let conn = provider.get_connection(); + let (_temp_dir, mut provider) = create_test_catalog(true); - let result = DuckLakeCatalog::update_table_column_stats( - conn, - 1, - 1, - "ndv", - r#"{"distinct_count": 1000}"#, - ); + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); assert!(result.is_ok()); } #[test] fn test_table_stats_insertion_and_retrieval() { // Test inserting and retrieving multiple statistics types for a column. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, + ) .unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "histogram", - r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, - ) - .unwrap(); - let latest_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = DuckLakeCatalog::table_statistics(conn, "test_table", latest_snapshot) + let latest_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", latest_snapshot) .unwrap() .unwrap(); @@ -154,10 +149,9 @@ fn test_table_stats_insertion_and_retrieval() { #[test] fn test_current_schema() { // Test fetching current schema info returns valid metadata. - let (_temp_dir, provider) = create_test_catalog(true); - let conn = provider.get_connection(); + let (_temp_dir, mut provider) = create_test_catalog(true); - let schema = DuckLakeCatalog::current_schema_info(conn).unwrap(); + let schema = provider.current_schema_info().unwrap(); assert_eq!(schema.schema_name, "main"); assert_eq!(schema.schema_id, 0); @@ -168,18 +162,21 @@ fn test_current_schema() { #[test] fn test_snapshot_versioning_and_stats_types() { // Test snapshot creation, versioning, and continuity for multiple stats updates. - let (_temp_dir, provider) = create_test_catalog(true); - let conn = provider.get_connection(); + let (_temp_dir, mut provider) = create_test_catalog(true); - DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1000}"#) + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 2, 1, "ndv", r#"{"distinct_count": 2000}"#) + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) + provider + .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) .unwrap(); - let snapshots: Vec<(i64, i64)> = conn - .prepare( + let snapshots: Vec<(i64, i64)> = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT column_id, begin_snapshot FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -191,18 +188,22 @@ fn test_snapshot_versioning_and_stats_types() { .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; assert_eq!(snapshots.len(), 3); assert!(snapshots[1].1 > snapshots[0].1); assert!(snapshots[2].1 > snapshots[1].1); - DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1500}"#) + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 2000}"#) + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - let versions: Vec<(i64, Option, String)> = conn - .prepare( + let versions: Vec<(i64, Option, String)> = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT begin_snapshot, end_snapshot, payload FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -214,7 +215,8 @@ fn test_snapshot_versioning_and_stats_types() { .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; assert_eq!(versions.len(), 3); assert!(versions[0].1.is_some() && versions[1].1.is_some() && versions[2].1.is_none()); @@ -224,19 +226,16 @@ fn test_snapshot_versioning_and_stats_types() { assert!(versions[1].2.contains("1500")); assert!(versions[2].2.contains("2000")); - DuckLakeCatalog::update_table_column_stats( - conn, - 1, - 1, - "histogram", - r#"{"buckets": [1,2,3,4,5]}"#, - ) - .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "minmax", r#"{"min": 0, "max": 100}"#) + provider + .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) .unwrap(); - let type_count: i64 = conn - .query_row( + let type_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( r#" SELECT COUNT(DISTINCT stats_type) FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -245,42 +244,50 @@ fn test_snapshot_versioning_and_stats_types() { [], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; assert_eq!(type_count, 3); } #[test] fn test_snapshot_tracking_and_multi_table_stats() { // Test snapshot creation tracking and statistics isolation across multiple tables. - let (_temp_dir, provider) = create_test_catalog(true); - let conn = provider.get_connection(); + let (_temp_dir, mut provider) = create_test_catalog(true); - let initial_count: i64 = conn - .query_row( + let initial_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", [], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; - DuckLakeCatalog::update_table_column_stats(conn, 1, 1, "ndv", r#"{"distinct_count": 1000}"#) + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 2, 1, "ndv", r#"{"distinct_count": 2000}"#) + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 3, 1, "ndv", r#"{"distinct_count": 3000}"#) + provider + .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) .unwrap(); - let after_table1_count: i64 = conn - .query_row( + let after_table1_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", [], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; assert_eq!(after_table1_count - initial_count, 3); - let changes_count: i64 = conn - .query_row( + let changes_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( r#" SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes @@ -289,42 +296,50 @@ fn test_snapshot_tracking_and_multi_table_stats() { [], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; assert_eq!(changes_count, 3); - DuckLakeCatalog::update_table_column_stats(conn, 1, 2, "ndv", r#"{"distinct_count": 5000}"#) + provider + .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, 2, 2, "ndv", r#"{"distinct_count": 6000}"#) + provider + .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) .unwrap(); - let table1_count: i64 = conn - .query_row( - r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 1 - "#, - [], - |row| row.get(0), - ) - .unwrap(); - let table2_count: i64 = conn - .query_row( - r#" - SELECT COUNT(*) - FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats - WHERE table_id = 2 - "#, - [], - |row| row.get(0), - ) - .unwrap(); + let (table1_count, table2_count): (i64, i64) = { + let conn = provider.get_connection(); + let table1_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + "#, + [], + |row| row.get(0), + ) + .unwrap(); + let table2_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 2 + "#, + [], + |row| row.get(0), + ) + .unwrap(); + (table1_count, table2_count) + }; assert_eq!(table1_count, 3); assert_eq!(table2_count, 2); - let all_snapshots: Vec = conn - .prepare( + let all_snapshots: Vec = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT begin_snapshot FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -335,7 +350,8 @@ fn test_snapshot_tracking_and_multi_table_stats() { .query_map([], |row| row.get(0)) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; for i in 1..all_snapshots.len() { assert!(all_snapshots[i] > all_snapshots[i - 1]); @@ -345,28 +361,31 @@ fn test_snapshot_tracking_and_multi_table_stats() { #[test] fn test_update_and_fetch_table_column_stats() { // Test updating min/max values and advanced statistics with snapshot progression. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - let initial_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let initial_snapshot = provider.current_snapshot().unwrap(); assert!( - DuckLakeCatalog::table_statistics(conn, "test_table", initial_snapshot) + provider + .table_statistics("test_table", initial_snapshot) .unwrap() .is_some() ); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - let snapshot_after_min = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let snapshot_after_min = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") .unwrap(); - let snapshot_after_max = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let snapshot_after_max = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); - let (min_val, max_val): (Option, Option) = conn - .query_row( + let (min_val, max_val): (Option, Option) = { + let conn = provider.get_connection(); + conn.query_row( r#" SELECT min_value, max_value FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats @@ -375,13 +394,15 @@ fn test_update_and_fetch_table_column_stats() { [table_id, age_column_id], |row| Ok((row.get(0)?, row.get(1)?)), ) - .unwrap(); + .unwrap() + }; assert_eq!(min_val, Some("25".to_string())); assert_eq!(max_val, Some("35".to_string())); - let adv_stats: Vec<(String, String, i64, Option)> = conn - .prepare( + let adv_stats: Vec<(String, String, i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT stats_type, payload, begin_snapshot, end_snapshot FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -395,7 +416,8 @@ fn test_update_and_fetch_table_column_stats() { }) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; assert_eq!(adv_stats.len(), 2); assert!( @@ -409,57 +431,56 @@ fn test_update_and_fetch_table_column_stats() { .any(|(st, p, _, e)| st == "min_value" && p == "25" && e.is_none()) ); - DuckLakeCatalog::update_table_column_stats(conn, - age_column_id, + provider.update_table_column_stats(age_column_id, table_id, "histogram", &json!({"buckets": [{"min": 20, "max": 30, "count": 2}, {"min": 30, "max": 40, "count": 1}]}).to_string(), ) .unwrap(); - let snapshot_after_histogram = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let snapshot_after_histogram = provider.current_snapshot().unwrap(); assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); } #[test] fn test_fetch_table_stats_with_snapshot_time_travel() { // Test time-travel capability by fetching statistics at different snapshot points. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - let snapshot_0 = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let snapshot_0 = provider.current_snapshot().unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "histogram", - r#"{"version": 1, "buckets": [1, 2, 3]}"#, - ) - .unwrap(); - let snapshot_1 = DuckLakeCatalog::current_snapshot(conn).unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 1, "buckets": [1, 2, 3]}"#, + ) + .unwrap(); + let snapshot_1 = provider.current_snapshot().unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "histogram", - r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, - ) - .unwrap(); - let snapshot_2 = DuckLakeCatalog::current_snapshot(conn).unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, + ) + .unwrap(); + let snapshot_2 = provider.current_snapshot().unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "histogram", - r#"{"version": 3, "buckets": [10, 20, 30]}"#, - ) - .unwrap(); - let snapshot_3 = DuckLakeCatalog::current_snapshot(conn).unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 3, "buckets": [10, 20, 30]}"#, + ) + .unwrap(); + let snapshot_3 = provider.current_snapshot().unwrap(); - let stats_at_0 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_0) + let stats_at_0 = provider + .table_statistics("test_table", snapshot_0) .unwrap() .unwrap(); let age_stats_0 = stats_at_0 @@ -469,7 +490,8 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .unwrap(); assert_eq!(age_stats_0.advanced_stats.len(), 0); - let stats_at_1 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_1) + let stats_at_1 = provider + .table_statistics("test_table", snapshot_1) .unwrap() .unwrap(); let age_stats_1 = stats_at_1 @@ -485,7 +507,8 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .contains("\"version\":1") ); - let stats_at_2 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_2) + let stats_at_2 = provider + .table_statistics("test_table", snapshot_2) .unwrap() .unwrap(); let age_stats_2 = stats_at_2 @@ -501,7 +524,8 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { .contains("\"version\":2") ); - let stats_at_3 = DuckLakeCatalog::table_statistics(conn, "test_table", snapshot_3) + let stats_at_3 = provider + .table_statistics("test_table", snapshot_3) .unwrap() .unwrap(); let age_stats_3 = stats_at_3 @@ -521,40 +545,37 @@ fn test_fetch_table_stats_with_snapshot_time_travel() { #[test] fn test_fetch_table_stats_multiple_stat_types() { // Test fetching when multiple statistics types exist for the same column. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "max_value", "35") + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [20, 25, 30, 35]}"#, + ) + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "quantiles", + r#"{"p50": 30, "p95": 34, "p99": 35}"#, + ) .unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "histogram", - r#"{"buckets": [20, 25, 30, 35]}"#, - ) - .unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "ndv", - r#"{"distinct_count": 3}"#, - ) - .unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "quantiles", - r#"{"p50": 30, "p95": 34, "p99": 35}"#, - ) - .unwrap(); - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) .unwrap() .unwrap(); @@ -582,14 +603,15 @@ fn test_fetch_table_stats_multiple_stat_types() { #[test] fn test_fetch_table_stats_columns_without_stats() { // Test that columns without advanced statistics are still returned in fetch results. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") .unwrap(); - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) .unwrap() .unwrap(); @@ -619,7 +641,7 @@ fn test_fetch_table_stats_columns_without_stats() { #[test] fn test_fetch_table_stats_row_count() { // Test that row_count is correctly populated from table statistics. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -657,17 +679,13 @@ fn test_fetch_table_stats_row_count() { ) .unwrap(); - DuckLakeCatalog::update_table_column_stats( - conn, - col1_id, - table_id, - "ndv", - r#"{"distinct_count": 100}"#, - ) - .unwrap(); + provider + .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) + .unwrap(); - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = DuckLakeCatalog::table_statistics(conn, "large_table", current_snapshot) + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("large_table", current_snapshot) .unwrap() .unwrap(); @@ -678,7 +696,7 @@ fn test_fetch_table_stats_row_count() { #[test] fn test_current_schema_arrow() { // Test fetching Arrow schema from DuckDB table with type conversions. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -693,7 +711,7 @@ fn test_current_schema_arrow() { ) .unwrap(); - let schema = DuckLakeCatalog::current_schema(conn, None, "schema_test_table").unwrap(); + let schema = provider.current_schema(None, "schema_test_table").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -720,34 +738,37 @@ fn test_current_schema_arrow() { &duckdb::arrow::datatypes::DataType::Boolean )); - let schema_explicit = - DuckLakeCatalog::current_schema(conn, Some("main"), "schema_test_table").unwrap(); + let schema_explicit = provider + .current_schema(Some("main"), "schema_test_table") + .unwrap(); assert_eq!(schema_explicit.fields().len(), 4); } #[test] fn test_multiple_schemas_comprehensive() { // Test schema fetching and metadata tracking across multiple database schemas. - let (_temp_dir, provider) = create_test_catalog(false); - let conn = provider.get_connection(); + let (_temp_dir, mut provider) = create_test_catalog(false); - let initial_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); + let initial_schema_info = provider.current_schema_info().unwrap(); assert_eq!(initial_schema_info.schema_name, "main"); assert_eq!(initial_schema_info.schema_id, 0); assert!(initial_schema_info.end_snapshot.is_none()); - conn.execute_batch( - r#" + { + let conn = provider.get_connection(); + conn.execute_batch( + r#" CREATE SCHEMA analytics; CREATE SCHEMA reporting; CREATE TABLE main.users (user_id INTEGER, username VARCHAR, email VARCHAR, created_at TIMESTAMP); CREATE TABLE analytics.metrics (metric_id BIGINT, metric_name VARCHAR, value DOUBLE, recorded_at DATE); CREATE TABLE reporting.summary (report_id SMALLINT, report_name TEXT, data BLOB, is_published BOOLEAN); "#, - ) - .unwrap(); + ) + .unwrap(); + } - let main_users_schema = DuckLakeCatalog::current_schema(conn, None, "users").unwrap(); + let main_users_schema = provider.current_schema(None, "users").unwrap(); assert_eq!(main_users_schema.fields().len(), 4); assert!(matches!( main_users_schema @@ -771,8 +792,9 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Timestamp(_, _) )); - let analytics_metrics_schema = - DuckLakeCatalog::current_schema(conn, Some("analytics"), "metrics").unwrap(); + let analytics_metrics_schema = provider + .current_schema(Some("analytics"), "metrics") + .unwrap(); assert_eq!(analytics_metrics_schema.fields().len(), 4); assert!(matches!( analytics_metrics_schema @@ -796,8 +818,9 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Date32 )); - let reporting_summary_schema = - DuckLakeCatalog::current_schema(conn, Some("reporting"), "summary").unwrap(); + let reporting_summary_schema = provider + .current_schema(Some("reporting"), "summary") + .unwrap(); assert_eq!(reporting_summary_schema.fields().len(), 4); assert!(matches!( reporting_summary_schema @@ -821,26 +844,33 @@ fn test_multiple_schemas_comprehensive() { &duckdb::arrow::datatypes::DataType::Boolean )); - let current_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); + let current_schema_info = provider.current_schema_info().unwrap(); assert_eq!(current_schema_info.schema_name, "main"); - conn.execute("USE analytics;", []).unwrap(); - let analytics_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); + { + let conn = provider.get_connection(); + conn.execute("USE analytics;", []).unwrap(); + } + let analytics_schema_info = provider.current_schema_info().unwrap(); assert_eq!(analytics_schema_info.schema_name, "analytics"); assert!(analytics_schema_info.end_snapshot.is_none()); - let metrics_schema_implicit = DuckLakeCatalog::current_schema(conn, None, "metrics").unwrap(); + let metrics_schema_implicit = provider.current_schema(None, "metrics").unwrap(); assert_eq!(metrics_schema_implicit.fields().len(), 4); - let users_from_main = DuckLakeCatalog::current_schema(conn, Some("main"), "users").unwrap(); + let users_from_main = provider.current_schema(Some("main"), "users").unwrap(); assert_eq!(users_from_main.fields().len(), 4); - conn.execute("USE reporting;", []).unwrap(); - let reporting_schema_info = DuckLakeCatalog::current_schema_info(conn).unwrap(); + { + let conn = provider.get_connection(); + conn.execute("USE reporting;", []).unwrap(); + } + let reporting_schema_info = provider.current_schema_info().unwrap(); assert_eq!(reporting_schema_info.schema_name, "reporting"); - let schemas: Vec<(String, i64, i64, Option)> = conn - .prepare( + let schemas: Vec<(String, i64, i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT schema_name, schema_id, begin_snapshot, end_snapshot FROM __ducklake_metadata_metalake.main.ducklake_schema @@ -853,7 +883,8 @@ fn test_multiple_schemas_comprehensive() { }) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; assert!(schemas.len() >= 3); @@ -873,67 +904,71 @@ fn test_multiple_schemas_comprehensive() { #[test] fn test_error_handling_edge_cases() { // Test various error scenarios: non-existent tables, invalid snapshots, invalid IDs. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); // Non-existent table returns empty results - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = - DuckLakeCatalog::table_statistics(conn, "nonexistent_table", current_snapshot).unwrap(); + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("nonexistent_table", current_snapshot) + .unwrap(); assert!(stats.is_some()); assert_eq!(stats.unwrap().column_statistics.len(), 0); // Invalid/future snapshot still returns data - DuckLakeCatalog::update_table_column_stats(conn, age_column_id, table_id, "min_value", "25") + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + let future_stats = provider + .table_statistics("test_table", SnapshotId(99999)) .unwrap(); - let future_stats = - DuckLakeCatalog::table_statistics(conn, "test_table", SnapshotId(99999)).unwrap(); assert!(future_stats.is_some()); assert_eq!(future_stats.unwrap().column_statistics.len(), 3); // Updating with invalid IDs succeeds without error - let result = DuckLakeCatalog::update_table_column_stats( - conn, - 9999, - 9999, - "ndv", - r#"{"distinct_count": 100}"#, - ); + let result = + provider.update_table_column_stats(9999, 9999, "ndv", r#"{"distinct_count": 100}"#); assert!(result.is_ok()); // Fetching schema for non-existent table returns error - assert!(DuckLakeCatalog::current_schema(conn, None, "nonexistent_table").is_err()); + assert!(provider.current_schema(None, "nonexistent_table").is_err()); // Invalid schema name returns error - conn.execute_batch("CREATE TABLE test (id INTEGER);") - .unwrap(); - assert!(DuckLakeCatalog::current_schema(conn, Some("nonexistent_schema"), "test").is_err()); + { + let conn = provider.get_connection(); + conn.execute_batch("CREATE TABLE test (id INTEGER);") + .unwrap(); + } + assert!( + provider + .current_schema(Some("nonexistent_schema"), "test") + .is_err() + ); } #[test] fn test_update_same_stat_rapidly() { // Test updating the same statistic multiple times in rapid succession. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); - let initial_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let initial_snapshot = provider.current_snapshot().unwrap(); for i in 1..=5 { - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "ndv", - &format!(r#"{{"distinct_count": {}}}"#, i * 100), - ) - .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "ndv", + &format!(r#"{{"distinct_count": {}}}"#, i * 100), + ) + .unwrap(); } - let final_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); + let final_snapshot = provider.current_snapshot().unwrap(); assert_eq!(final_snapshot.0, initial_snapshot.0 + 5); - let versions: Vec<(i64, Option)> = conn - .prepare( + let versions: Vec<(i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( r#" SELECT begin_snapshot, end_snapshot FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -947,7 +982,8 @@ fn test_update_same_stat_rapidly() { }) .unwrap() .map(|r| r.unwrap()) - .collect(); + .collect() + }; assert_eq!(versions.len(), 5); for i in 0..4 { @@ -960,19 +996,22 @@ fn test_update_same_stat_rapidly() { #[test] fn test_data_edge_cases() { // Test empty tables, single columns, special characters, and large payloads. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); // Empty table with zero rows conn.execute_batch("CREATE TABLE empty_table (id INTEGER, name VARCHAR);") .unwrap(); - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let empty_stats = DuckLakeCatalog::table_statistics(conn, "empty_table", current_snapshot) + + let current_snapshot = provider.current_snapshot().unwrap(); + let empty_stats = provider + .table_statistics("empty_table", current_snapshot) .unwrap() .unwrap(); assert_eq!(empty_stats.row_count, 0); // Single column table + let conn = provider.get_connection(); conn.execute_batch( r#" CREATE TABLE single_col (value INTEGER); @@ -980,8 +1019,10 @@ fn test_data_edge_cases() { "#, ) .unwrap(); - let single_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let single_stats = DuckLakeCatalog::table_statistics(conn, "single_col", single_snapshot) + + let single_snapshot = provider.current_snapshot().unwrap(); + let single_stats = provider + .table_statistics("single_col", single_snapshot) .unwrap() .unwrap(); assert_eq!(single_stats.column_statistics.len(), 1); @@ -989,48 +1030,48 @@ fn test_data_edge_cases() { assert_eq!(single_stats.column_statistics[0].name, "value"); // Special characters in payload - conn.execute_batch( - r#" + let (table_id, age_column_id) = { + let conn = provider.get_connection(); + conn.execute_batch( + r#" CREATE TABLE test_table (id INTEGER, age INTEGER); INSERT INTO test_table VALUES (1, 25), (2, 30); "#, - ) - .unwrap(); - let table_id: i64 = conn - .query_row( - r#" + ) + .unwrap(); + let table_id: i64 = conn + .query_row( + r#" SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; "#, - [], - |row| row.get(0), - ) - .unwrap(); - let age_column_id: i64 = conn - .query_row( - r#" + [], + |row| row.get(0), + ) + .unwrap(); + let age_column_id: i64 = conn + .query_row( + r#" SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column WHERE table_id = ? AND column_name = 'age'; "#, - [table_id], - |row| row.get(0), - ) - .unwrap(); + [table_id], + |row| row.get(0), + ) + .unwrap(); + (table_id, age_column_id) + }; let special_payload = r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "special_test", - special_payload, - ) - .unwrap(); - let retrieved: String = conn - .query_row( + provider + .update_table_column_stats(age_column_id, table_id, "special_test", special_payload) + .unwrap(); + let retrieved: String = { + let conn = provider.get_connection(); + conn.query_row( r#" SELECT payload FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -1040,7 +1081,8 @@ fn test_data_edge_cases() { [age_column_id, table_id], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; assert_eq!(retrieved, special_payload); // Large payload @@ -1050,16 +1092,12 @@ fn test_data_edge_cases() { "metadata": "x".repeat(1000) }) .to_string(); - DuckLakeCatalog::update_table_column_stats( - conn, - age_column_id, - table_id, - "large_histogram", - &large_payload, - ) - .unwrap(); - let new_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let large_stats = DuckLakeCatalog::table_statistics(conn, "test_table", new_snapshot) + provider + .update_table_column_stats(age_column_id, table_id, "large_histogram", &large_payload) + .unwrap(); + let new_snapshot = provider.current_snapshot().unwrap(); + let large_stats = provider + .table_statistics("test_table", new_snapshot) .unwrap() .unwrap(); let age_stats = large_stats @@ -1078,7 +1116,7 @@ fn test_data_edge_cases() { #[test] fn test_schema_edge_cases() { // Test schema fetching with nullable/non-nullable columns and complex types. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); // Mixed nullable and non-nullable columns @@ -1093,7 +1131,8 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let mixed_schema = DuckLakeCatalog::current_schema(conn, None, "mixed_nulls").unwrap(); + + let mixed_schema = provider.current_schema(None, "mixed_nulls").unwrap(); assert_eq!(mixed_schema.fields().len(), 4); assert!(!mixed_schema.field_with_name("id").unwrap().is_nullable()); assert!( @@ -1116,6 +1155,7 @@ fn test_schema_edge_cases() { ); // Complex types + let conn = provider.get_connection(); conn.execute_batch( r#" CREATE TABLE complex_types ( @@ -1134,7 +1174,8 @@ fn test_schema_edge_cases() { "#, ) .unwrap(); - let complex_schema = DuckLakeCatalog::current_schema(conn, None, "complex_types").unwrap(); + + let complex_schema = provider.current_schema(None, "complex_types").unwrap(); assert_eq!(complex_schema.fields().len(), 11); assert!(matches!( complex_schema @@ -1183,13 +1224,11 @@ fn test_schema_edge_cases() { #[test] fn test_concurrent_snapshot_isolation() { // Test statistics with special characters and edge case JSON values. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); let special_payload = r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; - let result = DuckLakeCatalog::update_table_column_stats( - conn, + let result = provider.update_table_column_stats( age_column_id, table_id, "special_test", @@ -1198,8 +1237,9 @@ fn test_concurrent_snapshot_isolation() { assert!(result.is_ok()); - let retrieved_payload: String = conn - .query_row( + let retrieved_payload: String = { + let conn = provider.get_connection(); + conn.query_row( r#" SELECT payload FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats @@ -1209,7 +1249,8 @@ fn test_concurrent_snapshot_isolation() { [age_column_id, table_id], |row| row.get(0), ) - .unwrap(); + .unwrap() + }; assert_eq!(retrieved_payload, special_payload); } @@ -1217,8 +1258,7 @@ fn test_concurrent_snapshot_isolation() { #[test] fn test_large_statistics_payload() { // Test handling of large statistics payloads. - let (_temp_dir, provider, table_id, age_column_id) = create_test_catalog_with_data(); - let conn = provider.get_connection(); + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); let large_histogram: Vec = (0..1000).collect(); let large_payload = json!({ @@ -1227,8 +1267,7 @@ fn test_large_statistics_payload() { }) .to_string(); - let result = DuckLakeCatalog::update_table_column_stats( - conn, + let result = provider.update_table_column_stats( age_column_id, table_id, "large_histogram", @@ -1237,8 +1276,9 @@ fn test_large_statistics_payload() { assert!(result.is_ok()); - let current_snapshot = DuckLakeCatalog::current_snapshot(conn).unwrap(); - let stats = DuckLakeCatalog::table_statistics(conn, "test_table", current_snapshot) + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) .unwrap() .unwrap(); @@ -1260,7 +1300,7 @@ fn test_large_statistics_payload() { #[test] fn test_mixed_null_and_non_null_columns() { // Test schema fetching with mixed nullable and non-nullable columns. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -1275,7 +1315,7 @@ fn test_mixed_null_and_non_null_columns() { ) .unwrap(); - let schema = DuckLakeCatalog::current_schema(conn, None, "mixed_nulls").unwrap(); + let schema = provider.current_schema(None, "mixed_nulls").unwrap(); assert_eq!(schema.fields().len(), 4); @@ -1295,7 +1335,7 @@ fn test_mixed_null_and_non_null_columns() { #[test] fn test_schema_with_complex_types() { // Test schema fetching with various complex and edge case data types. - let (_temp_dir, provider) = create_test_catalog(false); + let (_temp_dir, mut provider) = create_test_catalog(false); let conn = provider.get_connection(); conn.execute_batch( @@ -1317,7 +1357,7 @@ fn test_schema_with_complex_types() { ) .unwrap(); - let schema = DuckLakeCatalog::current_schema(conn, None, "complex_types").unwrap(); + let schema = provider.current_schema(None, "complex_types").unwrap(); assert_eq!(schema.fields().len(), 11); From 83e4cb4bac348381b23b94f2ed88209e1cba8405 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 11 Nov 2025 13:02:29 -0500 Subject: [PATCH 27/40] move to connectors --- .../optd_catalog.rs => connectors/datafusion/src/catalog.rs | 0 connectors/datafusion/src/lib.rs | 2 ++ .../src/optd_table.rs => connectors/datafusion/src/table.rs | 0 optd/catalog/src/lib.rs | 5 ----- 4 files changed, 2 insertions(+), 5 deletions(-) rename optd/catalog/src/optd_catalog.rs => connectors/datafusion/src/catalog.rs (100%) rename optd/catalog/src/optd_table.rs => connectors/datafusion/src/table.rs (100%) delete mode 100644 optd/catalog/src/lib.rs diff --git a/optd/catalog/src/optd_catalog.rs b/connectors/datafusion/src/catalog.rs similarity index 100% rename from optd/catalog/src/optd_catalog.rs rename to connectors/datafusion/src/catalog.rs diff --git a/connectors/datafusion/src/lib.rs b/connectors/datafusion/src/lib.rs index 9c50034..7f316c0 100644 --- a/connectors/datafusion/src/lib.rs +++ b/connectors/datafusion/src/lib.rs @@ -1,5 +1,7 @@ +mod catalog; mod extension; mod planner; +mod table; use std::sync::Arc; diff --git a/optd/catalog/src/optd_table.rs b/connectors/datafusion/src/table.rs similarity index 100% rename from optd/catalog/src/optd_table.rs rename to connectors/datafusion/src/table.rs diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs deleted file mode 100644 index cf1844b..0000000 --- a/optd/catalog/src/lib.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod optd_catalog; -mod optd_table; - -pub use optd_catalog::*; -pub use optd_table::*; From e6e92530d09d498d3c6ca41dcfd468d6c7d293e0 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 18 Nov 2025 14:14:25 -0500 Subject: [PATCH 28/40] service implementation and test suite --- optd/catalog/src/lib.rs | 3 + optd/catalog/src/service.rs | 345 +++++++++ optd/catalog/tests/service_tests.rs | 1046 +++++++++++++++++++++++++++ 3 files changed, 1394 insertions(+) create mode 100644 optd/catalog/src/service.rs create mode 100644 optd/catalog/tests/service_tests.rs diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 21a60b6..0dcec8f 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -10,6 +10,9 @@ use serde_json::Value; use snafu::{ResultExt, prelude::*}; use std::{collections::HashMap, sync::Arc}; +mod service; +pub use service::{CatalogBackend, CatalogRequest, CatalogService, CatalogServiceHandle}; + /// Operations for managing table statistics with snapshot-based time travel. pub trait Catalog { /// Gets the current (most recent) snapshot ID. diff --git a/optd/catalog/src/service.rs b/optd/catalog/src/service.rs new file mode 100644 index 0000000..2904580 --- /dev/null +++ b/optd/catalog/src/service.rs @@ -0,0 +1,345 @@ +use crate::{ + Catalog, CatalogError, CurrentSchema, DuckLakeCatalog, SchemaRef, SnapshotId, SnapshotInfo, + TableStatistics, +}; +use tokio::sync::{mpsc, oneshot}; + +/// Max pending requests +const CHANNEL_BUFFER_SIZE: usize = 1000; + +/// Trait defining the catalog backend that can be used with the service. +pub trait CatalogBackend: Send + 'static { + fn current_snapshot(&mut self) -> Result; + fn current_snapshot_info(&mut self) -> Result; + fn current_schema( + &mut self, + schema: Option<&str>, + table: &str, + ) -> Result; + fn current_schema_info(&mut self) -> Result; + fn table_statistics( + &mut self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, CatalogError>; + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), CatalogError>; +} + +/// Implement CatalogBackend for any type that implements Catalog +impl CatalogBackend for T { + fn current_snapshot(&mut self) -> Result { + Catalog::current_snapshot(self) + } + + fn current_snapshot_info(&mut self) -> Result { + Catalog::current_snapshot_info(self) + } + + fn current_schema( + &mut self, + schema: Option<&str>, + table: &str, + ) -> Result { + Catalog::current_schema(self, schema, table) + } + + fn current_schema_info(&mut self) -> Result { + Catalog::current_schema_info(self) + } + + fn table_statistics( + &mut self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, CatalogError> { + Catalog::table_statistics(self, table_name, snapshot) + } + + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), CatalogError> { + Catalog::update_table_column_stats(self, column_id, table_id, stats_type, payload) + } +} + +#[derive(Debug)] +pub enum CatalogRequest { + CurrentSnapshot { + respond_to: oneshot::Sender>, + }, + + CurrentSnapshotInfo { + respond_to: oneshot::Sender>, + }, + + CurrentSchema { + schema: Option, + table: String, + respond_to: oneshot::Sender>, + }, + + CurrentSchemaInfo { + respond_to: oneshot::Sender>, + }, + + TableStatistics { + table_name: String, + snapshot: SnapshotId, + respond_to: oneshot::Sender, CatalogError>>, + }, + + UpdateTableColumnStats { + column_id: i64, + table_id: i64, + stats_type: String, + payload: String, + respond_to: oneshot::Sender>, + }, + + Shutdown, +} + +/// Handle for catalog service interaction +#[derive(Clone)] +pub struct CatalogServiceHandle { + sender: mpsc::Sender, +} + +impl CatalogServiceHandle { + pub async fn current_snapshot(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSnapshot { respond_to: tx }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_snapshot_info(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSnapshotInfo { respond_to: tx }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_schema( + &self, + schema: Option<&str>, + table: &str, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSchema { + schema: schema.map(|s| s.to_string()), + table: table.to_string(), + respond_to: tx, + }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_schema_info(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSchemaInfo { respond_to: tx }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn table_statistics( + &self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, CatalogError> { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::TableStatistics { + table_name: table_name.to_string(), + snapshot, + respond_to: tx, + }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn update_table_column_stats( + &self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), CatalogError> { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::UpdateTableColumnStats { + column_id, + table_id, + stats_type: stats_type.to_string(), + payload: payload.to_string(), + respond_to: tx, + }) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn shutdown(&self) -> Result<(), CatalogError> { + self.sender + .send(CatalogRequest::Shutdown) + .await + .map_err(|_| CatalogError::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + }) + } +} + +/// The catalog service that processes requests in the background +pub struct CatalogService { + backend: B, + receiver: mpsc::Receiver, +} + +impl CatalogService { + /// Create service with provided backend catalog + pub fn new(backend: B) -> (Self, CatalogServiceHandle) { + let (sender, receiver) = mpsc::channel(CHANNEL_BUFFER_SIZE); + + let service = CatalogService { backend, receiver }; + let handle = CatalogServiceHandle { sender }; + + (service, handle) + } + + /// Run the service, processing requests until shutdown + /// + /// Spawn with tokio: + /// ```ignore + /// tokio::spawn(async move { + /// service.run().await; + /// }); + /// ``` + pub async fn run(mut self) { + while let Some(request) = self.receiver.recv().await { + match request { + CatalogRequest::CurrentSnapshot { respond_to } => { + let result = self.backend.current_snapshot(); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSnapshotInfo { respond_to } => { + let result = self.backend.current_snapshot_info(); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSchema { + schema, + table, + respond_to, + } => { + let result = self.backend.current_schema(schema.as_deref(), &table); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSchemaInfo { respond_to } => { + let result = self.backend.current_schema_info(); + let _ = respond_to.send(result); + } + + CatalogRequest::TableStatistics { + table_name, + snapshot, + respond_to, + } => { + let result = self.backend.table_statistics(&table_name, snapshot); + let _ = respond_to.send(result); + } + + CatalogRequest::UpdateTableColumnStats { + column_id, + table_id, + stats_type, + payload, + respond_to, + } => { + let result = self.backend.update_table_column_stats( + column_id, + table_id, + &stats_type, + &payload, + ); + let _ = respond_to.send(result); + } + + CatalogRequest::Shutdown => { + // drop the receiver to stop accepting new requests + break; + } + } + } + } +} + +// Convenience methods for creating service with DuckLakeCatalog +impl CatalogService { + /// Create service from location paths using DuckLakeCatalog backend + pub fn try_new_from_location( + location: Option<&str>, + metadata_path: Option<&str>, + ) -> Result<(Self, CatalogServiceHandle), CatalogError> { + let catalog = DuckLakeCatalog::try_new(location, metadata_path)?; + Ok(Self::new(catalog)) + } + + /// Get a reference to the underlying DuckLakeCatalog for test setup only. + /// Only available in test/debug builds and should + /// only be used for setting up test fixtures. + #[cfg(any(test, debug_assertions))] + pub fn catalog_for_setup(&self) -> &DuckLakeCatalog { + &self.backend + } +} diff --git a/optd/catalog/tests/service_tests.rs b/optd/catalog/tests/service_tests.rs new file mode 100644 index 0000000..b47cf70 --- /dev/null +++ b/optd/catalog/tests/service_tests.rs @@ -0,0 +1,1046 @@ +use optd_catalog::{CatalogService, CatalogServiceHandle, DuckLakeCatalog}; +use std::time::Duration; +use tempfile::TempDir; + +/// Helper to create a test catalog service +fn create_test_service() -> ( + TempDir, + CatalogService, + CatalogServiceHandle, +) { + let temp_dir = TempDir::new().unwrap(); + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + let (service, handle) = + CatalogService::try_new_from_location(None, Some(metadata_path.to_str().unwrap())).unwrap(); + + (temp_dir, service, handle) +} + +// ============================================================================ +// Basic Functionality Tests +// ============================================================================ + +#[tokio::test] +async fn test_service_creation_and_shutdown() { + let (_temp_dir, service, handle) = create_test_service(); + + // Verify handle is cloneable (multi-producer capability) + let handle_clone = handle.clone(); + + let service_handle = tokio::spawn(async move { + service.run().await; + }); + + // Both handles should work + let snapshot1 = handle.current_snapshot().await.unwrap(); + let snapshot2 = handle_clone.current_snapshot().await.unwrap(); + assert_eq!( + snapshot1.0, snapshot2.0, + "Cloned handles should access same service" + ); + + // Shutdown should complete gracefully + handle.shutdown().await.unwrap(); + + // Service task should complete + tokio::time::timeout(Duration::from_secs(1), service_handle) + .await + .expect("Service should shutdown within timeout") + .unwrap(); + + // Verify shutdown is idempotent + let result = handle_clone.shutdown().await; + assert!(result.is_err(), "Second shutdown should fail gracefully"); +} + +#[tokio::test] +async fn test_current_snapshot_basic() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Initial snapshot should be 0"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_current_snapshot_info() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let info = handle.current_snapshot_info().await.unwrap(); + assert_eq!(info.id.0, 0); + assert_eq!(info.schema_version, 0); + assert!(info.next_catalog_id > 0); + assert_eq!(info.next_file_id, 0); + + // Verify snapshot info is consistent with current_snapshot + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!( + info.id.0, snapshot.0, + "Snapshot info ID should match current snapshot" + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_current_schema_info() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let schema_info = handle.current_schema_info().await.unwrap(); + assert_eq!(schema_info.schema_name, "main"); + assert_eq!(schema_info.schema_id, 0); + assert_eq!(schema_info.begin_snapshot, 0); + assert!(schema_info.end_snapshot.is_none()); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Table and Schema Tests +// ============================================================================ + +#[tokio::test] +async fn test_current_schema_with_table() { + let (_temp_dir, service, handle) = create_test_service(); + + // Get the catalog to create a test table BEFORE spawning service + let conn = service.catalog_for_setup().get_connection(); + conn.execute_batch( + r#" + CREATE TABLE test_table ( + id INTEGER NOT NULL, + name VARCHAR, + age INTEGER + ); + "#, + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Fetch schema + let schema = handle.current_schema(None, "test_table").await.unwrap(); + + assert_eq!(schema.fields().len(), 3); + assert!(schema.field_with_name("id").is_ok()); + assert!(schema.field_with_name("name").is_ok()); + assert!(schema.field_with_name("age").is_ok()); + + // Check nullable constraints + let id_field = schema.field_with_name("id").unwrap(); + assert!(!id_field.is_nullable(), "id should not be nullable"); + + let name_field = schema.field_with_name("name").unwrap(); + assert!(name_field.is_nullable(), "name should be nullable"); + + // Verify data types are correctly mapped + use duckdb::arrow::datatypes::DataType; + assert!( + matches!(id_field.data_type(), DataType::Int32), + "id should be Int32" + ); + assert!( + matches!(name_field.data_type(), DataType::Utf8), + "name should be Utf8/String" + ); + + // Verify field order matches CREATE TABLE order + assert_eq!(schema.fields()[0].name(), "id"); + assert_eq!(schema.fields()[1].name(), "name"); + assert_eq!(schema.fields()[2].name(), "age"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_table_statistics_empty_table() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + conn.execute_batch( + r#" + CREATE TABLE empty_table (id INTEGER, name VARCHAR); + "#, + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("empty_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some()); + let stats = stats.unwrap(); + assert_eq!(stats.row_count, 0); + + // For empty tables with no data, the statistics system may not return column metadata + // This is expected behavior - verify it's empty or has minimal stats + assert_eq!( + stats.column_statistics.len(), + 0, + "Empty table with no data should have 0 column statistics" + ); + + // If there were column statistics, verify no advanced stats would be present + for col_stat in &stats.column_statistics { + assert_eq!( + col_stat.advanced_stats.len(), + 0, + "Empty table should have no advanced stats for {}", + col_stat.name + ); + } + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_table_statistics_nonexistent_table() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("nonexistent_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some()); + assert_eq!(stats.unwrap().column_statistics.len(), 0); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Statistics Update Tests +// ============================================================================ + +#[tokio::test] +async fn test_update_and_retrieve_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + // Create table and get IDs + conn.execute_batch( + r#" + CREATE TABLE stats_test (id INTEGER, value DOUBLE); + INSERT INTO stats_test VALUES (1, 10.5), (2, 20.5); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'stats_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let value_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'value'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Update statistics + handle + .update_table_column_stats(value_column_id, table_id, "min_value", "10.5") + .await + .unwrap(); + + handle + .update_table_column_stats(value_column_id, table_id, "max_value", "20.5") + .await + .unwrap(); + + // Retrieve and verify + let snapshot = handle.current_snapshot().await.unwrap(); + // Table creation creates initial snapshots, then 2 updates create 2 more + assert!( + snapshot.0 >= 2, + "Should have at least 2 snapshots after updates" + ); + + let stats = handle + .table_statistics("stats_test", snapshot) + .await + .unwrap() + .unwrap(); + + let value_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "value") + .expect("Should have stats for value column"); + + assert_eq!(value_stats.advanced_stats.len(), 2); + assert!( + value_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "min_value") + ); + assert!( + value_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "max_value") + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_statistics_versioning() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE version_test (id INTEGER, count INTEGER); + INSERT INTO version_test VALUES (1, 100); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'version_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let count_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'count'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Take snapshot before updates + let snapshot_0 = handle.current_snapshot().await.unwrap(); + + // Update 1 + handle + .update_table_column_stats( + count_column_id, + table_id, + "ndv", + r#"{"distinct_count": 100}"#, + ) + .await + .unwrap(); + + let snapshot_1 = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot_1.0, snapshot_0.0 + 1); + + // Update 2 (new value) + handle + .update_table_column_stats( + count_column_id, + table_id, + "ndv", + r#"{"distinct_count": 150}"#, + ) + .await + .unwrap(); + + let snapshot_2 = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot_2.0, snapshot_1.0 + 1); + + // Verify stats at snapshot_1 + let stats_1 = handle + .table_statistics("version_test", snapshot_1) + .await + .unwrap() + .unwrap(); + + let count_stats_1 = stats_1 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!(count_stats_1.advanced_stats.len(), 1); + assert!( + count_stats_1.advanced_stats[0] + .data + .to_string() + .contains("100") + ); + + // Verify stats at snapshot_2 + let stats_2 = handle + .table_statistics("version_test", snapshot_2) + .await + .unwrap() + .unwrap(); + + let count_stats_2 = stats_2 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!(count_stats_2.advanced_stats.len(), 1); + assert!( + count_stats_2.advanced_stats[0] + .data + .to_string() + .contains("150") + ); + + // Verify snapshot_1 still returns old value + let stats_1_again = handle + .table_statistics("version_test", snapshot_1) + .await + .unwrap() + .unwrap(); + + let count_stats_1_again = stats_1_again + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert!( + count_stats_1_again.advanced_stats[0] + .data + .to_string() + .contains("100"), + "Time-travel query should return historical value, not current value" + ); + + // Verify snapshot_0 has no stats (before any updates) + let stats_0 = handle + .table_statistics("version_test", snapshot_0) + .await + .unwrap() + .unwrap(); + + let count_stats_0 = stats_0 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!( + count_stats_0.advanced_stats.len(), + 0, + "Snapshot before updates should have no advanced stats" + ); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Concurrency Tests +// ============================================================================ + +#[tokio::test] +async fn test_concurrent_read_operations() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Spawn multiple concurrent snapshot requests + let mut tasks = vec![]; + for _ in 0..50 { + let handle_clone = handle.clone(); + tasks.push(tokio::spawn(async move { + handle_clone.current_snapshot().await.unwrap() + })); + } + + // All should succeed with same snapshot ID + for task in tasks { + let snapshot = task.await.unwrap(); + assert_eq!(snapshot.0, 0); + } + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_concurrent_mixed_operations() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE concurrent_test (id INTEGER, data VARCHAR); + INSERT INTO concurrent_test VALUES (1, 'test'); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'concurrent_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let initial_snapshot = handle.current_snapshot().await.unwrap(); + + let mut tasks = vec![]; + + // Mix of reads and writes + for i in 0..20 { + let handle_clone = handle.clone(); + + if i % 2 == 0 { + // Read operation + tasks.push(tokio::spawn(async move { + let _ = handle_clone.current_snapshot().await; + })); + } else { + // Write operation + tasks.push(tokio::spawn(async move { + let _ = handle_clone + .update_table_column_stats( + id_column_id, + table_id, + &format!("stat_{}", i), + &format!(r#"{{"value": {}}}"#, i), + ) + .await; + })); + } + } + + // Wait for all + for task in tasks { + task.await.unwrap(); + } + + // Verify final snapshot progressed + let final_snapshot = handle.current_snapshot().await.unwrap(); + assert!(final_snapshot.0 >= 10, "Should have progressed snapshots"); + + // Verify all writes succeeded by checking stats + let stats = handle + .table_statistics("concurrent_test", final_snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column"); + + // Should have 10 stats (one for each odd i: 1,3,5,7,9,11,13,15,17,19) + assert_eq!( + id_stats.advanced_stats.len(), + 10, + "Should have 10 write operations worth of stats" + ); + + // Verify no stats were lost (check for specific stat names) + let stat_names: Vec<&str> = id_stats + .advanced_stats + .iter() + .map(|s| s.stats_type.as_str()) + .collect(); + for i in (1..20).step_by(2) { + let expected_name = format!("stat_{}", i); + assert!( + stat_names.contains(&expected_name.as_str()), + "Should have stat_{} but got {:?}", + i, + stat_names + ); + } + + // Verify snapshot progression matches write count + let snapshot_diff = final_snapshot.0 - initial_snapshot.0; + assert_eq!( + snapshot_diff, 10, + "Snapshot should have advanced by exactly 10 (one per write)" + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_multiple_handles_same_service() { + let (_temp_dir, service, handle1) = create_test_service(); + + // Clone handles + let handle2 = handle1.clone(); + let handle3 = handle1.clone(); + + tokio::spawn(async move { + service.run().await; + }); + + // All handles should work independently + let snapshot1 = handle1.current_snapshot().await.unwrap(); + let snapshot2 = handle2.current_snapshot().await.unwrap(); + let snapshot3 = handle3.current_snapshot().await.unwrap(); + + assert_eq!(snapshot1.0, snapshot2.0); + assert_eq!(snapshot2.0, snapshot3.0); + + handle1.shutdown().await.unwrap(); +} + +// ============================================================================ +// Edge Cases and Error Handling +// ============================================================================ + +#[tokio::test] +async fn test_operations_after_shutdown() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Shutdown the service + handle.shutdown().await.unwrap(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Operations after shutdown should fail + let result = handle.current_snapshot().await; + assert!(result.is_err(), "Operations after shutdown should fail"); + + // Verify multiple operations fail consistently + assert!(handle.current_snapshot_info().await.is_err()); + assert!(handle.current_schema_info().await.is_err()); + assert!( + handle + .table_statistics("any_table", optd_catalog::SnapshotId(0)) + .await + .is_err() + ); + + // Verify error type is consistent (channel closed) + match result { + Err(e) => { + let err_msg = format!("{:?}", e); + assert!( + err_msg.contains("ExecuteReturnedResults") || err_msg.contains("channel"), + "Error should indicate channel/connection issue, got: {}", + err_msg + ); + } + Ok(_) => panic!("Expected error after shutdown"), + } +} + +#[tokio::test] +async fn test_invalid_table_schema_request() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Request schema for non-existent table + let result = handle.current_schema(None, "does_not_exist").await; + assert!(result.is_err(), "Should error for non-existent table"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_large_json_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE large_stats_test (id INTEGER); + INSERT INTO large_stats_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'large_stats_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Create large histogram data + let large_histogram: Vec = (0..1000).collect(); + let large_payload = serde_json::json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + + // Should handle large payloads + let result = handle + .update_table_column_stats(id_column_id, table_id, "large_histogram", &large_payload) + .await; + + assert!(result.is_ok(), "Should handle large statistics payloads"); + + // Verify retrieval + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("large_stats_test", snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .unwrap(); + + let large_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .unwrap(); + + assert!(large_stat.data.to_string().len() > 1000); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_special_characters_in_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE special_chars_test (id INTEGER); + INSERT INTO special_chars_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'special_chars_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Statistics with special characters + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "emoji": "🚀"}"#; + + handle + .update_table_column_stats(id_column_id, table_id, "special_test", special_payload) + .await + .unwrap(); + + // Retrieve and verify + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("special_chars_test", snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .unwrap(); + + let special_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "special_test") + .unwrap(); + + let data_str = special_stat.data.to_string(); + assert!(data_str.contains("测试")); + assert!(data_str.contains("🚀")); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_rapid_sequential_updates() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE rapid_test (id INTEGER); + INSERT INTO rapid_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'rapid_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let initial_snapshot = handle.current_snapshot().await.unwrap(); + + // Perform 10 rapid updates + for i in 0..10 { + handle + .update_table_column_stats( + id_column_id, + table_id, + "counter", + &format!(r#"{{"count": {}}}"#, i), + ) + .await + .unwrap(); + } + + let final_snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!( + final_snapshot.0, + initial_snapshot.0 + 10, + "Should have 10 new snapshots" + ); + + // Verify the final value is the last update + let final_stats = handle + .table_statistics("rapid_test", final_snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = final_stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column"); + + // Should have only 1 stat since same stat_type was updated + assert_eq!(id_stats.advanced_stats.len(), 1); + + let counter_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "counter") + .expect("Should have counter stat"); + + // Final value should be 9 (last iteration) + assert!( + counter_stat.data.to_string().contains("9"), + "Final counter value should be 9, got: {}", + counter_stat.data + ); + + // Verify we can query intermediate snapshots + let mid_snapshot = optd_catalog::SnapshotId(initial_snapshot.0 + 5); + let mid_stats = handle + .table_statistics("rapid_test", mid_snapshot) + .await + .unwrap() + .unwrap(); + + let mid_id_stats = mid_stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column at mid snapshot"); + + if let Some(mid_counter) = mid_id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "counter") + { + // Mid-point should have value 4 (5th update, 0-indexed) + assert!( + mid_counter.data.to_string().contains("4"), + "Mid-point counter should be 4, got: {}", + mid_counter.data + ); + } + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Performance and Stress Tests +// ============================================================================ + +#[tokio::test] +async fn test_high_concurrency_stress() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Spawn 100 concurrent tasks + let mut tasks = vec![]; + for i in 0..100 { + let handle_clone = handle.clone(); + tasks.push(tokio::spawn(async move { + if i % 3 == 0 { + let _ = handle_clone.current_snapshot().await; + } else if i % 3 == 1 { + let _ = handle_clone.current_snapshot_info().await; + } else { + let _ = handle_clone.current_schema_info().await; + } + })); + } + + // Should complete without errors + let results: Vec<_> = futures::future::join_all(tasks).await; + for result in results { + assert!(result.is_ok(), "All concurrent operations should succeed"); + } + + handle.shutdown().await.unwrap(); +} From ba0bfa0cd40bfacbc4a4c5100d596a483520075f Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 18 Nov 2025 14:16:55 -0500 Subject: [PATCH 29/40] rename error --- optd/catalog/src/service.rs | 88 +++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/optd/catalog/src/service.rs b/optd/catalog/src/service.rs index 2904580..83b326c 100644 --- a/optd/catalog/src/service.rs +++ b/optd/catalog/src/service.rs @@ -1,5 +1,5 @@ use crate::{ - Catalog, CatalogError, CurrentSchema, DuckLakeCatalog, SchemaRef, SnapshotId, SnapshotInfo, + Catalog, CurrentSchema, DuckLakeCatalog, Error, SchemaRef, SnapshotId, SnapshotInfo, TableStatistics, }; use tokio::sync::{mpsc, oneshot}; @@ -9,47 +9,39 @@ const CHANNEL_BUFFER_SIZE: usize = 1000; /// Trait defining the catalog backend that can be used with the service. pub trait CatalogBackend: Send + 'static { - fn current_snapshot(&mut self) -> Result; - fn current_snapshot_info(&mut self) -> Result; - fn current_schema( - &mut self, - schema: Option<&str>, - table: &str, - ) -> Result; - fn current_schema_info(&mut self) -> Result; + fn current_snapshot(&mut self) -> Result; + fn current_snapshot_info(&mut self) -> Result; + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result; + fn current_schema_info(&mut self) -> Result; fn table_statistics( &mut self, table_name: &str, snapshot: SnapshotId, - ) -> Result, CatalogError>; + ) -> Result, Error>; fn update_table_column_stats( &mut self, column_id: i64, table_id: i64, stats_type: &str, payload: &str, - ) -> Result<(), CatalogError>; + ) -> Result<(), Error>; } /// Implement CatalogBackend for any type that implements Catalog impl CatalogBackend for T { - fn current_snapshot(&mut self) -> Result { + fn current_snapshot(&mut self) -> Result { Catalog::current_snapshot(self) } - fn current_snapshot_info(&mut self) -> Result { + fn current_snapshot_info(&mut self) -> Result { Catalog::current_snapshot_info(self) } - fn current_schema( - &mut self, - schema: Option<&str>, - table: &str, - ) -> Result { + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result { Catalog::current_schema(self, schema, table) } - fn current_schema_info(&mut self) -> Result { + fn current_schema_info(&mut self) -> Result { Catalog::current_schema_info(self) } @@ -57,7 +49,7 @@ impl CatalogBackend for T { &mut self, table_name: &str, snapshot: SnapshotId, - ) -> Result, CatalogError> { + ) -> Result, Error> { Catalog::table_statistics(self, table_name, snapshot) } @@ -67,7 +59,7 @@ impl CatalogBackend for T { table_id: i64, stats_type: &str, payload: &str, - ) -> Result<(), CatalogError> { + ) -> Result<(), Error> { Catalog::update_table_column_stats(self, column_id, table_id, stats_type, payload) } } @@ -75,27 +67,27 @@ impl CatalogBackend for T { #[derive(Debug)] pub enum CatalogRequest { CurrentSnapshot { - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, }, CurrentSnapshotInfo { - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, }, CurrentSchema { schema: Option, table: String, - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, }, CurrentSchemaInfo { - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, }, TableStatistics { table_name: String, snapshot: SnapshotId, - respond_to: oneshot::Sender, CatalogError>>, + respond_to: oneshot::Sender, Error>>, }, UpdateTableColumnStats { @@ -103,7 +95,7 @@ pub enum CatalogRequest { table_id: i64, stats_type: String, payload: String, - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, }, Shutdown, @@ -116,30 +108,30 @@ pub struct CatalogServiceHandle { } impl CatalogServiceHandle { - pub async fn current_snapshot(&self) -> Result { + pub async fn current_snapshot(&self) -> Result { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::CurrentSnapshot { respond_to: tx }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } - pub async fn current_snapshot_info(&self) -> Result { + pub async fn current_snapshot_info(&self) -> Result { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::CurrentSnapshotInfo { respond_to: tx }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } @@ -148,7 +140,7 @@ impl CatalogServiceHandle { &self, schema: Option<&str>, table: &str, - ) -> Result { + ) -> Result { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::CurrentSchema { @@ -157,25 +149,25 @@ impl CatalogServiceHandle { respond_to: tx, }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } - pub async fn current_schema_info(&self) -> Result { + pub async fn current_schema_info(&self) -> Result { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::CurrentSchemaInfo { respond_to: tx }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } @@ -184,7 +176,7 @@ impl CatalogServiceHandle { &self, table_name: &str, snapshot: SnapshotId, - ) -> Result, CatalogError> { + ) -> Result, Error> { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::TableStatistics { @@ -193,11 +185,11 @@ impl CatalogServiceHandle { respond_to: tx, }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } @@ -208,7 +200,7 @@ impl CatalogServiceHandle { table_id: i64, stats_type: &str, payload: &str, - ) -> Result<(), CatalogError> { + ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.sender .send(CatalogRequest::UpdateTableColumnStats { @@ -219,20 +211,20 @@ impl CatalogServiceHandle { respond_to: tx, }) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })?; - rx.await.map_err(|_| CatalogError::QueryExecution { + rx.await.map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, })? } - pub async fn shutdown(&self) -> Result<(), CatalogError> { + pub async fn shutdown(&self) -> Result<(), Error> { self.sender .send(CatalogRequest::Shutdown) .await - .map_err(|_| CatalogError::QueryExecution { + .map_err(|_| Error::QueryExecution { source: duckdb::Error::ExecuteReturnedResults, }) } @@ -330,7 +322,7 @@ impl CatalogService { pub fn try_new_from_location( location: Option<&str>, metadata_path: Option<&str>, - ) -> Result<(Self, CatalogServiceHandle), CatalogError> { + ) -> Result<(Self, CatalogServiceHandle), Error> { let catalog = DuckLakeCatalog::try_new(location, metadata_path)?; Ok(Self::new(catalog)) } From 740e1f3cd793f20378248db6f2e1bef1cb22d184 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Tue, 18 Nov 2025 14:18:08 -0500 Subject: [PATCH 30/40] update lock and remove superfluous main --- Cargo.lock | 27 ++++---------- optd/catalog/Cargo.toml | 3 ++ optd/catalog/src/main.rs | 81 ---------------------------------------- 3 files changed, 10 insertions(+), 101 deletions(-) delete mode 100644 optd/catalog/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 4db8e80..280456d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3557,10 +3557,12 @@ name = "optd-catalog" version = "0.1.0" dependencies = [ "duckdb", + "futures", "serde", "serde_json", "snafu", "tempfile", + "tokio", ] [[package]] @@ -3602,10 +3604,15 @@ dependencies = [ name = "optd-datafusion" version = "0.1.0" dependencies = [ + "async-trait", "datafusion", "itertools 0.14.0", + "optd-catalog", "optd-core", + "parking_lot", + "tokio", "tracing", + "url", ] [[package]] @@ -3995,17 +4002,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "r2d2" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" -dependencies = [ - "log", - "parking_lot", - "scheduled-thread-pool", -] - [[package]] name = "radium" version = "0.7.0" @@ -4444,15 +4440,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "scheduled-thread-pool" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot", -] - [[package]] name = "scopeguard" version = "1.2.0" diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index f390741..cd4008f 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -9,6 +9,9 @@ serde = { version = "1.0", features = ["derive"] } duckdb = { version = "1.4.0", features = ["bundled"] } snafu = "0.8.6" serde_json = "1.0" +tokio = { workspace = true, features = ["sync", "rt"] } [dev-dependencies] tempfile = "3.8" +tokio = { workspace = true, features = ["full", "test-util"] } +futures = "0.3" diff --git a/optd/catalog/src/main.rs b/optd/catalog/src/main.rs deleted file mode 100644 index 8f7c3b9..0000000 --- a/optd/catalog/src/main.rs +++ /dev/null @@ -1,81 +0,0 @@ -mod optd_catalog; -mod optd_table; - -use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; -use tokio; -use uuid::Uuid; - -#[tokio::main] -async fn main() -> Result<(), sqlx::Error> { - // Create Sqlite database file to hold the catalog - const SQLITE_DB_PATH: &str = "catalog.db"; - - // Set connect options - let connect_options = SqliteConnectOptions::new() - .filename(SQLITE_DB_PATH) - .create_if_missing(true); - - // Connect with SqlX - let pool = SqlitePool::connect_with(connect_options) - .await - .expect("Failed to connect to the SQLite database"); - - // Set the metadata catalog name - const METADATA_CATALOG: &str = "catalog"; - - // Execute the given Sql queries to create the catalog - let mut create_catalog_queries = vec![ - // "CREATE TABLE () IF NOT EXISTS {METADATA_CATALOG};", - "CREATE TABLE {METADATA_CATALOG}_metadata(key VARCHAR NOT NULL, value VARCHAR NOT NULL, scope VARCHAR, scope_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_snapshot(snapshot_id BIGINT PRIMARY KEY, snapshot_time TIMESTAMPTZ, schema_version BIGINT, next_catalog_id BIGINT, next_file_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_snapshot_changes(snapshot_id BIGINT PRIMARY KEY, changes_made VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_schema(schema_id BIGINT PRIMARY KEY, schema_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_table(table_id BIGINT, table_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, table_name VARCHAR, path VARCHAR, path_is_relative BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, partial_file_info VARCHAR, mapping_id BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_file_column_statistics(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN);", - "CREATE TABLE {METADATA_CATALOG}_delete_file(delete_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, format VARCHAR, delete_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, encryption_key VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_column(column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, table_id BIGINT, column_order BIGINT, column_name VARCHAR, column_type VARCHAR, initial_default VARCHAR, default_value VARCHAR, nulls_allowed BOOLEAN, parent_column BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_table_stats(table_id BIGINT, record_count BIGINT, next_row_id BIGINT, file_size_bytes BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_table_column_stats(table_id BIGINT, column_id BIGINT, contains_null BOOLEAN, contains_nan BOOLEAN, min_value VARCHAR, max_value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_partition_info(partition_id BIGINT, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_partition_column(partition_id BIGINT, table_id BIGINT, partition_key_index BIGINT, column_id BIGINT, transform VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_file_partition_value(data_file_id BIGINT, table_id BIGINT, partition_key_index BIGINT, partition_value VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_files_scheduled_for_deletion(data_file_id BIGINT, path VARCHAR, path_is_relative BOOLEAN, schedule_start TIMESTAMPTZ);", - "CREATE TABLE {METADATA_CATALOG}_inlined_data_tables(table_id BIGINT, table_name VARCHAR, schema_version BIGINT);", - "CREATE TABLE {METADATA_CATALOG}_column_mapping(mapping_id BIGINT, table_id BIGINT, type VARCHAR);", - "CREATE TABLE {METADATA_CATALOG}_name_mapping(mapping_id BIGINT, column_id BIGINT, source_name VARCHAR, target_field_id BIGINT, parent_column BIGINT);", - "INSERT INTO {METADATA_CATALOG}_snapshot VALUES (0, current_timestamp, 0, 1, 0);", - "INSERT INTO {METADATA_CATALOG}_snapshot_changes VALUES (0, 'created_schema:\"main\"');", - //"INSERT INTO {METADATA_CATALOG}_metadata (key, value) VALUES ('version', '0.2'), ('created_by', 'DuckDB %s'), ('data_path', %s), ('encrypted', '%s');" - ]; - - let set_uuid_query = format!( - "UPDATE {METADATA_CATALOG}_schema SET schema_uuid = '{}' WHERE schema_id = 0;", - Uuid::new_v4() - ); - - create_catalog_queries.push(set_uuid_query.as_str()); - - // Format the queries with the metadata catalog name - let formatted_query = create_catalog_queries - .iter() - .map(|query| query.replace("{METADATA_CATALOG}", METADATA_CATALOG)); - - for query in formatted_query { - println!("Executing query: {}", query); - sqlx::query(&query) - .execute(&pool) - .await - .expect("Failed to execute query"); - - println!("Query executed successfully."); - } - - // Close the connection - pool.close().await; - - Ok(()) -} From 4b579beeb3427de1fc41681e735871983caed6f9 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 19 Nov 2025 00:25:23 -0500 Subject: [PATCH 31/40] catalog implementation and test suite + datafusion version bump --- Cargo.lock | 128 +-- Cargo.toml | 5 +- cli/src/lib.rs | 1 + connectors/datafusion/Cargo.toml | 7 + connectors/datafusion/src/catalog.rs | 111 +- connectors/datafusion/src/lib.rs | 2 + connectors/datafusion/src/table.rs | 81 +- .../datafusion/tests/integration_test.rs | 975 ++++++++++++++++++ optd/catalog/src/service.rs | 2 +- 9 files changed, 1163 insertions(+), 149 deletions(-) create mode 100644 connectors/datafusion/tests/integration_test.rs diff --git a/Cargo.lock b/Cargo.lock index 280456d..e602557 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1486,9 +1486,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.1.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4016a135c11820d9c9884a1f7924d5456c563bd3657b7d691a6e7b937a452df7" +checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" dependencies = [ "arrow", "arrow-ipc", @@ -1543,9 +1543,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d70327e81ab3a1f5832d8b372d55fa607851d7cea6d1f8e65ff0c98fcc32d222" +checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" dependencies = [ "arrow", "async-trait", @@ -1569,9 +1569,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268819e6bb20ba70a664abddc20deac604f30d3267f8c91847064542a8c0720c" +checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" dependencies = [ "arrow", "async-trait", @@ -1618,9 +1618,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054873d5563f115f83ef4270b560ac2ce4de713905e825a40cac49d6ff348254" +checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -1645,9 +1645,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a1d1bc69aaaadb8008b65329ed890b33e845dc063225c190f77b20328fbe1d" +checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" dependencies = [ "futures", "log", @@ -1656,9 +1656,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d855160469020982880fd9bd0962e033d2f4728f56f85a83d8c90785638b6519" +checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" dependencies = [ "arrow", "async-compression", @@ -1693,9 +1693,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8cfac9188138eb326f60eab872cb92a89a301f0a3f9fd1f24004325fd2741d" +checksum = "10d40b6953ebc9099b37adfd12fde97eb73ff0cee44355c6dea64b8a4537d561" dependencies = [ "apache-avro", "arrow", @@ -1718,9 +1718,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec3aa7575378d23aae96b955b5233bea6f9d461648174f6ccc8f3c160f2b7a7" +checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" dependencies = [ "arrow", "async-trait", @@ -1743,9 +1743,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00cfb8f33e2864eeb3188b6818acf5546d56a5a487d423cce9b684a554caabfa" +checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" dependencies = [ "arrow", "async-trait", @@ -1768,9 +1768,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3bfb48fb4ff42ac1485a12ea56434eaab53f7da8f00b2443b1a3d35a0b6d10" +checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" dependencies = [ "arrow", "async-trait", @@ -1802,15 +1802,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbf41013cf55c2369b5229594898e8108c8a1beeb49d97feb5e0cce9933eb8f" +checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" [[package]] name = "datafusion-execution" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fd0c1ffe3885687758f985ed548184bf63b17b2a7a5ae695de422ad6432118" +checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" dependencies = [ "arrow", "async-trait", @@ -1829,9 +1829,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c4fe6411218a9dab656437b1e69b00a470a7a2d7db087867a366c145eb164a7" +checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" dependencies = [ "arrow", "async-trait", @@ -1851,9 +1851,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a45bee7d2606bfb41ceb1d904ba7cecf69bd5a6f8f3e6c57c3f5a83d84bdd97" +checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" dependencies = [ "arrow", "datafusion-common", @@ -1864,9 +1864,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7e1c532ff9d14f291160bca23e55ffd4899800301dd2389786c2f02d76904a" +checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", @@ -1893,9 +1893,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05d47426645aef1e73b1a034c75ab2401bc504175feb191accbe211ec24a342" +checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" dependencies = [ "ahash 0.8.12", "arrow", @@ -1914,9 +1914,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05c99f648b2b1743de0c1c19eef07e8cc5a085237f172b2e20bf6934e0a804e4" +checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" dependencies = [ "ahash 0.8.12", "arrow", @@ -1927,9 +1927,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4227782023f4fb68d3d5c5eb190665212f43c9a0b437553e4b938b379aff6cf6" +checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" dependencies = [ "arrow", "arrow-ord", @@ -1949,9 +1949,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d902b1769f69058236e89f04f3bff2cf62f24311adb7bf3c6c3e945c9451076" +checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" dependencies = [ "arrow", "async-trait", @@ -1965,9 +1965,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8ee43974c92eb9920fe8e97e0fab48675e93b062abcb48bef4c1d4305b6ee4" +checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" dependencies = [ "arrow", "datafusion-common", @@ -1983,9 +1983,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e149d36cdd44fb425dc815c5fac55025aa9a592dd65cb3c421881096292c02" +checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1993,9 +1993,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07c9faa0cdefb6e6e756482b846397b5c2d84d369e30b009472b9ab9b1430fbd" +checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" dependencies = [ "datafusion-expr", "quote", @@ -2004,9 +2004,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16a4f7059302ad1de6e97ab0eebb5c34405917b1f80806a30a66e38ad118251" +checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" dependencies = [ "arrow", "chrono", @@ -2024,9 +2024,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10bb87a605d8ce9672d5347c0293c12211b0c03923fc12fbdc665fe76e6f9e01" +checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" dependencies = [ "ahash 0.8.12", "arrow", @@ -2047,9 +2047,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da3a7429a555dd5ff0bec4d24bd5532ec43876764088da635cad55b2f178dc2" +checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" dependencies = [ "arrow", "datafusion-common", @@ -2062,9 +2062,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "845eb44ef1e04d2a15c6d955cb146b40a41814a7be4377f0a541857d3e257d6f" +checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" dependencies = [ "ahash 0.8.12", "arrow", @@ -2076,9 +2076,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b9b648ee2785722c79eae366528e52e93ece6808aef9297cf8e5521de381da" +checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" dependencies = [ "arrow", "datafusion-common", @@ -2096,9 +2096,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6688d17b78104e169d7069749832c20ff50f112be853d2c058afe46c889064" +checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" dependencies = [ "ahash 0.8.12", "arrow", @@ -2127,9 +2127,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a893a46c56f5f190085e13949eb8ec163672c7ec2ac33bdb82c84572e71ca73" +checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" dependencies = [ "arrow", "arrow-schema", @@ -2145,9 +2145,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b62684c7a1db6121a8c83100209cffa1e664a8d9ced87e1a32f8cdc2fff3c2" +checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" dependencies = [ "arrow", "async-trait", @@ -2169,9 +2169,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09cff94b8242843e1da5d069e9d2cfc53807f1f00b1c0da78c297f47c21456e" +checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" dependencies = [ "arrow", "bigdecimal", @@ -3609,10 +3609,10 @@ dependencies = [ "itertools 0.14.0", "optd-catalog", "optd-core", - "parking_lot", + "serde_json", + "tempfile", "tokio", "tracing", - "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 88eb5b1..bc84079 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,6 @@ resolver = "2" members = ["cli", "connectors/datafusion", "optd/core", "optd/catalog"] -# By default, only compiles the `optd-core` crate. default-members = ["optd/core"] [workspace.dependencies] @@ -11,7 +10,9 @@ tokio = { version = "1.47", features = ["macros", "rt", "sync"] } tracing = "0.1" # DataFusion dependencies -datafusion = { version = "50.0", default-features = false } +datafusion = { version = "50.3", default-features = false, features = [ + "parquet", +] } [workspace.package] version = "0.1.0" diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 8d3977c..b3f86bb 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -7,6 +7,7 @@ use datafusion::{ sql::TableReference, }; use datafusion_cli::cli_context::CliSessionContext; +use optd_datafusion::{OptdExtensionConfig, SessionStateBuilderOptdExt}; use std::sync::Arc; pub struct OptdCliSessionContext { diff --git a/connectors/datafusion/Cargo.toml b/connectors/datafusion/Cargo.toml index e4704a9..93c7c62 100644 --- a/connectors/datafusion/Cargo.toml +++ b/connectors/datafusion/Cargo.toml @@ -8,4 +8,11 @@ repository.workspace = true datafusion = { workspace = true } tracing = { workspace = true, features = ["log"] } optd-core = { path = "../../optd/core", version = "0.1" } +optd-catalog = { path = "../../optd/catalog", version = "0.1" } itertools = "0.14.0" +async-trait = "0.1" + +[dev-dependencies] +tempfile = "3.13" +serde_json = "1" +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/connectors/datafusion/src/catalog.rs b/connectors/datafusion/src/catalog.rs index ff57663..d9eb157 100644 --- a/connectors/datafusion/src/catalog.rs +++ b/connectors/datafusion/src/catalog.rs @@ -1,31 +1,31 @@ +use async_trait::async_trait; use datafusion::{ catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}, + common::DataFusionError, error::Result, - execution::SessionState, - common::DataFusionError }; -use parking_lot::RwLock; +use optd_catalog::CatalogServiceHandle; use std::any::Any; -use std::sync::{Arc, Weak}; -use async_trait::async_trait; +use std::sync::Arc; + +use crate::table::OptdTableProvider; #[derive(Debug)] pub struct OptdCatalogProviderList { inner: Arc, - state: Weak>, + catalog_handle: Option, } impl OptdCatalogProviderList { - pub fn new(inner: Arc, state: Weak>) -> Self { - Self { inner, state } + pub fn new( + inner: Arc, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + } } - - // pub fn new_from_location(path: &str) -> Result { - // let url = url::Url::parse(path)?; - // let state = Arc::downgrade(&SessionState::new()); - // let inner = Arc::new(); - // Ok(Self { inner, state }) - // } } impl CatalogProviderList for OptdCatalogProviderList { @@ -46,22 +46,29 @@ impl CatalogProviderList for OptdCatalogProviderList { } fn catalog(&self, name: &str) -> Option> { - let state = self.state.clone(); - self.inner - .catalog(name) - .map(|catalog| Arc::new(OptdCatalogProvider::new(catalog, state)) as _) + let catalog_handle = self.catalog_handle.clone(); + self.inner.catalog(name).map(|catalog| { + Arc::new(OptdCatalogProvider::new(catalog, catalog_handle)) + as Arc + }) } } #[derive(Debug)] struct OptdCatalogProvider { inner: Arc, - state: Weak>, + catalog_handle: Option, } impl OptdCatalogProvider { - pub fn new(inner: Arc, state: Weak>) -> Self { - Self { inner, state } + pub fn new( + inner: Arc, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + } } } @@ -75,10 +82,10 @@ impl CatalogProvider for OptdCatalogProvider { } fn schema(&self, name: &str) -> Option> { - let state = self.state.clone(); - self.inner - .schema(name) - .map(|schema| Arc::new(OptdSchemaProvider::new(schema, state)) as _) + let catalog_handle = self.catalog_handle.clone(); + self.inner.schema(name).map(|schema| { + Arc::new(OptdSchemaProvider::new(schema, catalog_handle)) as Arc + }) } fn register_schema( @@ -93,12 +100,18 @@ impl CatalogProvider for OptdCatalogProvider { #[derive(Debug)] pub struct OptdSchemaProvider { inner: Arc, - state: Weak>, + catalog_handle: Option, } impl OptdSchemaProvider { - pub fn new(inner: Arc, state: Weak>) -> Self { - Self { inner, state } + pub fn new( + inner: Arc, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + } } } @@ -108,26 +121,35 @@ impl SchemaProvider for OptdSchemaProvider { self } - async fn table(&self, name: &str) -> Result>, DataFusionError> { - return self.inner.table(name).await; - } - fn table_names(&self) -> Vec { self.inner.table_names() } + async fn table(&self, name: &str) -> Result>, DataFusionError> { + let table_opt = self.inner.table(name).await?; + + if let Some(table) = table_opt { + let optd_table = Arc::new(OptdTableProvider::new( + table, + name.to_string(), + self.catalog_handle.clone(), + )); + + Ok(Some(optd_table as Arc)) + } else { + Ok(None) + } + } + fn register_table( &self, name: String, - table: Arc, - ) -> Result>> { + table: Arc, + ) -> Result>> { self.inner.register_table(name, table) } - fn deregister_table( - &self, - name: &str, - ) -> Result>> { + fn deregister_table(&self, name: &str) -> Result>> { self.inner.deregister_table(name) } @@ -135,14 +157,3 @@ impl SchemaProvider for OptdSchemaProvider { self.inner.table_exist(name) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_getters() { - // let catalog = OptdCatalogProviderList::new(); - // assert_eq!(catalog.catalog_names(), Vec::::new()); - } -} \ No newline at end of file diff --git a/connectors/datafusion/src/lib.rs b/connectors/datafusion/src/lib.rs index 7f316c0..e47a400 100644 --- a/connectors/datafusion/src/lib.rs +++ b/connectors/datafusion/src/lib.rs @@ -5,8 +5,10 @@ mod table; use std::sync::Arc; +pub use catalog::{OptdCatalogProviderList, OptdSchemaProvider}; pub use extension::{OptdExtension, OptdExtensionConfig}; pub use planner::OptdQueryPlanner; +pub use table::{OptdTable, OptdTableProvider}; pub trait SessionStateBuilderOptdExt: Sized { fn with_optd_planner(self) -> Self; diff --git a/connectors/datafusion/src/table.rs b/connectors/datafusion/src/table.rs index d9c89b5..f2943cd 100644 --- a/connectors/datafusion/src/table.rs +++ b/connectors/datafusion/src/table.rs @@ -4,10 +4,7 @@ use datafusion::{ arrow::datatypes::SchemaRef, catalog::{Session, TableProvider}, common::{Constraints, Statistics}, - datasource::{ - TableType, - listing::{ListingTable, ListingTableUrl}, - }, + datasource::{TableType, listing::ListingTable}, error::Result, logical_expr::{LogicalPlan, TableProviderFilterPushDown, dml::InsertOp}, physical_plan::ExecutionPlan, @@ -15,29 +12,9 @@ use datafusion::{ sql::TableReference, }; -use glob::Pattern; -use url::Url; - -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct OptdTableUrl { - inner: Arc, -} - -impl OptdTableUrl { - pub fn try_new(url: Url, glob: Option) -> Result { - let inner = ListingTableUrl::try_new(url, glob)?; - Ok(OptdTableUrl { - inner: Arc::new(inner), - }) - } - - pub fn new_with_inner(inner: Arc) -> Self { - OptdTableUrl { inner } - } -} - -// #[derive()] +use optd_catalog::CatalogServiceHandle; +#[allow(dead_code)] pub struct OptdTable { inner: Box, name: String, @@ -50,7 +27,7 @@ impl OptdTable { name: String, table_reference: TableReference, ) -> Result { - Ok(OptdTable { + Ok(Self { inner: Box::new(inner), name, table_reference, @@ -62,7 +39,7 @@ impl OptdTable { name: String, table_reference: TableReference, ) -> Self { - OptdTable { + Self { inner, name, table_reference, @@ -78,10 +55,34 @@ impl OptdTable { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct OptdTableProvider { inner: Arc, - table_url: OptdTableUrl, + catalog_handle: Option, + table_name: String, +} + +impl OptdTableProvider { + pub fn new( + inner: Arc, + table_name: String, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + table_name, + } + } + + pub fn table_name(&self) -> &str { + &self.table_name + } + + /// Get the catalog handle if available + pub fn catalog_handle(&self) -> Option<&CatalogServiceHandle> { + self.catalog_handle.as_ref() + } } #[async_trait::async_trait] @@ -116,7 +117,7 @@ impl TableProvider for OptdTableProvider { self.inner.get_table_definition() } - fn get_logical_plan(&self) -> Option> { + fn get_logical_plan(&'_ self) -> Option> { self.inner.get_logical_plan() } @@ -135,7 +136,23 @@ impl TableProvider for OptdTableProvider { } fn statistics(&self) -> Option { - self.inner.statistics() + let stats = self.inner.statistics(); + + if let Some(ref s) = stats { + tracing::debug!( + "Retrieved statistics from inner provider for table {} (num_rows={:?}, total_byte_size={:?})", + self.table_name, + s.num_rows, + s.total_byte_size + ); + } else { + tracing::debug!( + "No statistics available for table {} from inner provider", + self.table_name + ); + } + + stats } async fn insert_into( diff --git a/connectors/datafusion/tests/integration_test.rs b/connectors/datafusion/tests/integration_test.rs new file mode 100644 index 0000000..34bbe4b --- /dev/null +++ b/connectors/datafusion/tests/integration_test.rs @@ -0,0 +1,975 @@ +use datafusion::{ + arrow::{ + array::{Float64Array, Int32Array, Int64Array, RecordBatch, StringArray}, + datatypes::{DataType, Field, Schema}, + }, + catalog::{CatalogProviderList, MemorySchemaProvider, TableProvider}, + datasource::MemTable, + execution::context::SessionContext, + prelude::*, +}; +use optd_catalog::{CatalogService, DuckLakeCatalog}; +use optd_datafusion::{OptdCatalogProviderList, OptdTableProvider}; +use serde_json; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use tempfile::TempDir; + +static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); + +/// Creates a test catalog with isolated metadata directory +/// TempDir is returned to keep the directory alive +fn create_test_catalog() -> (TempDir, DuckLakeCatalog) { + let temp_dir = TempDir::new().unwrap(); + let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let unique_dir = temp_dir + .path() + .join(format!("df_test_{}_{}", timestamp, counter)); + std::fs::create_dir_all(&unique_dir).unwrap(); + let metadata_path = unique_dir.join("metadata.ducklake"); + + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap())).unwrap(); + + (temp_dir, catalog) +} + +/// Creates test schema and batch +fn create_test_data( + fields: Vec<(&str, DataType)>, + columns: Vec>, +) -> (Arc, RecordBatch) { + let schema = Arc::new(Schema::new( + fields + .into_iter() + .map(|(name, dtype)| Field::new(name, dtype, false)) + .collect::>(), + )); + let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + (schema, batch) +} + +/// Retrieves OptdTableProvider from catalog hierarchy +async fn get_optd_table( + catalog_list: Arc, + catalog_handle: Option, + table_name: &str, +) -> Arc { + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, catalog_handle); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let schema = catalog.schema("public").unwrap(); + let table = schema + .table(table_name) + .await + .expect("Failed to retrieve table") + .expect("Table not found"); + table + .as_any() + .downcast_ref::() + .unwrap() + .clone() + .into() +} + +#[tokio::test] +async fn test_catalog_provider_list_wrapping() { + let ctx = SessionContext::new(); + let catalog_list = ctx.state().catalog_list().clone(); + + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list.clone(), None); + + let original_names = catalog_list.catalog_names(); + let wrapped_names = optd_catalog_list.catalog_names(); + assert_eq!(original_names, wrapped_names); + assert!(wrapped_names.contains(&"datafusion".to_string())); +} + +#[tokio::test] +async fn test_table_provider_wrapping() { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ) + .unwrap(); + + let mem_table = Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap()); + let optd_table = OptdTableProvider::new(mem_table.clone(), "test_table".to_string(), None); + + assert_eq!(optd_table.table_name(), "test_table"); + assert!(optd_table.catalog_handle().is_none()); + assert_eq!(optd_table.schema(), schema); + assert!(optd_table.statistics().is_none()); +} + +#[tokio::test] +async fn test_schema_retrieval() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + ); + ctx.register_batch("numbers", batch).unwrap(); + + let optd_table = get_optd_table(ctx.state().catalog_list().clone(), None, "numbers").await; + assert_eq!(optd_table.table_name(), "numbers"); + + let schema = optd_table.schema(); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(0).data_type(), &DataType::Int32); + assert_eq!(schema.field(1).name(), "value"); + assert_eq!(schema.field(1).data_type(), &DataType::Int32); + + let expected_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("value", DataType::Int32, false), + ])); + + assert_eq!(schema.as_ref(), expected_schema.as_ref()); +} + +#[tokio::test] +async fn test_query_execution_with_wrapped_catalog() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + ); + ctx.register_batch("test_data", batch).unwrap(); + + let results = ctx + .sql("SELECT id, value FROM test_data WHERE value > 20") + .await + .unwrap() + .collect() + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 3); + assert_eq!( + results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .values(), + &[3, 4, 5] + ); +} + +#[tokio::test] +async fn test_table_provider_accessibility_from_plan() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ); + ctx.register_batch("users", batch).unwrap(); + + let df = ctx.sql("SELECT * FROM users").await.unwrap(); + assert!(format!("{:?}", df.logical_plan()).contains("users")); + + let results = df.collect().await.unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + assert_eq!( + results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .values(), + &[1, 2] + ); + assert_eq!( + results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap() + .iter() + .collect::>(), + vec![Some("Alice"), Some("Bob")] + ); +} + +#[tokio::test] +async fn test_table_metadata_access_through_catalog() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("customer_id", DataType::Int32), + ("order_amount", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 1, 3, 2, 1])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250, 120])), + ], + ); + ctx.register_batch("orders", batch).unwrap(); + + let optd_table = get_optd_table(ctx.state().catalog_list().clone(), None, "orders").await; + assert_eq!(optd_table.table_name(), "orders"); + assert!(optd_table.catalog_handle().is_none()); + assert!(optd_table.statistics().is_none()); + + let results = ctx + .sql("SELECT customer_id, SUM(order_amount) FROM orders GROUP BY customer_id") + .await + .unwrap() + .collect() + .await + .unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 rows for 3 unique customers"); + assert!(!results.is_empty(), "Should have at least one batch"); + assert_eq!( + results[0].num_columns(), + 2, + "Each batch should have 2 columns (customer_id and sum)" + ); + + // Collect all results into vectors for verification + let mut all_customer_ids = Vec::new(); + let mut all_sums = Vec::new(); + for batch in &results { + let customer_ids = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let sums = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + all_customer_ids.extend(customer_ids.values()); + all_sums.extend(sums.values()); + } + + // Sort by customer_id for consistent verification + let mut pairs: Vec<_> = all_customer_ids + .iter() + .zip(all_sums.iter()) + .map(|(c, s)| (*c, *s)) + .collect(); + pairs.sort_by_key(|p| p.0); + + assert_eq!( + pairs, + vec![(1, 370), (2, 450), (3, 300)], + "Expected customer_id 1->370, 2->450, 3->300" + ); +} + +#[tokio::test] +async fn test_csv_table_wrapping() { + let _tmp_dir = tempfile::TempDir::new().unwrap(); + let csv_path = _tmp_dir.path().join("test.csv"); + let mut file = std::fs::File::create(&csv_path).unwrap(); + std::io::Write::write_all(&mut file, b"id,value\n1,10\n2,20\n").unwrap(); + + let ctx = SessionContext::new(); + + ctx.register_csv( + "test_csv", + csv_path.to_str().unwrap(), + CsvReadOptions::default(), + ) + .await + .unwrap(); + + let df = ctx.sql("SELECT * FROM test_csv").await.unwrap(); + let results = df.collect().await.unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // CSV columns are typically parsed as Int64, not Int32 + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let value_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2]); + assert_eq!(value_col.values(), &[10, 20]); +} + +#[tokio::test] +async fn test_full_optimizer_integration_pipeline() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("product_id", DataType::Int32), + ("category", DataType::Utf8), + ("price", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(StringArray::from(vec!["A", "B", "A", "C", "B"])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("products", batch).unwrap(); + + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, None); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + assert!(catalog.schema_names().contains(&"public".to_string())); + + let df = ctx + .sql("SELECT category, AVG(price) as avg_price FROM products GROUP BY category") + .await + .unwrap(); + + assert!(format!("{:?}", df.logical_plan()).contains("products")); + + let results = df.collect().await.unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 categories"); + assert_eq!(results[0].num_columns(), 2); + + // Collect and verify exact AVG results: A->125, B->225, C->300 + let mut category_avgs = Vec::new(); + for batch in &results { + let categories = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let avg_prices = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + category_avgs.push((categories.value(i).to_string(), avg_prices.value(i))); + } + } + category_avgs.sort_by(|a, b| a.0.cmp(&b.0)); + + assert_eq!(category_avgs.len(), 3); + assert_eq!(category_avgs[0].0, "A"); + assert!( + (category_avgs[0].1 - 125.0).abs() < 0.01, + "Category A avg should be 125" + ); + assert_eq!(category_avgs[1].0, "B"); + assert!( + (category_avgs[1].1 - 225.0).abs() < 0.01, + "Category B avg should be 225" + ); + assert_eq!(category_avgs[2].0, "C"); + assert!( + (category_avgs[2].1 - 300.0).abs() < 0.01, + "Category C avg should be 300" + ); +} + +// Tests with CatalogService integration + +#[tokio::test] +async fn test_catalog_service_handle_propagation() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (schema, batch) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ); + ctx.register_batch("users", batch).unwrap(); + + let optd_table = + get_optd_table(ctx.state().catalog_list().clone(), Some(handle), "users").await; + assert!(optd_table.catalog_handle().is_some()); + assert_eq!(optd_table.table_name(), "users"); + assert_eq!(optd_table.schema(), schema); +} + +#[tokio::test] +async fn test_catalog_service_snapshot_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32)], + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + ); + ctx.register_batch("test", batch).unwrap(); + + let optd_table = get_optd_table(ctx.state().catalog_list().clone(), Some(handle), "test").await; + let catalog_handle = optd_table.catalog_handle().unwrap(); + + let snapshot = catalog_handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); + + let snapshot_info = catalog_handle.current_snapshot_info().await.unwrap(); + assert_eq!(snapshot_info.id.0, 0); + assert_eq!(snapshot_info.schema_version, 0); + assert!(snapshot_info.next_catalog_id >= 0); + assert!(snapshot_info.next_file_id >= 0); +} + +#[tokio::test] +async fn test_catalog_service_schema_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let conn = catalog.get_connection(); + conn.execute_batch( + "CREATE TABLE test_schema_table (id INTEGER, value VARCHAR, amount DECIMAL(10,2))", + ) + .unwrap(); + + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let schema = handle + .current_schema(None, "test_schema_table") + .await + .unwrap(); + + assert_eq!(schema.fields().len(), 3); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "value"); + assert_eq!(schema.field(2).name(), "amount"); +} + +#[tokio::test] +async fn test_full_workflow_with_catalog_service() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("product_id", DataType::Int32), + ("category", DataType::Utf8), + ("price", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(StringArray::from(vec!["A", "B", "A", "C", "B"])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("products", batch).unwrap(); + + let optd_table = get_optd_table( + ctx.state().catalog_list().clone(), + Some(handle.clone()), + "products", + ) + .await; + + assert!(optd_table.catalog_handle().is_some()); + + let snapshot = optd_table + .catalog_handle() + .unwrap() + .current_snapshot() + .await + .unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); + + let results = ctx + .sql("SELECT category, AVG(price) as avg_price FROM products GROUP BY category") + .await + .unwrap() + .collect() + .await + .unwrap(); + + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 categories"); + + // Verify exact AVG results + let mut category_avgs = Vec::new(); + for batch in &results { + let categories = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let avg_prices = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + category_avgs.push((categories.value(i).to_string(), avg_prices.value(i))); + } + } + category_avgs.sort_by(|a, b| a.0.cmp(&b.0)); + + assert_eq!( + category_avgs, + vec![ + ("A".to_string(), 125.0), + ("B".to_string(), 225.0), + ("C".to_string(), 300.0) + ] + ); +} + +#[tokio::test] +async fn test_catalog_service_statistics_update_and_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let conn = catalog.get_connection(); + + // Create a table with known structure + conn.execute_batch( + "CREATE TABLE stats_table (id INTEGER, name VARCHAR, age INTEGER); + INSERT INTO stats_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Charlie', 35);", + ) + .unwrap(); + + // Get table_id and column_id for statistics + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'stats_table'", + [], + |row| row.get(0), + ).unwrap(); + + let age_column_id: i64 = conn + .query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'", + [table_id], + |row| row.get(0), + ) + .unwrap(); + + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Update statistics through the catalog service + handle + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .await + .unwrap(); + + handle + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .await + .unwrap(); + + handle + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .await + .unwrap(); + + // Retrieve statistics + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("stats_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some(), "Statistics should be available"); + let stats = stats.unwrap(); + + // Verify table-level statistics + assert_eq!(stats.row_count, 3, "Table should have 3 rows"); + + // Verify column statistics + let age_stats = stats + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("age column should have statistics"); + + assert_eq!( + age_stats.advanced_stats.len(), + 3, + "Should have 3 stat types" + ); + + let ndv_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .expect("Should have ndv statistic"); + assert_eq!(ndv_stat.data, serde_json::json!({"distinct_count": 3})); + + let min_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "min_value") + .expect("Should have min_value statistic"); + assert_eq!(min_stat.data, serde_json::json!(25)); + + let max_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "max_value") + .expect("Should have max_value statistic"); + assert_eq!(max_stat.data, serde_json::json!(35)); +} + +#[tokio::test] +async fn test_catalog_service_with_datafusion_integration() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("test_table", batch).unwrap(); + + let optd_table = get_optd_table( + ctx.state().catalog_list().clone(), + Some(handle), + "test_table", + ) + .await; + + let snapshot = optd_table + .catalog_handle() + .unwrap() + .current_snapshot() + .await + .unwrap(); + assert_eq!(snapshot.0, 0); + + let results = ctx + .sql("SELECT id, value FROM test_table WHERE value > 150") + .await + .unwrap() + .collect() + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 3); + + // Verify exact filtered results: rows with value > 150 are (2,200), (4,300), (5,250) + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let value_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[2, 4, 5]); + assert_eq!(value_col.values(), &[200, 300, 250]); +} + +#[tokio::test] +async fn test_multiple_schemas_isolation() { + let ctx = SessionContext::new(); + + // Register tables in the default "public" schema + let (_, batch1) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ); + ctx.register_batch("users", batch1).unwrap(); + + // Create a custom schema and register a table there + let (_, batch2) = create_test_data( + vec![("id", DataType::Int32), ("department", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![10, 20])), + Arc::new(StringArray::from(vec!["Engineering", "Sales"])), + ], + ); + + // DataFusion's default catalog structure: catalog.schema.table + // We'll use the memory catalog provider to create multiple schemas + let mem_table = MemTable::try_new(batch2.schema(), vec![vec![batch2]]).unwrap(); + ctx.catalog("datafusion") + .unwrap() + .register_schema("custom_schema", Arc::new(MemorySchemaProvider::new())) + .unwrap(); + + ctx.catalog("datafusion") + .unwrap() + .schema("custom_schema") + .unwrap() + .register_table("departments".to_string(), Arc::new(mem_table)) + .unwrap(); + + // Wrap with OptdCatalogProviderList + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, None); + + // Test 1: Verify both schemas exist + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let schema_names = catalog.schema_names(); + assert!(schema_names.contains(&"public".to_string())); + assert!(schema_names.contains(&"custom_schema".to_string())); + + // Test 2: Verify tables are isolated in their respective schemas + let public_schema = catalog.schema("public").unwrap(); + let custom_schema = catalog.schema("custom_schema").unwrap(); + + let users_in_public = public_schema.table("users").await.unwrap(); + assert!( + users_in_public.is_some(), + "users should exist in public schema" + ); + let departments_in_public = public_schema.table("departments").await.unwrap(); + assert!( + departments_in_public.is_none(), + "departments should not exist in public schema" + ); + + let departments_in_custom = custom_schema.table("departments").await.unwrap(); + assert!( + departments_in_custom.is_some(), + "departments should exist in custom_schema" + ); + let users_in_custom = custom_schema.table("users").await.unwrap(); + assert!( + users_in_custom.is_none(), + "users should not exist in custom_schema" + ); + + // Test 3: Verify OptdTableProvider wraps tables from both schemas + let users_table = users_in_public.unwrap(); + let users_optd = users_table + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(users_optd.table_name(), "users"); + + let departments_table = departments_in_custom.unwrap(); + let departments_optd = departments_table + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(departments_optd.table_name(), "departments"); + + // Test 4: Verify queries work with schema qualification + let results = ctx + .sql("SELECT * FROM public.users") + .await + .unwrap() + .collect() + .await + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // Verify exact user data + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let name_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2]); + assert_eq!( + name_col.iter().collect::>(), + vec![Some("Alice"), Some("Bob")] + ); + + let results = ctx + .sql("SELECT * FROM custom_schema.departments") + .await + .unwrap() + .collect() + .await + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // Verify exact department data + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let dept_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[10, 20]); + assert_eq!( + dept_col.iter().collect::>(), + vec![Some("Engineering"), Some("Sales")] + ); +} + +#[tokio::test] +async fn test_multiple_schemas_with_catalog_service() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + + // Register tables in public schema + let (_, batch1) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![100, 200, 300])), + ], + ); + ctx.register_batch("table1", batch1).unwrap(); + + // Create and register in custom schema + let (_, batch2) = create_test_data( + vec![("id", DataType::Int32), ("amount", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![10, 20])), + Arc::new(Int32Array::from(vec![500, 600])), + ], + ); + + let mem_table = MemTable::try_new(batch2.schema(), vec![vec![batch2]]).unwrap(); + ctx.catalog("datafusion") + .unwrap() + .register_schema("analytics", Arc::new(MemorySchemaProvider::new())) + .unwrap(); + + ctx.catalog("datafusion") + .unwrap() + .schema("analytics") + .unwrap() + .register_table("table2".to_string(), Arc::new(mem_table)) + .unwrap(); + + // Wrap with catalog service handle + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, Some(handle.clone())); + + // Verify handle propagates to tables in both schemas + let catalog_provider = optd_catalog_list.catalog("datafusion").unwrap(); + + let table1 = catalog_provider + .schema("public") + .unwrap() + .table("table1") + .await + .unwrap() + .unwrap(); + let table1_optd = table1.as_any().downcast_ref::().unwrap(); + let handle1 = table1_optd + .catalog_handle() + .expect("table1 should have catalog handle"); + + let table2 = catalog_provider + .schema("analytics") + .unwrap() + .table("table2") + .await + .unwrap() + .unwrap(); + let table2_optd = table2.as_any().downcast_ref::().unwrap(); + let handle2 = table2_optd + .catalog_handle() + .expect("table2 should have catalog handle"); + + // Verify both can access the same catalog service + let snapshot1 = handle1.current_snapshot().await.unwrap(); + let snapshot2 = handle2.current_snapshot().await.unwrap(); + assert_eq!( + snapshot1.0, snapshot2.0, + "Both tables should share the same catalog snapshot" + ); + + // Verify cross-schema query works + let results = ctx + .sql("SELECT t1.id, t1.value, t2.amount FROM public.table1 t1 CROSS JOIN analytics.table2 t2") + .await + .unwrap() + .collect() + .await + .unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!( + total_rows, 6, + "3 rows from table1 * 2 rows from table2 = 6 rows" + ); + + // Verify exact cross join results + let mut all_rows = Vec::new(); + for batch in &results { + let t1_id = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let t1_value = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + let t2_amount = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + all_rows.push((t1_id.value(i), t1_value.value(i), t2_amount.value(i))); + } + } + all_rows.sort(); + + // Expected: each row from table1 (1,100), (2,200), (3,300) paired with each row from table2 (10,500), (20,600) + assert_eq!( + all_rows, + vec![ + (1, 100, 500), + (1, 100, 600), + (2, 200, 500), + (2, 200, 600), + (3, 300, 500), + (3, 300, 600), + ] + ); +} diff --git a/optd/catalog/src/service.rs b/optd/catalog/src/service.rs index 83b326c..c6d4f1f 100644 --- a/optd/catalog/src/service.rs +++ b/optd/catalog/src/service.rs @@ -102,7 +102,7 @@ pub enum CatalogRequest { } /// Handle for catalog service interaction -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct CatalogServiceHandle { sender: mpsc::Sender, } From 85205c4a4c122024d62cca8ca4f8e2c2889c40b5 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 19 Nov 2025 00:40:12 -0500 Subject: [PATCH 32/40] cargo fmt --- connectors/datafusion/src/catalog.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/connectors/datafusion/src/catalog.rs b/connectors/datafusion/src/catalog.rs index d9eb157..c1091f9 100644 --- a/connectors/datafusion/src/catalog.rs +++ b/connectors/datafusion/src/catalog.rs @@ -48,8 +48,7 @@ impl CatalogProviderList for OptdCatalogProviderList { fn catalog(&self, name: &str) -> Option> { let catalog_handle = self.catalog_handle.clone(); self.inner.catalog(name).map(|catalog| { - Arc::new(OptdCatalogProvider::new(catalog, catalog_handle)) - as Arc + Arc::new(OptdCatalogProvider::new(catalog, catalog_handle)) as Arc }) } } From 4bebc8ffbfb59a91ea22f4f8e82064fd4abc2876 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 19 Nov 2025 14:28:59 -0500 Subject: [PATCH 33/40] bump version of datafusion & cli to 51.0 --- Cargo.lock | 704 +++++++++++++++++++++++++----------------------- Cargo.toml | 2 +- cli/Cargo.toml | 2 +- cli/src/main.rs | 5 +- 4 files changed, 371 insertions(+), 342 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8886726..e689db1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -61,12 +52,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -140,7 +125,7 @@ checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" dependencies = [ "bigdecimal", "bon", - "bzip2 0.6.0", + "bzip2 0.6.1", "crc32fast", "digest", "log", @@ -175,9 +160,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" dependencies = [ "arrow-arith", "arrow-array", @@ -196,23 +181,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" dependencies = [ "ahash", "arrow-buffer", @@ -222,25 +207,28 @@ dependencies = [ "chrono-tz", "half", "hashbrown 0.16.0", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" dependencies = [ "arrow-array", "arrow-buffer", @@ -253,15 +241,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" dependencies = [ "arrow-array", "arrow-cast", @@ -274,21 +262,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" dependencies = [ "arrow-array", "arrow-buffer", @@ -302,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,20 +302,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.11.4", + "indexmap 2.12.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" dependencies = [ "arrow-array", "arrow-buffer", @@ -337,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" dependencies = [ "arrow-array", "arrow-buffer", @@ -350,33 +341,33 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" dependencies = [ "arrow-array", "arrow-buffer", @@ -384,7 +375,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax 0.8.6", ] @@ -462,9 +453,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.6" +version = "1.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bc1b40fb26027769f16960d2f4a6bc20c4bb755d403e552c8c1a73af433c246" +checksum = "1856b1b48b65f71a4dd940b1c0931f9a7b646d4a924b9828ffefc1454714668a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -492,9 +483,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.6" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d025db5d9f52cbc413b167136afb3d8aeea708c0d8884783cf6253be5e22f6f2" +checksum = "86590e57ea40121d47d3f2e131bfd873dea15d78dc2f4604f4734537ad9e56c4" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -527,9 +518,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.10" +version = "1.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +checksum = "8fe0fd441565b0b318c76e7206c8d1d0b0166b3e986cf30e890b61feb6192045" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -551,9 +542,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.84.0" +version = "1.89.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357a841807f6b52cb26123878b3326921e2a25faca412fabdd32bd35b7edd5d3" +checksum = "a9c1b1af02288f729e95b72bd17988c009aa72e26dcb59b3200f86d7aea726c9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -573,9 +564,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.86.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1cc7fb324aa12eb4404210e6381195c5b5e9d52c2682384f295f38716dd3c7" +checksum = "4e8122301558dc7c6c68e878af918880b82ff41897a60c8c4e18e4dc4d93e9f1" dependencies = [ "aws-credential-types", "aws-runtime", @@ -595,9 +586,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.86.0" +version = "1.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d835f123f307cafffca7b9027c14979f1d403b417d8541d67cf252e8a21e35" +checksum = "a0c7808adcff8333eaa76a849e6de926c6ac1a1268b9fd6afe32de9c29ef29d2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -618,9 +609,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.4" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -640,9 +631,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" dependencies = [ "futures-util", "pin-project-lite", @@ -651,15 +642,16 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.3" +version = "0.62.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", "http 1.3.1", "http-body 0.4.6", @@ -671,9 +663,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.1" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b" +checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -695,27 +687,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.5" +version = "0.61.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047" +checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.7" +version = "0.60.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" dependencies = [ "aws-smithy-types", "urlencoding", @@ -723,9 +715,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.2" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185" +checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -747,9 +739,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.0" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56" +checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -764,9 +756,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" dependencies = [ "base64-simd", "bytes", @@ -787,18 +779,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.10" +version = "0.60.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" +checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.8" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -855,21 +847,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "backtrace" -version = "0.3.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - [[package]] name = "base64" version = "0.21.7" @@ -1063,9 +1040,9 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ "libbz2-rs-sys", ] @@ -1114,15 +1091,14 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1148,9 +1124,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.48" +version = "4.5.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" +checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8" dependencies = [ "clap_builder", "clap_derive", @@ -1158,9 +1134,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.48" +version = "4.5.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" +checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1" dependencies = [ "anstream", "anstyle", @@ -1170,9 +1146,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.47" +version = "4.5.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" dependencies = [ "heck", "proc-macro2", @@ -1423,22 +1399,22 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "481d0c1cad7606cee11233abcdff8eec46e43dd25abda007db6d5d26ae8483c4" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", @@ -1461,7 +1437,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "hex", "itertools 0.14.0", "log", "object_store", @@ -1469,6 +1444,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1480,9 +1456,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d70327e81ab3a1f5832d8b372d55fa607851d7cea6d1f8e65ff0c98fcc32d222" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1495,7 +1471,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1506,9 +1481,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268819e6bb20ba70a664abddc20deac604f30d3267f8c91847064542a8c0720c" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1518,10 +1493,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1529,16 +1505,18 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56a7d57a2a5535516d4fa5b3ac494959531df562558e1a827dd60075364b53e" +checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" dependencies = [ "arrow", "async-trait", "aws-config", "aws-credential-types", + "chrono", "clap", "datafusion", + "datafusion-common", "dirs", "env_logger", "futures", @@ -1555,20 +1533,19 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054873d5563f115f83ef4270b560ac2ce4de713905e825a40cac49d6ff348254" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash", "apache-avro", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", "hex", - "indexmap 2.11.4", + "indexmap 2.12.0", "libc", "log", "object_store", @@ -1582,9 +1559,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a1d1bc69aaaadb8008b65329ed890b33e845dc063225c190f77b20328fbe1d" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1593,15 +1570,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d855160469020982880fd9bd0962e033d2f4728f56f85a83d8c90785638b6519" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1618,9 +1595,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1628,47 +1603,64 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-avro" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8cfac9188138eb326f60eab872cb92a89a301f0a3f9fd1f24004325fd2741d" +checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" dependencies = [ "apache-avro", "arrow", "async-trait", "bytes", - "chrono", - "datafusion-catalog", "datafusion-common", "datafusion-datasource", - "datafusion-execution", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "num-traits", "object_store", - "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec3aa7575378d23aae96b955b5233bea6f9d461648174f6ccc8f3c160f2b7a7" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1680,74 +1672,67 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00cfb8f33e2864eeb3188b6818acf5546d56a5a487d423cce9b684a554caabfa" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3bfb48fb4ff42ac1485a12ea56434eaab53f7da8f00b2443b1a3d35a0b6d10" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", - "hex", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbf41013cf55c2369b5229594898e8108c8a1beeb49d97feb5e0cce9933eb8f" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fd0c1ffe3885687758f985ed548184bf63b17b2a7a5ae695de422ad6432118" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -1766,9 +1751,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c4fe6411218a9dab656437b1e69b00a470a7a2d7db087867a366c145eb164a7" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -1779,7 +1764,8 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.11.4", + "indexmap 2.12.0", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -1788,22 +1774,22 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a45bee7d2606bfb41ceb1d904ba7cecf69bd5a6f8f3e6c57c3f5a83d84bdd97" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7e1c532ff9d14f291160bca23e55ffd4899800301dd2389786c2f02d76904a" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", @@ -1821,6 +1807,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -1830,9 +1817,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05d47426645aef1e73b1a034c75ab2401bc504175feb191accbe211ec24a342" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash", "arrow", @@ -1851,9 +1838,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05c99f648b2b1743de0c1c19eef07e8cc5a085237f172b2e20bf6934e0a804e4" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash", "arrow", @@ -1864,9 +1851,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4227782023f4fb68d3d5c5eb190665212f43c9a0b437553e4b938b379aff6cf6" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -1874,6 +1861,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -1886,9 +1874,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d902b1769f69058236e89f04f3bff2cf62f24311adb7bf3c6c3e945c9451076" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -1902,9 +1890,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8ee43974c92eb9920fe8e97e0fab48675e93b062abcb48bef4c1d4305b6ee4" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -1920,9 +1908,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e149d36cdd44fb425dc815c5fac55025aa9a592dd65cb3c421881096292c02" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1930,20 +1918,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07c9faa0cdefb6e6e756482b846397b5c2d84d369e30b009472b9ab9b1430fbd" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", "syn", ] [[package]] name = "datafusion-optimizer" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16a4f7059302ad1de6e97ab0eebb5c34405917b1f80806a30a66e38ad118251" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -1951,7 +1939,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "log", "recursive", @@ -1961,9 +1949,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10bb87a605d8ce9672d5347c0293c12211b0c03923fc12fbdc665fe76e6f9e01" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash", "arrow", @@ -1974,9 +1962,8 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph", @@ -1984,9 +1971,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da3a7429a555dd5ff0bec4d24bd5532ec43876764088da635cad55b2f178dc2" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -1999,9 +1986,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "845eb44ef1e04d2a15c6d955cb146b40a41814a7be4377f0a541857d3e257d6f" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash", "arrow", @@ -2013,9 +2000,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b9b648ee2785722c79eae366528e52e93ece6808aef9297cf8e5521de381da" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -2027,15 +2014,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6688d17b78104e169d7069749832c20ff50f112be853d2c058afe46c889064" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash", "arrow", @@ -2054,7 +2040,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "log", "parking_lot", @@ -2064,12 +2050,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a893a46c56f5f190085e13949eb8ec163672c7ec2ac33bdb82c84572e71ca73" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -2082,39 +2067,30 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b62684c7a1db6121a8c83100209cffa1e664a8d9ced87e1a32f8cdc2fff3c2" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09cff94b8242843e1da5d069e9d2cfc53807f1f00b1c0da78c297f47c21456e" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.11.4", + "indexmap 2.12.0", "log", "recursive", "regex", @@ -2159,7 +2135,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2271,9 +2247,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -2384,6 +2360,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2439,12 +2421,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - [[package]] name = "glob" version = "0.3.2" @@ -2463,7 +2439,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.11.4", + "indexmap 2.12.0", "slab", "tokio", "tokio-util", @@ -2472,13 +2448,14 @@ dependencies = [ [[package]] name = "half" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "zerocopy", ] [[package]] @@ -2503,8 +2480,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash", ] @@ -2855,9 +2830,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown 0.16.0", @@ -2869,17 +2844,6 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-uring" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - [[package]] name = "ipnet" version = "2.11.0" @@ -3054,9 +3018,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.176" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libloading" @@ -3222,6 +3186,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -3276,20 +3241,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -3325,28 +3276,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -3357,20 +3286,11 @@ dependencies = [ "libm", ] -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - [[package]] name = "object_store" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", "base64 0.22.1", @@ -3524,9 +3444,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" dependencies = [ "ahash", "arrow-array", @@ -3545,8 +3465,9 @@ dependencies = [ "half", "hashbrown 0.16.0", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", "ring", @@ -3573,13 +3494,13 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "serde", ] @@ -3694,6 +3615,15 @@ dependencies = [ "syn", ] +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -3817,9 +3747,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.40" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -3947,13 +3877,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "regex-syntax 0.8.6", ] @@ -3968,9 +3898,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -3995,6 +3925,12 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "reqwest" version = "0.12.22" @@ -4052,10 +3988,33 @@ dependencies = [ ] [[package]] -name = "rustc-demangle" -version = "0.1.26" +name = "rstest" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn", + "unicode-ident", +] [[package]] name = "rustc-hash" @@ -4257,10 +4216,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] @@ -4273,11 +4233,20 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -4343,6 +4312,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simdutf8" version = "0.1.5" @@ -4416,9 +4391,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -4512,9 +4487,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", @@ -4666,30 +4641,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", "socket2 0.6.0", "tokio-macros", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", @@ -4730,6 +4702,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap 2.12.0", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + [[package]] name = "tonic" version = "0.12.3" @@ -5199,7 +5201,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.1.3", "windows-result", "windows-strings", ] @@ -5232,13 +5234,19 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-result" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -5247,7 +5255,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -5277,6 +5285,15 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5299,7 +5316,7 @@ version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", "windows_i686_gnu 0.53.0", @@ -5406,6 +5423,15 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" diff --git a/Cargo.toml b/Cargo.toml index 38a35c1..f208488 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ tokio = { version = "1.47", features = ["macros", "rt", "sync"] } tracing = "0.1" # DataFusion dependencies -datafusion = { version = "50.0", default-features = false } +datafusion = { version = "51.0", default-features = false } [workspace.package] version = "0.1.0" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 4a094c4..7114cd2 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -7,7 +7,7 @@ repository.workspace = true [dependencies] clap = { version = "4.5.41", features = ["derive", "cargo"] } datafusion = { workspace = true } -datafusion-cli = "50.0" +datafusion-cli = "51.0" optd-datafusion = { path = "../connectors/datafusion", version = "0.1" } tokio = { workspace = true, features = [ "macros", diff --git a/cli/src/main.rs b/cli/src/main.rs index 312a27f..6379169 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -28,10 +28,12 @@ use datafusion::execution::memory_pool::{ FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool, }; use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::logical_expr::ExplainFormat; use datafusion_cli::catalog::DynamicObjectStoreCatalog; use datafusion_cli::functions::ParquetMetadataFunc; use datafusion_cli::{ DATAFUSION_CLI_VERSION, exec, + object_storage::instrumented::InstrumentedObjectStoreRegistry, pool_type::PoolType, print_format::PrintFormat, print_options::{MaxRows, PrintOptions}, @@ -226,6 +228,7 @@ async fn main_inner() -> Result<()> { quiet: args.quiet, maxrows: args.maxrows, color: args.color, + instrumented_registry: Arc::new(InstrumentedObjectStoreRegistry::new()), }; let commands = args.command; @@ -282,7 +285,7 @@ fn get_session_config(args: &Args) -> Result { // use easier to understand "tree" mode by default // if the user hasn't specified an explain format in the environment if env::var_os("DATAFUSION_EXPLAIN_FORMAT").is_none() { - config_options.explain.format = String::from("tree"); + config_options.explain.format = ExplainFormat::Tree; } // in the CLI, we want to show NULL values rather the empty strings From 4dba0d411d8764cfa62062c5380594e53dac28e6 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Wed, 19 Nov 2025 15:15:32 -0500 Subject: [PATCH 34/40] add datafusion features & update gitignore --- .gitignore | 9 ++++++++- Cargo.toml | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 52bfa81..e263eb5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ - # rust /target @@ -11,3 +10,11 @@ data/ # datafusion .history + +*.db + +# configuration +.vscode/launch.json + +# macOS +.DS_Store \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index f208488..45a6fd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,10 @@ tokio = { version = "1.47", features = ["macros", "rt", "sync"] } tracing = "0.1" # DataFusion dependencies -datafusion = { version = "51.0", default-features = false } +datafusion = { version = "51.0", default-features = false, features = [ + "parquet", + "sql", +] } [workspace.package] version = "0.1.0" From 03ebbbb0aafd2c9a7e3cba24468b0ae6179a9266 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Thu, 20 Nov 2025 22:24:39 -0500 Subject: [PATCH 35/40] refactoring inconsistent design and remove ds_store --- .DS_Store | Bin 10244 -> 0 bytes connectors/datafusion/src/catalog.rs | 32 ++--- connectors/datafusion/src/lib.rs | 2 +- connectors/datafusion/src/table.rs | 20 +--- .../datafusion/tests/integration_test.rs | 110 ++++++++++-------- 5 files changed, 77 insertions(+), 87 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 0971cf0bd0150ee2ecfaa42761e5eb0a19902cf0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10244 zcmeHMYitx%6uxI#+8H{u(>go~g{~}Og%(PAwLFsT0}7?3mF>I5y1O&9L#H!!XLgH_ znkBv>YGUy57mP9Sk)TnCCJI4+D2W;$X^5gEiZL+bcG>Ae{}4ms++^;# z_ndRjIrp1$cIM6!LZBg`Y#<~+2;uRfR1&e9rBOM(W;D*_1dsx*C;IS)aLgPVpP}h9 zd<1+1d<1+1d<1+1ZWjdbnN1sC;84H!5%3Z45y&7we;)#PQ5te-%AxG113PvEK$?qe z)^IG*0n&g=LoQ7@6eZZeS&GzI3hogDoaM+5xOyR%rX1=lC*U4F!1WC72?dyUv>#yV z1VRq=dmjNGftd&dkd;f;kzt|{+jD+jbo%vd*_heX6xGcr^03rWUw0h*xZW#Q{!uvfB-{Vtfs>E5I`S@C$?Mw;vc9)yE9;1PvG;NA6>bEO6$T{z)8V_T#}i=jx{(nU!pG z_L{Rfz1J)0y$bUS#06q!S9g!t*Q>=$Bdi@&homA4H@p|89dR{@8|{gidScA5hNR+H zM2{u(h^^M^I#otqkrj)Zy53=0nyr~eU$3nmvD1b;qTYVXjN6UO*pMV8EcJkjwzQ<$ zgci|M3H8=KPR&@|YTQB^427Ic3ug)!m)&{Sn)RFNb~fy8**{ez6_?y0FBJO`UBlK6 zX==PLV#QQL(Tw4)2~Dv_dXk!@MRm2)h>WRxF;oguCB;&aTq-Z)r6Av%h-wccH2cA+ z5~;XImdgV?FP$4yFqk$|p%xY9Wu;s$4^VDnT*Y4DdB$EXSBRY!g1}1=fpOM~RbuBT z-hM%{98R@ZBX{YMsHzK+;w;-NZk2nI&h7+hpTpcHE|h!m8|WBr#S(ZHkSK@h1zx&z zA9dXtvF!G7)o?>#_AP? z+9{Sw+B;-iA!$ZZ!*oZyovfF>;&~zH08dCTROt+Z7nKwnF+c^`;bwaJGPazoCbc9) zI>{j{x8vj~a*CWK=g9@~8M#b;Ait1b$qn)+`3pG60TC8J5lB!9DEYPou@g=^>7F9W`irVFh5|f=%gV$R<>?xnUYlK5Ch8Ghz^at;ryT9zDjv&3n&(a{?!oCkDbJafRjs4tDRFEn zmep*;;)qB&Q_D)1F4N%lx*~+Cgef+ZHHi}1xg^}`{Qi<$ zA>Wgq$#rrA@}K~-x*RH?8n!_Qc0)5}^*(5WZs@_h?q^wj2!=t!%(fu`NjL&W;bC|L z9)-u@3C!;2-0VILFTzXkGMs@o;7xc7-h=aS0WQJ^@F9ExQ*afo!H-VXX0o`=o5gTF zJBza)xdY4JPe1-MS)TnOG@IP>Xw8?XtIXPDIBw{sF^oJ~QF-{djo|CRFuodOza|P# z5B=6WU7j5mA*uaM&TEOsMs{qdbFF?BoviqTdk4QjH-7UG@DcD4@DaEb2;?|M0`&X; zuDSpJzZI^08$JR)0=FpwklPe)YQW&qXQ%0La>xkabw6ITQN5Hy*%Iui3gxrU@i?KM o<11WVHKnwnJmk`pL)miVv*k$tqdx=u^S{Dw0P)ZN|JU>X-, catalog_handle: Option, } @@ -69,6 +69,10 @@ impl OptdCatalogProvider { catalog_handle, } } + + pub fn catalog_handle(&self) -> Option<&CatalogServiceHandle> { + self.catalog_handle.as_ref() + } } impl CatalogProvider for OptdCatalogProvider { @@ -81,10 +85,9 @@ impl CatalogProvider for OptdCatalogProvider { } fn schema(&self, name: &str) -> Option> { - let catalog_handle = self.catalog_handle.clone(); - self.inner.schema(name).map(|schema| { - Arc::new(OptdSchemaProvider::new(schema, catalog_handle)) as Arc - }) + self.inner + .schema(name) + .map(|schema| Arc::new(OptdSchemaProvider::new(schema)) as Arc) } fn register_schema( @@ -99,18 +102,11 @@ impl CatalogProvider for OptdCatalogProvider { #[derive(Debug)] pub struct OptdSchemaProvider { inner: Arc, - catalog_handle: Option, } impl OptdSchemaProvider { - pub fn new( - inner: Arc, - catalog_handle: Option, - ) -> Self { - Self { - inner, - catalog_handle, - } + pub fn new(inner: Arc) -> Self { + Self { inner } } } @@ -128,11 +124,7 @@ impl SchemaProvider for OptdSchemaProvider { let table_opt = self.inner.table(name).await?; if let Some(table) = table_opt { - let optd_table = Arc::new(OptdTableProvider::new( - table, - name.to_string(), - self.catalog_handle.clone(), - )); + let optd_table = Arc::new(OptdTableProvider::new(table, name.to_string())); Ok(Some(optd_table as Arc)) } else { diff --git a/connectors/datafusion/src/lib.rs b/connectors/datafusion/src/lib.rs index e47a400..b2123e5 100644 --- a/connectors/datafusion/src/lib.rs +++ b/connectors/datafusion/src/lib.rs @@ -5,7 +5,7 @@ mod table; use std::sync::Arc; -pub use catalog::{OptdCatalogProviderList, OptdSchemaProvider}; +pub use catalog::{OptdCatalogProvider, OptdCatalogProviderList, OptdSchemaProvider}; pub use extension::{OptdExtension, OptdExtensionConfig}; pub use planner::OptdQueryPlanner; pub use table::{OptdTable, OptdTableProvider}; diff --git a/connectors/datafusion/src/table.rs b/connectors/datafusion/src/table.rs index f2943cd..c4b65d8 100644 --- a/connectors/datafusion/src/table.rs +++ b/connectors/datafusion/src/table.rs @@ -12,8 +12,6 @@ use datafusion::{ sql::TableReference, }; -use optd_catalog::CatalogServiceHandle; - #[allow(dead_code)] pub struct OptdTable { inner: Box, @@ -58,31 +56,17 @@ impl OptdTable { #[derive(Debug, Clone)] pub struct OptdTableProvider { inner: Arc, - catalog_handle: Option, table_name: String, } impl OptdTableProvider { - pub fn new( - inner: Arc, - table_name: String, - catalog_handle: Option, - ) -> Self { - Self { - inner, - catalog_handle, - table_name, - } + pub fn new(inner: Arc, table_name: String) -> Self { + Self { inner, table_name } } pub fn table_name(&self) -> &str { &self.table_name } - - /// Get the catalog handle if available - pub fn catalog_handle(&self) -> Option<&CatalogServiceHandle> { - self.catalog_handle.as_ref() - } } #[async_trait::async_trait] diff --git a/connectors/datafusion/tests/integration_test.rs b/connectors/datafusion/tests/integration_test.rs index 34bbe4b..3030a3e 100644 --- a/connectors/datafusion/tests/integration_test.rs +++ b/connectors/datafusion/tests/integration_test.rs @@ -9,7 +9,7 @@ use datafusion::{ prelude::*, }; use optd_catalog::{CatalogService, DuckLakeCatalog}; -use optd_datafusion::{OptdCatalogProviderList, OptdTableProvider}; +use optd_datafusion::{OptdCatalogProvider, OptdCatalogProviderList, OptdTableProvider}; use serde_json; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; @@ -53,8 +53,24 @@ fn create_test_data( (schema, batch) } -/// Retrieves OptdTableProvider from catalog hierarchy -async fn get_optd_table( +/// Wraps a catalog list as OptdCatalogProvider +async fn get_wrapped_catalog( + catalog_list: Arc, + catalog_handle: Option, +) -> Arc { + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, catalog_handle); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + Arc::new( + catalog + .as_any() + .downcast_ref::() + .unwrap() + .clone(), + ) +} + +/// Retrieves a table as OptdTableProvider +async fn get_wrapped_table( catalog_list: Arc, catalog_handle: Option, table_name: &str, @@ -67,12 +83,13 @@ async fn get_optd_table( .await .expect("Failed to retrieve table") .expect("Table not found"); - table - .as_any() - .downcast_ref::() - .unwrap() - .clone() - .into() + Arc::new( + table + .as_any() + .downcast_ref::() + .unwrap() + .clone(), + ) } #[tokio::test] @@ -105,10 +122,9 @@ async fn test_table_provider_wrapping() { .unwrap(); let mem_table = Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap()); - let optd_table = OptdTableProvider::new(mem_table.clone(), "test_table".to_string(), None); + let optd_table = OptdTableProvider::new(mem_table.clone(), "test_table".to_string()); assert_eq!(optd_table.table_name(), "test_table"); - assert!(optd_table.catalog_handle().is_none()); assert_eq!(optd_table.schema(), schema); assert!(optd_table.statistics().is_none()); } @@ -125,7 +141,7 @@ async fn test_schema_retrieval() { ); ctx.register_batch("numbers", batch).unwrap(); - let optd_table = get_optd_table(ctx.state().catalog_list().clone(), None, "numbers").await; + let optd_table = get_wrapped_table(ctx.state().catalog_list().clone(), None, "numbers").await; assert_eq!(optd_table.table_name(), "numbers"); let schema = optd_table.schema(); @@ -229,9 +245,11 @@ async fn test_table_metadata_access_through_catalog() { ); ctx.register_batch("orders", batch).unwrap(); - let optd_table = get_optd_table(ctx.state().catalog_list().clone(), None, "orders").await; + let optd_table = get_wrapped_table(ctx.state().catalog_list().clone(), None, "orders").await; + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), None).await; + assert_eq!(optd_table.table_name(), "orders"); - assert!(optd_table.catalog_handle().is_none()); + assert!(catalog.catalog_handle().is_none()); assert!(optd_table.statistics().is_none()); let results = ctx @@ -410,9 +428,15 @@ async fn test_catalog_service_handle_propagation() { ); ctx.register_batch("users", batch).unwrap(); - let optd_table = - get_optd_table(ctx.state().catalog_list().clone(), Some(handle), "users").await; - assert!(optd_table.catalog_handle().is_some()); + let optd_table = get_wrapped_table( + ctx.state().catalog_list().clone(), + Some(handle.clone()), + "users", + ) + .await; + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; + + assert!(catalog.catalog_handle().is_some()); assert_eq!(optd_table.table_name(), "users"); assert_eq!(optd_table.schema(), schema); } @@ -430,8 +454,8 @@ async fn test_catalog_service_snapshot_retrieval() { ); ctx.register_batch("test", batch).unwrap(); - let optd_table = get_optd_table(ctx.state().catalog_list().clone(), Some(handle), "test").await; - let catalog_handle = optd_table.catalog_handle().unwrap(); + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; + let catalog_handle = catalog.catalog_handle().unwrap(); let snapshot = catalog_handle.current_snapshot().await.unwrap(); assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); @@ -487,16 +511,12 @@ async fn test_full_workflow_with_catalog_service() { ); ctx.register_batch("products", batch).unwrap(); - let optd_table = get_optd_table( - ctx.state().catalog_list().clone(), - Some(handle.clone()), - "products", - ) - .await; + let catalog = + get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle.clone())).await; - assert!(optd_table.catalog_handle().is_some()); + assert!(catalog.catalog_handle().is_some()); - let snapshot = optd_table + let snapshot = catalog .catalog_handle() .unwrap() .current_snapshot() @@ -657,14 +677,9 @@ async fn test_catalog_service_with_datafusion_integration() { ); ctx.register_batch("test_table", batch).unwrap(); - let optd_table = get_optd_table( - ctx.state().catalog_list().clone(), - Some(handle), - "test_table", - ) - .await; + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; - let snapshot = optd_table + let snapshot = catalog .catalog_handle() .unwrap() .current_snapshot() @@ -889,6 +904,10 @@ async fn test_multiple_schemas_with_catalog_service() { // Verify handle propagates to tables in both schemas let catalog_provider = optd_catalog_list.catalog("datafusion").unwrap(); + let optd_catalog = catalog_provider + .as_any() + .downcast_ref::() + .expect("Should be OptdCatalogProvider"); let table1 = catalog_provider .schema("public") @@ -897,10 +916,7 @@ async fn test_multiple_schemas_with_catalog_service() { .await .unwrap() .unwrap(); - let table1_optd = table1.as_any().downcast_ref::().unwrap(); - let handle1 = table1_optd - .catalog_handle() - .expect("table1 should have catalog handle"); + let _table1_optd = table1.as_any().downcast_ref::().unwrap(); let table2 = catalog_provider .schema("analytics") @@ -909,18 +925,16 @@ async fn test_multiple_schemas_with_catalog_service() { .await .unwrap() .unwrap(); - let table2_optd = table2.as_any().downcast_ref::().unwrap(); - let handle2 = table2_optd + let _table2_optd = table2.as_any().downcast_ref::().unwrap(); + + // Verify catalog has the handle (handle is at catalog level, not table level) + let handle = optd_catalog .catalog_handle() - .expect("table2 should have catalog handle"); + .expect("catalog should have catalog handle"); - // Verify both can access the same catalog service - let snapshot1 = handle1.current_snapshot().await.unwrap(); - let snapshot2 = handle2.current_snapshot().await.unwrap(); - assert_eq!( - snapshot1.0, snapshot2.0, - "Both tables should share the same catalog snapshot" - ); + // Verify catalog service is accessible + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); // Verify cross-schema query works let results = ctx From 11e82406af507f16c21d4422fb567882ece474cf Mon Sep 17 00:00:00 2001 From: HFFuture Date: Sun, 23 Nov 2025 23:09:22 -0500 Subject: [PATCH 36/40] update cli to use catalog service --- Cargo.lock | 1 + cli/Cargo.toml | 3 +++ cli/src/main.rs | 32 +++++++++++++++++++++++++++++--- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6346d1..232ac6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3691,6 +3691,7 @@ dependencies = [ "optd-catalog", "optd-datafusion", "regex", + "tempfile", "tokio", "tracing", "tracing-subscriber", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c170f34..5d1a862 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -26,3 +26,6 @@ tracing = { workspace = true } futures = "0.3.31" optd-catalog = { path = "../optd/catalog", version = "0.1" } + +[dev-dependencies] +tempfile = "3" diff --git a/cli/src/main.rs b/cli/src/main.rs index 6379169..4ecfe31 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -44,7 +44,9 @@ use datafusion::common::config_err; use datafusion::config::ConfigOptions; use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; +use optd_catalog::{CatalogService, DuckLakeCatalog}; use optd_cli::OptdCliSessionContext; +use optd_datafusion::OptdCatalogProviderList; #[derive(Debug, Parser, PartialEq)] #[clap(author, version, about, long_about= None)] @@ -214,11 +216,35 @@ async fn main_inner() -> Result<()> { let cli_ctx = cli_ctx.enable_url_table(); let ctx = cli_ctx.inner(); + // Initialize catalog with optional DuckLake catalog service + let catalog_handle = if let Ok(metadata_path) = env::var("OPTD_CATALOG_METADATA_PATH") { + if !args.quiet { + println!("Using OptD catalog with metadata path: {}", metadata_path); + } + let ducklake_catalog = DuckLakeCatalog::try_new(None, Some(&metadata_path)) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + let (service, handle) = CatalogService::new(ducklake_catalog); + tokio::spawn(async move { service.run().await }); + Some(handle) + } else { + if !args.quiet { + println!("OptD catalog integration enabled (no persistent metadata)"); + } + None + }; + + // Wrap the catalog list with OptdCatalogProviderList + let original_catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = + OptdCatalogProviderList::new(original_catalog_list.clone(), catalog_handle); + // install dynamic catalog provider that can register required object stores - ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( - ctx.state().catalog_list().clone(), + // and wrap it with OptD catalog provider + let dynamic_catalog = Arc::new(DynamicObjectStoreCatalog::new( + Arc::new(optd_catalog_list), ctx.state_weak_ref(), - ))); + )); + ctx.register_catalog_list(dynamic_catalog); // register `parquet_metadata` table function to get metadata from parquet files ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); From a487782a9e965778ca0c78db5df0277bc29aba5d Mon Sep 17 00:00:00 2001 From: HFFuture Date: Mon, 24 Nov 2025 21:05:48 -0500 Subject: [PATCH 37/40] add cli smoke and integration tests --- cli/smoke_test_cli.sh | 57 ++++++ cli/tests/catalog_service_integration.rs | 226 +++++++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100755 cli/smoke_test_cli.sh create mode 100644 cli/tests/catalog_service_integration.rs diff --git a/cli/smoke_test_cli.sh b/cli/smoke_test_cli.sh new file mode 100755 index 0000000..e33a7b6 --- /dev/null +++ b/cli/smoke_test_cli.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# CLI smoke test - verifies catalog integration is active + +set -e # Exit on error + +GREEN='\033[0;32m' +RED='\033[0;31m' +RESET='\033[0m' + +echo "=== CLI Smoke Test ===" + +# Build +echo "Building..." +cargo build --package optd-cli --quiet +if [ ! -f ./target/debug/optd-cli ]; then + echo -e "${RED}✗ Build failed${RESET}" + exit 1 +fi + +CLI=./target/debug/optd-cli + +# Test 1: Basic functionality +echo "Test 1: Basic query execution" +output=$($CLI -c "SELECT 1 as test;" 2>&1) +if [ $? -eq 0 ] && echo "$output" | grep -q "OptD catalog"; then + echo -e "${GREEN}✓ PASS${RESET} - CLI runs, catalog integration active" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +# Test 2: Session persistence (multiple commands) +echo "Test 2: Session state persistence" +output=$($CLI -c "CREATE TABLE t (x INT);" -c "INSERT INTO t VALUES (1);" -c "SELECT * FROM t;" 2>&1) +if [ $? -eq 0 ] && echo "$output" | grep -q "1 row"; then + echo -e "${GREEN}✓ PASS${RESET} - Multiple commands work, session persists" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +# Test 3: Metadata path configuration +echo "Test 3: Metadata path environment variable" +TMPDIR_PATH=$(mktemp -d) +export OPTD_CATALOG_METADATA_PATH="$TMPDIR_PATH/test.ducklake" +output=$($CLI -c "SELECT 1;" 2>&1) +unset OPTD_CATALOG_METADATA_PATH +rm -rf "$TMPDIR_PATH" +if echo "$output" | grep -q "Using OptD catalog with metadata path"; then + echo -e "${GREEN}✓ PASS${RESET} - Metadata path recognized" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +echo "" +echo -e "${GREEN}✓ All smoke tests passed!${RESET}" diff --git a/cli/tests/catalog_service_integration.rs b/cli/tests/catalog_service_integration.rs new file mode 100644 index 0000000..f04c3a6 --- /dev/null +++ b/cli/tests/catalog_service_integration.rs @@ -0,0 +1,226 @@ +// Integration tests for OptD catalog service handle functions + +use datafusion::catalog::CatalogProviderList; +use datafusion::prelude::*; +use optd_catalog::{CatalogService, DuckLakeCatalog}; +use optd_datafusion::OptdCatalogProviderList; +use std::sync::Arc; +use tempfile::TempDir; + +#[tokio::test] +async fn test_catalog_service_handle() -> Result<(), Box> { + // Setup catalog with test data + let temp_dir = TempDir::new()?; + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + { + let setup_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = setup_catalog.get_connection(); + conn.execute_batch("CREATE TABLE test_table (id INTEGER, name VARCHAR, age INTEGER)")?; + conn.execute_batch( + "INSERT INTO test_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Carol', 35)", + )?; + } + + // Start catalog service again to check restart resilience + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Test catalog service handle functions + let snapshot = handle.current_snapshot().await?; + assert_eq!( + snapshot.0, 2, + "Snapshot should be 2 (CREATE TABLE and INSERT)" + ); + + let snapshot_info = handle.current_snapshot_info().await?; + assert!( + snapshot_info.schema_version >= 0, + "Schema version should be greater than or equal to 0" + ); + assert_eq!(snapshot_info.id.0, snapshot.0, "Snapshot IDs should match"); + + let schema = handle.current_schema(None, "test_table").await?; + assert_eq!(schema.fields().len(), 3, "Should have 3 fields"); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + assert_eq!(schema.field(2).name(), "age"); + + // Test statistics + let query_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = query_catalog.get_connection(); + + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'", + [], + |row| row.get(0), + )?; + + let age_column_id: i64 = conn.query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'", + [table_id], + |row| row.get(0), + )?; + + // Test statistics update API + handle + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .await?; + + let updated_snapshot = handle.current_snapshot().await?; + assert_eq!( + updated_snapshot.0, 3, + "Should be snapshot 3 after stats update" + ); + + let stats = handle + .table_statistics("test_table", updated_snapshot) + .await? + .unwrap(); + assert_eq!(stats.row_count, 3, "Should have 3 rows"); + + let age_stats = stats + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("Should have statistics for 'age' column"); + + assert_eq!(age_stats.name, "age"); + assert_eq!(age_stats.column_type, "int32"); + + // Verify the ndv statistic was actually persisted + assert_eq!( + age_stats.advanced_stats.len(), + 1, + "Should have 1 advanced statistic" + ); + assert_eq!(age_stats.advanced_stats[0].stats_type, "ndv"); + assert_eq!( + age_stats.advanced_stats[0] + .data + .get("distinct_count") + .and_then(|v| v.as_i64()), + Some(3), + "Should have distinct_count of 3 in ndv statistic" + ); + + // Test multiple statistics on the same column (add histogram) + handle + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"lower": 25, "upper": 30, "count": 2}, {"lower": 30, "upper": 35, "count": 1}]}"# + ) + .await?; + + let updated_snapshot2 = handle.current_snapshot().await?; + assert_eq!( + updated_snapshot2.0, 4, + "Should be snapshot 4 after histogram update" + ); + + let stats2 = handle + .table_statistics("test_table", updated_snapshot2) + .await? + .unwrap(); + + let age_stats2 = stats2 + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("Should have statistics for 'age' column"); + + // Should now have both ndv and histogram statistics + assert_eq!( + age_stats2.advanced_stats.len(), + 2, + "Should have 2 advanced statistics" + ); + + let ndv_stat = age_stats2 + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .expect("Should have ndv"); + let histogram_stat = age_stats2 + .advanced_stats + .iter() + .find(|s| s.stats_type == "histogram") + .expect("Should have histogram"); + + assert_eq!( + ndv_stat.data.get("distinct_count").and_then(|v| v.as_i64()), + Some(3), + "ndv statistic should persist" + ); + + assert!( + histogram_stat + .data + .get("buckets") + .and_then(|v| v.as_array()) + .is_some(), + "histogram should have buckets array" + ); + + let buckets = histogram_stat + .data + .get("buckets") + .unwrap() + .as_array() + .unwrap(); + assert_eq!(buckets.len(), 2, "Should have 2 histogram buckets"); + + // Test DataFusion integration + let ctx = SessionContext::new(); + let df_schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ + datafusion::arrow::datatypes::Field::new( + "id", + datafusion::arrow::datatypes::DataType::Int32, + false, + ), + datafusion::arrow::datatypes::Field::new( + "value", + datafusion::arrow::datatypes::DataType::Int32, + false, + ), + ])); + + let batch = datafusion::arrow::array::RecordBatch::try_new( + df_schema.clone(), + vec![ + Arc::new(datafusion::arrow::array::Int32Array::from(vec![ + 1, 2, 3, 4, 5, + ])), + Arc::new(datafusion::arrow::array::Int32Array::from(vec![ + 10, 20, 30, 40, 50, + ])), + ], + )?; + ctx.register_batch("test_table", batch)?; + + let optd_catalog_list = + OptdCatalogProviderList::new(ctx.state().catalog_list().clone(), Some(handle.clone())); + + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let optd_catalog = catalog + .as_any() + .downcast_ref::() + .expect("Should be OptdCatalogProvider"); + + assert!(optd_catalog.catalog_handle().is_some()); + + // Verify handle works from catalog + optd_catalog + .catalog_handle() + .unwrap() + .current_snapshot() + .await?; + + Ok(()) +} From 5727cedaabbb82708a237a817759e1d9847f3493 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Mon, 24 Nov 2025 21:07:02 -0500 Subject: [PATCH 38/40] remove lib extraneous comment --- cli/src/lib.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cli/src/lib.rs b/cli/src/lib.rs index b3f86bb..89a878d 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -42,15 +42,6 @@ impl OptdCliSessionContext { &self.inner } - #[ignore = "not yet fully implemented"] - // pub fn register_optd_catalog(&self, optd_catalog: Arc) -> Result<()> { - // let state = self.inner.state_ref().read().clone(); - // state.register_catalog( - // "ducklake", - // Arc::new(datafusion_ducklake::DuckLakeCatalogProvider::new()), - // ) - // } - pub fn return_empty_dataframe(&self) -> Result { let plan = LogicalPlanBuilder::empty(false).build()?; Ok(DataFrame::new(self.inner.state(), plan)) From 664e55af4a1e81d3cf3e9c06a1d65c49aaec5331 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Mon, 24 Nov 2025 21:46:30 -0500 Subject: [PATCH 39/40] Change path naming --- cli/smoke_test_cli.sh | 4 ++-- cli/src/main.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/smoke_test_cli.sh b/cli/smoke_test_cli.sh index e33a7b6..726d1c8 100755 --- a/cli/smoke_test_cli.sh +++ b/cli/smoke_test_cli.sh @@ -42,9 +42,9 @@ fi # Test 3: Metadata path configuration echo "Test 3: Metadata path environment variable" TMPDIR_PATH=$(mktemp -d) -export OPTD_CATALOG_METADATA_PATH="$TMPDIR_PATH/test.ducklake" +export OPTD_METADATA_CATALOG_PATH="$TMPDIR_PATH/test.ducklake" output=$($CLI -c "SELECT 1;" 2>&1) -unset OPTD_CATALOG_METADATA_PATH +unset OPTD_METADATA_CATALOG_PATH rm -rf "$TMPDIR_PATH" if echo "$output" | grep -q "Using OptD catalog with metadata path"; then echo -e "${GREEN}✓ PASS${RESET} - Metadata path recognized" diff --git a/cli/src/main.rs b/cli/src/main.rs index 4ecfe31..88547d0 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -217,7 +217,7 @@ async fn main_inner() -> Result<()> { let ctx = cli_ctx.inner(); // Initialize catalog with optional DuckLake catalog service - let catalog_handle = if let Ok(metadata_path) = env::var("OPTD_CATALOG_METADATA_PATH") { + let catalog_handle = if let Ok(metadata_path) = env::var("OPTD_METADATA_CATALOG_PATH") { if !args.quiet { println!("Using OptD catalog with metadata path: {}", metadata_path); } From 851be1fee5253e3cae84c9a754b9cc6af961f445 Mon Sep 17 00:00:00 2001 From: HFFuture Date: Mon, 24 Nov 2025 22:13:02 -0500 Subject: [PATCH 40/40] Add more comprehensive Datafusion integration tests --- cli/tests/catalog_service_integration.rs | 159 ++++++++++++++++++----- 1 file changed, 128 insertions(+), 31 deletions(-) diff --git a/cli/tests/catalog_service_integration.rs b/cli/tests/catalog_service_integration.rs index f04c3a6..d893b50 100644 --- a/cli/tests/catalog_service_integration.rs +++ b/cli/tests/catalog_service_integration.rs @@ -1,7 +1,11 @@ // Integration tests for OptD catalog service handle functions -use datafusion::catalog::CatalogProviderList; -use datafusion::prelude::*; +use datafusion::{ + arrow::array::{Int32Array, RecordBatch}, + arrow::datatypes::{DataType, Field, Schema}, + catalog::CatalogProviderList, + prelude::SessionContext, +}; use optd_catalog::{CatalogService, DuckLakeCatalog}; use optd_datafusion::OptdCatalogProviderList; use std::sync::Arc; @@ -176,33 +180,75 @@ async fn test_catalog_service_handle() -> Result<(), Box> .unwrap(); assert_eq!(buckets.len(), 2, "Should have 2 histogram buckets"); - // Test DataFusion integration + Ok(()) +} + +#[tokio::test] +async fn test_datafusion_catalog_integration() -> Result<(), Box> { + // Setup catalog with test data and statistics + let temp_dir = TempDir::new()?; + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + { + let setup_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = setup_catalog.get_connection(); + conn.execute_batch("CREATE TABLE df_test (id INTEGER, value INTEGER)")?; + conn.execute_batch( + "INSERT INTO df_test VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50)", + )?; + } + + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Setup statistics for testing + let query_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = query_catalog.get_connection(); + + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'df_test'", + [], + |row| row.get(0), + )?; + + let value_column_id: i64 = conn.query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'value'", + [table_id], + |row| row.get(0), + )?; + + // Add test statistics + handle + .update_table_column_stats(value_column_id, table_id, "ndv", r#"{"distinct_count": 5}"#) + .await?; + handle + .update_table_column_stats( + value_column_id, + table_id, + "histogram", + r#"{"buckets": [{"lower": 10, "upper": 30, "count": 3}, {"lower": 30, "upper": 50, "count": 2}]}"# + ) + .await?; + + // Test DataFusion catalog integration let ctx = SessionContext::new(); - let df_schema = Arc::new(datafusion::arrow::datatypes::Schema::new(vec![ - datafusion::arrow::datatypes::Field::new( - "id", - datafusion::arrow::datatypes::DataType::Int32, - false, - ), - datafusion::arrow::datatypes::Field::new( - "value", - datafusion::arrow::datatypes::DataType::Int32, - false, - ), - ])); - - let batch = datafusion::arrow::array::RecordBatch::try_new( - df_schema.clone(), - vec![ - Arc::new(datafusion::arrow::array::Int32Array::from(vec![ - 1, 2, 3, 4, 5, + ctx.register_batch( + "df_test", + RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("value", DataType::Int32, false), ])), - Arc::new(datafusion::arrow::array::Int32Array::from(vec![ - 10, 20, 30, 40, 50, - ])), - ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + )?, )?; - ctx.register_batch("test_table", batch)?; let optd_catalog_list = OptdCatalogProviderList::new(ctx.state().catalog_list().clone(), Some(handle.clone())); @@ -213,14 +259,65 @@ async fn test_catalog_service_handle() -> Result<(), Box> .downcast_ref::() .expect("Should be OptdCatalogProvider"); - assert!(optd_catalog.catalog_handle().is_some()); + assert!( + optd_catalog.catalog_handle().is_some(), + "Catalog handle should propagate through DataFusion integration" + ); - // Verify handle works from catalog - optd_catalog + // Verify statistics retrieval through DataFusion catalog + let stats_via_catalog = optd_catalog .catalog_handle() .unwrap() - .current_snapshot() - .await?; + .table_statistics( + "df_test", + optd_catalog + .catalog_handle() + .unwrap() + .current_snapshot() + .await?, + ) + .await? + .unwrap(); + + assert_eq!(stats_via_catalog.row_count, 5); + + let value_stats = stats_via_catalog + .column_statistics + .iter() + .find(|c| c.name == "value") + .expect("Should find value column statistics"); + + assert_eq!( + value_stats.advanced_stats.len(), + 2, + "Should have both ndv and histogram stats" + ); + + // Verify ndv statistic + assert_eq!( + value_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .and_then(|s| s.data.get("distinct_count").and_then(|v| v.as_i64())), + Some(5), + "Should retrieve ndv statistics through DataFusion catalog" + ); + + // Verify histogram statistic + let histogram = value_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "histogram") + .expect("Should have histogram statistic"); + let buckets = histogram + .data + .get("buckets") + .and_then(|v| v.as_array()) + .expect("Should have buckets"); + assert_eq!(buckets.len(), 2); + assert_eq!(buckets[0].get("lower").and_then(|v| v.as_i64()), Some(10)); + assert_eq!(buckets[0].get("count").and_then(|v| v.as_i64()), Some(3)); Ok(()) }