diff --git a/Cargo.lock b/Cargo.lock index 94abe65..bf04a1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.11" @@ -33,6 +44,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "const-random", "getrandom 0.2.15", "once_cell", "version_check", @@ -169,6 +181,170 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.15.2", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "arrow-select" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "54.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + [[package]] name = "async-native-tls" version = "0.4.0" @@ -302,6 +478,18 @@ dependencies = [ "serde", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -361,6 +549,29 @@ dependencies = [ "serde_with", ] +[[package]] +name = "borsh" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5430e3be710b68d984d1391c854eb431a9d548640711faa54eecb1df93db91cc" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b668d39970baad5356d7c83a86fee3a539e6f93bf6764c97368243e17a0487" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "brotli" version = "7.0.0" @@ -391,6 +602,28 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "bytemuck" version = "1.16.3" @@ -423,6 +656,12 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "castaway" version = "0.2.3" @@ -448,6 +687,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.39" @@ -512,7 +757,7 @@ version = "4.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.87", @@ -543,8 +788,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" dependencies = [ "crossterm", - "strum", - "strum_macros", + "strum 0.26.3", + "strum_macros 0.26.4", "unicode-width 0.1.13", ] @@ -584,6 +829,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -609,6 +874,7 @@ dependencies = [ "chrono", "clap", "csv", + "duckdb", "env_logger", "futures", "futures-io", @@ -845,6 +1111,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + [[package]] name = "crypto-common" version = "0.1.6" @@ -991,6 +1263,25 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "duckdb" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2093a18d0c07e411104a9d27ef0097872172552ad5774feba304c2b47f382c" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "memchr", + "num-integer", + "rust_decimal", + "smallvec", + "strum 0.25.0", +] + [[package]] name = "dyn-clone" version = "1.0.17" @@ -1219,6 +1510,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.31" @@ -1398,11 +1695,25 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" @@ -1410,7 +1721,7 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash", + "ahash 0.8.11", "allocator-api2", "rayon", "serde", @@ -1438,6 +1749,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1777,12 +2094,92 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +[[package]] +name = "libduckdb-sys" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc4020eaf07df4927b5205cd200ca2a5ed0798b49652dec22e09384ba8efa163" +dependencies = [ + "autocfg", + "cc", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] + [[package]] name = "libm" version = "0.2.8" @@ -2033,6 +2430,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -2384,7 +2795,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1fd3c64d50b7f5f328e1566cab9979d4bc1ba2ff22114b301ed2ee0e518dbca" dependencies = [ - "ahash", + "ahash 0.8.11", "atoi", "bytemuck", "chrono", @@ -2406,7 +2817,7 @@ dependencies = [ "simdutf8", "streaming-iterator", "strength_reduce", - "strum_macros", + "strum_macros 0.26.4", "version_check", "zstd", ] @@ -2449,7 +2860,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4794a9e38ef2faf7e47a6f736c7f156c6fbb66cd529f82593b2d48348e422c8d" dependencies = [ - "ahash", + "ahash 0.8.11", "bitflags 2.6.0", "bytemuck", "chrono", @@ -2472,7 +2883,7 @@ dependencies = [ "rand_distr", "rayon", "regex", - "strum_macros", + "strum_macros 0.26.4", "thiserror 2.0.11", "version_check", "xxhash-rust", @@ -2496,7 +2907,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad56c5ea4d6e0546fbc3fa35918a537b76587600a5118770ed331136249d50d8" dependencies = [ - "ahash", + "ahash 0.8.11", "bitflags 2.6.0", "hashbrown 0.15.2", "num-traits", @@ -2520,7 +2931,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95d774d5971d2092f0588e89d2f0be524dff35ea368272c0810ba54a860e4411" dependencies = [ - "ahash", + "ahash 0.8.11", "async-trait", "atoi_simd", "bytes", @@ -2557,7 +2968,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa457bfa96f45cf14c33507eaa3ebcec6a8d52e7f7fc60cd23f338631369d417" dependencies = [ - "ahash", + "ahash 0.8.11", "bitflags 2.6.0", "memchr", "once_cell", @@ -2601,7 +3012,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b267480495ffe382dab63318e3c6bf4073bb82971c8b80294d079293fece458b" dependencies = [ - "ahash", + "ahash 0.8.11", "argminmax", "base64 0.22.1", "bytemuck", @@ -2622,7 +3033,7 @@ dependencies = [ "rayon", "regex", "regex-syntax", - "strum_macros", + "strum_macros 0.26.4", "unicode-reverse", "version_check", ] @@ -2633,7 +3044,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20237f232b1a74b1fae6b5c9bea8c440f2e5d3b5506601b038f0a7a34b84b710" dependencies = [ - "ahash", + "ahash 0.8.11", "async-stream", "base64 0.22.1", "brotli", @@ -2697,7 +3108,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99a3832887671df1eb326df52cbfcc47789d3d58454c1084a154b48b240175e2" dependencies = [ - "ahash", + "ahash 0.8.11", "bitflags 2.6.0", "bytemuck", "bytes", @@ -2720,7 +3131,7 @@ dependencies = [ "rayon", "recursive", "regex", - "strum_macros", + "strum_macros 0.26.4", "version_check", ] @@ -2821,7 +3232,7 @@ dependencies = [ "polars-ops", "polars-utils", "regex", - "strum_macros", + "strum_macros 0.26.4", ] [[package]] @@ -2830,7 +3241,7 @@ version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0fc010eea42ad113b641aa53106e4d6e474650c73573d959a546eed0ce6d479" dependencies = [ - "ahash", + "ahash 0.8.11", "bytemuck", "bytes", "compact_str", @@ -2883,6 +3294,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -2901,6 +3321,26 @@ dependencies = [ "cc", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "pulley-interpreter" version = "29.0.1" @@ -2922,6 +3362,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.7.3" @@ -3139,6 +3585,15 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "ring" version = "0.17.8" @@ -3154,6 +3609,35 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rmp" version = "0.8.14" @@ -3196,6 +3680,22 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rust_decimal" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3375,6 +3875,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" version = "2.11.1" @@ -3741,7 +4247,7 @@ checksum = "1a099220ae541c5db479c6424bdf1b200987934033c2584f79a0e1693601e776" dependencies = [ "dotenvy", "either", - "heck", + "heck 0.5.0", "hex", "once_cell", "proc-macro2", @@ -3948,19 +4454,41 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + [[package]] name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + [[package]] name = "strum_macros" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", @@ -4008,6 +4536,23 @@ dependencies = [ "windows", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tar" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-features" version = "0.1.6" @@ -4168,6 +4713,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -4852,7 +5406,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8358319c2dd1e4db79e3c1c5d3a5af84956615343f9f89f4e4996a36816e06e6" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap 2.7.0", "wit-parser", ] @@ -5204,6 +5758,15 @@ dependencies = [ "wasmparser 0.221.2", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "1.3.1" diff --git a/Cargo.toml b/Cargo.toml index 8174889..51e12ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ tokio-util = { version = "0.7.13", features = ["compat"] } async-trait = "0.1.83" testcontainers = "0.23.1" wasmtime = "29.0.1" +duckdb = { version = "1.2.0", features = ["bundled"] } # More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target [lib] diff --git a/src/graph/sql.rs b/src/graph/sql.rs index e0cecaf..4cc44e3 100644 --- a/src/graph/sql.rs +++ b/src/graph/sql.rs @@ -1,3 +1,4 @@ +use duckdb::{params, params_from_iter, Connection}; use futures::TryStreamExt; use futures_io::{AsyncRead, AsyncWrite}; use sqlx::{Any, Column, Executor, Pool, Row}; @@ -117,3 +118,41 @@ impl QueryExecutor for Pool { Ok(result.rows_affected()) } } + +impl QueryExecutor for Connection { + async fn get_rows( + &mut self, + query: impl AsRef, + params: &[String], + row_consumer: &mut impl FnMut(BTreeMap) -> anyhow::Result<()>, + ) -> anyhow::Result<()> { + let mut stmt = self.prepare(query.as_ref())?; + let columns = stmt.column_names(); + let mut row_iter = stmt.query_map(params_from_iter(params.iter()), |row| { + let mut row_map: BTreeMap = BTreeMap::new(); + for i in 0..columns.len() { + row_map.insert(columns[i].clone(), row.get(i)?); + } + Ok(row_map) + })?; + for row in row_iter { + let row = row?; + row_consumer(row)?; + } + Ok(()) + } + + async fn execute_query( + &mut self, + query: impl AsRef, + params: &[String], + ) -> anyhow::Result { + let rows_processed = self.execute(query.as_ref(), params_from_iter(params.iter()))?; + Ok(rows_processed as u64) + } + + async fn execute_unchecked(&mut self, query: impl AsRef) -> anyhow::Result { + let rows_processed = self.execute(query.as_ref(), params![])?; + Ok(rows_processed as u64) + } +} diff --git a/src/graph/upload_to_db.rs b/src/graph/upload_to_db.rs index b56cc7f..c416839 100644 --- a/src/graph/upload_to_db.rs +++ b/src/graph/upload_to_db.rs @@ -1,5 +1,6 @@ use anyhow::bail; use async_trait::async_trait; +use duckdb::Connection; use itertools::Itertools; use log::{log, Level}; use schemars::JsonSchema; @@ -24,8 +25,8 @@ pub async fn upload_file_bulk( if upload_node.column_mappings.is_none() { let insert_from_file_query = match upload_node.db_type { DBType::Postgres => Some(format!( - r#"COPY "{}" FROM '{}' DELIMITERS ',' CSV HEADER QUOTE '"';"#, - upload_node.table_name.replace("'", "''"), + r#"COPY "{}" FROM '{}' WITH DELIMITER ',' CSV HEADER QUOTE '"';"#, + upload_node.table_name.replace("\"", ""), upload_node.file_path.replace("'", "''") )), // This can't be parameterised as mysql doesn't allow LOAD DATA @@ -41,10 +42,14 @@ pub async fn upload_file_bulk( )), DBType::Mssql => Some(format!( "BULK INSERT [{}] FROM '{}' WITH ( FORMAT = 'CSV', FIRSTROW = 2 );", - upload_node.table_name.replace("'", "''"), + upload_node.table_name.replace("]", ""), + upload_node.file_path.replace("'", "''") + )), + DBType::Duckdb => Some(format!( + r#"COPY "{}" FROM '{}' WITH DELIMITER ',' CSV HEADER QUOTE '"';"#, + upload_node.table_name.replace("\"", ""), upload_node.file_path.replace("'", "''") )), - _ => None, }; if let Some(insert_from_file_query) = insert_from_file_query { let result = executor.execute_unchecked(&insert_from_file_query).await; @@ -60,6 +65,10 @@ pub async fn upload_file_bulk( } if rows_affected == None { + log!( + Level::Warn, + "Failed to copy csv to database, falling back to manual insert, which will be slow" + ); let mut file_reader = csv::Reader::from_path(upload_node.file_path.clone())?; let csv_columns = file_reader .headers()? @@ -139,17 +148,17 @@ pub enum DBType { Mysql, Postgres, Mssql, - Sqlite, + Duckdb, } impl DBType { pub fn quote_name(&self, quoted: impl AsRef) -> String { let quoted = quoted.as_ref(); match self { - DBType::Postgres => format!(r#""{}""#, quoted), - DBType::Mysql => format!("`{}`", quoted), - DBType::Mssql => format!("[{}]", quoted), - DBType::Sqlite => format!("[{}]", quoted), + DBType::Postgres => format!(r#""{}""#, quoted.replace("\"", "")), + DBType::Mysql => format!("`{}`", quoted.replace("`", "")), + DBType::Mssql => format!("[{}]", quoted.replace("]", "")), + DBType::Duckdb => format!(r#""{}""#, quoted.replace("\"", "")), } } } @@ -181,6 +190,9 @@ impl RunnableNode for UploadNodeRunner { tcp.set_nodelay(true)?; let mut client = tiberius::Client::connect(config, tcp.compat_write()).await?; upload_file_bulk(&mut client, &upload_node, BIND_LIMIT, &upload_node.db_type).await?; + } else if upload_node.db_type == DBType::Duckdb { + let mut conn = Connection::open(&upload_node.connection_string)?; + upload_file_bulk(&mut conn, &upload_node, BIND_LIMIT, &upload_node.db_type).await?; } else { install_default_drivers(); let mut pool = AnyPool::connect(&upload_node.connection_string).await?; @@ -194,8 +206,10 @@ impl RunnableNode for UploadNodeRunner { mod tests { use crate::graph::node::RunnableNode; use crate::graph::upload_to_db::{DBType, UploadNode, UploadNodeRunner}; + use duckdb::{params, Connection}; use sqlx::{AnyPool, Row}; use std::path::PathBuf; + use std::{fs, path}; use testcontainers::core::{IntoContainerPort, WaitFor}; use testcontainers::runners::AsyncRunner; use testcontainers::{GenericImage, ImageExt}; @@ -301,6 +315,47 @@ mod tests { Ok(()) } + #[tokio::test] + pub async fn check_bulk_upload_duckdb() -> anyhow::Result<()> { + let connection_string = "./testing/output/test.duckdb".to_owned(); + fs::remove_file(&connection_string); + let file = path::absolute("./testing/input/upload_to_db/test.csv")? + .to_string_lossy() + .into_owned(); + let table_name = "My Test Table".to_string(); + let upload_node = UploadNodeRunner { + upload_node: UploadNode { + file_path: file.to_owned(), + table_name: table_name.clone(), + column_mappings: None, + post_script: None, + db_type: DBType::Duckdb, + connection_string, + }, + }; + let mut conn = Connection::open(&upload_node.upload_node.connection_string)?; + conn.execute( + r#"CREATE TABLE "My Test Table" ( + column1 INT, + column2 VARCHAR(200), + );"#, + params![], + )?; + upload_node.run().await?; + // Duckdb quirk + let mut conn = Connection::open(&upload_node.upload_node.connection_string)?; + let mut stmt = conn.prepare(r#"SELECT column1, column2 FROM "My Test Table";"#)?; + let mut query = stmt.query(params![])?; + let row = query.next()?; + assert_eq!(true, row.is_some()); + let row = row.unwrap(); + let column1: i32 = row.get(0)?; + let column2: String = row.get(1)?; + assert_eq!(1, column1); + assert_eq!("Hello", column2); + Ok(()) + } + #[tokio::test] pub async fn check_bulk_upload_mysql() -> anyhow::Result<()> { let container = GenericImage::new(