Add support for duckdb data upload and querying
Some checks failed
test / test (push) Failing after 1h29m18s

This commit is contained in:
2025-02-21 16:06:21 +10:30
parent 02772a01a8
commit 88e57f1860
4 changed files with 688 additions and 30 deletions

605
Cargo.lock generated
View File

@@ -26,6 +26,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [
"getrandom 0.2.15",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "ahash" name = "ahash"
version = "0.8.11" version = "0.8.11"
@@ -33,6 +44,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"const-random",
"getrandom 0.2.15", "getrandom 0.2.15",
"once_cell", "once_cell",
"version_check", "version_check",
@@ -169,6 +181,170 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76"
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
dependencies = [
"arrow-arith",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ord",
"arrow-row",
"arrow-schema",
"arrow-select",
"arrow-string",
]
[[package]]
name = "arrow-arith"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"num",
]
[[package]]
name = "arrow-array"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
dependencies = [
"ahash 0.8.11",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"half",
"hashbrown 0.15.2",
"num",
]
[[package]]
name = "arrow-buffer"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
dependencies = [
"bytes",
"half",
"num",
]
[[package]]
name = "arrow-cast"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
"atoi",
"base64 0.22.1",
"chrono",
"comfy-table",
"half",
"lexical-core",
"num",
"ryu",
]
[[package]]
name = "arrow-data"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
dependencies = [
"arrow-buffer",
"arrow-schema",
"half",
"num",
]
[[package]]
name = "arrow-ord"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
]
[[package]]
name = "arrow-row"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"half",
]
[[package]]
name = "arrow-schema"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
dependencies = [
"bitflags 2.6.0",
]
[[package]]
name = "arrow-select"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
dependencies = [
"ahash 0.8.11",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"num",
]
[[package]]
name = "arrow-string"
version = "54.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
"memchr",
"num",
"regex",
"regex-syntax",
]
[[package]] [[package]]
name = "async-native-tls" name = "async-native-tls"
version = "0.4.0" version = "0.4.0"
@@ -302,6 +478,18 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.10.4" version = "0.10.4"
@@ -361,6 +549,29 @@ dependencies = [
"serde_with", "serde_with",
] ]
[[package]]
name = "borsh"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5430e3be710b68d984d1391c854eb431a9d548640711faa54eecb1df93db91cc"
dependencies = [
"borsh-derive",
"cfg_aliases",
]
[[package]]
name = "borsh-derive"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8b668d39970baad5356d7c83a86fee3a539e6f93bf6764c97368243e17a0487"
dependencies = [
"once_cell",
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 2.0.87",
]
[[package]] [[package]]
name = "brotli" name = "brotli"
version = "7.0.0" version = "7.0.0"
@@ -391,6 +602,28 @@ dependencies = [
"allocator-api2", "allocator-api2",
] ]
[[package]]
name = "bytecheck"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2"
dependencies = [
"bytecheck_derive",
"ptr_meta",
"simdutf8",
]
[[package]]
name = "bytecheck_derive"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.16.3" version = "1.16.3"
@@ -423,6 +656,12 @@ version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3"
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "castaway" name = "castaway"
version = "0.2.3" version = "0.2.3"
@@ -448,6 +687,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.39" version = "0.4.39"
@@ -512,7 +757,7 @@ version = "4.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e"
dependencies = [ dependencies = [
"heck", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.87", "syn 2.0.87",
@@ -543,8 +788,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [ dependencies = [
"crossterm", "crossterm",
"strum", "strum 0.26.3",
"strum_macros", "strum_macros 0.26.4",
"unicode-width 0.1.13", "unicode-width 0.1.13",
] ]
@@ -584,6 +829,26 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom 0.2.15",
"once_cell",
"tiny-keccak",
]
[[package]] [[package]]
name = "core-foundation" name = "core-foundation"
version = "0.9.4" version = "0.9.4"
@@ -609,6 +874,7 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"csv", "csv",
"duckdb",
"env_logger", "env_logger",
"futures", "futures",
"futures-io", "futures-io",
@@ -845,6 +1111,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "crunchy"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.6" version = "0.1.6"
@@ -991,6 +1263,25 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "duckdb"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2093a18d0c07e411104a9d27ef0097872172552ad5774feba304c2b47f382c"
dependencies = [
"arrow",
"cast",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libduckdb-sys",
"memchr",
"num-integer",
"rust_decimal",
"smallvec",
"strum 0.25.0",
]
[[package]] [[package]]
name = "dyn-clone" name = "dyn-clone"
version = "1.0.17" version = "1.0.17"
@@ -1219,6 +1510,12 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.31" version = "0.3.31"
@@ -1398,11 +1695,25 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "half"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
dependencies = [
"cfg-if",
"crunchy",
"num-traits",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash 0.7.8",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
@@ -1410,7 +1721,7 @@ version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"allocator-api2", "allocator-api2",
"rayon", "rayon",
"serde", "serde",
@@ -1438,6 +1749,12 @@ dependencies = [
"hashbrown 0.14.5", "hashbrown 0.14.5",
] ]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.5.0" version = "0.5.0"
@@ -1777,12 +2094,92 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
[[package]]
name = "lexical-core"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.169" version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "libduckdb-sys"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc4020eaf07df4927b5205cd200ca2a5ed0798b49652dec22e09384ba8efa163"
dependencies = [
"autocfg",
"cc",
"flate2",
"pkg-config",
"serde",
"serde_json",
"tar",
"vcpkg",
]
[[package]] [[package]]
name = "libm" name = "libm"
version = "0.2.8" version = "0.2.8"
@@ -2033,6 +2430,20 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]] [[package]]
name = "num-bigint" name = "num-bigint"
version = "0.4.6" version = "0.4.6"
@@ -2384,7 +2795,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1fd3c64d50b7f5f328e1566cab9979d4bc1ba2ff22114b301ed2ee0e518dbca" checksum = "d1fd3c64d50b7f5f328e1566cab9979d4bc1ba2ff22114b301ed2ee0e518dbca"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"atoi", "atoi",
"bytemuck", "bytemuck",
"chrono", "chrono",
@@ -2406,7 +2817,7 @@ dependencies = [
"simdutf8", "simdutf8",
"streaming-iterator", "streaming-iterator",
"strength_reduce", "strength_reduce",
"strum_macros", "strum_macros 0.26.4",
"version_check", "version_check",
"zstd", "zstd",
] ]
@@ -2449,7 +2860,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4794a9e38ef2faf7e47a6f736c7f156c6fbb66cd529f82593b2d48348e422c8d" checksum = "4794a9e38ef2faf7e47a6f736c7f156c6fbb66cd529f82593b2d48348e422c8d"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.6.0",
"bytemuck", "bytemuck",
"chrono", "chrono",
@@ -2472,7 +2883,7 @@ dependencies = [
"rand_distr", "rand_distr",
"rayon", "rayon",
"regex", "regex",
"strum_macros", "strum_macros 0.26.4",
"thiserror 2.0.11", "thiserror 2.0.11",
"version_check", "version_check",
"xxhash-rust", "xxhash-rust",
@@ -2496,7 +2907,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad56c5ea4d6e0546fbc3fa35918a537b76587600a5118770ed331136249d50d8" checksum = "ad56c5ea4d6e0546fbc3fa35918a537b76587600a5118770ed331136249d50d8"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.6.0",
"hashbrown 0.15.2", "hashbrown 0.15.2",
"num-traits", "num-traits",
@@ -2520,7 +2931,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95d774d5971d2092f0588e89d2f0be524dff35ea368272c0810ba54a860e4411" checksum = "95d774d5971d2092f0588e89d2f0be524dff35ea368272c0810ba54a860e4411"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"async-trait", "async-trait",
"atoi_simd", "atoi_simd",
"bytes", "bytes",
@@ -2557,7 +2968,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa457bfa96f45cf14c33507eaa3ebcec6a8d52e7f7fc60cd23f338631369d417" checksum = "fa457bfa96f45cf14c33507eaa3ebcec6a8d52e7f7fc60cd23f338631369d417"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.6.0",
"memchr", "memchr",
"once_cell", "once_cell",
@@ -2601,7 +3012,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b267480495ffe382dab63318e3c6bf4073bb82971c8b80294d079293fece458b" checksum = "b267480495ffe382dab63318e3c6bf4073bb82971c8b80294d079293fece458b"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"argminmax", "argminmax",
"base64 0.22.1", "base64 0.22.1",
"bytemuck", "bytemuck",
@@ -2622,7 +3033,7 @@ dependencies = [
"rayon", "rayon",
"regex", "regex",
"regex-syntax", "regex-syntax",
"strum_macros", "strum_macros 0.26.4",
"unicode-reverse", "unicode-reverse",
"version_check", "version_check",
] ]
@@ -2633,7 +3044,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20237f232b1a74b1fae6b5c9bea8c440f2e5d3b5506601b038f0a7a34b84b710" checksum = "20237f232b1a74b1fae6b5c9bea8c440f2e5d3b5506601b038f0a7a34b84b710"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"async-stream", "async-stream",
"base64 0.22.1", "base64 0.22.1",
"brotli", "brotli",
@@ -2697,7 +3108,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99a3832887671df1eb326df52cbfcc47789d3d58454c1084a154b48b240175e2" checksum = "99a3832887671df1eb326df52cbfcc47789d3d58454c1084a154b48b240175e2"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.6.0",
"bytemuck", "bytemuck",
"bytes", "bytes",
@@ -2720,7 +3131,7 @@ dependencies = [
"rayon", "rayon",
"recursive", "recursive",
"regex", "regex",
"strum_macros", "strum_macros 0.26.4",
"version_check", "version_check",
] ]
@@ -2821,7 +3232,7 @@ dependencies = [
"polars-ops", "polars-ops",
"polars-utils", "polars-utils",
"regex", "regex",
"strum_macros", "strum_macros 0.26.4",
] ]
[[package]] [[package]]
@@ -2830,7 +3241,7 @@ version = "0.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0fc010eea42ad113b641aa53106e4d6e474650c73573d959a546eed0ce6d479" checksum = "e0fc010eea42ad113b641aa53106e4d6e474650c73573d959a546eed0ce6d479"
dependencies = [ dependencies = [
"ahash", "ahash 0.8.11",
"bytemuck", "bytemuck",
"bytes", "bytes",
"compact_str", "compact_str",
@@ -2883,6 +3294,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5"
[[package]]
name = "proc-macro-crate"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
dependencies = [
"toml_edit",
]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.86" version = "1.0.86"
@@ -2901,6 +3321,26 @@ dependencies = [
"cc", "cc",
] ]
[[package]]
name = "ptr_meta"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1"
dependencies = [
"ptr_meta_derive",
]
[[package]]
name = "ptr_meta_derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "pulley-interpreter" name = "pulley-interpreter"
version = "29.0.1" version = "29.0.1"
@@ -2922,6 +3362,12 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.7.3" version = "0.7.3"
@@ -3139,6 +3585,15 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rend"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c"
dependencies = [
"bytecheck",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.8"
@@ -3154,6 +3609,35 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "rkyv"
version = "0.7.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b"
dependencies = [
"bitvec",
"bytecheck",
"bytes",
"hashbrown 0.12.3",
"ptr_meta",
"rend",
"rkyv_derive",
"seahash",
"tinyvec",
"uuid",
]
[[package]]
name = "rkyv_derive"
version = "0.7.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "rmp" name = "rmp"
version = "0.8.14" version = "0.8.14"
@@ -3196,6 +3680,22 @@ dependencies = [
"zeroize", "zeroize",
] ]
[[package]]
name = "rust_decimal"
version = "1.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555"
dependencies = [
"arrayvec",
"borsh",
"bytes",
"num-traits",
"rand 0.8.5",
"rkyv",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "rustc-demangle" name = "rustc-demangle"
version = "0.1.24" version = "0.1.24"
@@ -3375,6 +3875,12 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "seahash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
@@ -3741,7 +4247,7 @@ checksum = "1a099220ae541c5db479c6424bdf1b200987934033c2584f79a0e1693601e776"
dependencies = [ dependencies = [
"dotenvy", "dotenvy",
"either", "either",
"heck", "heck 0.5.0",
"hex", "hex",
"once_cell", "once_cell",
"proc-macro2", "proc-macro2",
@@ -3948,19 +4454,41 @@ dependencies = [
"syn 2.0.87", "syn 2.0.87",
] ]
[[package]]
name = "strum"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
dependencies = [
"strum_macros 0.25.3",
]
[[package]] [[package]]
name = "strum" name = "strum"
version = "0.26.3" version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
[[package]]
name = "strum_macros"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.87",
]
[[package]] [[package]]
name = "strum_macros" name = "strum_macros"
version = "0.26.4" version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [ dependencies = [
"heck", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"rustversion", "rustversion",
@@ -4008,6 +4536,23 @@ dependencies = [
"windows", "windows",
] ]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tar"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]] [[package]]
name = "target-features" name = "target-features"
version = "0.1.6" version = "0.1.6"
@@ -4168,6 +4713,15 @@ dependencies = [
"time-core", "time-core",
] ]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.8.0" version = "1.8.0"
@@ -4852,7 +5406,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8358319c2dd1e4db79e3c1c5d3a5af84956615343f9f89f4e4996a36816e06e6" checksum = "8358319c2dd1e4db79e3c1c5d3a5af84956615343f9f89f4e4996a36816e06e6"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"heck", "heck 0.5.0",
"indexmap 2.7.0", "indexmap 2.7.0",
"wit-parser", "wit-parser",
] ]
@@ -5204,6 +5758,15 @@ dependencies = [
"wasmparser 0.221.2", "wasmparser 0.221.2",
] ]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]
[[package]] [[package]]
name = "xattr" name = "xattr"
version = "1.3.1" version = "1.3.1"

View File

@@ -31,6 +31,7 @@ tokio-util = { version = "0.7.13", features = ["compat"] }
async-trait = "0.1.83" async-trait = "0.1.83"
testcontainers = "0.23.1" testcontainers = "0.23.1"
wasmtime = "29.0.1" wasmtime = "29.0.1"
duckdb = { version = "1.2.0", features = ["bundled"] }
# More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target # More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target
[lib] [lib]

View File

@@ -1,3 +1,4 @@
use duckdb::{params, params_from_iter, Connection};
use futures::TryStreamExt; use futures::TryStreamExt;
use futures_io::{AsyncRead, AsyncWrite}; use futures_io::{AsyncRead, AsyncWrite};
use sqlx::{Any, Column, Executor, Pool, Row}; use sqlx::{Any, Column, Executor, Pool, Row};
@@ -117,3 +118,41 @@ impl QueryExecutor for Pool<Any> {
Ok(result.rows_affected()) Ok(result.rows_affected())
} }
} }
impl QueryExecutor for Connection {
async fn get_rows(
&mut self,
query: impl AsRef<str>,
params: &[String],
row_consumer: &mut impl FnMut(BTreeMap<String, String>) -> anyhow::Result<()>,
) -> anyhow::Result<()> {
let mut stmt = self.prepare(query.as_ref())?;
let columns = stmt.column_names();
let mut row_iter = stmt.query_map(params_from_iter(params.iter()), |row| {
let mut row_map: BTreeMap<String, String> = BTreeMap::new();
for i in 0..columns.len() {
row_map.insert(columns[i].clone(), row.get(i)?);
}
Ok(row_map)
})?;
for row in row_iter {
let row = row?;
row_consumer(row)?;
}
Ok(())
}
async fn execute_query(
&mut self,
query: impl AsRef<str>,
params: &[String],
) -> anyhow::Result<u64> {
let rows_processed = self.execute(query.as_ref(), params_from_iter(params.iter()))?;
Ok(rows_processed as u64)
}
async fn execute_unchecked(&mut self, query: impl AsRef<str>) -> anyhow::Result<u64> {
let rows_processed = self.execute(query.as_ref(), params![])?;
Ok(rows_processed as u64)
}
}

View File

@@ -1,5 +1,6 @@
use anyhow::bail; use anyhow::bail;
use async_trait::async_trait; use async_trait::async_trait;
use duckdb::Connection;
use itertools::Itertools; use itertools::Itertools;
use log::{log, Level}; use log::{log, Level};
use schemars::JsonSchema; use schemars::JsonSchema;
@@ -24,8 +25,8 @@ pub async fn upload_file_bulk(
if upload_node.column_mappings.is_none() { if upload_node.column_mappings.is_none() {
let insert_from_file_query = match upload_node.db_type { let insert_from_file_query = match upload_node.db_type {
DBType::Postgres => Some(format!( DBType::Postgres => Some(format!(
r#"COPY "{}" FROM '{}' DELIMITERS ',' CSV HEADER QUOTE '"';"#, r#"COPY "{}" FROM '{}' WITH DELIMITER ',' CSV HEADER QUOTE '"';"#,
upload_node.table_name.replace("'", "''"), upload_node.table_name.replace("\"", ""),
upload_node.file_path.replace("'", "''") upload_node.file_path.replace("'", "''")
)), )),
// This can't be parameterised as mysql doesn't allow LOAD DATA // This can't be parameterised as mysql doesn't allow LOAD DATA
@@ -41,10 +42,14 @@ pub async fn upload_file_bulk(
)), )),
DBType::Mssql => Some(format!( DBType::Mssql => Some(format!(
"BULK INSERT [{}] FROM '{}' WITH ( FORMAT = 'CSV', FIRSTROW = 2 );", "BULK INSERT [{}] FROM '{}' WITH ( FORMAT = 'CSV', FIRSTROW = 2 );",
upload_node.table_name.replace("'", "''"), upload_node.table_name.replace("]", ""),
upload_node.file_path.replace("'", "''")
)),
DBType::Duckdb => Some(format!(
r#"COPY "{}" FROM '{}' WITH DELIMITER ',' CSV HEADER QUOTE '"';"#,
upload_node.table_name.replace("\"", ""),
upload_node.file_path.replace("'", "''") upload_node.file_path.replace("'", "''")
)), )),
_ => None,
}; };
if let Some(insert_from_file_query) = insert_from_file_query { if let Some(insert_from_file_query) = insert_from_file_query {
let result = executor.execute_unchecked(&insert_from_file_query).await; let result = executor.execute_unchecked(&insert_from_file_query).await;
@@ -60,6 +65,10 @@ pub async fn upload_file_bulk(
} }
if rows_affected == None { if rows_affected == None {
log!(
Level::Warn,
"Failed to copy csv to database, falling back to manual insert, which will be slow"
);
let mut file_reader = csv::Reader::from_path(upload_node.file_path.clone())?; let mut file_reader = csv::Reader::from_path(upload_node.file_path.clone())?;
let csv_columns = file_reader let csv_columns = file_reader
.headers()? .headers()?
@@ -139,17 +148,17 @@ pub enum DBType {
Mysql, Mysql,
Postgres, Postgres,
Mssql, Mssql,
Sqlite, Duckdb,
} }
impl DBType { impl DBType {
pub fn quote_name(&self, quoted: impl AsRef<str>) -> String { pub fn quote_name(&self, quoted: impl AsRef<str>) -> String {
let quoted = quoted.as_ref(); let quoted = quoted.as_ref();
match self { match self {
DBType::Postgres => format!(r#""{}""#, quoted), DBType::Postgres => format!(r#""{}""#, quoted.replace("\"", "")),
DBType::Mysql => format!("`{}`", quoted), DBType::Mysql => format!("`{}`", quoted.replace("`", "")),
DBType::Mssql => format!("[{}]", quoted), DBType::Mssql => format!("[{}]", quoted.replace("]", "")),
DBType::Sqlite => format!("[{}]", quoted), DBType::Duckdb => format!(r#""{}""#, quoted.replace("\"", "")),
} }
} }
} }
@@ -181,6 +190,9 @@ impl RunnableNode for UploadNodeRunner {
tcp.set_nodelay(true)?; tcp.set_nodelay(true)?;
let mut client = tiberius::Client::connect(config, tcp.compat_write()).await?; let mut client = tiberius::Client::connect(config, tcp.compat_write()).await?;
upload_file_bulk(&mut client, &upload_node, BIND_LIMIT, &upload_node.db_type).await?; upload_file_bulk(&mut client, &upload_node, BIND_LIMIT, &upload_node.db_type).await?;
} else if upload_node.db_type == DBType::Duckdb {
let mut conn = Connection::open(&upload_node.connection_string)?;
upload_file_bulk(&mut conn, &upload_node, BIND_LIMIT, &upload_node.db_type).await?;
} else { } else {
install_default_drivers(); install_default_drivers();
let mut pool = AnyPool::connect(&upload_node.connection_string).await?; let mut pool = AnyPool::connect(&upload_node.connection_string).await?;
@@ -194,8 +206,10 @@ impl RunnableNode for UploadNodeRunner {
mod tests { mod tests {
use crate::graph::node::RunnableNode; use crate::graph::node::RunnableNode;
use crate::graph::upload_to_db::{DBType, UploadNode, UploadNodeRunner}; use crate::graph::upload_to_db::{DBType, UploadNode, UploadNodeRunner};
use duckdb::{params, Connection};
use sqlx::{AnyPool, Row}; use sqlx::{AnyPool, Row};
use std::path::PathBuf; use std::path::PathBuf;
use std::{fs, path};
use testcontainers::core::{IntoContainerPort, WaitFor}; use testcontainers::core::{IntoContainerPort, WaitFor};
use testcontainers::runners::AsyncRunner; use testcontainers::runners::AsyncRunner;
use testcontainers::{GenericImage, ImageExt}; use testcontainers::{GenericImage, ImageExt};
@@ -301,6 +315,47 @@ mod tests {
Ok(()) Ok(())
} }
#[tokio::test]
pub async fn check_bulk_upload_duckdb() -> anyhow::Result<()> {
let connection_string = "./testing/output/test.duckdb".to_owned();
fs::remove_file(&connection_string);
let file = path::absolute("./testing/input/upload_to_db/test.csv")?
.to_string_lossy()
.into_owned();
let table_name = "My Test Table".to_string();
let upload_node = UploadNodeRunner {
upload_node: UploadNode {
file_path: file.to_owned(),
table_name: table_name.clone(),
column_mappings: None,
post_script: None,
db_type: DBType::Duckdb,
connection_string,
},
};
let mut conn = Connection::open(&upload_node.upload_node.connection_string)?;
conn.execute(
r#"CREATE TABLE "My Test Table" (
column1 INT,
column2 VARCHAR(200),
);"#,
params![],
)?;
upload_node.run().await?;
// Duckdb quirk
let mut conn = Connection::open(&upload_node.upload_node.connection_string)?;
let mut stmt = conn.prepare(r#"SELECT column1, column2 FROM "My Test Table";"#)?;
let mut query = stmt.query(params![])?;
let row = query.next()?;
assert_eq!(true, row.is_some());
let row = row.unwrap();
let column1: i32 = row.get(0)?;
let column2: String = row.get(1)?;
assert_eq!(1, column1);
assert_eq!("Hello", column2);
Ok(())
}
#[tokio::test] #[tokio::test]
pub async fn check_bulk_upload_mysql() -> anyhow::Result<()> { pub async fn check_bulk_upload_mysql() -> anyhow::Result<()> {
let container = GenericImage::new( let container = GenericImage::new(