Add sql node
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,7 +2,8 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
*.xcuserdatad
|
*.xcuserdatad
|
||||||
.venv
|
.venv
|
||||||
*.csv
|
/*.csv
|
||||||
*.h
|
*.h
|
||||||
*.py
|
*.py
|
||||||
.idea
|
.idea
|
||||||
|
/testing/output
|
||||||
1813
Cargo.lock
generated
1813
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
21
Cargo.toml
21
Cargo.toml
@@ -7,25 +7,26 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# https://nalgebra.org/docs/user_guide/getting_started
|
# https://nalgebra.org/docs/user_guide/getting_started
|
||||||
nalgebra = "0.31.0"
|
nalgebra = "0.33"
|
||||||
|
|
||||||
# https://docs.rs/csv/1.1.6/csv/
|
# https://docs.rs/csv/1.1.6/csv/
|
||||||
csv = "1.1"
|
csv = "1"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
|
||||||
# num = "0.4"
|
# num = "0.4"
|
||||||
clap = { version = "4.1.8", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
anyhow = "1.0"
|
anyhow = "1"
|
||||||
|
|
||||||
itertools = "0.10.3"
|
itertools = "0.10"
|
||||||
chrono = {version = "0.4.31", features = ["default", "serde"]}
|
chrono = {version = "0.4", features = ["default", "serde"]}
|
||||||
|
|
||||||
rayon = "1.6.0"
|
rayon = "1.6.0"
|
||||||
tokio = { version = "1.26.0", features = ["full"] }
|
tokio = { version = "1.39", features = ["full"] }
|
||||||
sqlx = { version = "0.8", features = [ "runtime-tokio-rustls", "any" ] }
|
sqlx = { version = "0.8", features = [ "runtime-tokio-rustls", "any" ] }
|
||||||
rmp-serde = "1.1.1"
|
rmp-serde = "1.1"
|
||||||
tempfile = "3.7.0"
|
tempfile = "3.7"
|
||||||
polars = {version = "0.32.1", features = ["lazy", "performant", "streaming", "cse", "dtype-datetime"]}
|
polars = {version = "0.41", features = ["lazy", "performant", "streaming", "cse", "dtype-datetime"]}
|
||||||
|
polars-sql = "0.41"
|
||||||
|
|
||||||
# More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target
|
# More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target
|
||||||
[lib]
|
[lib]
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ use crate::{
|
|||||||
derive::DeriveNode,
|
derive::DeriveNode,
|
||||||
filter::{FilterNode, FilterNodeRunner},
|
filter::{FilterNode, FilterNodeRunner},
|
||||||
node::RunnableNode,
|
node::RunnableNode,
|
||||||
|
sql_rule::{SQLNode, SQLNodeRunner},
|
||||||
upload_to_db::{UploadNode, UploadNodeRunner},
|
upload_to_db::{UploadNode, UploadNodeRunner},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -27,6 +28,7 @@ pub enum NodeConfiguration {
|
|||||||
CodeRuleNode(CodeRuleNode),
|
CodeRuleNode(CodeRuleNode),
|
||||||
FilterNode(FilterNode),
|
FilterNode(FilterNode),
|
||||||
UploadNode(UploadNode),
|
UploadNode(UploadNode),
|
||||||
|
SQLNode(SQLNode),
|
||||||
Dynamic,
|
Dynamic,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,6 +130,7 @@ fn get_runnable_node(node: Node) -> Box<dyn RunnableNode> {
|
|||||||
NodeConfiguration::CodeRuleNode(_) => todo!(),
|
NodeConfiguration::CodeRuleNode(_) => todo!(),
|
||||||
NodeConfiguration::FilterNode(filter_node) => Box::new(FilterNodeRunner { filter_node }),
|
NodeConfiguration::FilterNode(filter_node) => Box::new(FilterNodeRunner { filter_node }),
|
||||||
NodeConfiguration::UploadNode(upload_node) => Box::new(UploadNodeRunner { upload_node }),
|
NodeConfiguration::UploadNode(upload_node) => Box::new(UploadNodeRunner { upload_node }),
|
||||||
|
NodeConfiguration::SQLNode(sql_node) => Box::new(SQLNodeRunner { sql_node }),
|
||||||
NodeConfiguration::Dynamic => todo!(),
|
NodeConfiguration::Dynamic => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ mod graph;
|
|||||||
mod io;
|
mod io;
|
||||||
pub mod link;
|
pub mod link;
|
||||||
pub mod node;
|
pub mod node;
|
||||||
|
pub mod sql_rule;
|
||||||
pub mod upload_to_db;
|
pub mod upload_to_db;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
|
|||||||
@@ -81,10 +81,12 @@ pub fn build_polars(
|
|||||||
})
|
})
|
||||||
.reduce(|prev, next| prev.and(next));
|
.reduce(|prev, next| prev.and(next));
|
||||||
|
|
||||||
let input_file = inputs.iter().find(|input| input.file_path == definition.source)
|
let input_file = inputs
|
||||||
|
.iter()
|
||||||
|
.find(|input| input.file_path == definition.source)
|
||||||
.ok_or(anyhow!("Failed to find valid file"))?;
|
.ok_or(anyhow!("Failed to find valid file"))?;
|
||||||
let mut reader = LazyCsvReader::new(&input_file.file_path)
|
let mut reader = LazyCsvReader::new(&input_file.file_path)
|
||||||
.has_header(true)
|
.with_has_header(true)
|
||||||
.finish()?;
|
.finish()?;
|
||||||
let mut required_files = HashSet::new();
|
let mut required_files = HashSet::new();
|
||||||
for component in &definition.components {
|
for component in &definition.components {
|
||||||
@@ -98,7 +100,8 @@ pub fn build_polars(
|
|||||||
for source_type in required_files {
|
for source_type in required_files {
|
||||||
// TODO: Better error messages
|
// TODO: Better error messages
|
||||||
if source_type != &definition.source {
|
if source_type != &definition.source {
|
||||||
let source_file = inputs.iter()
|
let source_file = inputs
|
||||||
|
.iter()
|
||||||
.find(|input| input.file_path == definition.source)
|
.find(|input| input.file_path == definition.source)
|
||||||
.ok_or(anyhow!("Input file was not specified for source type"))?;
|
.ok_or(anyhow!("Input file was not specified for source type"))?;
|
||||||
// TODO: Alias the joined columns so they don't potentially clash with the current column
|
// TODO: Alias the joined columns so they don't potentially clash with the current column
|
||||||
@@ -126,7 +129,6 @@ pub fn build_polars(
|
|||||||
match component {
|
match component {
|
||||||
Component::Constant(constant) => {
|
Component::Constant(constant) => {
|
||||||
built_expression = built_expression + lit(constant.clone())
|
built_expression = built_expression + lit(constant.clone())
|
||||||
|
|
||||||
}
|
}
|
||||||
// TODO: Do we need to worry about the source type? Might be clashing column names we need to think about earlier then address here?
|
// TODO: Do we need to worry about the source type? Might be clashing column names we need to think about earlier then address here?
|
||||||
// TODO: What I really want to do is not use source type, instead I want to be referring to a file, which we translate from the sourcetype
|
// TODO: What I really want to do is not use source type, instead I want to be referring to a file, which we translate from the sourcetype
|
||||||
|
|||||||
81
src/sql_rule.rs
Normal file
81
src/sql_rule.rs
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
|
||||||
|
use polars::{
|
||||||
|
io::SerWriter,
|
||||||
|
prelude::{CsvWriter, LazyCsvReader, LazyFileListReader},
|
||||||
|
};
|
||||||
|
use polars_sql::SQLContext;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::node::RunnableNode;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct CSVFile {
|
||||||
|
name: String,
|
||||||
|
path: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run SQL over files using polars, export results to output file
|
||||||
|
*/
|
||||||
|
fn run_sql(files: &Vec<CSVFile>, output_path: &String, query: &String) -> anyhow::Result<()> {
|
||||||
|
let mut ctx = SQLContext::new();
|
||||||
|
for file in files {
|
||||||
|
let df = LazyCsvReader::new(&file.path).finish()?;
|
||||||
|
ctx.register(&file.name, df);
|
||||||
|
}
|
||||||
|
let result = ctx.execute(&query)?;
|
||||||
|
let mut file = File::create(output_path)?;
|
||||||
|
CsvWriter::new(&mut file).finish(&mut result.collect()?)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct SQLNode {
|
||||||
|
pub files: Vec<CSVFile>,
|
||||||
|
pub output_file: String,
|
||||||
|
pub query: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SQLNodeRunner {
|
||||||
|
pub sql_node: SQLNode,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RunnableNode for SQLNodeRunner {
|
||||||
|
fn run(&self) -> anyhow::Result<()> {
|
||||||
|
run_sql(
|
||||||
|
&self.sql_node.files,
|
||||||
|
&self.sql_node.output_file,
|
||||||
|
&self.sql_node.query,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::{fs::File, io::Read};
|
||||||
|
|
||||||
|
use super::{run_sql, CSVFile};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_query_works() -> anyhow::Result<()> {
|
||||||
|
let output_path = "./testing/output/output.csv".to_owned();
|
||||||
|
run_sql(
|
||||||
|
&vec![CSVFile {
|
||||||
|
name: "Account".to_owned(),
|
||||||
|
path: "./testing/test.csv".to_owned(),
|
||||||
|
}],
|
||||||
|
&output_path,
|
||||||
|
&"SELECT * FROM Account WHERE Code = 'A195950'".to_owned(),
|
||||||
|
)?;
|
||||||
|
let mut output = String::new();
|
||||||
|
let mut output_file = File::open(output_path)?;
|
||||||
|
output_file.read_to_string(&mut output)?;
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
"Code,Description,Type,CostOutput,PercentFixed
|
||||||
|
A195950,A195950 Staff Related Other,E,GS,100.00
|
||||||
|
"
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
9
testing/test.csv
Normal file
9
testing/test.csv
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
Code,Description,Type,CostOutput,PercentFixed
|
||||||
|
A195950,A195950 Staff Related Other,E,GS,100.00
|
||||||
|
A105115,A105115 S&W Sal Pack Dieticians,R,SW AH,100.00
|
||||||
|
A105240,A105240 S&W Sal Pack Tech Offs,R,SW Other,100.00
|
||||||
|
A114250,A114250 S&W Man Jnl Agcy Med,E,SW Other,100.00
|
||||||
|
A183350,A183350 Dom Sups Oth,E,GS,100.00
|
||||||
|
A170210,"A170210 Biscuits, Cakes, Pies",E,GS,100.00
|
||||||
|
A194000,A194000 Stationery,E,GS,100.00
|
||||||
|
A174350,A174350 M&S Enteral Feed Sets,E,MS,100.00
|
||||||
|
Reference in New Issue
Block a user