Add sql node

This commit is contained in:
2024-07-31 20:00:33 +09:30
parent 5acee8c889
commit e9caf43de3
8 changed files with 1009 additions and 936 deletions

View File

@@ -15,6 +15,7 @@ use crate::{
derive::DeriveNode,
filter::{FilterNode, FilterNodeRunner},
node::RunnableNode,
sql_rule::{SQLNode, SQLNodeRunner},
upload_to_db::{UploadNode, UploadNodeRunner},
};
@@ -27,6 +28,7 @@ pub enum NodeConfiguration {
CodeRuleNode(CodeRuleNode),
FilterNode(FilterNode),
UploadNode(UploadNode),
SQLNode(SQLNode),
Dynamic,
}
@@ -128,6 +130,7 @@ fn get_runnable_node(node: Node) -> Box<dyn RunnableNode> {
NodeConfiguration::CodeRuleNode(_) => todo!(),
NodeConfiguration::FilterNode(filter_node) => Box::new(FilterNodeRunner { filter_node }),
NodeConfiguration::UploadNode(upload_node) => Box::new(UploadNodeRunner { upload_node }),
NodeConfiguration::SQLNode(sql_node) => Box::new(SQLNodeRunner { sql_node }),
NodeConfiguration::Dynamic => todo!(),
}
}

View File

@@ -18,6 +18,7 @@ mod graph;
mod io;
pub mod link;
pub mod node;
pub mod sql_rule;
pub mod upload_to_db;
#[no_mangle]

View File

@@ -81,10 +81,12 @@ pub fn build_polars(
})
.reduce(|prev, next| prev.and(next));
let input_file = inputs.iter().find(|input| input.file_path == definition.source)
let input_file = inputs
.iter()
.find(|input| input.file_path == definition.source)
.ok_or(anyhow!("Failed to find valid file"))?;
let mut reader = LazyCsvReader::new(&input_file.file_path)
.has_header(true)
.with_has_header(true)
.finish()?;
let mut required_files = HashSet::new();
for component in &definition.components {
@@ -98,8 +100,9 @@ pub fn build_polars(
for source_type in required_files {
// TODO: Better error messages
if source_type != &definition.source {
let source_file = inputs.iter()
.find(|input| input.file_path == definition.source)
let source_file = inputs
.iter()
.find(|input| input.file_path == definition.source)
.ok_or(anyhow!("Input file was not specified for source type"))?;
// TODO: Alias the joined columns so they don't potentially clash with the current column
let join_reader = LazyCsvReader::new(source_file.file_path.clone()).finish()?;
@@ -126,7 +129,6 @@ pub fn build_polars(
match component {
Component::Constant(constant) => {
built_expression = built_expression + lit(constant.clone())
}
// TODO: Do we need to worry about the source type? Might be clashing column names we need to think about earlier then address here?
// TODO: What I really want to do is not use source type, instead I want to be referring to a file, which we translate from the sourcetype

81
src/sql_rule.rs Normal file
View File

@@ -0,0 +1,81 @@
use std::fs::File;
use polars::{
io::SerWriter,
prelude::{CsvWriter, LazyCsvReader, LazyFileListReader},
};
use polars_sql::SQLContext;
use serde::{Deserialize, Serialize};
use crate::node::RunnableNode;
#[derive(Serialize, Deserialize, Clone)]
pub struct CSVFile {
name: String,
path: String,
}
/**
* Run SQL over files using polars, export results to output file
*/
fn run_sql(files: &Vec<CSVFile>, output_path: &String, query: &String) -> anyhow::Result<()> {
let mut ctx = SQLContext::new();
for file in files {
let df = LazyCsvReader::new(&file.path).finish()?;
ctx.register(&file.name, df);
}
let result = ctx.execute(&query)?;
let mut file = File::create(output_path)?;
CsvWriter::new(&mut file).finish(&mut result.collect()?)?;
Ok(())
}
#[derive(Serialize, Deserialize, Clone)]
pub struct SQLNode {
pub files: Vec<CSVFile>,
pub output_file: String,
pub query: String,
}
pub struct SQLNodeRunner {
pub sql_node: SQLNode,
}
impl RunnableNode for SQLNodeRunner {
fn run(&self) -> anyhow::Result<()> {
run_sql(
&self.sql_node.files,
&self.sql_node.output_file,
&self.sql_node.query,
)
}
}
#[cfg(test)]
mod tests {
use std::{fs::File, io::Read};
use super::{run_sql, CSVFile};
#[test]
fn basic_query_works() -> anyhow::Result<()> {
let output_path = "./testing/output/output.csv".to_owned();
run_sql(
&vec![CSVFile {
name: "Account".to_owned(),
path: "./testing/test.csv".to_owned(),
}],
&output_path,
&"SELECT * FROM Account WHERE Code = 'A195950'".to_owned(),
)?;
let mut output = String::new();
let mut output_file = File::open(output_path)?;
output_file.read_to_string(&mut output)?;
assert_eq!(
output,
"Code,Description,Type,CostOutput,PercentFixed
A195950,A195950 Staff Related Other,E,GS,100.00
"
);
Ok(())
}
}