Add basic pull from db support

This commit is contained in:
2024-12-26 16:19:38 +10:30
parent 375e1f9638
commit 139d6fb7fd
9 changed files with 802 additions and 130 deletions

View File

@@ -1,6 +1,10 @@
use std::fs::File;
use super::node::RunnableNode;
use crate::io::{DataSource, SourceType};
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, LazyFrame, ParquetReader, ScanArgsParquet};
use polars::{
io::SerWriter,
prelude::{CsvWriter, LazyCsvReader, LazyFileListReader},
@@ -9,21 +13,24 @@ use polars_sql::SQLContext;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use super::node::RunnableNode;
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct CSVFile {
name: String,
path: String,
pub struct SqlFile {
pub name: String,
pub data_source: DataSource,
}
/**
* Run SQL over files using polars, export results to output file
*/
fn run_sql(files: &Vec<CSVFile>, output_path: &String, query: &String) -> anyhow::Result<()> {
fn run_sql(files: &Vec<SqlFile>, output_path: &String, query: &String) -> anyhow::Result<()> {
let mut ctx = SQLContext::new();
for file in files {
let df = LazyCsvReader::new(&file.path).finish()?;
let df = match file.data_source.source_type {
SourceType::CSV => LazyCsvReader::new(&file.data_source.path).finish()?,
SourceType::PARQUET => {
LazyFrame::scan_parquet(&file.data_source.path, ScanArgsParquet::default())?
}
};
ctx.register(&file.name, df);
}
let result = ctx.execute(&query)?;
@@ -34,7 +41,7 @@ fn run_sql(files: &Vec<CSVFile>, output_path: &String, query: &String) -> anyhow
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct SQLNode {
pub files: Vec<CSVFile>,
pub files: Vec<SqlFile>,
pub output_file: String,
pub query: String,
}
@@ -55,17 +62,21 @@ impl RunnableNode for SQLNodeRunner {
}
#[cfg(test)]
mod tests {
use super::{run_sql, SqlFile};
use crate::io::{DataSource, SourceType};
use std::path::PathBuf;
use std::{fs::File, io::Read};
use super::{run_sql, CSVFile};
#[test]
fn basic_query_works() -> anyhow::Result<()> {
let output_path = "./testing/output/output.csv".to_owned();
run_sql(
&vec![CSVFile {
&vec![SqlFile {
name: "Account".to_owned(),
path: "./testing/test.csv".to_owned(),
data_source: DataSource {
source_type: SourceType::CSV,
path: PathBuf::from("./testing/test.csv"),
}
}],
&output_path,
&"SELECT * FROM Account WHERE Code = 'A195950'".to_owned(),
@@ -76,7 +87,7 @@ mod tests {
assert_eq!(
output,
"Code,Description,Type,CostOutput,PercentFixed
A195950,A195950 Staff Related Other,E,GS,100.00
A195950,A195950 Staff Related Other,E,GS,100.0
"
);
Ok(())