Start adding row-level splitting, refactor cli and graph into subcrates
This commit is contained in:
114
src/graph/filter.rs
Normal file
114
src/graph/filter.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::io::{RecordDeserializer, RecordSerializer};
|
||||
|
||||
use super::derive::{DataValidators, DeriveFilter};
|
||||
|
||||
use super::derive;
|
||||
use super::node::RunnableNode;
|
||||
|
||||
/**
|
||||
* Write all lines from the input file to the output file, skipping records
|
||||
* that don't satisfy the filter criteria
|
||||
*/
|
||||
pub fn filter_file(
|
||||
rules: &DataValidators,
|
||||
input: &mut impl RecordDeserializer,
|
||||
output: &mut impl RecordSerializer,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(line) = input.deserialize()? {
|
||||
let line: BTreeMap<String, String> = line;
|
||||
output.write_header(&line)?;
|
||||
|
||||
if derive::is_line_valid(&line, &rules) {
|
||||
output.write_record(&line)?;
|
||||
}
|
||||
|
||||
while let Some(line) = input.deserialize()? {
|
||||
let line: BTreeMap<String, String> = line;
|
||||
if derive::is_line_valid(&line, rules) {
|
||||
output.write_record(&line)?;
|
||||
}
|
||||
}
|
||||
output.flush()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
|
||||
pub struct FilterNode {
|
||||
pub filters: Vec<DeriveFilter>,
|
||||
pub input_file_path: String,
|
||||
pub output_file_path: String,
|
||||
}
|
||||
|
||||
pub struct FilterNodeRunner {
|
||||
pub filter_node: FilterNode,
|
||||
}
|
||||
|
||||
impl RunnableNode for FilterNodeRunner {
|
||||
fn run(&self) -> anyhow::Result<()> {
|
||||
let mut reader = csv::Reader::from_path(&self.filter_node.input_file_path)?;
|
||||
let mut writer = csv::Writer::from_path(&self.filter_node.output_file_path)?;
|
||||
let rules = derive::to_filter_rules(&self.filter_node.filters)?;
|
||||
filter_file(&rules, &mut reader, &mut writer)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::derive::{Comparator, FilterRule};
|
||||
|
||||
use super::filter_file;
|
||||
|
||||
#[test]
|
||||
fn no_filters_passes_through() -> anyhow::Result<()> {
|
||||
let records = "Column1,Column2
|
||||
Value1,Value2
|
||||
Value3,Value4
|
||||
";
|
||||
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
|
||||
let mut writer = csv::Writer::from_writer(vec![]);
|
||||
filter_file(&vec![], &mut reader, &mut writer)?;
|
||||
let result = String::from_utf8(writer.into_inner()?)?;
|
||||
assert_eq!(
|
||||
records, result,
|
||||
"Should not modify input when no filters are defined"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_data() -> anyhow::Result<()> {
|
||||
let records = "Column1,Column2
|
||||
Value1,Value2
|
||||
Value3,Value4
|
||||
";
|
||||
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
|
||||
let mut writer = csv::Writer::from_writer(vec![]);
|
||||
filter_file(
|
||||
&vec![Box::new(FilterRule {
|
||||
column_name: "Column1".to_owned(),
|
||||
comparator: Comparator::NotEqual("Value3".to_owned()),
|
||||
})],
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
)?;
|
||||
let result = String::from_utf8(writer.into_inner()?)?;
|
||||
assert_eq!(
|
||||
"Column1,Column2
|
||||
Value1,Value2
|
||||
",
|
||||
result,
|
||||
"Should filter out second record due to filter rules"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_print_header_when_no_rules_pass() {}
|
||||
}
|
||||
Reference in New Issue
Block a user