use std::collections::BTreeMap; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use crate::io::{RecordDeserializer, RecordSerializer}; use super::derive::{DataValidators, DeriveFilter}; use super::derive; use super::node::RunnableNode; /** * Write all lines from the input file to the output file, skipping records * that don't satisfy the filter criteria */ pub fn filter_file( rules: &DataValidators, input: &mut impl RecordDeserializer, output: &mut impl RecordSerializer, ) -> anyhow::Result<()> { if let Some(line) = input.deserialize()? { let line: BTreeMap = line; output.write_header(&line)?; if derive::is_line_valid(&line, &rules) { output.write_record(&line)?; } while let Some(line) = input.deserialize()? { let line: BTreeMap = line; if derive::is_line_valid(&line, rules) { output.write_record(&line)?; } } output.flush()?; } Ok(()) } #[derive(Serialize, Deserialize, Clone, JsonSchema)] pub struct FilterNode { pub filters: Vec, pub input_file_path: String, pub output_file_path: String, } pub struct FilterNodeRunner { pub filter_node: FilterNode, } impl RunnableNode for FilterNodeRunner { fn run(&self) -> anyhow::Result<()> { let mut reader = csv::Reader::from_path(&self.filter_node.input_file_path)?; let mut writer = csv::Writer::from_path(&self.filter_node.output_file_path)?; let rules = derive::to_filter_rules(&self.filter_node.filters)?; filter_file(&rules, &mut reader, &mut writer) } } #[cfg(test)] mod tests { use super::derive::{Comparator, FilterRule}; use super::filter_file; #[test] fn no_filters_passes_through() -> anyhow::Result<()> { let records = "Column1,Column2 Value1,Value2 Value3,Value4 "; let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes()); let mut writer = csv::Writer::from_writer(vec![]); filter_file(&vec![], &mut reader, &mut writer)?; let result = String::from_utf8(writer.into_inner()?)?; assert_eq!( records, result, "Should not modify input when no filters are defined" ); Ok(()) } #[test] fn filters_data() -> anyhow::Result<()> { let records = "Column1,Column2 Value1,Value2 Value3,Value4 "; let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes()); let mut writer = csv::Writer::from_writer(vec![]); filter_file( &vec![Box::new(FilterRule { column_name: "Column1".to_owned(), comparator: Comparator::NotEqual("Value3".to_owned()), })], &mut reader, &mut writer, )?; let result = String::from_utf8(writer.into_inner()?)?; assert_eq!( "Column1,Column2 Value1,Value2 ", result, "Should filter out second record due to filter rules" ); Ok(()) } #[test] fn should_print_header_when_no_rules_pass() {} }