115 lines
3.1 KiB
Rust
115 lines
3.1 KiB
Rust
use std::collections::BTreeMap;
|
|
|
|
use schemars::JsonSchema;
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use crate::io::{RecordDeserializer, RecordSerializer};
|
|
|
|
use super::derive::{DataValidators, DeriveFilter};
|
|
|
|
use super::derive;
|
|
use super::node::RunnableNode;
|
|
|
|
/**
|
|
* Write all lines from the input file to the output file, skipping records
|
|
* that don't satisfy the filter criteria
|
|
*/
|
|
pub fn filter_file(
|
|
rules: &DataValidators,
|
|
input: &mut impl RecordDeserializer,
|
|
output: &mut impl RecordSerializer,
|
|
) -> anyhow::Result<()> {
|
|
if let Some(line) = input.deserialize()? {
|
|
let line: BTreeMap<String, String> = line;
|
|
output.write_header(&line)?;
|
|
|
|
if derive::is_line_valid(&line, &rules) {
|
|
output.write_record(&line)?;
|
|
}
|
|
|
|
while let Some(line) = input.deserialize()? {
|
|
let line: BTreeMap<String, String> = line;
|
|
if derive::is_line_valid(&line, rules) {
|
|
output.write_record(&line)?;
|
|
}
|
|
}
|
|
output.flush()?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
|
|
pub struct FilterNode {
|
|
pub filters: Vec<DeriveFilter>,
|
|
pub input_file_path: String,
|
|
pub output_file_path: String,
|
|
}
|
|
|
|
pub struct FilterNodeRunner {
|
|
pub filter_node: FilterNode,
|
|
}
|
|
|
|
impl RunnableNode for FilterNodeRunner {
|
|
fn run(&self) -> anyhow::Result<()> {
|
|
let mut reader = csv::Reader::from_path(&self.filter_node.input_file_path)?;
|
|
let mut writer = csv::Writer::from_path(&self.filter_node.output_file_path)?;
|
|
let rules = derive::to_filter_rules(&self.filter_node.filters)?;
|
|
filter_file(&rules, &mut reader, &mut writer)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use super::derive::{Comparator, FilterRule};
|
|
|
|
use super::filter_file;
|
|
|
|
#[test]
|
|
fn no_filters_passes_through() -> anyhow::Result<()> {
|
|
let records = "Column1,Column2
|
|
Value1,Value2
|
|
Value3,Value4
|
|
";
|
|
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
|
|
let mut writer = csv::Writer::from_writer(vec![]);
|
|
filter_file(&vec![], &mut reader, &mut writer)?;
|
|
let result = String::from_utf8(writer.into_inner()?)?;
|
|
assert_eq!(
|
|
records, result,
|
|
"Should not modify input when no filters are defined"
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn filters_data() -> anyhow::Result<()> {
|
|
let records = "Column1,Column2
|
|
Value1,Value2
|
|
Value3,Value4
|
|
";
|
|
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
|
|
let mut writer = csv::Writer::from_writer(vec![]);
|
|
filter_file(
|
|
&vec![Box::new(FilterRule {
|
|
column_name: "Column1".to_owned(),
|
|
comparator: Comparator::NotEqual("Value3".to_owned()),
|
|
})],
|
|
&mut reader,
|
|
&mut writer,
|
|
)?;
|
|
let result = String::from_utf8(writer.into_inner()?)?;
|
|
assert_eq!(
|
|
"Column1,Column2
|
|
Value1,Value2
|
|
",
|
|
result,
|
|
"Should filter out second record due to filter rules"
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn should_print_header_when_no_rules_pass() {}
|
|
}
|