Files
ingey/src/graph/filter.rs

115 lines
3.1 KiB
Rust

use std::collections::BTreeMap;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::io::{RecordDeserializer, RecordSerializer};
use super::derive::{DataValidators, DeriveFilter};
use super::derive;
use super::node::RunnableNode;
/**
* Write all lines from the input file to the output file, skipping records
* that don't satisfy the filter criteria
*/
pub fn filter_file(
rules: &DataValidators,
input: &mut impl RecordDeserializer,
output: &mut impl RecordSerializer,
) -> anyhow::Result<()> {
if let Some(line) = input.deserialize()? {
let line: BTreeMap<String, String> = line;
output.write_header(&line)?;
if derive::is_line_valid(&line, &rules) {
output.write_record(&line)?;
}
while let Some(line) = input.deserialize()? {
let line: BTreeMap<String, String> = line;
if derive::is_line_valid(&line, rules) {
output.write_record(&line)?;
}
}
output.flush()?;
}
Ok(())
}
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct FilterNode {
pub filters: Vec<DeriveFilter>,
pub input_file_path: String,
pub output_file_path: String,
}
pub struct FilterNodeRunner {
pub filter_node: FilterNode,
}
impl RunnableNode for FilterNodeRunner {
fn run(&self) -> anyhow::Result<()> {
let mut reader = csv::Reader::from_path(&self.filter_node.input_file_path)?;
let mut writer = csv::Writer::from_path(&self.filter_node.output_file_path)?;
let rules = derive::to_filter_rules(&self.filter_node.filters)?;
filter_file(&rules, &mut reader, &mut writer)
}
}
#[cfg(test)]
mod tests {
use super::derive::{Comparator, FilterRule};
use super::filter_file;
#[test]
fn no_filters_passes_through() -> anyhow::Result<()> {
let records = "Column1,Column2
Value1,Value2
Value3,Value4
";
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
let mut writer = csv::Writer::from_writer(vec![]);
filter_file(&vec![], &mut reader, &mut writer)?;
let result = String::from_utf8(writer.into_inner()?)?;
assert_eq!(
records, result,
"Should not modify input when no filters are defined"
);
Ok(())
}
#[test]
fn filters_data() -> anyhow::Result<()> {
let records = "Column1,Column2
Value1,Value2
Value3,Value4
";
let mut reader: csv::Reader<&[u8]> = csv::Reader::from_reader(records.as_bytes());
let mut writer = csv::Writer::from_writer(vec![]);
filter_file(
&vec![Box::new(FilterRule {
column_name: "Column1".to_owned(),
comparator: Comparator::NotEqual("Value3".to_owned()),
})],
&mut reader,
&mut writer,
)?;
let result = String::from_utf8(writer.into_inner()?)?;
assert_eq!(
"Column1,Column2
Value1,Value2
",
result,
"Should filter out second record due to filter rules"
);
Ok(())
}
#[test]
fn should_print_header_when_no_rules_pass() {}
}