diff --git a/src/derive.rs b/src/derive.rs index 4679df0..44794ba 100644 --- a/src/derive.rs +++ b/src/derive.rs @@ -1,5 +1,12 @@ +use std::{collections::BTreeMap, str::FromStr}; + use serde::{Deserialize, Serialize}; +use crate::{ + io::{RecordDeserializer, RecordSerializer}, + node::RunnableNode, +}; + #[derive(Serialize, Deserialize, Clone)] pub enum DeriveColumnType { Column(String), @@ -63,13 +70,178 @@ pub struct DeriveFilter { pub value_type: ValueType, } +pub enum Comparator { + Equal(T), + NotEqual(T), + GreaterThan(T), + LessThan(T), + In(Vec), + NotIn(Vec), +} + +impl Comparator { + pub fn is_valid(&self, value: T) -> bool { + match self { + Comparator::Equal(v) => value == *v, + Comparator::NotEqual(v) => value != *v, + Comparator::GreaterThan(v) => value > *v, + Comparator::LessThan(v) => value < *v, + Comparator::In(v) => v.contains(&value), + Comparator::NotIn(v) => !v.contains(&value), + } + } +} + +pub trait FieldName { + // Name of the field this validator should work on + fn get_field_name(&self) -> String; +} + +pub type DataValidators = Vec>; + +pub trait DataValidator: FieldName { + // Whether the given value is valid for the validator + fn is_valid(&self, s: &str) -> bool; +} + +pub struct FilterRule { + pub column_name: String, + pub comparator: Comparator, +} + +impl FieldName for FilterRule { + fn get_field_name(&self) -> String { + self.column_name.clone() + } +} + +impl DataValidator for FilterRule { + fn is_valid(&self, s: &str) -> bool { + s.parse().map_or(false, |f| self.comparator.is_valid(f)) + } +} + +pub fn to_filter_rules(filters: &Vec) -> anyhow::Result>> { + filters + .iter() + // For some reason inlining to_filter_rules causes a compiler error, so leaving + // in a separate function (it is cleaner at least) + .map(|filter| to_filter_rule(filter)) + .collect() +} + +fn to_filter_rule(filter: &DeriveFilter) -> anyhow::Result> { + let value = filter.match_value.clone(); + match filter.value_type { + crate::derive::ValueType::String => Ok(Box::new(get_filter_rule(filter, value))), + crate::derive::ValueType::Integer => { + Ok(Box::new(get_filter_rule(filter, value.parse::()?))) + } + crate::derive::ValueType::Float => { + Ok(Box::new(get_filter_rule(filter, value.parse::()?))) + } + crate::derive::ValueType::Boolean => { + Ok(Box::new(get_filter_rule(filter, value.parse::()?))) + } + } +} + +fn get_filter_rule(filter: &DeriveFilter, value: T) -> FilterRule { + FilterRule { + column_name: filter.column_name.clone(), + comparator: match filter.comparator { + MatchComparisonType::Equal => Comparator::Equal(value), + MatchComparisonType::GreaterThan => Comparator::GreaterThan(value), + MatchComparisonType::LessThan => Comparator::LessThan(value), + MatchComparisonType::NotEqual => Comparator::NotEqual(value), + }, + } +} + +#[derive(Serialize, Deserialize, Clone)] +pub struct DeriveColumnOperation { + pub column_name: String, + pub operation: DeriveOperation, +} + #[derive(Serialize, Deserialize, Clone)] pub struct DeriveRule { - pub operations: Vec, + pub operations: Vec, pub filters: Vec, } #[derive(Serialize, Deserialize, Clone)] pub struct DeriveNode { pub rules: Vec, + pub input_file_path: String, + pub output_file_path: String, +} + +pub struct RunnableDeriveRule { + pub operations: Vec, + pub filters: Vec>, +} + +impl DeriveRule { + fn to_runnable_rule(&self) -> anyhow::Result { + let filters = to_filter_rules(&self.filters)?; + Ok(RunnableDeriveRule { + operations: self.operations.clone(), + filters, + }) + } +} + +fn derive( + rules: &Vec, + input: &mut impl RecordDeserializer, + output: &mut impl RecordSerializer, +) -> anyhow::Result<()> { + if let Some(line) = input.deserialize()? { + let line: BTreeMap = line; + output.write_header(&line)?; + derive_line(line, rules, output)?; + + while let Some(line) = input.deserialize()? { + let line: BTreeMap = line; + derive_line(line, rules, output)?; + } + } + Ok(()) +} + +fn derive_line( + line: BTreeMap, + rules: &Vec, + output: &mut impl RecordSerializer, +) -> anyhow::Result<()> { + for rule in rules { + // First check the filter works. If there are no filters, the rule applies to all rows + for filter in &rule.filters {} + // TODO: Split operations should be processed separately, after all the other operations have been applied + // Apply all operations individually, adding as a column to the record map + for operation in &rule.operations {} + } + // for line in line { + output.serialize(line) + // } +} + +pub struct DeriveNodeRunner { + derive_node: DeriveNode, +} + +impl RunnableNode for DeriveNodeRunner { + fn run(&self) -> anyhow::Result<()> { + let mut reader = csv::Reader::from_path(&self.derive_node.input_file_path)?; + let mut writer = csv::Writer::from_path(&self.derive_node.output_file_path)?; + let rules: anyhow::Result> = self + .derive_node + .rules + .iter() + .map(|rule| rule.to_runnable_rule()) + .collect(); + let rules = rules?; + derive(&rules, &mut reader, &mut writer) + } } diff --git a/src/filter.rs b/src/filter.rs index 13f0048..6a0427a 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,60 +1,23 @@ -use std::{collections::BTreeMap, str::FromStr}; +use std::collections::BTreeMap; use serde::{Deserialize, Serialize}; use crate::{ - derive::{DeriveFilter, MatchComparisonType}, + derive::{to_filter_rules, DataValidator, DataValidators, DeriveFilter}, io::{RecordDeserializer, RecordSerializer}, node::RunnableNode, }; -pub enum Comparator { - Equal(T), - NotEqual(T), - GreaterThan(T), - LessThan(T), - In(Vec), - NotIn(Vec), -} - -impl Comparator { - pub fn is_valid(&self, value: T) -> bool { - match self { - Comparator::Equal(v) => value == *v, - Comparator::NotEqual(v) => value != *v, - Comparator::GreaterThan(v) => value > *v, - Comparator::LessThan(v) => value < *v, - Comparator::In(v) => v.contains(&value), - Comparator::NotIn(v) => !v.contains(&value), - } - } -} - -pub trait FieldName { - // Name of the field this validator should work on - fn get_field_name(&self) -> String; -} - -pub trait DataValidator: FieldName { - // Whether the given value is valid for the validator - fn is_valid(&self, s: &str) -> bool; -} - -pub struct FilterRule { - column_name: String, - comparator: Comparator, -} - -impl FieldName for FilterRule { - fn get_field_name(&self) -> String { - self.column_name.clone() - } -} - -impl DataValidator for FilterRule { - fn is_valid(&self, s: &str) -> bool { - s.parse().map_or(false, |f| self.comparator.is_valid(f)) - } +fn is_line_valid(line: &BTreeMap, rules: &DataValidators) -> bool { + rules.iter().all(|rule| { + line.get(&rule.get_field_name()).map_or(true, |value| { + if value.trim().is_empty() { + true + } else { + rule.is_valid(value) + } + }) + }) } /** @@ -62,26 +25,21 @@ impl DataValidator for FilterRule { * that don't satisfy the filter criteria */ pub fn filter_file( - rules: &Vec>, + rules: &DataValidators, input: &mut impl RecordDeserializer, output: &mut impl RecordSerializer, ) -> anyhow::Result<()> { if let Some(line) = input.deserialize()? { let line: BTreeMap = line; output.write_header(&line)?; - output.write_record(&line)?; + + if (is_line_valid(&line, &rules)) { + output.write_record(&line)?; + } while let Some(line) = input.deserialize()? { let line: BTreeMap = line; - if rules.iter().all(|rule| { - line.get(&rule.get_field_name()).map_or(true, |value| { - if value.trim().is_empty() { - true - } else { - rule.is_valid(value) - } - }) - }) { + if is_line_valid(&line, rules) { output.write_record(&line)?; } } @@ -97,45 +55,6 @@ pub struct FilterNode { pub output_file_path: String, } -impl FilterNode { - fn to_filter_rules(&self) -> anyhow::Result>> { - self.filters - .iter() - // For some reason inlining to_filter_rules causes a compiler error, so leaving - // in a separate function (it is cleaner at least) - .map(|filter| to_filter_rule(filter)) - .collect() - } -} - -fn to_filter_rule(filter: &DeriveFilter) -> anyhow::Result> { - let value = filter.match_value.clone(); - match filter.value_type { - crate::derive::ValueType::String => Ok(Box::new(get_filter_rule(filter, value))), - crate::derive::ValueType::Integer => { - Ok(Box::new(get_filter_rule(filter, value.parse::()?))) - } - crate::derive::ValueType::Float => { - Ok(Box::new(get_filter_rule(filter, value.parse::()?))) - } - crate::derive::ValueType::Boolean => { - Ok(Box::new(get_filter_rule(filter, value.parse::()?))) - } - } -} - -fn get_filter_rule(filter: &DeriveFilter, value: T) -> FilterRule { - FilterRule { - column_name: filter.column_name.clone(), - comparator: match filter.comparator { - MatchComparisonType::Equal => Comparator::Equal(value), - MatchComparisonType::GreaterThan => Comparator::GreaterThan(value), - MatchComparisonType::LessThan => Comparator::LessThan(value), - MatchComparisonType::NotEqual => Comparator::NotEqual(value), - }, - } -} - pub struct FilterNodeRunner { pub filter_node: FilterNode, } @@ -144,14 +63,15 @@ impl RunnableNode for FilterNodeRunner { fn run(&self) -> anyhow::Result<()> { let mut reader = csv::Reader::from_path(&self.filter_node.input_file_path)?; let mut writer = csv::Writer::from_path(&self.filter_node.output_file_path)?; - let rules = self.filter_node.to_filter_rules()?; + let rules = to_filter_rules(&self.filter_node.filters)?; filter_file(&rules, &mut reader, &mut writer) } } #[cfg(test)] mod tests { - use crate::filter::FilterRule; + + use crate::derive::{Comparator, FilterRule}; use super::filter_file; @@ -183,7 +103,7 @@ Value3,Value4 filter_file( &vec![Box::new(FilterRule { column_name: "Column1".to_owned(), - comparator: crate::filter::Comparator::NotEqual("Value3".to_owned()), + comparator: Comparator::NotEqual("Value3".to_owned()), })], &mut reader, &mut writer,