Refactor codebase into submodules
This commit is contained in:
283
src/move_money.rs
Normal file
283
src/move_money.rs
Normal file
@@ -0,0 +1,283 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// TODO: Fix up these names, check if all is actually integrated into the strings
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CsvMovementRule {
|
||||
#[serde(rename = "FromCC")]
|
||||
// Need strings to further split later
|
||||
from_departments: String,
|
||||
to_departments: String,
|
||||
all_from_departments: bool,
|
||||
all_to_departments: bool,
|
||||
from_accounts: String,
|
||||
to_accounts: String,
|
||||
all_from_accounts: bool,
|
||||
all_to_accounts: bool,
|
||||
amount: f64,
|
||||
is_percent: Option<bool>,
|
||||
is_separator: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MovementRule {
|
||||
// If the vectors are empty, then it means 'all'
|
||||
pub from_departments: Vec<String>,
|
||||
pub to_departments: Vec<String>,
|
||||
pub all_from_departments: bool,
|
||||
pub all_to_departments: bool,
|
||||
pub from_accounts: Vec<String>,
|
||||
pub to_accounts: Vec<String>,
|
||||
pub all_from_accounts: bool,
|
||||
pub all_to_accounts: bool,
|
||||
pub amount: f64,
|
||||
pub is_percent: bool,
|
||||
pub is_separator: bool,
|
||||
}
|
||||
|
||||
impl MovementRule {
|
||||
pub fn pass_break() -> MovementRule {
|
||||
MovementRule {
|
||||
is_separator: true,
|
||||
..MovementRule::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> bool {
|
||||
if self.from_departments.is_empty() && self.to_departments.is_empty() {
|
||||
// Would be nice to have a decent message/error here as well
|
||||
return false;
|
||||
}
|
||||
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Hash, Clone, Default, PartialEq, Eq)]
|
||||
pub struct Unit {
|
||||
pub department: String,
|
||||
pub account: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CsvCost {
|
||||
account: String,
|
||||
department: String,
|
||||
value: f64,
|
||||
}
|
||||
|
||||
pub fn move_money<R, L, O>(
|
||||
rules_reader: csv::Reader<R>,
|
||||
lines_reader: csv::Reader<L>,
|
||||
output: csv::Writer<O>,
|
||||
use_numeric_accounts: bool,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
R: std::io::Read,
|
||||
L: std::io::Read,
|
||||
O: std::io::Write,
|
||||
{
|
||||
let mut lines_reader = lines_reader;
|
||||
let headers = lines_reader.headers()?;
|
||||
let mut account_index = 0;
|
||||
let mut department_index = 0;
|
||||
for (index, field) in headers.iter().enumerate() {
|
||||
if field.eq_ignore_ascii_case("account") {
|
||||
account_index = index;
|
||||
} else if field.eq_ignore_ascii_case("department") {
|
||||
department_index = index;
|
||||
}
|
||||
}
|
||||
|
||||
let lines: HashMap<Unit, f64> = lines_reader
|
||||
.records()
|
||||
.map(|record| {
|
||||
let record = record.unwrap();
|
||||
let account = record.get(account_index).unwrap();
|
||||
let department = record.get(department_index).unwrap();
|
||||
let sum = record
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| *i != account_index && *i != department_index)
|
||||
.map(|(_, f)| f.parse::<f64>().unwrap())
|
||||
.sum();
|
||||
(
|
||||
Unit {
|
||||
account: account.into(),
|
||||
department: department.into(),
|
||||
},
|
||||
sum,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let all_accounts_sorted = if use_numeric_accounts {
|
||||
lines
|
||||
.keys()
|
||||
.map(|key| key.account.clone().parse::<i32>().unwrap())
|
||||
.sorted()
|
||||
.map(|account| account.to_string())
|
||||
.collect()
|
||||
} else {
|
||||
lines
|
||||
.keys()
|
||||
.map(|key| key.account.clone())
|
||||
.sorted()
|
||||
.collect()
|
||||
};
|
||||
let all_departments_sorted = lines
|
||||
.keys()
|
||||
.map(|key| key.department.clone())
|
||||
.sorted()
|
||||
.collect();
|
||||
let mut rules_reader = rules_reader;
|
||||
let mut rules: Vec<MovementRule> = vec![];
|
||||
for result in rules_reader.deserialize() {
|
||||
let movement_rule: CsvMovementRule = result?;
|
||||
let from_accounts = extract_range(
|
||||
movement_rule.from_accounts,
|
||||
movement_rule.all_from_accounts,
|
||||
&all_accounts_sorted,
|
||||
);
|
||||
let to_accounts = extract_range(
|
||||
movement_rule.to_accounts,
|
||||
movement_rule.all_to_accounts,
|
||||
&all_accounts_sorted,
|
||||
);
|
||||
let from_departments = extract_range(
|
||||
movement_rule.from_departments,
|
||||
movement_rule.all_from_departments,
|
||||
&all_departments_sorted,
|
||||
);
|
||||
let to_departments = extract_range(
|
||||
movement_rule.to_departments,
|
||||
movement_rule.all_to_departments,
|
||||
&all_departments_sorted,
|
||||
);
|
||||
rules.push(MovementRule {
|
||||
from_departments,
|
||||
to_departments,
|
||||
all_from_departments: movement_rule.all_from_departments,
|
||||
all_to_departments: movement_rule.all_to_departments,
|
||||
from_accounts,
|
||||
to_accounts,
|
||||
all_from_accounts: movement_rule.all_from_accounts,
|
||||
all_to_accounts: movement_rule.all_to_accounts,
|
||||
amount: movement_rule.amount,
|
||||
is_percent: movement_rule.is_percent.unwrap_or(false),
|
||||
is_separator: movement_rule.is_separator.unwrap_or(false),
|
||||
})
|
||||
}
|
||||
|
||||
// Then run move_money
|
||||
let moved = move_money_2(lines, &rules);
|
||||
let mut output = output;
|
||||
|
||||
// Ouput the list moved moneys
|
||||
for money in moved {
|
||||
output.serialize(CsvCost {
|
||||
account: money.0.account,
|
||||
department: money.0.department,
|
||||
value: money.1,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_range(range: String, all: bool, options: &Vec<String>) -> Vec<String> {
|
||||
if all {
|
||||
return vec![];
|
||||
}
|
||||
let split_range: Vec<&str> = range.split("-").collect();
|
||||
if split_range.len() == 1 {
|
||||
return vec![range];
|
||||
}
|
||||
let start_index = options
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|option| option.1 == split_range[0])
|
||||
.map(|start| start.0);
|
||||
let end_index = options
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|option| option.1 == split_range[1])
|
||||
.map(|end| end.0);
|
||||
if let Some(start) = start_index {
|
||||
if let Some(end) = end_index {
|
||||
return Vec::from(&options[start..end + 1]);
|
||||
} else {
|
||||
return vec![options[start].clone()];
|
||||
}
|
||||
} else if let Some(end) = end_index {
|
||||
return vec![options[end].clone()];
|
||||
}
|
||||
return vec![];
|
||||
}
|
||||
|
||||
// Approach 1:
|
||||
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
||||
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
||||
// to perform fast, particularly on larger datasets.
|
||||
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
||||
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
||||
// Advantage of this approach is it can be easily extended to run on the gpu.
|
||||
pub fn move_money_1() {}
|
||||
|
||||
// Approach 2:
|
||||
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
||||
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
||||
// map.
|
||||
// Upon a pass break (separator), the temp map will assign the values into the total map.
|
||||
// Once done, do a final assignment back to the total, and return that.
|
||||
// Advantage of this is the required code is tiny, and no third-party math library is required.
|
||||
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
|
||||
// to every. It's also much more memory efficient than approach 1.
|
||||
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
||||
pub fn move_money_2(
|
||||
initial_totals: HashMap<Unit, f64>,
|
||||
rules: &Vec<MovementRule>,
|
||||
) -> HashMap<Unit, f64> {
|
||||
// Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
|
||||
// but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
|
||||
let mut running_total = HashMap::from(initial_totals);
|
||||
let mut temp_total = running_total.clone();
|
||||
for rule in rules {
|
||||
if rule.is_separator {
|
||||
running_total = temp_total.clone();
|
||||
} else {
|
||||
let mut sum_from = 0.;
|
||||
for unit in &running_total {
|
||||
if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department))
|
||||
&& (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account))
|
||||
{
|
||||
let previous_temp = unit.1;
|
||||
let added_amount = if rule.is_percent {
|
||||
previous_temp * rule.amount
|
||||
} else {
|
||||
rule.amount
|
||||
};
|
||||
sum_from += added_amount;
|
||||
*temp_total.get_mut(&unit.0).unwrap() -= added_amount;
|
||||
}
|
||||
}
|
||||
|
||||
let num_to_units = running_total
|
||||
.keys()
|
||||
.filter(|key| {
|
||||
(rule.all_to_accounts || rule.to_departments.contains(&key.department))
|
||||
&& (rule.all_to_accounts || rule.to_accounts.contains(&key.account))
|
||||
})
|
||||
.count();
|
||||
let value_per_unit = sum_from / num_to_units as f64;
|
||||
for unit in running_total.keys() {
|
||||
*temp_total.get_mut(&unit).unwrap() += value_per_unit;
|
||||
}
|
||||
}
|
||||
}
|
||||
temp_total
|
||||
}
|
||||
Reference in New Issue
Block a user