From 1e36bc68e45a6f4c9ed45038038b7ce670e37b5f Mon Sep 17 00:00:00 2001 From: piv <> Date: Wed, 8 Mar 2023 12:56:39 +1030 Subject: [PATCH] Fix move money to correctly move between accounts/ccs with ranges and and consider cost output correctly, speed up process by using hashset for rule accounts/departments --- src/lib.rs | 14 ++- src/main.rs | 13 ++- src/move_money.rs | 211 +++++++++++++++++++++++-------------- src/overhead_allocation.rs | 2 - 4 files changed, 152 insertions(+), 88 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2292aa2..488ada4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,7 @@ pub extern "C" fn move_money_from_text( rules: *const c_char, lines: *const c_char, accounts: *const c_char, + cost_centres: *const c_char, use_numeric_accounts: bool, ) -> *mut c_char { let mut output_writer = csv::Writer::from_writer(vec![]); @@ -34,13 +35,18 @@ pub extern "C" fn move_money_from_text( CStr::from_ptr(lines) }; let safe_accounts = unsafe { - assert!(!lines.is_null()); + assert!(!accounts.is_null()); CStr::from_ptr(accounts) }; + let safe_cost_centres = unsafe { + assert!(!cost_centres.is_null()); + CStr::from_ptr(cost_centres) + }; move_money( - csv::Reader::from_reader(safe_rules.to_bytes()), - csv::Reader::from_reader(safe_lines.to_bytes()), - csv::Reader::from_reader(safe_accounts.to_bytes()), + &mut csv::Reader::from_reader(safe_rules.to_bytes()), + &mut csv::Reader::from_reader(safe_lines.to_bytes()), + &mut csv::Reader::from_reader(safe_accounts.to_bytes()), + &mut csv::Reader::from_reader(safe_cost_centres.to_bytes()), &mut output_writer, use_numeric_accounts, true, diff --git a/src/main.rs b/src/main.rs index 4337768..5a8c90e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,9 @@ enum Commands { #[arg(short = 'a', long, value_name = "FILE")] accounts: PathBuf, + #[arg(short = 'c', long, value_name = "FILE")] + cost_centres: PathBuf, + #[arg(short, long, value_name = "FILE")] output: Option, @@ -80,6 +83,7 @@ fn main() -> anyhow::Result<()> { rules, lines, accounts, + cost_centres, output, use_numeric_accounts, flush_pass, @@ -87,6 +91,7 @@ fn main() -> anyhow::Result<()> { rules, lines, accounts, + cost_centres, output, use_numeric_accounts, flush_pass, @@ -118,14 +123,16 @@ fn move_money( rules: PathBuf, lines: PathBuf, accounts: PathBuf, + cost_centres: PathBuf, output: Option, use_numeric_accounts: bool, flush_pass: bool, ) -> anyhow::Result<()> { coster_rs::move_money( - csv::Reader::from_path(rules)?, - csv::Reader::from_path(lines)?, - csv::Reader::from_path(accounts)?, + &mut csv::Reader::from_path(rules)?, + &mut csv::Reader::from_path(lines)?, + &mut csv::Reader::from_path(accounts)?, + &mut csv::Reader::from_path(cost_centres)?, &mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?, use_numeric_accounts, flush_pass, diff --git a/src/move_money.rs b/src/move_money.rs index a2e1bb4..e81ac82 100644 --- a/src/move_money.rs +++ b/src/move_money.rs @@ -1,4 +1,7 @@ -use std::{collections::HashMap, thread::current}; +use std::{ + collections::{HashMap, HashSet}, + thread::current, +}; use itertools::Itertools; use serde::{Deserialize, Serialize, Serializer}; @@ -42,15 +45,23 @@ struct CsvMovementRule { cost_output: Option, } +#[derive(Deserialize)] +pub struct PartialCostCentre { + #[serde(rename = "Code")] + pub code: String, + #[serde(rename = "Area")] + pub area: Option, +} + #[derive(Default)] pub struct MovementRule { // If the vectors are empty, then it means 'all' - pub from_departments: Vec, - pub to_departments: Vec, + pub from_departments: HashSet, + pub to_departments: HashSet, pub all_from_departments: bool, pub all_to_departments: bool, - pub from_accounts: Vec, - pub to_accounts: Vec, + pub from_accounts: HashSet, + pub to_accounts: HashSet, pub all_from_accounts: bool, pub all_to_accounts: bool, pub amount: f64, @@ -102,10 +113,11 @@ where s.serialize_f64((x * 100000.).round() / 100000.) } -pub fn move_money( - rules_reader: csv::Reader, - lines_reader: csv::Reader, - accounts_reader: csv::Reader, +pub fn move_money( + rules_reader: &mut csv::Reader, + lines_reader: &mut csv::Reader, + accounts_reader: &mut csv::Reader, + cost_centres_reader: &mut csv::Reader, output: &mut csv::Writer, use_numeric_accounts: bool, flush_pass: bool, @@ -114,9 +126,9 @@ where R: std::io::Read, L: std::io::Read, A: std::io::Read, + C: std::io::Read, O: std::io::Write, { - let mut lines_reader = lines_reader; let headers = lines_reader.headers()?; let mut account_index = 0; let mut department_index = 0; @@ -179,53 +191,89 @@ where .sorted() .collect() }; - let all_departments_sorted = lines + let line_departments_sorted = lines .keys() .map(|key| key.department.clone()) .unique() .sorted() .collect(); + let all_departments_sorted = cost_centres_reader + .deserialize::() + .map(|cc| cc.unwrap().code) + .unique() + .sorted() + .collect(); let mut rules_reader = rules_reader; let mut rules: Vec = vec![]; for movement_rule in rules_reader.deserialize() { // TODO: Consider reclass rule group, how does that even work? let movement_rule: CsvMovementRule = movement_rule?; - let from_accounts = extract_range( - movement_rule.source_from_account, - movement_rule.source_to_account, - false, - &all_accounts_sorted, - ); - let to_accounts = if movement_rule.cost_output.is_some() { - account_mappings - .iter() - .filter(|(_, account)| { - account.cost_output.is_some() - && account.cost_output.clone().unwrap() - == movement_rule.cost_output.clone().unwrap() - }) - .map(|(code, _)| code.clone()) - .collect() + let is_separator = movement_rule.apply == "-DIVIDER-"; + let from_accounts = if is_separator { + HashSet::new() + } else { + if movement_rule.cost_output.is_some() { + account_mappings + .iter() + .filter(|(_, account)| { + account.cost_output.is_some() + && account.cost_output.clone().unwrap() + == movement_rule.cost_output.clone().unwrap() + }) + .map(|(code, _)| code.clone()) + .collect() + } else { + extract_range( + movement_rule.source_from_account, + movement_rule.source_to_account, + false, + &all_accounts_sorted, + ) + } + }; + let to_accounts = if is_separator { + HashSet::new() + } else { + if movement_rule.cost_output.is_some() { + account_mappings + .iter() + .filter(|(_, account)| { + account.cost_output.is_some() + && account.cost_output.clone().unwrap() + == movement_rule.cost_output.clone().unwrap() + }) + .map(|(code, _)| code.clone()) + .collect() + } else { + extract_range( + movement_rule.dest_from_account, + movement_rule.dest_to_account, + false, + &all_accounts_sorted, + ) + } + }; + let from_departments = if is_separator { + HashSet::new() } else { extract_range( - movement_rule.dest_from_account, - movement_rule.dest_to_account, + movement_rule.source_from_department, + movement_rule.source_to_department, false, - &all_accounts_sorted, + // Don't use all departments, as we only need ones that actually have costs + &line_departments_sorted, + ) + }; + let to_departments = if is_separator { + HashSet::new() + } else { + extract_range( + movement_rule.dest_from_department, + movement_rule.dest_to_department, + false, + &all_departments_sorted, ) }; - let from_departments = extract_range( - movement_rule.source_from_department, - movement_rule.source_to_department, - false, - &all_departments_sorted, - ); - let to_departments = extract_range( - movement_rule.dest_from_department, - movement_rule.dest_to_department, - false, - &all_departments_sorted, - ); rules.push(MovementRule { from_departments, to_departments, @@ -242,7 +290,7 @@ where 1. }), is_percent: movement_rule.is_percent == "%", - is_separator: movement_rule.apply == "-DIVIDER-", + is_separator, }) } @@ -264,9 +312,9 @@ where Ok(()) } -fn extract_range(from: String, to: String, all: bool, options: &Vec) -> Vec { - if all { - return vec![]; +fn extract_range(from: String, to: String, all: bool, options: &Vec) -> HashSet { + if all || (from.is_empty() && to.is_empty()) { + return options.clone().into_iter().collect(); } let start_index = options .iter() @@ -280,14 +328,14 @@ fn extract_range(from: String, to: String, all: bool, options: &Vec) -> .map(|end| end.0); if let Some(start) = start_index { if let Some(end) = end_index { - return Vec::from(&options[start..end + 1]); + return Vec::from(&options[start..end + 1]).into_iter().collect(); } else { - return vec![options[start].clone()]; + return vec![options[start].clone()].into_iter().collect(); } } else if let Some(end) = end_index { - return vec![options[end].clone()]; + return vec![options[end].clone()].into_iter().collect(); } - return vec![]; + return HashSet::new(); } // Approach 1: @@ -301,6 +349,7 @@ pub fn move_money_1() {} pub struct MoveMoneyResult { pass: i32, + // TODO: We want the from account and the to account totals: HashMap, } @@ -322,6 +371,8 @@ pub fn move_money_2( ) -> Vec { // Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule), // but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice + // TODO: This is initial totals is problematic, as we may move into a cc that wasn't in the list of lines. In which case we'd need + // to add a new line let mut running_total = HashMap::from(initial_totals); let mut temp_total = running_total.clone(); let mut moveMoneyResult: Vec = vec![]; @@ -335,48 +386,49 @@ pub fn move_money_2( for rule in rules { if rule.is_separator { if flush_pass { + current_pass += 1; // Flush the totals at the end of this pass (more specifically the change) moveMoneyResult.push(MoveMoneyResult { pass: current_pass, - totals: running_total + totals: temp_total .iter() - .map(|(unit, value)| (unit.clone(), temp_total.get(unit).unwrap() - value)) + .map(|(unit, value)| { + (unit.clone(), value - running_total.get(unit).unwrap_or(&0.)) + }) .filter(|(_, value)| *value != 0.) .collect(), }); - current_pass += 1; } running_total = temp_total.clone(); } else { - let mut sum_from = 0.; - for unit in &running_total { - if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department)) - && (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account)) + for (unit, amount) in &running_total { + if (rule.all_from_departments || rule.from_departments.contains(&unit.department)) + && (rule.all_from_accounts || rule.from_accounts.contains(&unit.account)) { - let previous_temp = unit.1; + let previous_temp = amount; let added_amount = if rule.is_percent { previous_temp * rule.amount } else { rule.amount }; - sum_from += added_amount; - *temp_total.get_mut(&unit.0).unwrap() -= added_amount; - } - } - - let num_to_units = running_total - .keys() - .filter(|key| { - (rule.all_to_departments || rule.to_departments.contains(&key.department)) - && (rule.all_to_accounts || rule.to_accounts.contains(&key.account)) - }) - .count(); - let value_per_unit = sum_from / num_to_units as f64; - for unit in running_total.keys() { - if (rule.all_to_departments || rule.to_departments.contains(&unit.department)) - && (rule.all_to_accounts || rule.to_accounts.contains(&unit.account)) - { - *temp_total.get_mut(&unit).unwrap() += value_per_unit; + // TODO: Track the from department/account, maintained in a separate list if flush_pass is set + *temp_total.get_mut(&unit).unwrap() -= added_amount; + let department = if rule.to_departments.len() == 1 { + rule.to_departments.iter().next().unwrap().clone() + } else { + unit.department.clone() + }; + let account = if rule.to_accounts.len() == 1 { + rule.to_accounts.iter().next().unwrap().clone() + } else { + unit.account.clone() + }; + *temp_total + .entry(Unit { + department, + account, + }) + .or_insert(0.) += added_amount; } } } @@ -394,9 +446,10 @@ mod tests { #[test] fn move_money() { super::move_money( - csv::Reader::from_path("reclassrule.csv").unwrap(), - csv::Reader::from_path("line.csv").unwrap(), - csv::Reader::from_path("account.csv").unwrap(), + &mut csv::Reader::from_path("reclassrule.csv").unwrap(), + &mut csv::Reader::from_path("line.csv").unwrap(), + &mut csv::Reader::from_path("account.csv").unwrap(), + &mut csv::Reader::from_path("costcentres.csv").unwrap(), &mut csv::Writer::from_path("output.csv").unwrap(), false, true, diff --git a/src/overhead_allocation.rs b/src/overhead_allocation.rs index 0b3569e..7ecabf5 100644 --- a/src/overhead_allocation.rs +++ b/src/overhead_allocation.rs @@ -39,8 +39,6 @@ pub struct AllocationStatisticAccountRange { end: usize, } -type CsvCostCentre = HashMap; - type CsvArea = HashMap; // Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,