Fix move money to correctly move between accounts/ccs with ranges and and consider cost output correctly, speed up process by using hashset for rule accounts/departments

This commit is contained in:
piv
2023-03-08 12:56:39 +10:30
parent e1bb6e65d3
commit 1e36bc68e4
4 changed files with 152 additions and 88 deletions

View File

@@ -22,6 +22,7 @@ pub extern "C" fn move_money_from_text(
rules: *const c_char, rules: *const c_char,
lines: *const c_char, lines: *const c_char,
accounts: *const c_char, accounts: *const c_char,
cost_centres: *const c_char,
use_numeric_accounts: bool, use_numeric_accounts: bool,
) -> *mut c_char { ) -> *mut c_char {
let mut output_writer = csv::Writer::from_writer(vec![]); let mut output_writer = csv::Writer::from_writer(vec![]);
@@ -34,13 +35,18 @@ pub extern "C" fn move_money_from_text(
CStr::from_ptr(lines) CStr::from_ptr(lines)
}; };
let safe_accounts = unsafe { let safe_accounts = unsafe {
assert!(!lines.is_null()); assert!(!accounts.is_null());
CStr::from_ptr(accounts) CStr::from_ptr(accounts)
}; };
let safe_cost_centres = unsafe {
assert!(!cost_centres.is_null());
CStr::from_ptr(cost_centres)
};
move_money( move_money(
csv::Reader::from_reader(safe_rules.to_bytes()), &mut csv::Reader::from_reader(safe_rules.to_bytes()),
csv::Reader::from_reader(safe_lines.to_bytes()), &mut csv::Reader::from_reader(safe_lines.to_bytes()),
csv::Reader::from_reader(safe_accounts.to_bytes()), &mut csv::Reader::from_reader(safe_accounts.to_bytes()),
&mut csv::Reader::from_reader(safe_cost_centres.to_bytes()),
&mut output_writer, &mut output_writer,
use_numeric_accounts, use_numeric_accounts,
true, true,

View File

@@ -27,6 +27,9 @@ enum Commands {
#[arg(short = 'a', long, value_name = "FILE")] #[arg(short = 'a', long, value_name = "FILE")]
accounts: PathBuf, accounts: PathBuf,
#[arg(short = 'c', long, value_name = "FILE")]
cost_centres: PathBuf,
#[arg(short, long, value_name = "FILE")] #[arg(short, long, value_name = "FILE")]
output: Option<PathBuf>, output: Option<PathBuf>,
@@ -80,6 +83,7 @@ fn main() -> anyhow::Result<()> {
rules, rules,
lines, lines,
accounts, accounts,
cost_centres,
output, output,
use_numeric_accounts, use_numeric_accounts,
flush_pass, flush_pass,
@@ -87,6 +91,7 @@ fn main() -> anyhow::Result<()> {
rules, rules,
lines, lines,
accounts, accounts,
cost_centres,
output, output,
use_numeric_accounts, use_numeric_accounts,
flush_pass, flush_pass,
@@ -118,14 +123,16 @@ fn move_money(
rules: PathBuf, rules: PathBuf,
lines: PathBuf, lines: PathBuf,
accounts: PathBuf, accounts: PathBuf,
cost_centres: PathBuf,
output: Option<PathBuf>, output: Option<PathBuf>,
use_numeric_accounts: bool, use_numeric_accounts: bool,
flush_pass: bool, flush_pass: bool,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
coster_rs::move_money( coster_rs::move_money(
csv::Reader::from_path(rules)?, &mut csv::Reader::from_path(rules)?,
csv::Reader::from_path(lines)?, &mut csv::Reader::from_path(lines)?,
csv::Reader::from_path(accounts)?, &mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?, &mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?,
use_numeric_accounts, use_numeric_accounts,
flush_pass, flush_pass,

View File

@@ -1,4 +1,7 @@
use std::{collections::HashMap, thread::current}; use std::{
collections::{HashMap, HashSet},
thread::current,
};
use itertools::Itertools; use itertools::Itertools;
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
@@ -42,15 +45,23 @@ struct CsvMovementRule {
cost_output: Option<String>, cost_output: Option<String>,
} }
#[derive(Deserialize)]
pub struct PartialCostCentre {
#[serde(rename = "Code")]
pub code: String,
#[serde(rename = "Area")]
pub area: Option<String>,
}
#[derive(Default)] #[derive(Default)]
pub struct MovementRule { pub struct MovementRule {
// If the vectors are empty, then it means 'all' // If the vectors are empty, then it means 'all'
pub from_departments: Vec<String>, pub from_departments: HashSet<String>,
pub to_departments: Vec<String>, pub to_departments: HashSet<String>,
pub all_from_departments: bool, pub all_from_departments: bool,
pub all_to_departments: bool, pub all_to_departments: bool,
pub from_accounts: Vec<String>, pub from_accounts: HashSet<String>,
pub to_accounts: Vec<String>, pub to_accounts: HashSet<String>,
pub all_from_accounts: bool, pub all_from_accounts: bool,
pub all_to_accounts: bool, pub all_to_accounts: bool,
pub amount: f64, pub amount: f64,
@@ -102,10 +113,11 @@ where
s.serialize_f64((x * 100000.).round() / 100000.) s.serialize_f64((x * 100000.).round() / 100000.)
} }
pub fn move_money<R, L, A, O>( pub fn move_money<R, L, A, C, O>(
rules_reader: csv::Reader<R>, rules_reader: &mut csv::Reader<R>,
lines_reader: csv::Reader<L>, lines_reader: &mut csv::Reader<L>,
accounts_reader: csv::Reader<A>, accounts_reader: &mut csv::Reader<A>,
cost_centres_reader: &mut csv::Reader<C>,
output: &mut csv::Writer<O>, output: &mut csv::Writer<O>,
use_numeric_accounts: bool, use_numeric_accounts: bool,
flush_pass: bool, flush_pass: bool,
@@ -114,9 +126,9 @@ where
R: std::io::Read, R: std::io::Read,
L: std::io::Read, L: std::io::Read,
A: std::io::Read, A: std::io::Read,
C: std::io::Read,
O: std::io::Write, O: std::io::Write,
{ {
let mut lines_reader = lines_reader;
let headers = lines_reader.headers()?; let headers = lines_reader.headers()?;
let mut account_index = 0; let mut account_index = 0;
let mut department_index = 0; let mut department_index = 0;
@@ -179,24 +191,50 @@ where
.sorted() .sorted()
.collect() .collect()
}; };
let all_departments_sorted = lines let line_departments_sorted = lines
.keys() .keys()
.map(|key| key.department.clone()) .map(|key| key.department.clone())
.unique() .unique()
.sorted() .sorted()
.collect(); .collect();
let all_departments_sorted = cost_centres_reader
.deserialize::<PartialCostCentre>()
.map(|cc| cc.unwrap().code)
.unique()
.sorted()
.collect();
let mut rules_reader = rules_reader; let mut rules_reader = rules_reader;
let mut rules: Vec<MovementRule> = vec![]; let mut rules: Vec<MovementRule> = vec![];
for movement_rule in rules_reader.deserialize() { for movement_rule in rules_reader.deserialize() {
// TODO: Consider reclass rule group, how does that even work? // TODO: Consider reclass rule group, how does that even work?
let movement_rule: CsvMovementRule = movement_rule?; let movement_rule: CsvMovementRule = movement_rule?;
let from_accounts = extract_range( let is_separator = movement_rule.apply == "-DIVIDER-";
let from_accounts = if is_separator {
HashSet::new()
} else {
if movement_rule.cost_output.is_some() {
account_mappings
.iter()
.filter(|(_, account)| {
account.cost_output.is_some()
&& account.cost_output.clone().unwrap()
== movement_rule.cost_output.clone().unwrap()
})
.map(|(code, _)| code.clone())
.collect()
} else {
extract_range(
movement_rule.source_from_account, movement_rule.source_from_account,
movement_rule.source_to_account, movement_rule.source_to_account,
false, false,
&all_accounts_sorted, &all_accounts_sorted,
); )
let to_accounts = if movement_rule.cost_output.is_some() { }
};
let to_accounts = if is_separator {
HashSet::new()
} else {
if movement_rule.cost_output.is_some() {
account_mappings account_mappings
.iter() .iter()
.filter(|(_, account)| { .filter(|(_, account)| {
@@ -213,19 +251,29 @@ where
false, false,
&all_accounts_sorted, &all_accounts_sorted,
) )
}
}; };
let from_departments = extract_range( let from_departments = if is_separator {
HashSet::new()
} else {
extract_range(
movement_rule.source_from_department, movement_rule.source_from_department,
movement_rule.source_to_department, movement_rule.source_to_department,
false, false,
&all_departments_sorted, // Don't use all departments, as we only need ones that actually have costs
); &line_departments_sorted,
let to_departments = extract_range( )
};
let to_departments = if is_separator {
HashSet::new()
} else {
extract_range(
movement_rule.dest_from_department, movement_rule.dest_from_department,
movement_rule.dest_to_department, movement_rule.dest_to_department,
false, false,
&all_departments_sorted, &all_departments_sorted,
); )
};
rules.push(MovementRule { rules.push(MovementRule {
from_departments, from_departments,
to_departments, to_departments,
@@ -242,7 +290,7 @@ where
1. 1.
}), }),
is_percent: movement_rule.is_percent == "%", is_percent: movement_rule.is_percent == "%",
is_separator: movement_rule.apply == "-DIVIDER-", is_separator,
}) })
} }
@@ -264,9 +312,9 @@ where
Ok(()) Ok(())
} }
fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) -> Vec<String> { fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) -> HashSet<String> {
if all { if all || (from.is_empty() && to.is_empty()) {
return vec![]; return options.clone().into_iter().collect();
} }
let start_index = options let start_index = options
.iter() .iter()
@@ -280,14 +328,14 @@ fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) ->
.map(|end| end.0); .map(|end| end.0);
if let Some(start) = start_index { if let Some(start) = start_index {
if let Some(end) = end_index { if let Some(end) = end_index {
return Vec::from(&options[start..end + 1]); return Vec::from(&options[start..end + 1]).into_iter().collect();
} else { } else {
return vec![options[start].clone()]; return vec![options[start].clone()].into_iter().collect();
} }
} else if let Some(end) = end_index { } else if let Some(end) = end_index {
return vec![options[end].clone()]; return vec![options[end].clone()].into_iter().collect();
} }
return vec![]; return HashSet::new();
} }
// Approach 1: // Approach 1:
@@ -301,6 +349,7 @@ pub fn move_money_1() {}
pub struct MoveMoneyResult { pub struct MoveMoneyResult {
pass: i32, pass: i32,
// TODO: We want the from account and the to account
totals: HashMap<Unit, f64>, totals: HashMap<Unit, f64>,
} }
@@ -322,6 +371,8 @@ pub fn move_money_2(
) -> Vec<MoveMoneyResult> { ) -> Vec<MoveMoneyResult> {
// Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule), // Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
// but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice // but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
// TODO: This is initial totals is problematic, as we may move into a cc that wasn't in the list of lines. In which case we'd need
// to add a new line
let mut running_total = HashMap::from(initial_totals); let mut running_total = HashMap::from(initial_totals);
let mut temp_total = running_total.clone(); let mut temp_total = running_total.clone();
let mut moveMoneyResult: Vec<MoveMoneyResult> = vec![]; let mut moveMoneyResult: Vec<MoveMoneyResult> = vec![];
@@ -335,48 +386,49 @@ pub fn move_money_2(
for rule in rules { for rule in rules {
if rule.is_separator { if rule.is_separator {
if flush_pass { if flush_pass {
current_pass += 1;
// Flush the totals at the end of this pass (more specifically the change) // Flush the totals at the end of this pass (more specifically the change)
moveMoneyResult.push(MoveMoneyResult { moveMoneyResult.push(MoveMoneyResult {
pass: current_pass, pass: current_pass,
totals: running_total totals: temp_total
.iter() .iter()
.map(|(unit, value)| (unit.clone(), temp_total.get(unit).unwrap() - value)) .map(|(unit, value)| {
(unit.clone(), value - running_total.get(unit).unwrap_or(&0.))
})
.filter(|(_, value)| *value != 0.) .filter(|(_, value)| *value != 0.)
.collect(), .collect(),
}); });
current_pass += 1;
} }
running_total = temp_total.clone(); running_total = temp_total.clone();
} else { } else {
let mut sum_from = 0.; for (unit, amount) in &running_total {
for unit in &running_total { if (rule.all_from_departments || rule.from_departments.contains(&unit.department))
if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department)) && (rule.all_from_accounts || rule.from_accounts.contains(&unit.account))
&& (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account))
{ {
let previous_temp = unit.1; let previous_temp = amount;
let added_amount = if rule.is_percent { let added_amount = if rule.is_percent {
previous_temp * rule.amount previous_temp * rule.amount
} else { } else {
rule.amount rule.amount
}; };
sum_from += added_amount; // TODO: Track the from department/account, maintained in a separate list if flush_pass is set
*temp_total.get_mut(&unit.0).unwrap() -= added_amount; *temp_total.get_mut(&unit).unwrap() -= added_amount;
} let department = if rule.to_departments.len() == 1 {
} rule.to_departments.iter().next().unwrap().clone()
} else {
let num_to_units = running_total unit.department.clone()
.keys() };
.filter(|key| { let account = if rule.to_accounts.len() == 1 {
(rule.all_to_departments || rule.to_departments.contains(&key.department)) rule.to_accounts.iter().next().unwrap().clone()
&& (rule.all_to_accounts || rule.to_accounts.contains(&key.account)) } else {
unit.account.clone()
};
*temp_total
.entry(Unit {
department,
account,
}) })
.count(); .or_insert(0.) += added_amount;
let value_per_unit = sum_from / num_to_units as f64;
for unit in running_total.keys() {
if (rule.all_to_departments || rule.to_departments.contains(&unit.department))
&& (rule.all_to_accounts || rule.to_accounts.contains(&unit.account))
{
*temp_total.get_mut(&unit).unwrap() += value_per_unit;
} }
} }
} }
@@ -394,9 +446,10 @@ mod tests {
#[test] #[test]
fn move_money() { fn move_money() {
super::move_money( super::move_money(
csv::Reader::from_path("reclassrule.csv").unwrap(), &mut csv::Reader::from_path("reclassrule.csv").unwrap(),
csv::Reader::from_path("line.csv").unwrap(), &mut csv::Reader::from_path("line.csv").unwrap(),
csv::Reader::from_path("account.csv").unwrap(), &mut csv::Reader::from_path("account.csv").unwrap(),
&mut csv::Reader::from_path("costcentres.csv").unwrap(),
&mut csv::Writer::from_path("output.csv").unwrap(), &mut csv::Writer::from_path("output.csv").unwrap(),
false, false,
true, true,

View File

@@ -39,8 +39,6 @@ pub struct AllocationStatisticAccountRange {
end: usize, end: usize,
} }
type CsvCostCentre = HashMap<String, String>;
type CsvArea = HashMap<String, String>; type CsvArea = HashMap<String, String>;
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department, // Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,