From 8a6a94a11ea19105da3f59a512e7ecd814bbf6cf Mon Sep 17 00:00:00 2001 From: Piv <18462828+Piv200@users.noreply.github.com> Date: Sat, 11 Mar 2023 23:46:37 +1030 Subject: [PATCH] Stop calculating amounts of overheads that don't have initial costs --- src/main.rs | 2 +- src/overhead_allocation.rs | 64 ++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5c0970b..5c0879e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,7 +66,7 @@ enum Commands { #[arg(short = 'f', long)] show_from: bool, - #[arg(short, long, default_value = "0.1")] + #[arg(short, long, default_value = "0.000001")] zero_threshold: f64, #[arg(short, long, value_name = "FILE")] diff --git a/src/overhead_allocation.rs b/src/overhead_allocation.rs index b870b7e..a605780 100644 --- a/src/overhead_allocation.rs +++ b/src/overhead_allocation.rs @@ -6,10 +6,7 @@ use std::{ use csv::Writer; use itertools::Itertools; use nalgebra::{DMatrix, Dynamic, LU}; -use rayon::prelude::{ - IntoParallelIterator, IntoParallelRefIterator, ParallelDrainFull, ParallelDrainRange, - ParallelIterator, -}; +use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use crate::{CsvAccount, CsvCost}; @@ -75,7 +72,6 @@ struct MovedAmount { from_cost_centre: String, } -// TODO: Also need a way to dictate the order of the departments? pub trait ReciprocalAllocationSolver { fn solve(&self, costs: &DMatrix) -> DMatrix; } @@ -163,7 +159,6 @@ where // value is (cc, allocation_statistic, total) let mut totals: HashMap<(String, String), f64> = HashMap::new(); for line in lines.iter() { - // TODO: Another optimisation potential here, puttinig the accounts into a map, although less important since there's usually <1k accounts let line_index = all_accounts_sorted .iter() .position(|account| account == &line.account); @@ -324,6 +319,25 @@ where overhead_other_total.append(&mut totals); } } + // TODO: This seems to do nothing even in a complex setting where I'd expect it to do something, can probably be removed + let error_amounts: Vec = overhead_other_total + .iter() + .filter(|(_, _, value)| *value < zero_threshold && *value > -1. * zero_threshold) + .enumerate() + .map(|(index, _)| index) + .collect(); + for index in error_amounts.iter() { + let (overhead_cc, _, value) = overhead_other_total.remove(*index); + let non_error_match = overhead_other_total + .iter_mut() + .filter(|next| { + next.0 == *overhead_cc && (next.2 > zero_threshold || next.2 < -1. * zero_threshold) + }) + .next(); + if let Some((_, _, match_value)) = non_error_match { + *match_value = *match_value + value; + } + } // overhead department -> total (summed limit to costs) let mut overhead_cc_totals: HashMap = HashMap::new(); @@ -346,10 +360,6 @@ where } } - //TODO: Redistribute floating point errors so everything still sums to 1. Also reduces the amount of data produced - // Basically needs to take each mapping, and somehow sum the percentages less than the threshold, and evenly(?) redistribute - // them to the others within the same department... Wonder if this could have been done in the previous step somehow? - // Finally, for each cc match total produced previously, sum the overhead cc where overhead cc appears in other cc, then // divide the other cc by this summed amount (thus getting the relative cost) @@ -617,9 +627,22 @@ fn do_solve_reciprocal( // TODO: A performance improvement will be to create another hashmap for index -> department, then just // iterate over the actual indexes instead (will have preloading) for (overhead_department, index) in overhead_department_mappings.iter() { + // TODO: Check this filter is actually working correctly by summing the costs and comparing to the non show_from setting + // (the sums should match up) + // Thinking intuitively, if the cost truely didn't exist, then it never would have been included in the totals + // in the first place, + let initial_amount = total_costs + .summed_department_costs + .iter() + .filter(|cost| cost.department == *overhead_department) + .next(); + if initial_amount.is_none() { + continue; + } + + let calculated_amount = calculated_overheads[*index]; // Calculate each movement individually - let calculated = - operating_overhead_mappings.column(*index) * calculated_overheads[*index]; + let calculated = operating_overhead_mappings.column(*index) * calculated_amount; for (department, index) in &operating_department_mappings { let value = *calculated.get(*index).unwrap(); if value > zero_threshold || value < -1. * zero_threshold { @@ -650,7 +673,8 @@ fn do_solve_reciprocal( }) .collect(); // Redistribute floating point errors (only for ccs we actually allocated from/to) - // TODO: Consider removing this once we're doing this above. + // Considered removing this since redistribution should be done in cost driver calculations, however since that usually + // does nothing, we may as well keep this just in case. let initial_cost: f64 = total_costs .summed_department_costs .iter() @@ -672,6 +696,7 @@ fn do_solve_reciprocal( }) .collect(), }); + break; } Ok(final_account_costs) } @@ -763,9 +788,14 @@ mod tests { ], }]; - let result = - reciprocal_allocation_impl::(allocation_rules, initial_totals, None, 0.001) - .unwrap(); + let mut movement_writer = csv::Writer::from_path("test_output.csv").unwrap(); + let result = reciprocal_allocation_impl::( + allocation_rules, + initial_totals, + Some(&mut movement_writer), + 0.00001, + ) + .unwrap(); assert_eq!(expected_final_allocations, result); } @@ -802,7 +832,7 @@ mod tests { true, "E".to_owned(), true, - 0.1, + 0.00001, ); assert!(result.is_ok()) }