use std::{ collections::{HashMap, HashSet}, io::{Read, Write}, }; use csv::Writer; use itertools::Itertools; use nalgebra::{zero, DMatrix, Dynamic, LU}; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use crate::{CsvAccount, CsvCost}; #[derive(Debug, PartialEq, Eq)] pub enum DepartmentType { Operating, Overhead, } impl DepartmentType { pub fn from(s: &str) -> DepartmentType { if s == "P" { DepartmentType::Operating } else { DepartmentType::Overhead } } } #[derive(Deserialize)] pub struct CsvAllocationStatistic { #[serde(rename = "Name")] name: String, #[serde(rename = "AccountType")] account_type: String, #[serde(rename = "AccountRanges")] account_ranges: String, } pub struct AllocationStatisticAccountRange { start: usize, end: usize, } // Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department, // you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end // up with negative there so yes this is expected) pub struct OverheadAllocationRule { from_overhead_department: String, to_department: String, percent: f64, to_department_type: DepartmentType, } #[derive(Debug, PartialEq)] pub struct TotalDepartmentCost { department: String, value: f64, } #[derive(Debug, PartialEq)] pub struct AccountCost { account: String, summed_department_costs: Vec, } #[derive(Debug, Serialize, Deserialize)] struct MovedAmount { account: String, cost_centre: String, value: f64, from_cost_centre: String, } pub trait ReciprocalAllocationSolver { fn solve(&self, costs: &DMatrix) -> DMatrix; } impl ReciprocalAllocationSolver for LU { fn solve(&self, costs: &DMatrix) -> DMatrix { self.solve(costs).unwrap() } } impl ReciprocalAllocationSolver for DMatrix { fn solve(&self, costs: &DMatrix) -> DMatrix { self * costs } } pub fn reciprocal_allocation( lines: &mut csv::Reader, accounts: &mut csv::Reader, allocation_statistics: &mut csv::Reader, areas: &mut csv::Reader, cost_centres: &mut csv::Reader, // TODO: Receiver method rather than this writer that can accept // the raw float results, so we can write in an alternate format // that more accurately represents the values on disk output: &mut csv::Writer, use_numeric_accounts: bool, exclude_negative_allocation_statistics: bool, any_limit_criteria: bool, account_type: String, show_from: bool, zero_threshold: f64, ) -> anyhow::Result<()> where Lines: Read, Account: Read, AllocationStatistic: Read, Area: Read, CostCentre: Read, Output: std::io::Write, { let lines = lines .deserialize() .collect::, csv::Error>>()?; let all_accounts_sorted: Vec = if use_numeric_accounts { accounts .deserialize::() .filter(|account| { account.is_ok() && account.as_ref().unwrap().account_type == account_type }) .map(|line| line.unwrap().code.clone().parse::().unwrap()) .unique() .sorted() .map(|account| account.to_string()) .collect() } else { accounts .deserialize::() .filter(|account| { account.is_ok() && account.as_ref().unwrap().account_type == account_type }) .map(|line| line.unwrap().code.clone()) .unique() .sorted() .collect() }; let allocation_statistics = allocation_statistics .deserialize::() .filter(|allocation_statistic| { allocation_statistic.as_ref().unwrap().account_type == account_type }) .collect::, csv::Error>>()?; let split_allocation_ranges: Vec<(String, Vec)> = allocation_statistics .iter() .map(|allocation_statistic| { ( allocation_statistic.name.clone(), split_allocation_statistic_range(allocation_statistic, &all_accounts_sorted), ) }) .collect(); // For each allocation statistic, sum the cost centres across accounts in the allocaiton statistic range // value is (cc, allocation_statistic, total) let mut totals: HashMap<(String, String), f64> = HashMap::new(); for line in lines.iter() { let line_index = all_accounts_sorted .iter() .position(|account| account == &line.account); // Skip account as it doesn't exist (likely due to wrong account type) if line_index.is_none() { continue; } let line_index = line_index.unwrap(); // Find the allocation statistics this line is in for (allocation_statistic, range) in split_allocation_ranges.iter() { if range .iter() .find(|range| line_index >= range.start && line_index <= range.end) .is_some() { *totals .entry((line.department.clone(), allocation_statistic.clone())) .or_insert(0.) += line.value; } } } // If ignore negative is used, then set values < 0 to 0 if exclude_negative_allocation_statistics { for ((_, _), total) in totals.iter_mut() { if *total < 0. { *total = 0.; } } } // Group ccs by area let mut area_ccs: HashMap> = HashMap::new(); let headers = cost_centres.headers()?; // Group ccs by rollup, and group rollups into their slot let mut rollups: HashMap>> = headers .iter() .filter(|name| name.to_lowercase().starts_with("rollupslot:")) .map(|rollupslot| (rollupslot.to_owned(), HashMap::new())) .collect(); for cost_centre in cost_centres.deserialize() { let cost_centre: HashMap = cost_centre?; let name = cost_centre.get("Code").unwrap(); let area = cost_centre.get("Area").unwrap(); if area.is_empty() { continue; } area_ccs .entry(area.clone()) .or_insert(Vec::new()) .push(name.clone()); for rollupslot in rollups.iter_mut() { let rollup_name = cost_centre.get(rollupslot.0).unwrap(); rollupslot .1 .entry(rollup_name.clone()) .or_insert(Vec::new()) .push(name.clone()); } } let headers = areas.headers()?; let limit_tos: Vec = headers .iter() .filter(|header| header.to_lowercase().starts_with("limitto:")) .map(|header| header["limitto:".len()..].to_owned()) .collect(); let mut overhead_other_total: Vec<(String, String, f64)> = Vec::new(); // Save overhead ccs, so we later know whether a to cc is overhead or operating let mut overhead_ccs: HashSet = HashSet::new(); // For each overhead area, get the cost centres in the area (overhead cost centres), and get all cost centres // that fit the limit to criteria for the area (skip any cases of overhead cc = other cc). // Then get the totals for the other ccs, by looking in the flat_department_costs, where the // allocation statistic matches the allocation statistic for this area for area in areas.deserialize() { let area: HashMap = area?; // Check for limitTos, should probably somehow build out the list of allocation rules from this point. let area_name = area.get("Name").unwrap(); let allocation_statistic = area.get("AllocationStatistic").unwrap(); let department_type: DepartmentType = DepartmentType::from(area.get("Type").unwrap()); if department_type == DepartmentType::Overhead { let current_area_ccs = area_ccs.get(area_name); if current_area_ccs.is_none() { continue; } let current_area_ccs = current_area_ccs.unwrap().clone(); for cc in current_area_ccs { overhead_ccs.insert(cc); } let overhead_ccs = area_ccs.get(area_name).unwrap(); // TODO: This depends on the area limit criteria. For now just doing any limit criteria let mut limited_ccs: Vec = Vec::new(); for limit_to in limit_tos.iter() { // TODO: It is technically possible to have more than one limit to (I think?) for a slot, so consider eventually splitting this and doing a foreach let limit_value = area.get(&("LimitTo:".to_owned() + limit_to)).unwrap(); if limit_value.is_empty() { continue; } if limit_to.eq_ignore_ascii_case("costcentre") { limited_ccs.push(limit_value.clone()); } else { let mut found_ccs = rollups .get(&("RollupSlot:".to_owned() + limit_to)) .map(|rollups| rollups.get(limit_value)) .flatten() .unwrap() .clone(); limited_ccs.append(&mut found_ccs); } } if limited_ccs.is_empty() { let mut other_ccs: Vec = area_ccs .values() .flat_map(|ccs| ccs.iter().map(|cc| cc.clone())) .collect(); // No limit criteria, use all ccs limited_ccs.append(&mut other_ccs); } let mut totals: Vec<(String, String, f64)> = overhead_ccs .par_iter() .flat_map(|overhead_cc| { let limited = limited_ccs .iter() .filter(|other_cc| { totals.contains_key(&( // TODO: This looks terrible other_cc.clone().clone(), allocation_statistic.clone(), )) }) .map(|other_cc| { ( overhead_cc.clone(), other_cc.clone(), totals .get(&(other_cc.clone(), allocation_statistic.clone())) .map(|f| *f) .unwrap(), ) }) .filter(|(_, _, value)| *value != 0.) .filter(|(from_cc, to_cc, _)| from_cc != to_cc) .collect_vec(); // TODO: Put me back if rayon proves problematic // Insert is safe, since an overhead cc can only be a part of one area // overhead_cc_totals.insert( // overhead_cc.clone(), // limited.iter().map(|(_, _, value)| value).sum(), // ); limited }) .collect(); overhead_other_total.append(&mut totals); } } // TODO: This seems to do nothing even in a complex setting where I'd expect it to do something, can probably be removed let error_amounts: Vec = overhead_other_total .iter() .filter(|(_, _, value)| *value < zero_threshold && *value > -1. * zero_threshold) .enumerate() .map(|(index, _)| index) .collect(); for index in error_amounts.iter() { let (overhead_cc, _, value) = overhead_other_total.remove(*index); let non_error_match = overhead_other_total .iter_mut() .filter(|next| { next.0 == *overhead_cc && (next.2 > zero_threshold || next.2 < -1. * zero_threshold) }) .next(); if let Some((_, _, match_value)) = non_error_match { *match_value = *match_value + value; } } // overhead department -> total (summed limit to costs) let mut overhead_cc_totals: HashMap = HashMap::new(); // Using rayon and doing another pass later proves to be for (overhead_cc, _, value) in overhead_other_total.iter() { *overhead_cc_totals.entry(overhead_cc.clone()).or_insert(0.) += value; } // Export initial totals for operating departments if show_from { for line in lines.iter() { if !overhead_ccs.contains(&line.department) { output.serialize(MovedAmount { account: line.account.clone(), cost_centre: line.department.clone(), value: line.value, from_cost_centre: line.department.clone(), })?; } } } // Finally, for each cc match total produced previously, sum the overhead cc where overhead cc appears in other cc, then // divide the other cc by this summed amount (thus getting the relative cost) // At this point we convert to our format that's actually used, need to somehow recover the to_cc_type... could build that out from the areas let allocation_rules: Vec = overhead_other_total .iter() .map( |(from_overhead_department, to_department, percent)| OverheadAllocationRule { from_overhead_department: from_overhead_department.clone(), to_department: to_department.clone(), percent: percent / overhead_cc_totals.get(from_overhead_department).unwrap(), to_department_type: if overhead_ccs.contains(to_department) { DepartmentType::Overhead } else { DepartmentType::Operating }, }, ) .collect(); let mut initial_account_costs: HashMap> = HashMap::new(); for line in lines { // Only include accounts we've already filtered on (i.e. by account type) if all_accounts_sorted .iter() .find(|account| **account == line.account) .is_some() { initial_account_costs .entry(line.account) .or_insert(Vec::new()) .push(TotalDepartmentCost { department: line.department, value: line.value, }); } } // TODO: (Consider) We could actually cheat here and not use this matrix implementation at all (and thus be more // memory efficient, but maybe slower) // Since we know each operating department in an account will get the proportion of the total overhead amount relative // according to its operating amount from the total amount of the overhead departments, we can just directly calculate // these totals and do some simple multiplications (it does get trickier with multiple accounts, as the cost drivers // are consistent across all accounts, but depend on the allocation statistic to determine which lines to pick from). let results = reciprocal_allocation_impl( allocation_rules, initial_account_costs .into_iter() .map(|(account, total_cost)| AccountCost { account: account, summed_department_costs: total_cost, }) .collect(), if show_from { Some(output) } else { None }, zero_threshold, )?; if !show_from { for cost in results { for department in cost.summed_department_costs { // Any consumers should assume missing cc/account value was 0 (we already ignore overhead, as they all 0 out) if department.value > 0.00001 || department.value < -0.00001 { output.serialize(CsvCost { account: cost.account.clone(), department: department.department, value: department.value, })?; } } } } Ok(()) } fn split_allocation_statistic_range( allocation_statistic: &CsvAllocationStatistic, accounts_sorted: &Vec, ) -> Vec { // TODO: This split needs to be more comprehensive so that we don't split between quotes, so use a regex let split = allocation_statistic.account_ranges.split(";"); split .map(|split| { let range_split = split.split('-').collect::>(); let start = remove_quote_and_padding(range_split[0]); let start_index = accounts_sorted .iter() .position(|account| *account == start) .unwrap(); if range_split.len() == 1 { AllocationStatisticAccountRange { start: start_index, end: start_index, } } else { let end = remove_quote_and_padding(range_split[1]); let end_index = accounts_sorted .iter() .position(|account| *account == end) .unwrap(); AllocationStatisticAccountRange { start: start_index, end: end_index, } } }) .collect() } // Removes quotes and padding from accounts int he allocation statistic account range. // e.g. "'100' " becomes "100" fn remove_quote_and_padding(s: &str) -> String { if s.contains('\'') { s.trim()[1..s.trim().len() - 1].to_owned() } else { s.trim().to_owned() } } // Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs // to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when // matrix is singular fn reciprocal_allocation_impl( allocations: Vec, account_costs: Vec, movement_writer: Option<&mut csv::Writer>, zero_threshold: f64, ) -> anyhow::Result> { let overhead_department_mappings = get_rules_indexes(&allocations, DepartmentType::Overhead); let mut slice_allocations = vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()]; for allocation in allocations .iter() .filter(|allocation| allocation.to_department_type == DepartmentType::Overhead) { let from_index = overhead_department_mappings .get(&allocation.from_overhead_department) .unwrap(); let to_index = overhead_department_mappings .get(&allocation.to_department) .unwrap(); slice_allocations[from_index * overhead_department_mappings.len() + to_index] = allocation.percent * -1.; } let mut mat: DMatrix = DMatrix::from_vec( overhead_department_mappings.len(), overhead_department_mappings.len(), slice_allocations, ); mat.fill_diagonal(1.); if mat.determinant() == 0. { let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001); do_solve_reciprocal( pseudo_inverse.unwrap(), account_costs, overhead_department_mappings, allocations, movement_writer, zero_threshold, ) } else { do_solve_reciprocal( mat.lu(), account_costs, overhead_department_mappings, allocations, movement_writer, zero_threshold, ) } } fn get_rules_indexes( allocations: &Vec, department_type: DepartmentType, ) -> HashMap { allocations .iter() .filter(|allocation| allocation.to_department_type == department_type) .flat_map(|department| { if department.to_department_type == DepartmentType::Operating { vec![department.to_department.clone()] } else { vec![ department.from_overhead_department.clone(), department.to_department.clone(), ] } }) .unique() .enumerate() .map(|(index, department)| (department, index)) .collect() } fn do_solve_reciprocal( solver: T, account_costs: Vec, overhead_department_mappings: HashMap, allocations: Vec, temp_writer: Option<&mut Writer>, zero_threshold: f64, ) -> anyhow::Result> { let operating_department_mappings = get_rules_indexes(&allocations, DepartmentType::Operating); let mut operating_overhead_mappings = vec![0.; overhead_department_mappings.len() * operating_department_mappings.len()]; for rule in allocations { if rule.to_department_type == DepartmentType::Operating { let from_index = *overhead_department_mappings .get(&rule.from_overhead_department) .unwrap(); let to_index = *operating_department_mappings .get(&rule.to_department) .unwrap(); operating_overhead_mappings [from_index * operating_department_mappings.len() + to_index] = rule.percent; } } let operating_overhead_mappings_mat: DMatrix = DMatrix::from_vec( operating_department_mappings.len(), overhead_department_mappings.len(), operating_overhead_mappings, ); let mut temp_writer = temp_writer; if let Some(temp_writer) = temp_writer.as_mut() { solve_reciprocal_with_from( solver, account_costs, overhead_department_mappings, operating_department_mappings, operating_overhead_mappings_mat, temp_writer, zero_threshold, )?; Ok(vec![]) } else { Ok(solve_reciprocal_no_from( solver, account_costs, overhead_department_mappings, operating_department_mappings, operating_overhead_mappings_mat, )) } } fn solve_reciprocal_no_from( solver: impl ReciprocalAllocationSolver + Sync + Send, account_costs: Vec, overhead_department_mappings: HashMap, operating_department_mappings: HashMap, operating_overhead_mappings_mat: DMatrix, ) -> Vec { account_costs .par_iter() // .filter(|cost| cost.account == "A480200") .map(|total_costs| { // To get the from/to ccs like ppm does, we ignore the initial totals. Then for each overhead cc, // we zero out all the calculated overheads except for this cc and do // operating_overhead_mappings * calculated_overheads (basically the first part of the normal calculation) // TODO: There has to be a cleaner way to do this, perhaps by presorting things? let mut overhead_slice_costs = vec![0.; overhead_department_mappings.len()]; for cost in total_costs.summed_department_costs.iter() { if overhead_department_mappings.contains_key(&cost.department) { overhead_slice_costs [*overhead_department_mappings.get(&cost.department).unwrap()] = cost.value } } let overhead_costs_vec: DMatrix = DMatrix::from_row_slice( overhead_department_mappings.len(), 1, &overhead_slice_costs, ); let calculated_overheads = solver.solve(&overhead_costs_vec); let mut operating_slice_costs = vec![0.; operating_department_mappings.len()]; for cost in &total_costs.summed_department_costs { if operating_department_mappings.contains_key(&cost.department) { let elem = &mut operating_slice_costs [*operating_department_mappings.get(&cost.department).unwrap()]; *elem = cost.value; } } let operating_costs_vec: DMatrix = DMatrix::from_row_slice( operating_department_mappings.len(), 1, &operating_slice_costs, ); // // Borrow so we don't move between loops let operating_overhead_mappings = &operating_overhead_mappings_mat; let calculated_overheads = &calculated_overheads; // Calculation: operating_overhead_usage . calculated_overheads + initial_totals // Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the // solved overheads usages after taking into account usage between departments, and initial_totals is the initial values // for the operating departments. let calculated = operating_overhead_mappings * calculated_overheads + operating_costs_vec; let converted_result: Vec = operating_department_mappings .iter() .map(|(department, index)| TotalDepartmentCost { department: department.clone(), value: *calculated.get(*index).unwrap(), }) .collect(); // Redistribute floating point errors (only for ccs we actually allocated from/to) // Considered removing this since redistribution should be done in cost driver calculations, however since that usually // does nothing, we may as well keep this just in case. // TODO: Not sure we actually need this, would probably be better to have a better storage format than // csv/string conversions // let initial_cost: f64 = total_costs // .summed_department_costs // .iter() // .filter(|cost| { // operating_department_mappings.contains_key(&cost.department) // || overhead_department_mappings.contains_key(&cost.department) // }) // .map(|cost| cost.value) // .sum(); // let new_cost: f64 = converted_result.iter().map(|cost| cost.value).sum(); // let diff = initial_cost - new_cost; AccountCost { account: total_costs.account.clone(), summed_department_costs: converted_result .into_iter() .map(|cost| TotalDepartmentCost { department: cost.department, value: cost.value, // + if new_cost == 0_f64 || diff == 0_f64 { // 0_f64 // } else { // cost.value / new_cost * diff // }, }) .collect(), } }) .collect() } fn solve_reciprocal_with_from( solver: T, total_costs: Vec, overhead_department_mappings: HashMap, operating_department_mappings: HashMap, operating_overhead_mappings: DMatrix, temp_writer: &mut Writer, zero_threshold: f64, ) -> anyhow::Result<()> { for total_costs in total_costs { let moved_amounts: Vec = total_costs .summed_department_costs .par_iter() .filter(|overhead_department_cost| { overhead_department_mappings.contains_key(&overhead_department_cost.department) && overhead_department_cost.value != 0_f64 }) .flat_map(|overhead_department_cost| { let mut overhead_slice_costs = vec![0.; overhead_department_mappings.len()]; overhead_slice_costs[*overhead_department_mappings .get(&overhead_department_cost.department) .unwrap()] = overhead_department_cost.value; let overhead_costs_vec: DMatrix = DMatrix::from_row_slice( overhead_department_mappings.len(), 1, &overhead_slice_costs, ); let calculated_overheads = solver.solve(&overhead_costs_vec); let calculated = &operating_overhead_mappings * calculated_overheads; operating_department_mappings .iter() .map(|(department, index)| (department, *calculated.get(*index).unwrap())) .map(|(department, value)| MovedAmount { account: total_costs.account.clone(), cost_centre: department.clone(), value: value, from_cost_centre: department.clone(), }) .collect::>() }) .collect(); for moved_amount in moved_amounts { temp_writer.serialize(moved_amount)?; } } temp_writer.flush().unwrap(); Ok(()) } #[cfg(test)] mod tests { use std::fs::File; use crate::reciprocal_allocation; use crate::AccountCost; use crate::DepartmentType; use crate::OverheadAllocationRule; use crate::TotalDepartmentCost; use super::reciprocal_allocation_impl; use super::MovedAmount; #[test] fn test_basic() { let allocation_rules = vec![ OverheadAllocationRule { from_overhead_department: "Y".to_owned(), to_department: "Z".to_owned(), percent: 0.2, to_department_type: DepartmentType::Overhead, }, OverheadAllocationRule { from_overhead_department: "Z".to_owned(), to_department: "Y".to_owned(), percent: 0.3, to_department_type: DepartmentType::Overhead, }, OverheadAllocationRule { from_overhead_department: "Y".to_owned(), to_department: "A".to_owned(), percent: 0.4, to_department_type: DepartmentType::Operating, }, OverheadAllocationRule { from_overhead_department: "Y".to_owned(), to_department: "B".to_owned(), percent: 0.4, to_department_type: DepartmentType::Operating, }, OverheadAllocationRule { from_overhead_department: "Z".to_owned(), to_department: "A".to_owned(), percent: 0.2, to_department_type: DepartmentType::Operating, }, OverheadAllocationRule { from_overhead_department: "Z".to_owned(), to_department: "B".to_owned(), percent: 0.5, to_department_type: DepartmentType::Operating, }, ]; let initial_totals = vec![AccountCost { account: "Default".to_owned(), summed_department_costs: vec![ TotalDepartmentCost { department: "Y".to_owned(), value: 7260., }, TotalDepartmentCost { department: "Z".to_owned(), value: 4000., }, TotalDepartmentCost { department: "A".to_owned(), value: 12000., }, TotalDepartmentCost { department: "B".to_owned(), value: 16000., }, ], }]; let expected_final_allocations = vec![AccountCost { account: "Default".to_owned(), summed_department_costs: vec![ TotalDepartmentCost { department: "A".to_owned(), value: 16760., }, TotalDepartmentCost { department: "B".to_owned(), value: 22500., }, ], }]; let mut movement_writer = csv::Writer::from_path("test_output.csv").unwrap(); let result = reciprocal_allocation_impl::( allocation_rules, initial_totals, Some(&mut movement_writer), 0.00001, ) .unwrap(); assert_eq!(expected_final_allocations, result); } #[test] fn test_basic_real() { let result = reciprocal_allocation( &mut csv::Reader::from_path("test_line.csv").unwrap(), &mut csv::Reader::from_path("test_account.csv").unwrap(), &mut csv::Reader::from_path("test_alloc_stat.csv").unwrap(), &mut csv::Reader::from_path("test_area.csv").unwrap(), &mut csv::Reader::from_path("test_costcentre.csv").unwrap(), &mut csv::Writer::from_path("test_output_alloc_stat.csv").unwrap(), true, false, true, "E".to_owned(), false, 0.1, ); assert!(result.is_ok()); } #[test] fn test_real() { let result = reciprocal_allocation( &mut csv::Reader::from_path("output.csv").unwrap(), &mut csv::Reader::from_path("account.csv").unwrap(), &mut csv::Reader::from_path("allocstat.csv").unwrap(), &mut csv::Reader::from_path("area.csv").unwrap(), &mut csv::Reader::from_path("costcentre.csv").unwrap(), &mut csv::Writer::from_path("output_alloc_stat.csv").unwrap(), false, false, true, "E".to_owned(), true, 0.001, ); assert!(result.is_ok()) } }