Files
ingey/src/overhead_allocation.rs
2023-03-09 18:58:54 +10:30

713 lines
27 KiB
Rust

use std::{
collections::{HashMap, HashSet},
io::Read,
};
use itertools::Itertools;
use nalgebra::{DMatrix, Dynamic, LU};
use serde::Deserialize;
use crate::{CsvAccount, CsvCost};
#[derive(Debug, PartialEq, Eq)]
pub enum DepartmentType {
Operating,
Overhead,
}
impl DepartmentType {
pub fn from(s: &str) -> DepartmentType {
if s == "P" {
DepartmentType::Operating
} else {
DepartmentType::Overhead
}
}
}
#[derive(Deserialize)]
pub struct CsvAllocationStatistic {
#[serde(rename = "Name")]
name: String,
#[serde(rename = "AccountType")]
account_type: String,
#[serde(rename = "AccountRanges")]
account_ranges: String,
}
pub struct AllocationStatisticAccountRange {
start: usize,
end: usize,
}
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
// up with negative there so yes this is expected)
pub struct OverheadAllocationRule {
from_overhead_department: String,
to_department: String,
percent: f64,
to_department_type: DepartmentType,
}
#[derive(Debug, PartialEq)]
pub struct TotalDepartmentCost {
department: String,
value: f64,
}
#[derive(Debug, PartialEq)]
pub struct AccountCost {
account: String,
summed_department_costs: Vec<TotalDepartmentCost>,
}
// TODO: Also need a way to dictate the order of the departments?
pub trait ReciprocalAllocationSolver {
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
}
impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
self.solve(costs).unwrap()
}
}
impl ReciprocalAllocationSolver for DMatrix<f64> {
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
self * costs
}
}
pub fn reciprocal_allocation<Lines, Account, AllocationStatistic, Area, CostCentre, Output>(
lines: csv::Reader<Lines>,
accounts: csv::Reader<Account>,
allocation_statistics: csv::Reader<AllocationStatistic>,
areas: csv::Reader<Area>,
cost_centres: csv::Reader<CostCentre>,
output: &mut csv::Writer<Output>,
use_numeric_accounts: bool,
exclude_negative_allocation_statistics: bool,
any_limit_criteria: bool,
account_type: String,
) -> anyhow::Result<()>
where
Lines: Read,
Account: Read,
AllocationStatistic: Read,
Area: Read,
CostCentre: Read,
Output: std::io::Write,
{
let mut lines_reader = lines;
let lines = lines_reader
.deserialize()
.collect::<Result<Vec<CsvCost>, csv::Error>>()?;
let mut accounts = accounts;
let all_accounts_sorted: Vec<String> = if use_numeric_accounts {
accounts
.deserialize::<CsvAccount>()
.filter(|account| {
account.is_ok() && account.as_ref().unwrap().account_type == account_type
})
.map(|line| line.unwrap().code.clone().parse::<i32>().unwrap())
.unique()
.sorted()
.map(|account| account.to_string())
.collect()
} else {
accounts
.deserialize::<CsvAccount>()
.filter(|account| {
account.is_ok() && account.as_ref().unwrap().account_type == account_type
})
.map(|line| line.unwrap().code.clone())
.unique()
.sorted()
.collect()
};
let mut allocation_statistics_reader = allocation_statistics;
let allocation_statistics = allocation_statistics_reader
.deserialize::<CsvAllocationStatistic>()
.filter(|allocation_statistic| {
allocation_statistic.as_ref().unwrap().account_type == account_type
})
.collect::<Result<Vec<CsvAllocationStatistic>, csv::Error>>()?;
let split_allocation_ranges: Vec<(String, Vec<AllocationStatisticAccountRange>)> =
allocation_statistics
.iter()
.map(|allocation_statistic| {
(
allocation_statistic.name.clone(),
split_allocation_statistic_range(allocation_statistic, &all_accounts_sorted),
)
})
.collect();
// For each allocation statistic, sum the cost centres across accounts in the allocaiton statistic range
// value is (cc, allocation_statistic, total)
let mut totals: HashMap<(String, String), f64> = HashMap::new();
for line in lines.iter() {
// TODO: Another optimisation potential here, puttinig the accounts into a map, although less important since there's usually <1k accounts
let line_index = all_accounts_sorted
.iter()
.position(|account| account == &line.account);
// Skip account as it doesn't exist (likely due to wrong account type)
if line_index.is_none() {
continue;
}
let line_index = line_index.unwrap();
// Find the allocation statistics this line is in
for (allocation_statistic, range) in split_allocation_ranges.iter() {
if range
.iter()
.find(|range| line_index >= range.start && line_index <= range.end)
.is_some()
{
*totals
.entry((line.department.clone(), allocation_statistic.clone()))
.or_insert(0.) += line.value;
}
}
}
// If ignore negative is used, then set values < 0 to 0
if exclude_negative_allocation_statistics {
for ((_, _), total) in totals.iter_mut() {
if *total < 0. {
*total = 0.;
}
}
}
// Group ccs by area
let mut area_ccs: HashMap<String, Vec<String>> = HashMap::new();
let mut cost_centres = cost_centres;
let headers = cost_centres.headers()?;
// Group ccs by rollup, and group rollups into their slot
let mut rollups: HashMap<String, HashMap<String, Vec<String>>> = headers
.iter()
.filter(|name| name.to_lowercase().starts_with("rollupslot:"))
.map(|rollupslot| (rollupslot.to_owned(), HashMap::new()))
.collect();
for cost_centre in cost_centres.deserialize() {
let cost_centre: HashMap<String, String> = cost_centre?;
let name = cost_centre.get("Code").unwrap();
let area = cost_centre.get("Area").unwrap();
if area.is_empty() {
continue;
}
area_ccs
.entry(area.clone())
.or_insert(Vec::new())
.push(name.clone());
for rollupslot in rollups.iter_mut() {
let rollup_name = cost_centre.get(rollupslot.0).unwrap();
rollupslot
.1
.entry(rollup_name.clone())
.or_insert(Vec::new())
.push(name.clone());
}
}
let mut areas = areas;
let headers = areas.headers()?;
let limit_tos: Vec<String> = headers
.iter()
.filter(|header| header.to_lowercase().starts_with("limitto:"))
.map(|header| header["limitto:".len()..].to_owned())
.collect();
let mut overhead_other_total: Vec<(String, String, f64)> = Vec::new();
// Save overhead ccs, so we later know whether a to cc is overhead or operating
let mut overhead_ccs: HashSet<String> = HashSet::new();
// overhead department -> total (summed limit to costs)
let mut overhead_cc_totals: HashMap<String, f64> = HashMap::new();
// For each overhead area, get the cost centres in the area (overhead cost centres), and get all cost centres
// that fit the limit to criteria for the area (skip any cases of overhead cc = other cc).
// Then get the totals for the other ccs, by looking in the flat_department_costs, where the
// allocation statistic matches the allocation statistic for this area
for area in areas.deserialize() {
let area: HashMap<String, String> = area?;
// Check for limitTos, should probably somehow build out the list of allocation rules from this point.
let area_name = area.get("Name").unwrap();
let allocation_statistic = area.get("AllocationStatistic").unwrap();
let department_type: DepartmentType = DepartmentType::from(area.get("Type").unwrap());
if department_type == DepartmentType::Overhead {
let current_area_ccs = area_ccs.get(area_name);
if current_area_ccs.is_none() {
continue;
}
let current_area_ccs = current_area_ccs.unwrap().clone();
for cc in current_area_ccs {
overhead_ccs.insert(cc);
}
let overhead_ccs = area_ccs.get(area_name).unwrap();
// TODO: This depends on the area limit criteria. For now just doing any limit criteria
let mut limited_ccs: Vec<String> = Vec::new();
for limit_to in limit_tos.iter() {
// TODO: It is technically possible to have more than one limit to (I think?) for a slot, so consider eventually splitting this and doing a foreach
let limit_value = area.get(&("LimitTo:".to_owned() + limit_to)).unwrap();
if limit_value.is_empty() {
continue;
}
if limit_to.eq_ignore_ascii_case("costcentre") {
limited_ccs.push(limit_value.clone());
} else {
let mut found_ccs = rollups
.get(&("RollupSlot:".to_owned() + limit_to))
.map(|rollups| rollups.get(limit_value))
.flatten()
.unwrap()
.clone();
limited_ccs.append(&mut found_ccs);
}
}
if limited_ccs.is_empty() {
let mut other_ccs: Vec<String> = area_ccs
.values()
.flat_map(|ccs| ccs.iter().map(|cc| cc.clone()))
.collect();
// No limit criteria, use all ccs
limited_ccs.append(&mut other_ccs);
}
let mut totals: Vec<(String, String, f64)> = overhead_ccs
.iter()
.flat_map(|overhead_cc| {
let limited = limited_ccs
.iter()
.filter(|other_cc| {
totals.contains_key(&(
// TODO: This looks terrible
other_cc.clone().clone(),
allocation_statistic.clone(),
))
})
.map(|other_cc| {
(
overhead_cc.clone(),
other_cc.clone(),
totals
.get(&(other_cc.clone(), allocation_statistic.clone()))
.map(|f| *f)
.unwrap(),
)
})
.filter(|(_, _, value)| *value != 0.)
.filter(|(from_cc, to_cc, _)| from_cc != to_cc)
.collect_vec();
// Insert is safe, since an overhead cc can only be a part of one area
overhead_cc_totals.insert(
overhead_cc.clone(),
limited.iter().map(|(_, _, value)| value).sum(),
);
limited
})
.collect();
overhead_other_total.append(&mut totals);
}
}
// Finally, for each cc match total produced previously, sum the overhead cc where overhead cc appears in other cc, then
// divide the other cc by this summed amount (thus getting the relative cost)
// At this point we convert to our format that's actually used, need to somehow recover the to_cc_type... could build that out from the areas
let allocation_rules: Vec<OverheadAllocationRule> = overhead_other_total
.iter()
.map(
|(from_overhead_department, to_department, percent)| OverheadAllocationRule {
from_overhead_department: from_overhead_department.clone(),
to_department: to_department.clone(),
percent: percent / overhead_cc_totals.get(from_overhead_department).unwrap(),
to_department_type: if overhead_ccs.contains(to_department) {
DepartmentType::Overhead
} else {
DepartmentType::Operating
},
},
)
.collect();
let mut initial_account_costs: HashMap<String, Vec<TotalDepartmentCost>> = HashMap::new();
for line in lines {
// Only include accounts we've already filtered on (i.e. by account type)
if all_accounts_sorted
.iter()
.find(|account| **account == line.account)
.is_some()
{
initial_account_costs
.entry(line.account)
.or_insert(Vec::new())
.push(TotalDepartmentCost {
department: line.department,
value: line.value,
});
}
}
// TODO: (Consider) We could actually cheat here and not use this matrix implementation at all (and thus be more
// memory efficient, but maybe slower)
// Since we know each operating department in an account will get the proportion of the total overhead amount relative
// according to its operating amount from the total amount of the overhead departments, we can just directly calculate
// these totals and do some simple multiplications (it does get trickier with multiple accounts, as the cost drivers
// are consistent across all accounts, but depend on the allocation statistic to determine which lines to pick from).
let results = reciprocal_allocation_impl(
allocation_rules,
initial_account_costs
.into_iter()
.map(|(account, total_cost)| AccountCost {
account: account,
summed_department_costs: total_cost,
})
.collect(),
)?;
for cost in results {
for department in cost.summed_department_costs {
// Any consumers should assume missing cc/account value was 0 (we already ignore overhead, as they all 0 out)
if department.value > 0.00001 || department.value < -0.00001 {
output.serialize(CsvCost {
account: cost.account.clone(),
department: department.department,
value: department.value,
pass: None,
})?;
}
}
}
Ok(())
}
fn split_allocation_statistic_range(
allocation_statistic: &CsvAllocationStatistic,
accounts_sorted: &Vec<String>,
) -> Vec<AllocationStatisticAccountRange> {
// TODO: This split needs to be more comprehensive so that we don't split between quotes, so use a regex
let split = allocation_statistic.account_ranges.split(";");
split
.map(|split| {
let range_split = split.split('-').collect::<Vec<_>>();
let start = remove_quote_and_padding(range_split[0]);
let start_index = accounts_sorted
.iter()
.position(|account| *account == start)
.unwrap();
if range_split.len() == 1 {
AllocationStatisticAccountRange {
start: start_index,
end: start_index,
}
} else {
let end = remove_quote_and_padding(range_split[1]);
let end_index = accounts_sorted
.iter()
.position(|account| *account == end)
.unwrap();
AllocationStatisticAccountRange {
start: start_index,
end: end_index,
}
}
})
.collect()
}
// Removes quotes and padding from accounts int he allocation statistic account range.
// e.g. "'100' " becomes "100"
fn remove_quote_and_padding(s: &str) -> String {
if s.contains('\'') {
s.trim()[1..s.trim().len() - 1].to_owned()
} else {
s.trim().to_owned()
}
}
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
// matrix is singular
fn reciprocal_allocation_impl(
allocations: Vec<OverheadAllocationRule>,
account_costs: Vec<AccountCost>,
// TODO: Throw an appropriate error
) -> anyhow::Result<Vec<AccountCost>> {
let overhead_department_mappings = get_rules_indexes(&allocations, DepartmentType::Overhead);
let mut slice_allocations =
vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()];
for allocation in allocations
.iter()
.filter(|allocation| allocation.to_department_type == DepartmentType::Overhead)
{
let from_index = overhead_department_mappings
.get(&allocation.from_overhead_department)
.unwrap();
let to_index = overhead_department_mappings
.get(&allocation.to_department)
.unwrap();
slice_allocations[from_index * overhead_department_mappings.len() + to_index] =
allocation.percent * -1.;
}
let mut mat: DMatrix<f64> = DMatrix::from_vec(
overhead_department_mappings.len(),
overhead_department_mappings.len(),
slice_allocations,
);
mat.fill_diagonal(1.);
if mat.determinant() == 0. {
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
do_solve_reciprocal(
pseudo_inverse.unwrap(),
account_costs,
overhead_department_mappings,
allocations,
)
} else {
do_solve_reciprocal(
mat.lu(),
account_costs,
overhead_department_mappings,
allocations,
)
}
}
fn get_rules_indexes(
allocations: &Vec<OverheadAllocationRule>,
department_type: DepartmentType,
) -> HashMap<String, usize> {
allocations
.iter()
.filter(|allocation| allocation.to_department_type == department_type)
.flat_map(|department| {
if department.to_department_type == DepartmentType::Operating {
vec![department.to_department.clone()]
} else {
vec![
department.from_overhead_department.clone(),
department.to_department.clone(),
]
}
})
.unique()
.enumerate()
.map(|(index, department)| (department, index))
.collect()
}
fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
solver: T,
account_costs: Vec<AccountCost>,
overhead_department_mappings: HashMap<String, usize>,
allocations: Vec<OverheadAllocationRule>,
) -> anyhow::Result<Vec<AccountCost>> {
let operating_department_mappings = get_rules_indexes(&allocations, DepartmentType::Operating);
let mut operating_overhead_mappings =
vec![0.; overhead_department_mappings.len() * operating_department_mappings.len()];
for rule in allocations {
if rule.to_department_type == DepartmentType::Operating {
let from_index = *overhead_department_mappings
.get(&rule.from_overhead_department)
.unwrap();
let to_index = *operating_department_mappings
.get(&rule.to_department)
.unwrap();
operating_overhead_mappings
[from_index * operating_department_mappings.len() + to_index] = rule.percent;
}
}
let operating_overhead_mappings_mat: DMatrix<f64> = DMatrix::from_vec(
operating_department_mappings.len(),
overhead_department_mappings.len(),
operating_overhead_mappings,
);
let mut final_account_costs: Vec<AccountCost> = Vec::with_capacity(account_costs.len());
for total_costs in account_costs {
// TODO: There has to be a cleaner way to do this, perhaps by presorting things?
let mut overhead_slice_costs = vec![0.; overhead_department_mappings.len()];
for cost in total_costs.summed_department_costs.iter() {
if overhead_department_mappings.contains_key(&cost.department) {
overhead_slice_costs[*overhead_department_mappings.get(&cost.department).unwrap()] =
cost.value
}
}
let overhead_costs_vec: DMatrix<f64> =
DMatrix::from_row_slice(overhead_department_mappings.len(), 1, &overhead_slice_costs);
let calculated_overheads = solver.solve(&overhead_costs_vec);
let mut operating_slice_costs = vec![0.; operating_department_mappings.len()];
for cost in total_costs.summed_department_costs {
if operating_department_mappings.contains_key(&cost.department) {
let elem = &mut operating_slice_costs
[*operating_department_mappings.get(&cost.department).unwrap()];
*elem = cost.value;
}
}
let operating_costs_vec: DMatrix<f64> = DMatrix::from_row_slice(
operating_department_mappings.len(),
1,
&operating_slice_costs,
);
// Borrow so we don't move between loops
let operating_overhead_mappings = &operating_overhead_mappings_mat;
let calculated_overheads = &calculated_overheads;
// Calculation: operating_overhead_usage . calculated_overheads + initial_totals
// Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the
// solved overheads usages after taking into account usage between departments, and initial_totals is the initial values
// for the operating departments.
let calculated = operating_overhead_mappings * calculated_overheads + operating_costs_vec;
let converted_result: Vec<TotalDepartmentCost> = operating_department_mappings
.iter()
.map(|(department, index)| TotalDepartmentCost {
department: department.clone(),
value: *calculated.get(*index).unwrap(),
})
.collect();
final_account_costs.push(AccountCost {
account: total_costs.account,
summed_department_costs: converted_result,
});
}
Ok(final_account_costs)
}
#[cfg(test)]
mod tests {
use crate::reciprocal_allocation;
use crate::AccountCost;
use crate::DepartmentType;
use crate::OverheadAllocationRule;
use crate::TotalDepartmentCost;
use super::reciprocal_allocation_impl;
#[test]
fn test_basic() {
let allocation_rules = vec![
OverheadAllocationRule {
from_overhead_department: "Y".to_owned(),
to_department: "Z".to_owned(),
percent: 0.2,
to_department_type: DepartmentType::Overhead,
},
OverheadAllocationRule {
from_overhead_department: "Z".to_owned(),
to_department: "Y".to_owned(),
percent: 0.3,
to_department_type: DepartmentType::Overhead,
},
OverheadAllocationRule {
from_overhead_department: "Y".to_owned(),
to_department: "A".to_owned(),
percent: 0.4,
to_department_type: DepartmentType::Operating,
},
OverheadAllocationRule {
from_overhead_department: "Y".to_owned(),
to_department: "B".to_owned(),
percent: 0.4,
to_department_type: DepartmentType::Operating,
},
OverheadAllocationRule {
from_overhead_department: "Z".to_owned(),
to_department: "A".to_owned(),
percent: 0.2,
to_department_type: DepartmentType::Operating,
},
OverheadAllocationRule {
from_overhead_department: "Z".to_owned(),
to_department: "B".to_owned(),
percent: 0.5,
to_department_type: DepartmentType::Operating,
},
];
let initial_totals = vec![AccountCost {
account: "Default".to_owned(),
summed_department_costs: vec![
TotalDepartmentCost {
department: "Y".to_owned(),
value: 7260.,
},
TotalDepartmentCost {
department: "Z".to_owned(),
value: 4000.,
},
TotalDepartmentCost {
department: "A".to_owned(),
value: 12000.,
},
TotalDepartmentCost {
department: "B".to_owned(),
value: 16000.,
},
],
}];
let expected_final_allocations = vec![AccountCost {
account: "Default".to_owned(),
summed_department_costs: vec![
TotalDepartmentCost {
department: "A".to_owned(),
value: 16760.,
},
TotalDepartmentCost {
department: "B".to_owned(),
value: 22500.,
},
],
}];
let result = reciprocal_allocation_impl(allocation_rules, initial_totals).unwrap();
assert_eq!(expected_final_allocations, result);
}
#[test]
fn test_basic_real() {
let result = reciprocal_allocation(
csv::Reader::from_path("test_line.csv").unwrap(),
csv::Reader::from_path("test_account.csv").unwrap(),
csv::Reader::from_path("test_alloc_stat.csv").unwrap(),
csv::Reader::from_path("test_area.csv").unwrap(),
csv::Reader::from_path("test_costcentre.csv").unwrap(),
&mut csv::Writer::from_path("test_output_alloc_stat.csv").unwrap(),
true,
false,
true,
"E".to_owned(),
);
assert!(result.is_ok());
}
#[test]
fn test_real() {
let result = reciprocal_allocation(
csv::Reader::from_path("output.csv").unwrap(),
csv::Reader::from_path("account.csv").unwrap(),
csv::Reader::from_path("allocstat.csv").unwrap(),
csv::Reader::from_path("area.csv").unwrap(),
csv::Reader::from_path("costcentre.csv").unwrap(),
&mut csv::Writer::from_path("output_alloc_stat.csv").unwrap(),
true,
false,
true,
"E".to_owned(),
);
assert!(result.is_ok())
}
}