270 lines
11 KiB
Rust
270 lines
11 KiB
Rust
extern crate nalgebra as na;
|
|
|
|
use itertools::Itertools;
|
|
use na::{DMatrix, Dynamic, LU};
|
|
use std::{collections::HashMap, error::Error, ops::Mul};
|
|
|
|
// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
|
|
// records from a csv file using the csv crate
|
|
#[derive(Default)]
|
|
pub struct MovementRule {
|
|
// If the vectors are empty, then it means 'all'
|
|
pub from_units: Vec<String>,
|
|
pub to_units: Vec<String>,
|
|
pub amount: f64,
|
|
pub is_percent: bool,
|
|
pub is_separator: bool,
|
|
}
|
|
|
|
impl MovementRule {
|
|
pub fn pass_break() -> MovementRule {
|
|
MovementRule {
|
|
from_units: vec![],
|
|
to_units: vec![],
|
|
amount: 0.0,
|
|
is_percent: false,
|
|
is_separator: true,
|
|
}
|
|
}
|
|
|
|
pub fn validate(&self) -> bool {
|
|
if self.from_units.is_empty() && self.to_units.is_empty() {
|
|
// Would be nice to have a decent message/error here as well
|
|
return false;
|
|
}
|
|
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
|
return false;
|
|
}
|
|
true
|
|
}
|
|
}
|
|
|
|
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
|
|
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
|
|
//
|
|
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
|
|
// be worked around by actually inputting every type into the rules
|
|
|
|
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
|
|
let ruleMapping: HashMap<String, usize> = HashMap::new();
|
|
// First build out the list/map of all departments (store index of each element in the array)
|
|
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
|
|
// needed between each pass is actually required
|
|
for rule in rules {
|
|
for department in rule.from_units {
|
|
// ruleMapping.entry(department).or_insert(ruleMapping.len());
|
|
}
|
|
}
|
|
vec![]
|
|
}
|
|
|
|
// Approach 1:
|
|
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
|
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
|
// to perform fast, particularly on larger datasets.
|
|
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
|
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
|
// Advantage of this approach is it can be easily extended to run on the gpu.
|
|
pub fn move_money_1() {}
|
|
|
|
// Approach 2:
|
|
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
|
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
|
// map.
|
|
// Upon a pass break (separator), the temp map will assign the values into the total map.
|
|
// Once done, do a final assignment back to the total, and return that.
|
|
// Advantage of this is the required code is tiny, and no third-party math library is required.
|
|
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
|
|
// to every. It's also much more memory efficient than approach 1.
|
|
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
|
// TODO: Right now this only supports movements between departments, we also need to support movements between accounts.
|
|
// This would require an expansion so that we also have from/to accounts, and the hashmap will use some struct
|
|
// that combines an account/department, which is also how the totals will be loaded. (so when loading from disk,
|
|
// we load the whole GL into memory sum the account/department totals, and move these into a map line by line)
|
|
pub fn move_money_2(
|
|
initial_totals: HashMap<String, f64>,
|
|
rules: Vec<MovementRule>,
|
|
) -> HashMap<String, f64> {
|
|
// TODO: Should probably validate that all the rules have departments that actually exist in initial_totals.
|
|
// Note: It's potentially a bit more intensive to use cloned totals, but it's much simpler code and, and since we're only working line-by-line
|
|
// it isn't really that much memory. in practice
|
|
let mut running_total = HashMap::from(initial_totals);
|
|
let mut temp_total: HashMap<String, f64> = running_total.clone();
|
|
for rule in rules {
|
|
if rule.is_separator {
|
|
running_total = temp_total.clone();
|
|
} else {
|
|
let mut sum_from = 0.;
|
|
for department in rule.from_units {
|
|
let previous_temp = running_total.get(&department).expect(
|
|
"Failed to find department in temp totals, this should not be possible",
|
|
);
|
|
let added_amount = if rule.is_percent {
|
|
*previous_temp * rule.amount
|
|
} else {
|
|
rule.amount
|
|
};
|
|
sum_from += added_amount;
|
|
*temp_total.get_mut(&department).unwrap() -= added_amount;
|
|
}
|
|
|
|
let value_per_unit = sum_from / rule.to_units.len() as f64;
|
|
for department in rule.to_units {
|
|
*temp_total.get_mut(&department).unwrap() += value_per_unit;
|
|
}
|
|
}
|
|
}
|
|
temp_total
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum DepartmentType {
|
|
Operating,
|
|
Overhead,
|
|
}
|
|
|
|
// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
|
|
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
|
|
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
|
|
// up with negative there so yes this is expected)
|
|
// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
|
|
pub struct OverheadAllocationRule {
|
|
from_overhead_department: String,
|
|
to_department: String,
|
|
percent: f64,
|
|
to_department_type: DepartmentType,
|
|
}
|
|
|
|
pub struct TotalDepartmentCost {
|
|
department: String,
|
|
value: f64,
|
|
}
|
|
|
|
pub struct AccountCost {
|
|
account: String,
|
|
summed_department_costs: Vec<TotalDepartmentCost>,
|
|
}
|
|
|
|
// TODO: Also need a way to dictate the order of the departments?
|
|
pub trait ReciprocalAllocationSolver {
|
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
|
|
}
|
|
|
|
impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
|
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
|
self.solve(costs).unwrap()
|
|
}
|
|
}
|
|
|
|
impl ReciprocalAllocationSolver for DMatrix<f64> {
|
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
|
self.mul(costs)
|
|
}
|
|
}
|
|
|
|
fn get_rules_indexes(
|
|
allocations: &Vec<OverheadAllocationRule>,
|
|
department_type: DepartmentType,
|
|
) -> HashMap<String, usize> {
|
|
allocations
|
|
.iter()
|
|
.filter(|allocation| allocation.to_department_type == department_type)
|
|
.flat_map(|department| {
|
|
[
|
|
department.from_overhead_department.clone(),
|
|
department.to_department.clone(),
|
|
]
|
|
})
|
|
.unique()
|
|
.enumerate()
|
|
.map(|(index, department)| (department, index))
|
|
.collect()
|
|
}
|
|
|
|
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
|
|
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
|
|
// matrix is singular
|
|
pub fn reciprocal_allocation(
|
|
allocations: Vec<OverheadAllocationRule>,
|
|
account_costs: Vec<AccountCost>,
|
|
// TODO: Throw an appropriate error
|
|
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
|
|
let overhead_department_mappings: HashMap<String, usize> =
|
|
get_rules_indexes(&allocations, DepartmentType::Overhead);
|
|
let operating_department_mappings: HashMap<String, usize> =
|
|
get_rules_indexes(&allocations, DepartmentType::Operating);
|
|
|
|
let mut slice_allocations =
|
|
vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()];
|
|
|
|
for allocation in allocations
|
|
.iter()
|
|
.filter(|allocation| allocation.to_department_type == DepartmentType::Overhead)
|
|
{
|
|
// TODO: Check if we need to flp this around
|
|
let from_index = overhead_department_mappings
|
|
.get(&allocation.from_overhead_department)
|
|
.unwrap();
|
|
let to_index = operating_department_mappings
|
|
.get(&allocation.to_department)
|
|
.unwrap();
|
|
let elem = &mut slice_allocations
|
|
[(*from_index) + (overhead_department_mappings.len() * (*to_index))];
|
|
*elem = allocation.percent;
|
|
}
|
|
|
|
// TODO: Also need ones along the diagonal, and negatives in some places...
|
|
|
|
let mat: DMatrix<f64> = DMatrix::from_row_slice(
|
|
overhead_department_mappings.len(),
|
|
overhead_department_mappings.len(),
|
|
&slice_allocations,
|
|
);
|
|
|
|
if mat.determinant() == 0. {
|
|
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
|
|
do_solve_reciprocal(
|
|
pseudo_inverse.unwrap(),
|
|
account_costs,
|
|
overhead_department_mappings,
|
|
allocations,
|
|
)
|
|
} else {
|
|
do_solve_reciprocal(
|
|
mat.lu(),
|
|
account_costs,
|
|
overhead_department_mappings,
|
|
allocations,
|
|
)
|
|
}
|
|
}
|
|
|
|
fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
|
|
solver: T,
|
|
account_costs: Vec<AccountCost>,
|
|
department_mappings: HashMap<String, usize>,
|
|
allocations: Vec<OverheadAllocationRule>,
|
|
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
|
|
// TODO: Could batch the accounts, although probably won't see to big a speed increase, compiler should help us out
|
|
for total_costs in account_costs {
|
|
let mut slice_costs = vec![0.; department_mappings.len()];
|
|
|
|
for cost in total_costs.summed_department_costs {
|
|
let elem = &mut slice_costs[*department_mappings.get(&cost.department).unwrap()];
|
|
*elem = cost.value;
|
|
}
|
|
|
|
let costs_vec: DMatrix<f64> =
|
|
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
|
|
|
|
let calculated_overheads = solver.solve(&costs_vec);
|
|
|
|
// Calculation: operating_overhead_usage . calculated_overheads + initial_totals
|
|
// Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the
|
|
// solved overheads usages after taking into account usage between departments, and initial_totals is the initial values
|
|
// for the operating departments.
|
|
}
|
|
// TODO: return something appropriate
|
|
Ok(vec![])
|
|
}
|