Add fixes to reciprocal allocation, example cli, add move money

This commit is contained in:
Piv
2022-06-18 10:30:18 +09:30
parent 6db4a50125
commit efdf4af2de
4 changed files with 475 additions and 50 deletions

View File

@@ -1,15 +1,127 @@
extern crate nalgebra as na;
use na::DMatrix;
use std::{collections::HashMap, ops::Mul};
use na::DMatrix;
// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
// records from a csv file using the csv crate
pub struct MovementRule {
// If the vectors are empty, then it means 'all'
pub from_units: Vec<String>,
pub to_units: Vec<String>,
pub amount: f64,
pub is_percent: bool,
pub is_separator: bool,
}
// TODO: Could probably put this up a level by indicating how much of another department
// each department used, then calculate the amounts from that.
impl MovementRule {
pub fn new() -> MovementRule {
MovementRule {
from_units: vec![],
to_units: vec![],
amount: 0.0,
is_percent: false,
is_separator: false,
}
}
// Note: No need to include the operating departments, only service departments are needed,
// then once we calculate all of the
pub struct OverheadDepartmentAllocation {
pub fn pass_break() -> MovementRule {
MovementRule {
from_units: vec![],
to_units: vec![],
amount: 0.0,
is_percent: false,
is_separator: true,
}
}
pub fn validate(&self) -> bool {
if self.from_units.is_empty() && self.to_units.is_empty() {
// Would be nice to have a decent message/error here as well
return false;
}
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
return false;
}
true
}
}
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
//
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
// be worked around by actually inputting every type into the rules
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
let ruleMapping: HashMap<String, usize> = HashMap::new();
// First build out the list/map of all departments (store index of each element in the array)
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
// needed between each pass is actually required
for rule in rules {
for department in rule.from_units {
// ruleMapping.entry(department).or_insert(ruleMapping.len());
}
}
vec![]
}
// Approach 1:
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
// to perform fast, particularly on larger datasets.
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
// Advantage of this approach is it can be easily extended to run on the gpu.
pub fn move_money_1() {}
// Approach 2:
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
// map.
// Upon a pass break (divider), the temp map will assign the values into the total map.
// Once done, do a final assignment back to the total back, and return that. Probably want to make a copy or
// borrow the total map so it isn't mutated elsewhere.
// Advantage of this is the required code is tiny, and no third-party math library is required (my matrix math
// implementation probably won't be as good as one that's battle-tested)
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
pub fn move_money_2(
initial_totals: HashMap<String, f64>,
rules: Vec<MovementRule>,
) -> HashMap<String, f64> {
// TODO: Replace maps with generic objects, so we can sub in db access/load only some initially
let mut running_total = HashMap::from(initial_totals);
let mut temp_total: HashMap<String, f64> = HashMap::new();
for rule in rules {
if rule.is_separator {
temp_total.into_iter().for_each(|temp| {
running_total.insert(temp.0, temp.1).unwrap();
});
temp_total = HashMap::new();
} else if rule.is_percent {
let new_value: f64 = running_total
.iter()
.filter(|department| rule.from_units.contains(department.0))
.map(|department| department.1 * rule.amount)
.sum();
for department in rule.to_units {
let previous_temp = temp_total.entry(department).or_insert(0.0);
*previous_temp += new_value;
}
// TODO: Subtract values from the from departments
} else {
// TODO: Simple addition to to departments/subtraction from from departments
}
}
running_total
}
// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
// up with negative there so yes this is expected)
// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
pub struct OverheadAllocationRule {
from_department: String,
to_department: String,
percent: f64,
@@ -20,31 +132,43 @@ pub struct TotalDepartmentCost {
value: f64,
}
// Gets the matrix that can be used to reciprocally allocate line items in an account
// TODO: What is actually supposed to be in the solve values? Not needed here but whatever calls this function will need to know this
// Also need to handle errors (return appropriate result type)
// TODO: Also need to return some order so we know what order ccs in the accounts should be in.. could just do this by returning a struct with
// the matrix and a method to get the value for a particular key using the hashmap we created.
fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocation>, total_costs: Vec<TotalDepartmentCost>) -> DMatrix<f64> {
// Convert vector to matrix form - matrix of from/to percent (usage) and vector of original costs
// Matrix of all unique departments
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
// matrix is singular
// TODO: Could also reduce memory by just calculating overhead costs in a first step (service departments), then
// calculating operating department costs in a second step using the output from the service departments (multiply
// by service department output rather than original). The second step can be a vector multiply or a loop, basically
// same as move money step, might bven be able to just repeat it
// Note: PPM currently does the invert for the cost centres only (so can be up to 6000 ccs), as the cost centres are the actual departments,
// and a previous step calculates the percentages for overhead areas using their allocation statistics. Then for each account,
// it will use the overhead allocation matrix to calculate the moved/overhead allocations from the line items calculated from the previous
// cost definiteions/reclass rules steps. Really we'd want to batch this out so we multiple a couple hundred or so accounts at a time (maybe
// with a batch size property)
pub fn get_reciprocal_allocation_matrix(
allocations: Vec<OverheadAllocationRule>,
total_costs: Vec<TotalDepartmentCost>,
) -> DMatrix<f64> {
let mut department_mappings: HashMap<String, usize> = HashMap::new();
for allocation in allocations.iter() {
let map_size = department_mappings.len();
department_mappings.entry(allocation.from_department.clone()).or_insert(map_size);
department_mappings
.entry(allocation.from_department.clone())
.or_insert(map_size);
let map_size = department_mappings.len();
department_mappings.entry(allocation.to_department.clone()).or_insert(map_size);
department_mappings
.entry(allocation.to_department.clone())
.or_insert(map_size);
}
let mut slice_allocations = vec![0.; department_mappings.len() * department_mappings.len()];
let mut slice_allocations = vec![0.; department_mappings.len()];
// TODO: This needs to be passed in another time.
// TODO: This needs to be passed in another time
let mut slice_costs = vec![0.; department_mappings.len()];
for allocation in allocations {
// TODO: Is there a more idiomatic way to do this?
let elem = &mut slice_allocations[*department_mappings.get(&allocation.from_department).unwrap()];
let elem = &mut slice_allocations[*department_mappings
.get(&allocation.from_department)
.unwrap()];
*elem = allocation.percent;
}
@@ -53,39 +177,22 @@ fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocatio
*elem = cost.value;
}
let mat: DMatrix<f64> = DMatrix::from_row_slice(
department_mappings.len(),
department_mappings.len(),
&slice_allocations,
);
// TODO: Would be nice to make this batched... matrix doesn't support that though.
let mat: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), department_mappings.len(), &slice_allocations);
let costs_vec: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
let costs_vec: DMatrix<f64> =
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
// Perform reciprocal allocation (LU solve or pseudoinverse regression if the matrix is singular - pseudo inverse is done using nalgebra svd)
// TODO: Is it wasteful to perform the determinant rather than just immediately attempting lu? The implementation of determinant appears calls lu anyway?
// TODO: Only calculate lu/pseudoinverse once. We then do the solve for the overhead department totals for each account, and use this to
// calculate the final totals.
if mat.determinant() == 0. {
// Pseudo inverse to find mininmum allocation
// TODO: Error handling
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
pseudo_inverse.unwrap().mul(&costs_vec)
} else {
// Standard solve using lu with partial pivoting.
let lup = mat.lu();
// TODO: Error handling
lup.solve(&costs_vec).unwrap()
}
}
// This is kind of a pointless function, it's just a matrix multiply... better to have a method that takes a function that can retrieve the accounts,
// then an application would just need to pass in the batch retriever function and the initial overhead things.
// Only issue that could come up with this is I get a case where I can't pass a function in from another language. Better the application itself just
// uses the struct returned from the function above to
fn allocate_overheads(allocation_matrix: DMatrix<f64>, ) {
}
// IDEA:
// Consider a state-machine approach. Struct of allocations + total costs, then have a method to transform to
// reciprocal matrix + hashmap of indexes, then another method that takes cc costs per account to transform into final outputs.
// I think the state machine can be a higher-level api, and can make use of the above functions to transition between states.
// This way you won't need to remember each step of the process, and it would be simpler to swap out implementations
// as each struct in the state can swap out which functions it can use in the transition.