Add fixes to reciprocal allocation, example cli, add move money

2022-06-18 10:30:18 +09:30
parent 6db4a50125
commit efdf4af2de
4 changed files with 475 additions and 50 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,15 +1,127 @@
 extern crate nalgebra as na;

+use na::DMatrix;
 use std::{collections::HashMap, ops::Mul};

-use na::DMatrix;
+// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
+// records from a csv file using the csv crate
+pub struct MovementRule {
+    // If the vectors are empty, then it means 'all'
+    pub from_units: Vec<String>,
+    pub to_units: Vec<String>,
+    pub amount: f64,
+    pub is_percent: bool,
+    pub is_separator: bool,
+}

-// TODO: Could probably put this up a level by indicating how much of another department
-// each department used, then calculate the amounts from that.
+impl MovementRule {
+    pub fn new() -> MovementRule {
+        MovementRule {
+            from_units: vec![],
+            to_units: vec![],
+            amount: 0.0,
+            is_percent: false,
+            is_separator: false,
+        }
+    }

-// Note: No need to include the operating departments, only service departments are needed,
-//  then once we calculate all of the 
-pub struct OverheadDepartmentAllocation {
+    pub fn pass_break() -> MovementRule {
+        MovementRule {
+            from_units: vec![],
+            to_units: vec![],
+            amount: 0.0,
+            is_percent: false,
+            is_separator: true,
+        }
+    }
+
+    pub fn validate(&self) -> bool {
+        if self.from_units.is_empty() && self.to_units.is_empty() {
+            // Would be nice to have a decent message/error here as well
+            return false;
+        }
+        if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
+            return false;
+        }
+        true
+    }
+}
+
+// Rules get parsed from file, converted into matrix format (for the in-memory movement),
+// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
+//
+// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
+// be worked around by actually inputting every type into the rules
+
+pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
+    let ruleMapping: HashMap<String, usize> = HashMap::new();
+    // First build out the list/map of all departments (store index of each element in the array)
+    // TODO: We could make this more advanced by only smushing per divider, so that only the departments
+    // needed between each pass is actually required
+    for rule in rules {
+        for department in rule.from_units {
+            // ruleMapping.entry(department).or_insert(ruleMapping.len());
+        }
+    }
+    vec![]
+}
+
+// Approach 1:
+// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
+// to the worst case of approach one, however can take advantage of auto parallelisation/simd
+// to perform fast, particularly on larger datasets.
+// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
+// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
+// Advantage of this approach is it can be easily extended to run on the gpu.
+pub fn move_money_1() {}
+
+// Approach 2:
+// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
+// Another map is built up for each rule, and each rule is processed based on the amount in the current total
+// map.
+// Upon a pass break (divider), the temp map will assign the values into the total map.
+// Once done, do a final assignment back to the total back, and return that. Probably want to make a copy or
+// borrow the total map so it isn't mutated elsewhere.
+// Advantage of this is the required code is tiny, and no third-party math library is required (my matrix math
+// implementation probably won't be as good as one that's battle-tested)
+// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
+pub fn move_money_2(
+    initial_totals: HashMap<String, f64>,
+    rules: Vec<MovementRule>,
+) -> HashMap<String, f64> {
+    // TODO: Replace maps with generic objects, so we can sub in db access/load only some initially
+    let mut running_total = HashMap::from(initial_totals);
+    let mut temp_total: HashMap<String, f64> = HashMap::new();
+    for rule in rules {
+        if rule.is_separator {
+            temp_total.into_iter().for_each(|temp| {
+                running_total.insert(temp.0, temp.1).unwrap();
+            });
+            temp_total = HashMap::new();
+        } else if rule.is_percent {
+            let new_value: f64 = running_total
+                .iter()
+                .filter(|department| rule.from_units.contains(department.0))
+                .map(|department| department.1 * rule.amount)
+                .sum();
+            for department in rule.to_units {
+                let previous_temp = temp_total.entry(department).or_insert(0.0);
+                *previous_temp += new_value;
+            }
+            // TODO: Subtract values from the from departments
+        } else {
+            // TODO: Simple addition to to departments/subtraction from from departments
+        }
+    }
+    running_total
+}
+
+// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
+// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
+// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
+// up with negative there so yes this is expected)
+// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
+pub struct OverheadAllocationRule {
    from_department: String,
    to_department: String,
    percent: f64,
@@ -20,31 +132,43 @@ pub struct TotalDepartmentCost {
    value: f64,
 }

-// Gets the matrix that can be used to reciprocally allocate line items in an account
-// TODO: What is actually supposed to be in the solve values? Not needed here but whatever calls this function will need to know this
-//      Also need to handle errors (return appropriate result type)
-// TODO: Also need to return some order so we know what order ccs in the accounts should be in.. could just do this by returning a struct with 
-//  the matrix and a method to get the value for a particular key using the hashmap we created.
-fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocation>, total_costs: Vec<TotalDepartmentCost>) -> DMatrix<f64> {
-    // Convert vector to matrix form - matrix of from/to percent (usage) and vector of original costs
-
-    // Matrix of all unique departments
+// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
+// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
+// matrix is singular
+// TODO: Could also reduce memory by just calculating overhead costs in a first step (service departments), then
+// calculating operating department costs in a second step using the output from the service departments (multiply
+// by service department output rather than original). The second step can be a vector multiply or a loop, basically
+// same as move money step, might bven be able to just repeat it
+// Note: PPM currently does the invert for the cost centres only (so can be up to 6000 ccs), as the cost centres are the actual departments,
+// and a previous step calculates the percentages for overhead areas using their allocation statistics. Then for each account,
+// it will use the overhead allocation matrix to calculate the moved/overhead allocations from the line items calculated from the previous
+// cost definiteions/reclass rules steps. Really we'd want to batch this out so we multiple a couple hundred or so accounts at a time (maybe
+// with a batch size property)
+pub fn get_reciprocal_allocation_matrix(
+    allocations: Vec<OverheadAllocationRule>,
+    total_costs: Vec<TotalDepartmentCost>,
+) -> DMatrix<f64> {
    let mut department_mappings: HashMap<String, usize> = HashMap::new();
    for allocation in allocations.iter() {
        let map_size = department_mappings.len();
-        department_mappings.entry(allocation.from_department.clone()).or_insert(map_size);
+        department_mappings
+            .entry(allocation.from_department.clone())
+            .or_insert(map_size);
        let map_size = department_mappings.len();
-        department_mappings.entry(allocation.to_department.clone()).or_insert(map_size);
+        department_mappings
+            .entry(allocation.to_department.clone())
+            .or_insert(map_size);
    }

-    let mut slice_allocations = vec![0.; department_mappings.len() * department_mappings.len()];
+    let mut slice_allocations = vec![0.; department_mappings.len()];

-    // TODO: This needs to be passed in another time.
+    // TODO: This needs to be passed in another time
    let mut slice_costs = vec![0.; department_mappings.len()];
-    
    for allocation in allocations {
        // TODO: Is there a more idiomatic way to do this?
-        let elem = &mut slice_allocations[*department_mappings.get(&allocation.from_department).unwrap()];
+        let elem = &mut slice_allocations[*department_mappings
+            .get(&allocation.from_department)
+            .unwrap()];
        *elem = allocation.percent;
    }

@@ -53,39 +177,22 @@ fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocatio
        *elem = cost.value;
    }

+    let mat: DMatrix<f64> = DMatrix::from_row_slice(
+        department_mappings.len(),
+        department_mappings.len(),
+        &slice_allocations,
+    );

-    // TODO: Would be nice to make this batched... matrix doesn't support that though.
-    let mat: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), department_mappings.len(), &slice_allocations);
-    let costs_vec: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
+    let costs_vec: DMatrix<f64> =
+        DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);

-
-    // Perform reciprocal allocation (LU solve or pseudoinverse regression if the matrix is singular - pseudo inverse is done using nalgebra svd)
-    // TODO: Is it wasteful to perform the determinant rather than just immediately attempting lu? The implementation of determinant appears calls lu anyway?
+    // TODO: Only calculate lu/pseudoinverse once. We then do the solve for the overhead department totals for each account, and use this to
+    // calculate the final totals.
    if mat.determinant() == 0. {
-        // Pseudo inverse to find mininmum allocation
-        // TODO: Error handling
        let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
        pseudo_inverse.unwrap().mul(&costs_vec)
    } else {
-        // Standard solve using lu with partial pivoting.
        let lup = mat.lu();
-        // TODO: Error handling
        lup.solve(&costs_vec).unwrap()
    }
 }
-
-// This is kind of a pointless function, it's just a matrix multiply... better to have a method that takes a function that can retrieve the accounts,
-//  then an application would just need to pass in the batch retriever function and the initial overhead things.
-// Only issue that could come up with this is I get a case where I can't pass a function in from another language. Better the application itself just
-// uses the struct returned from the function above to 
-fn allocate_overheads(allocation_matrix: DMatrix<f64>, ) {
-
-}
-
-
-// IDEA:
-// Consider a state-machine approach. Struct of allocations + total costs, then have a method to transform to 
-// reciprocal matrix + hashmap of indexes, then another method that takes cc costs per account to transform into final outputs.
-// I think the state machine can be a higher-level api, and can make use of the above functions to transition between states.
-//  This way you won't need to remember each step of the process, and it would be simpler to swap out implementations
-//  as each struct in the state can swap out which functions it can use in the transition.