extern crate nalgebra as na; use itertools::Itertools; use na::{DMatrix, Dynamic, LU}; use std::{collections::HashMap, error::Error, ops::Mul}; // TODO: Look into serde for serialisation, can also use it to serialise/deserialise // records from a csv file using the csv crate #[derive(Default)] pub struct MovementRule { // If the vectors are empty, then it means 'all' pub from_units: Vec, pub to_units: Vec, pub amount: f64, pub is_percent: bool, pub is_separator: bool, } impl MovementRule { pub fn pass_break() -> MovementRule { MovementRule { from_units: vec![], to_units: vec![], amount: 0.0, is_percent: false, is_separator: true, } } pub fn validate(&self) -> bool { if self.from_units.is_empty() && self.to_units.is_empty() { // Would be nice to have a decent message/error here as well return false; } if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) { return false; } true } } // Rules get parsed from file, converted into matrix format (for the in-memory movement), // then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied // // For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can // be worked around by actually inputting every type into the rules pub fn smush_rules(rules: Vec) -> Vec { let ruleMapping: HashMap = HashMap::new(); // First build out the list/map of all departments (store index of each element in the array) // TODO: We could make this more advanced by only smushing per divider, so that only the departments // needed between each pass is actually required for rule in rules { for department in rule.from_units { // ruleMapping.entry(department).or_insert(ruleMapping.len()); } } vec![] } // Approach 1: // Use math (linear algebra) to move between departments. Memory/computationally it's equivalent // to the worst case of approach one, however can take advantage of auto parallelisation/simd // to perform fast, particularly on larger datasets. // This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition // on the initial set. Can't record passes, but can record the smushed rules if only the data changes later // Advantage of this approach is it can be easily extended to run on the gpu. pub fn move_money_1() {} // Approach 2: // Traditinoal/naive, total for each department is stored in an initial map (department -> total amount). // Another map is built up for each rule, and each rule is processed based on the amount in the current total // map. // Upon a pass break (separator), the temp map will assign the values into the total map. // Once done, do a final assignment back to the total, and return that. // Advantage of this is the required code is tiny, and no third-party math library is required. // Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this // to every. It's also much more memory efficient than approach 1. // TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules // TODO: Right now this only supports movements between departments, we also need to support movements between accounts. // This would require an expansion so that we also have from/to accounts, and the hashmap will use some struct // that combines an account/department, which is also how the totals will be loaded. (so when loading from disk, // we load the whole GL into memory sum the account/department totals, and move these into a map line by line) pub fn move_money_2( initial_totals: HashMap, rules: Vec, ) -> HashMap { // TODO: Should probably validate that all the rules have departments that actually exist in initial_totals. // Note: It's potentially a bit more intensive to use cloned totals, but it's much simpler code and, and since we're only working line-by-line // it isn't really that much memory. in practice let mut running_total = HashMap::from(initial_totals); let mut temp_total: HashMap = running_total.clone(); for rule in rules { if rule.is_separator { running_total = temp_total.clone(); } else { let mut sum_from = 0.; for department in rule.from_units { let previous_temp = running_total.get(&department).expect( "Failed to find department in temp totals, this should not be possible", ); let added_amount = if rule.is_percent { *previous_temp * rule.amount } else { rule.amount }; sum_from += added_amount; *temp_total.get_mut(&department).unwrap() -= added_amount; } let value_per_unit = sum_from / rule.to_units.len() as f64; for department in rule.to_units { *temp_total.get_mut(&department).unwrap() += value_per_unit; } } } temp_total } #[derive(Debug, PartialEq, Eq)] pub enum DepartmentType { Operating, Overhead, } // TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...) // Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department, // you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end // up with negative there so yes this is expected) // Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to pub struct OverheadAllocationRule { from_overhead_department: String, to_department: String, percent: f64, to_department_type: DepartmentType, } pub struct TotalDepartmentCost { department: String, value: f64, } pub struct AccountCost { account: String, summed_department_costs: Vec, } // TODO: Also need a way to dictate the order of the departments? pub trait ReciprocalAllocationSolver { fn solve(&self, costs: &DMatrix) -> DMatrix; } impl ReciprocalAllocationSolver for LU { fn solve(&self, costs: &DMatrix) -> DMatrix { self.solve(costs).unwrap() } } impl ReciprocalAllocationSolver for DMatrix { fn solve(&self, costs: &DMatrix) -> DMatrix { self.mul(costs) } } fn get_rules_indexes( allocations: &Vec, department_type: DepartmentType, ) -> HashMap { allocations .iter() .filter(|allocation| allocation.to_department_type == department_type) .flat_map(|department| { [ department.from_overhead_department.clone(), department.to_department.clone(), ] }) .unique() .enumerate() .map(|(index, department)| (department, index)) .collect() } // Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs // to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when // matrix is singular pub fn reciprocal_allocation( allocations: Vec, account_costs: Vec, // TODO: Throw an appropriate error ) -> Result, Box> { let overhead_department_mappings: HashMap = get_rules_indexes(&allocations, DepartmentType::Overhead); let operating_department_mappings: HashMap = get_rules_indexes(&allocations, DepartmentType::Operating); let mut slice_allocations = vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()]; for allocation in allocations .iter() .filter(|allocation| allocation.to_department_type == DepartmentType::Overhead) { // TODO: Check if we need to flp this around let from_index = overhead_department_mappings .get(&allocation.from_overhead_department) .unwrap(); let to_index = operating_department_mappings .get(&allocation.to_department) .unwrap(); let elem = &mut slice_allocations [(*from_index) + (overhead_department_mappings.len() * (*to_index))]; *elem = allocation.percent; } // TODO: Also need ones along the diagonal, and negatives in some places... let mat: DMatrix = DMatrix::from_row_slice( overhead_department_mappings.len(), overhead_department_mappings.len(), &slice_allocations, ); if mat.determinant() == 0. { let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001); do_solve_reciprocal( pseudo_inverse.unwrap(), account_costs, overhead_department_mappings, allocations, ) } else { do_solve_reciprocal( mat.lu(), account_costs, overhead_department_mappings, allocations, ) } } fn do_solve_reciprocal( solver: T, account_costs: Vec, department_mappings: HashMap, allocations: Vec, ) -> Result, Box> { // TODO: Could batch the accounts, although probably won't see to big a speed increase, compiler should help us out for total_costs in account_costs { let mut slice_costs = vec![0.; department_mappings.len()]; for cost in total_costs.summed_department_costs { let elem = &mut slice_costs[*department_mappings.get(&cost.department).unwrap()]; *elem = cost.value; } let costs_vec: DMatrix = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs); let calculated_overheads = solver.solve(&costs_vec); // Calculation: operating_overhead_usage . calculated_overheads + initial_totals // Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the // solved overheads usages after taking into account usage between departments, and initial_totals is the initial values // for the operating departments. } // TODO: return something appropriate Ok(vec![]) }