Add fixes to reciprocal allocation, example cli, add move money
This commit is contained in:
201
src/lib.rs
201
src/lib.rs
@@ -1,15 +1,127 @@
|
||||
extern crate nalgebra as na;
|
||||
|
||||
use na::DMatrix;
|
||||
use std::{collections::HashMap, ops::Mul};
|
||||
|
||||
use na::DMatrix;
|
||||
// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
|
||||
// records from a csv file using the csv crate
|
||||
pub struct MovementRule {
|
||||
// If the vectors are empty, then it means 'all'
|
||||
pub from_units: Vec<String>,
|
||||
pub to_units: Vec<String>,
|
||||
pub amount: f64,
|
||||
pub is_percent: bool,
|
||||
pub is_separator: bool,
|
||||
}
|
||||
|
||||
// TODO: Could probably put this up a level by indicating how much of another department
|
||||
// each department used, then calculate the amounts from that.
|
||||
impl MovementRule {
|
||||
pub fn new() -> MovementRule {
|
||||
MovementRule {
|
||||
from_units: vec![],
|
||||
to_units: vec![],
|
||||
amount: 0.0,
|
||||
is_percent: false,
|
||||
is_separator: false,
|
||||
}
|
||||
}
|
||||
|
||||
// Note: No need to include the operating departments, only service departments are needed,
|
||||
// then once we calculate all of the
|
||||
pub struct OverheadDepartmentAllocation {
|
||||
pub fn pass_break() -> MovementRule {
|
||||
MovementRule {
|
||||
from_units: vec![],
|
||||
to_units: vec![],
|
||||
amount: 0.0,
|
||||
is_percent: false,
|
||||
is_separator: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> bool {
|
||||
if self.from_units.is_empty() && self.to_units.is_empty() {
|
||||
// Would be nice to have a decent message/error here as well
|
||||
return false;
|
||||
}
|
||||
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
|
||||
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
|
||||
//
|
||||
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
|
||||
// be worked around by actually inputting every type into the rules
|
||||
|
||||
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
|
||||
let ruleMapping: HashMap<String, usize> = HashMap::new();
|
||||
// First build out the list/map of all departments (store index of each element in the array)
|
||||
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
|
||||
// needed between each pass is actually required
|
||||
for rule in rules {
|
||||
for department in rule.from_units {
|
||||
// ruleMapping.entry(department).or_insert(ruleMapping.len());
|
||||
}
|
||||
}
|
||||
vec![]
|
||||
}
|
||||
|
||||
// Approach 1:
|
||||
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
||||
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
||||
// to perform fast, particularly on larger datasets.
|
||||
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
||||
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
||||
// Advantage of this approach is it can be easily extended to run on the gpu.
|
||||
pub fn move_money_1() {}
|
||||
|
||||
// Approach 2:
|
||||
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
||||
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
||||
// map.
|
||||
// Upon a pass break (divider), the temp map will assign the values into the total map.
|
||||
// Once done, do a final assignment back to the total back, and return that. Probably want to make a copy or
|
||||
// borrow the total map so it isn't mutated elsewhere.
|
||||
// Advantage of this is the required code is tiny, and no third-party math library is required (my matrix math
|
||||
// implementation probably won't be as good as one that's battle-tested)
|
||||
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
||||
pub fn move_money_2(
|
||||
initial_totals: HashMap<String, f64>,
|
||||
rules: Vec<MovementRule>,
|
||||
) -> HashMap<String, f64> {
|
||||
// TODO: Replace maps with generic objects, so we can sub in db access/load only some initially
|
||||
let mut running_total = HashMap::from(initial_totals);
|
||||
let mut temp_total: HashMap<String, f64> = HashMap::new();
|
||||
for rule in rules {
|
||||
if rule.is_separator {
|
||||
temp_total.into_iter().for_each(|temp| {
|
||||
running_total.insert(temp.0, temp.1).unwrap();
|
||||
});
|
||||
temp_total = HashMap::new();
|
||||
} else if rule.is_percent {
|
||||
let new_value: f64 = running_total
|
||||
.iter()
|
||||
.filter(|department| rule.from_units.contains(department.0))
|
||||
.map(|department| department.1 * rule.amount)
|
||||
.sum();
|
||||
for department in rule.to_units {
|
||||
let previous_temp = temp_total.entry(department).or_insert(0.0);
|
||||
*previous_temp += new_value;
|
||||
}
|
||||
// TODO: Subtract values from the from departments
|
||||
} else {
|
||||
// TODO: Simple addition to to departments/subtraction from from departments
|
||||
}
|
||||
}
|
||||
running_total
|
||||
}
|
||||
|
||||
// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
|
||||
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
|
||||
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
|
||||
// up with negative there so yes this is expected)
|
||||
// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
|
||||
pub struct OverheadAllocationRule {
|
||||
from_department: String,
|
||||
to_department: String,
|
||||
percent: f64,
|
||||
@@ -20,31 +132,43 @@ pub struct TotalDepartmentCost {
|
||||
value: f64,
|
||||
}
|
||||
|
||||
// Gets the matrix that can be used to reciprocally allocate line items in an account
|
||||
// TODO: What is actually supposed to be in the solve values? Not needed here but whatever calls this function will need to know this
|
||||
// Also need to handle errors (return appropriate result type)
|
||||
// TODO: Also need to return some order so we know what order ccs in the accounts should be in.. could just do this by returning a struct with
|
||||
// the matrix and a method to get the value for a particular key using the hashmap we created.
|
||||
fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocation>, total_costs: Vec<TotalDepartmentCost>) -> DMatrix<f64> {
|
||||
// Convert vector to matrix form - matrix of from/to percent (usage) and vector of original costs
|
||||
|
||||
// Matrix of all unique departments
|
||||
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
|
||||
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
|
||||
// matrix is singular
|
||||
// TODO: Could also reduce memory by just calculating overhead costs in a first step (service departments), then
|
||||
// calculating operating department costs in a second step using the output from the service departments (multiply
|
||||
// by service department output rather than original). The second step can be a vector multiply or a loop, basically
|
||||
// same as move money step, might bven be able to just repeat it
|
||||
// Note: PPM currently does the invert for the cost centres only (so can be up to 6000 ccs), as the cost centres are the actual departments,
|
||||
// and a previous step calculates the percentages for overhead areas using their allocation statistics. Then for each account,
|
||||
// it will use the overhead allocation matrix to calculate the moved/overhead allocations from the line items calculated from the previous
|
||||
// cost definiteions/reclass rules steps. Really we'd want to batch this out so we multiple a couple hundred or so accounts at a time (maybe
|
||||
// with a batch size property)
|
||||
pub fn get_reciprocal_allocation_matrix(
|
||||
allocations: Vec<OverheadAllocationRule>,
|
||||
total_costs: Vec<TotalDepartmentCost>,
|
||||
) -> DMatrix<f64> {
|
||||
let mut department_mappings: HashMap<String, usize> = HashMap::new();
|
||||
for allocation in allocations.iter() {
|
||||
let map_size = department_mappings.len();
|
||||
department_mappings.entry(allocation.from_department.clone()).or_insert(map_size);
|
||||
department_mappings
|
||||
.entry(allocation.from_department.clone())
|
||||
.or_insert(map_size);
|
||||
let map_size = department_mappings.len();
|
||||
department_mappings.entry(allocation.to_department.clone()).or_insert(map_size);
|
||||
department_mappings
|
||||
.entry(allocation.to_department.clone())
|
||||
.or_insert(map_size);
|
||||
}
|
||||
|
||||
let mut slice_allocations = vec![0.; department_mappings.len() * department_mappings.len()];
|
||||
let mut slice_allocations = vec![0.; department_mappings.len()];
|
||||
|
||||
// TODO: This needs to be passed in another time.
|
||||
// TODO: This needs to be passed in another time
|
||||
let mut slice_costs = vec![0.; department_mappings.len()];
|
||||
|
||||
for allocation in allocations {
|
||||
// TODO: Is there a more idiomatic way to do this?
|
||||
let elem = &mut slice_allocations[*department_mappings.get(&allocation.from_department).unwrap()];
|
||||
let elem = &mut slice_allocations[*department_mappings
|
||||
.get(&allocation.from_department)
|
||||
.unwrap()];
|
||||
*elem = allocation.percent;
|
||||
}
|
||||
|
||||
@@ -53,39 +177,22 @@ fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocatio
|
||||
*elem = cost.value;
|
||||
}
|
||||
|
||||
let mat: DMatrix<f64> = DMatrix::from_row_slice(
|
||||
department_mappings.len(),
|
||||
department_mappings.len(),
|
||||
&slice_allocations,
|
||||
);
|
||||
|
||||
// TODO: Would be nice to make this batched... matrix doesn't support that though.
|
||||
let mat: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), department_mappings.len(), &slice_allocations);
|
||||
let costs_vec: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
|
||||
let costs_vec: DMatrix<f64> =
|
||||
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
|
||||
|
||||
|
||||
// Perform reciprocal allocation (LU solve or pseudoinverse regression if the matrix is singular - pseudo inverse is done using nalgebra svd)
|
||||
// TODO: Is it wasteful to perform the determinant rather than just immediately attempting lu? The implementation of determinant appears calls lu anyway?
|
||||
// TODO: Only calculate lu/pseudoinverse once. We then do the solve for the overhead department totals for each account, and use this to
|
||||
// calculate the final totals.
|
||||
if mat.determinant() == 0. {
|
||||
// Pseudo inverse to find mininmum allocation
|
||||
// TODO: Error handling
|
||||
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
|
||||
pseudo_inverse.unwrap().mul(&costs_vec)
|
||||
} else {
|
||||
// Standard solve using lu with partial pivoting.
|
||||
let lup = mat.lu();
|
||||
// TODO: Error handling
|
||||
lup.solve(&costs_vec).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
// This is kind of a pointless function, it's just a matrix multiply... better to have a method that takes a function that can retrieve the accounts,
|
||||
// then an application would just need to pass in the batch retriever function and the initial overhead things.
|
||||
// Only issue that could come up with this is I get a case where I can't pass a function in from another language. Better the application itself just
|
||||
// uses the struct returned from the function above to
|
||||
fn allocate_overheads(allocation_matrix: DMatrix<f64>, ) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
// IDEA:
|
||||
// Consider a state-machine approach. Struct of allocations + total costs, then have a method to transform to
|
||||
// reciprocal matrix + hashmap of indexes, then another method that takes cc costs per account to transform into final outputs.
|
||||
// I think the state machine can be a higher-level api, and can make use of the above functions to transition between states.
|
||||
// This way you won't need to remember each step of the process, and it would be simpler to swap out implementations
|
||||
// as each struct in the state can swap out which functions it can use in the transition.
|
||||
|
||||
55
src/main.rs
55
src/main.rs
@@ -1,3 +1,54 @@
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "coster-rs")]
|
||||
#[clap(author = "Pivato M. <mpivato4@gmail.com>")]
|
||||
#[clap(version = "0.0.1")]
|
||||
#[clap(about = "Simple, fast, efficient costing tool", long_about = None)]
|
||||
struct Cli {
|
||||
#[clap(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
move_money {
|
||||
#[clap(short = 'r', long, parse(from_os_str), value_name = "FILE")]
|
||||
rules: PathBuf,
|
||||
|
||||
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
|
||||
output: Option<PathBuf>,
|
||||
},
|
||||
allocate_overheads {
|
||||
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
|
||||
rules: PathBuf,
|
||||
|
||||
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
|
||||
lines: PathBuf,
|
||||
|
||||
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
|
||||
output: Option<PathBuf>,
|
||||
},
|
||||
}
|
||||
|
||||
// TODO: Return error (implement the required trait to allow an error to be returned)
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Commands::move_money { rules, output } => move_money(),
|
||||
Commands::allocate_overheads {
|
||||
rules,
|
||||
lines,
|
||||
output,
|
||||
} => allocate_overheads(),
|
||||
}
|
||||
}
|
||||
|
||||
fn move_money() {
|
||||
// read rules, for each rule, inspect the files and
|
||||
}
|
||||
|
||||
fn allocate_overheads() {}
|
||||
|
||||
Reference in New Issue
Block a user