Add fixes to reciprocal allocation, example cli, add move money

This commit is contained in:
Piv
2022-06-18 10:30:18 +09:30
parent 6db4a50125
commit efdf4af2de
4 changed files with 475 additions and 50 deletions

259
Cargo.lock generated
View File

@@ -11,25 +11,166 @@ dependencies = [
"num-traits",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bstr"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
dependencies = [
"lazy_static",
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "bytemuck"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc"
[[package]]
name = "clap"
version = "3.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53da17d37dba964b9b3ecb5c5a1f193a2762c700e6829201e645b9381c99dc7"
dependencies = [
"atty",
"bitflags",
"clap_derive",
"clap_lex",
"indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_derive"
version = "3.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c11d40217d16aee8508cc8e5fde8b4ff24639758608e5374e731b53f85749fb9"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5538cd660450ebeb4234cfecf8f2284b844ffc4c50531e66d584ad5b91293613"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "coster-rs"
version = "0.1.0"
dependencies = [
"clap",
"csv",
"nalgebra",
]
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "hashbrown"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3"
[[package]]
name = "heck"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "indexmap"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c6392766afd7964e2531940894cffe4bd8d7d17dbc3c1c4857040fd4b33bdb3"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "matrixmultiply"
version = "0.3.2"
@@ -39,6 +180,12 @@ dependencies = [
"rawpointer",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "nalgebra"
version = "0.31.0"
@@ -105,12 +252,48 @@ dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
[[package]]
name = "os_str_bytes"
version = "6.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
[[package]]
name = "paste"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro2"
version = "1.0.39"
@@ -135,6 +318,18 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "ryu"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
[[package]]
name = "safe_arch"
version = "0.6.0"
@@ -144,6 +339,12 @@ dependencies = [
"bytemuck",
]
[[package]]
name = "serde"
version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
[[package]]
name = "simba"
version = "0.7.1"
@@ -157,6 +358,12 @@ dependencies = [
"wide",
]
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.95"
@@ -168,6 +375,21 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
[[package]]
name = "typenum"
version = "1.15.0"
@@ -180,6 +402,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wide"
version = "0.7.4"
@@ -189,3 +417,34 @@ dependencies = [
"bytemuck",
"safe_arch",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -6,4 +6,12 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
# https://nalgebra.org/docs/user_guide/getting_started
nalgebra = "0.31.0"
# https://docs.rs/csv/1.1.6/csv/
csv = "1.1"
# simba = { version = "0.7.1", features = ["partial_fixed_point_support"] }
# num = "0.4"
clap = { version = "3.1.18", features = ["derive"] }

View File

@@ -1,15 +1,127 @@
extern crate nalgebra as na;
use na::DMatrix;
use std::{collections::HashMap, ops::Mul};
use na::DMatrix;
// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
// records from a csv file using the csv crate
pub struct MovementRule {
// If the vectors are empty, then it means 'all'
pub from_units: Vec<String>,
pub to_units: Vec<String>,
pub amount: f64,
pub is_percent: bool,
pub is_separator: bool,
}
// TODO: Could probably put this up a level by indicating how much of another department
// each department used, then calculate the amounts from that.
impl MovementRule {
pub fn new() -> MovementRule {
MovementRule {
from_units: vec![],
to_units: vec![],
amount: 0.0,
is_percent: false,
is_separator: false,
}
}
// Note: No need to include the operating departments, only service departments are needed,
// then once we calculate all of the
pub struct OverheadDepartmentAllocation {
pub fn pass_break() -> MovementRule {
MovementRule {
from_units: vec![],
to_units: vec![],
amount: 0.0,
is_percent: false,
is_separator: true,
}
}
pub fn validate(&self) -> bool {
if self.from_units.is_empty() && self.to_units.is_empty() {
// Would be nice to have a decent message/error here as well
return false;
}
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
return false;
}
true
}
}
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
//
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
// be worked around by actually inputting every type into the rules
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
let ruleMapping: HashMap<String, usize> = HashMap::new();
// First build out the list/map of all departments (store index of each element in the array)
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
// needed between each pass is actually required
for rule in rules {
for department in rule.from_units {
// ruleMapping.entry(department).or_insert(ruleMapping.len());
}
}
vec![]
}
// Approach 1:
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
// to perform fast, particularly on larger datasets.
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
// Advantage of this approach is it can be easily extended to run on the gpu.
pub fn move_money_1() {}
// Approach 2:
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
// map.
// Upon a pass break (divider), the temp map will assign the values into the total map.
// Once done, do a final assignment back to the total back, and return that. Probably want to make a copy or
// borrow the total map so it isn't mutated elsewhere.
// Advantage of this is the required code is tiny, and no third-party math library is required (my matrix math
// implementation probably won't be as good as one that's battle-tested)
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
pub fn move_money_2(
initial_totals: HashMap<String, f64>,
rules: Vec<MovementRule>,
) -> HashMap<String, f64> {
// TODO: Replace maps with generic objects, so we can sub in db access/load only some initially
let mut running_total = HashMap::from(initial_totals);
let mut temp_total: HashMap<String, f64> = HashMap::new();
for rule in rules {
if rule.is_separator {
temp_total.into_iter().for_each(|temp| {
running_total.insert(temp.0, temp.1).unwrap();
});
temp_total = HashMap::new();
} else if rule.is_percent {
let new_value: f64 = running_total
.iter()
.filter(|department| rule.from_units.contains(department.0))
.map(|department| department.1 * rule.amount)
.sum();
for department in rule.to_units {
let previous_temp = temp_total.entry(department).or_insert(0.0);
*previous_temp += new_value;
}
// TODO: Subtract values from the from departments
} else {
// TODO: Simple addition to to departments/subtraction from from departments
}
}
running_total
}
// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
// up with negative there so yes this is expected)
// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
pub struct OverheadAllocationRule {
from_department: String,
to_department: String,
percent: f64,
@@ -20,31 +132,43 @@ pub struct TotalDepartmentCost {
value: f64,
}
// Gets the matrix that can be used to reciprocally allocate line items in an account
// TODO: What is actually supposed to be in the solve values? Not needed here but whatever calls this function will need to know this
// Also need to handle errors (return appropriate result type)
// TODO: Also need to return some order so we know what order ccs in the accounts should be in.. could just do this by returning a struct with
// the matrix and a method to get the value for a particular key using the hashmap we created.
fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocation>, total_costs: Vec<TotalDepartmentCost>) -> DMatrix<f64> {
// Convert vector to matrix form - matrix of from/to percent (usage) and vector of original costs
// Matrix of all unique departments
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
// matrix is singular
// TODO: Could also reduce memory by just calculating overhead costs in a first step (service departments), then
// calculating operating department costs in a second step using the output from the service departments (multiply
// by service department output rather than original). The second step can be a vector multiply or a loop, basically
// same as move money step, might bven be able to just repeat it
// Note: PPM currently does the invert for the cost centres only (so can be up to 6000 ccs), as the cost centres are the actual departments,
// and a previous step calculates the percentages for overhead areas using their allocation statistics. Then for each account,
// it will use the overhead allocation matrix to calculate the moved/overhead allocations from the line items calculated from the previous
// cost definiteions/reclass rules steps. Really we'd want to batch this out so we multiple a couple hundred or so accounts at a time (maybe
// with a batch size property)
pub fn get_reciprocal_allocation_matrix(
allocations: Vec<OverheadAllocationRule>,
total_costs: Vec<TotalDepartmentCost>,
) -> DMatrix<f64> {
let mut department_mappings: HashMap<String, usize> = HashMap::new();
for allocation in allocations.iter() {
let map_size = department_mappings.len();
department_mappings.entry(allocation.from_department.clone()).or_insert(map_size);
department_mappings
.entry(allocation.from_department.clone())
.or_insert(map_size);
let map_size = department_mappings.len();
department_mappings.entry(allocation.to_department.clone()).or_insert(map_size);
department_mappings
.entry(allocation.to_department.clone())
.or_insert(map_size);
}
let mut slice_allocations = vec![0.; department_mappings.len() * department_mappings.len()];
let mut slice_allocations = vec![0.; department_mappings.len()];
// TODO: This needs to be passed in another time.
// TODO: This needs to be passed in another time
let mut slice_costs = vec![0.; department_mappings.len()];
for allocation in allocations {
// TODO: Is there a more idiomatic way to do this?
let elem = &mut slice_allocations[*department_mappings.get(&allocation.from_department).unwrap()];
let elem = &mut slice_allocations[*department_mappings
.get(&allocation.from_department)
.unwrap()];
*elem = allocation.percent;
}
@@ -53,39 +177,22 @@ fn get_reciprocal_allocation_matrix(allocations: Vec<OverheadDepartmentAllocatio
*elem = cost.value;
}
let mat: DMatrix<f64> = DMatrix::from_row_slice(
department_mappings.len(),
department_mappings.len(),
&slice_allocations,
);
// TODO: Would be nice to make this batched... matrix doesn't support that though.
let mat: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), department_mappings.len(), &slice_allocations);
let costs_vec: DMatrix<f64> = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
let costs_vec: DMatrix<f64> =
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
// Perform reciprocal allocation (LU solve or pseudoinverse regression if the matrix is singular - pseudo inverse is done using nalgebra svd)
// TODO: Is it wasteful to perform the determinant rather than just immediately attempting lu? The implementation of determinant appears calls lu anyway?
// TODO: Only calculate lu/pseudoinverse once. We then do the solve for the overhead department totals for each account, and use this to
// calculate the final totals.
if mat.determinant() == 0. {
// Pseudo inverse to find mininmum allocation
// TODO: Error handling
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
pseudo_inverse.unwrap().mul(&costs_vec)
} else {
// Standard solve using lu with partial pivoting.
let lup = mat.lu();
// TODO: Error handling
lup.solve(&costs_vec).unwrap()
}
}
// This is kind of a pointless function, it's just a matrix multiply... better to have a method that takes a function that can retrieve the accounts,
// then an application would just need to pass in the batch retriever function and the initial overhead things.
// Only issue that could come up with this is I get a case where I can't pass a function in from another language. Better the application itself just
// uses the struct returned from the function above to
fn allocate_overheads(allocation_matrix: DMatrix<f64>, ) {
}
// IDEA:
// Consider a state-machine approach. Struct of allocations + total costs, then have a method to transform to
// reciprocal matrix + hashmap of indexes, then another method that takes cc costs per account to transform into final outputs.
// I think the state machine can be a higher-level api, and can make use of the above functions to transition between states.
// This way you won't need to remember each step of the process, and it would be simpler to swap out implementations
// as each struct in the state can swap out which functions it can use in the transition.

View File

@@ -1,3 +1,54 @@
fn main() {
println!("Hello, world!");
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[clap(name = "coster-rs")]
#[clap(author = "Pivato M. <mpivato4@gmail.com>")]
#[clap(version = "0.0.1")]
#[clap(about = "Simple, fast, efficient costing tool", long_about = None)]
struct Cli {
#[clap(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
move_money {
#[clap(short = 'r', long, parse(from_os_str), value_name = "FILE")]
rules: PathBuf,
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
output: Option<PathBuf>,
},
allocate_overheads {
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
rules: PathBuf,
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
lines: PathBuf,
#[clap(short, long, parse(from_os_str), value_name = "FILE")]
output: Option<PathBuf>,
},
}
// TODO: Return error (implement the required trait to allow an error to be returned)
fn main() {
let cli = Cli::parse();
match cli.command {
Commands::move_money { rules, output } => move_money(),
Commands::allocate_overheads {
rules,
lines,
output,
} => allocate_overheads(),
}
}
fn move_money() {
// read rules, for each rule, inspect the files and
}
fn allocate_overheads() {}