diff --git a/Cargo.lock b/Cargo.lock index d6e1c04..231e671 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,25 +11,166 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bytemuck" version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" +[[package]] +name = "clap" +version = "3.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53da17d37dba964b9b3ecb5c5a1f193a2762c700e6829201e645b9381c99dc7" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "once_cell", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c11d40217d16aee8508cc8e5fde8b4ff24639758608e5374e731b53f85749fb9" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5538cd660450ebeb4234cfecf8f2284b844ffc4c50531e66d584ad5b91293613" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "coster-rs" version = "0.1.0" dependencies = [ + "clap", + "csv", "nalgebra", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "hashbrown" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6392766afd7964e2531940894cffe4bd8d7d17dbc3c1c4857040fd4b33bdb3" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + [[package]] name = "matrixmultiply" version = "0.3.2" @@ -39,6 +180,12 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "nalgebra" version = "0.31.0" @@ -105,12 +252,48 @@ dependencies = [ "autocfg", ] +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + +[[package]] +name = "os_str_bytes" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" + [[package]] name = "paste" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" version = "1.0.39" @@ -135,6 +318,18 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + [[package]] name = "safe_arch" version = "0.6.0" @@ -144,6 +339,12 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" + [[package]] name = "simba" version = "0.7.1" @@ -157,6 +358,12 @@ dependencies = [ "wide", ] +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "syn" version = "1.0.95" @@ -168,6 +375,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + [[package]] name = "typenum" version = "1.15.0" @@ -180,6 +402,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wide" version = "0.7.4" @@ -189,3 +417,34 @@ dependencies = [ "bytemuck", "safe_arch", ] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 32b083d..7106bfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,12 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nalgebra = "0.31.0" \ No newline at end of file +# https://nalgebra.org/docs/user_guide/getting_started +nalgebra = "0.31.0" + +# https://docs.rs/csv/1.1.6/csv/ +csv = "1.1" +# simba = { version = "0.7.1", features = ["partial_fixed_point_support"] } + +# num = "0.4" +clap = { version = "3.1.18", features = ["derive"] } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index f02f1bd..15177e3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,127 @@ extern crate nalgebra as na; +use na::DMatrix; use std::{collections::HashMap, ops::Mul}; -use na::DMatrix; +// TODO: Look into serde for serialisation, can also use it to serialise/deserialise +// records from a csv file using the csv crate +pub struct MovementRule { + // If the vectors are empty, then it means 'all' + pub from_units: Vec, + pub to_units: Vec, + pub amount: f64, + pub is_percent: bool, + pub is_separator: bool, +} -// TODO: Could probably put this up a level by indicating how much of another department -// each department used, then calculate the amounts from that. +impl MovementRule { + pub fn new() -> MovementRule { + MovementRule { + from_units: vec![], + to_units: vec![], + amount: 0.0, + is_percent: false, + is_separator: false, + } + } -// Note: No need to include the operating departments, only service departments are needed, -// then once we calculate all of the -pub struct OverheadDepartmentAllocation { + pub fn pass_break() -> MovementRule { + MovementRule { + from_units: vec![], + to_units: vec![], + amount: 0.0, + is_percent: false, + is_separator: true, + } + } + + pub fn validate(&self) -> bool { + if self.from_units.is_empty() && self.to_units.is_empty() { + // Would be nice to have a decent message/error here as well + return false; + } + if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) { + return false; + } + true + } +} + +// Rules get parsed from file, converted into matrix format (for the in-memory movement), +// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied +// +// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can +// be worked around by actually inputting every type into the rules + +pub fn smush_rules(rules: Vec) -> Vec { + let ruleMapping: HashMap = HashMap::new(); + // First build out the list/map of all departments (store index of each element in the array) + // TODO: We could make this more advanced by only smushing per divider, so that only the departments + // needed between each pass is actually required + for rule in rules { + for department in rule.from_units { + // ruleMapping.entry(department).or_insert(ruleMapping.len()); + } + } + vec![] +} + +// Approach 1: +// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent +// to the worst case of approach one, however can take advantage of auto parallelisation/simd +// to perform fast, particularly on larger datasets. +// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition +// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later +// Advantage of this approach is it can be easily extended to run on the gpu. +pub fn move_money_1() {} + +// Approach 2: +// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount). +// Another map is built up for each rule, and each rule is processed based on the amount in the current total +// map. +// Upon a pass break (divider), the temp map will assign the values into the total map. +// Once done, do a final assignment back to the total back, and return that. Probably want to make a copy or +// borrow the total map so it isn't mutated elsewhere. +// Advantage of this is the required code is tiny, and no third-party math library is required (my matrix math +// implementation probably won't be as good as one that's battle-tested) +// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules +pub fn move_money_2( + initial_totals: HashMap, + rules: Vec, +) -> HashMap { + // TODO: Replace maps with generic objects, so we can sub in db access/load only some initially + let mut running_total = HashMap::from(initial_totals); + let mut temp_total: HashMap = HashMap::new(); + for rule in rules { + if rule.is_separator { + temp_total.into_iter().for_each(|temp| { + running_total.insert(temp.0, temp.1).unwrap(); + }); + temp_total = HashMap::new(); + } else if rule.is_percent { + let new_value: f64 = running_total + .iter() + .filter(|department| rule.from_units.contains(department.0)) + .map(|department| department.1 * rule.amount) + .sum(); + for department in rule.to_units { + let previous_temp = temp_total.entry(department).or_insert(0.0); + *previous_temp += new_value; + } + // TODO: Subtract values from the from departments + } else { + // TODO: Simple addition to to departments/subtraction from from departments + } + } + running_total +} + +// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...) +// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department, +// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end +// up with negative there so yes this is expected) +// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to +pub struct OverheadAllocationRule { from_department: String, to_department: String, percent: f64, @@ -20,31 +132,43 @@ pub struct TotalDepartmentCost { value: f64, } -// Gets the matrix that can be used to reciprocally allocate line items in an account -// TODO: What is actually supposed to be in the solve values? Not needed here but whatever calls this function will need to know this -// Also need to handle errors (return appropriate result type) -// TODO: Also need to return some order so we know what order ccs in the accounts should be in.. could just do this by returning a struct with -// the matrix and a method to get the value for a particular key using the hashmap we created. -fn get_reciprocal_allocation_matrix(allocations: Vec, total_costs: Vec) -> DMatrix { - // Convert vector to matrix form - matrix of from/to percent (usage) and vector of original costs - - // Matrix of all unique departments +// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs +// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when +// matrix is singular +// TODO: Could also reduce memory by just calculating overhead costs in a first step (service departments), then +// calculating operating department costs in a second step using the output from the service departments (multiply +// by service department output rather than original). The second step can be a vector multiply or a loop, basically +// same as move money step, might bven be able to just repeat it +// Note: PPM currently does the invert for the cost centres only (so can be up to 6000 ccs), as the cost centres are the actual departments, +// and a previous step calculates the percentages for overhead areas using their allocation statistics. Then for each account, +// it will use the overhead allocation matrix to calculate the moved/overhead allocations from the line items calculated from the previous +// cost definiteions/reclass rules steps. Really we'd want to batch this out so we multiple a couple hundred or so accounts at a time (maybe +// with a batch size property) +pub fn get_reciprocal_allocation_matrix( + allocations: Vec, + total_costs: Vec, +) -> DMatrix { let mut department_mappings: HashMap = HashMap::new(); for allocation in allocations.iter() { let map_size = department_mappings.len(); - department_mappings.entry(allocation.from_department.clone()).or_insert(map_size); + department_mappings + .entry(allocation.from_department.clone()) + .or_insert(map_size); let map_size = department_mappings.len(); - department_mappings.entry(allocation.to_department.clone()).or_insert(map_size); + department_mappings + .entry(allocation.to_department.clone()) + .or_insert(map_size); } - let mut slice_allocations = vec![0.; department_mappings.len() * department_mappings.len()]; + let mut slice_allocations = vec![0.; department_mappings.len()]; - // TODO: This needs to be passed in another time. + // TODO: This needs to be passed in another time let mut slice_costs = vec![0.; department_mappings.len()]; - for allocation in allocations { // TODO: Is there a more idiomatic way to do this? - let elem = &mut slice_allocations[*department_mappings.get(&allocation.from_department).unwrap()]; + let elem = &mut slice_allocations[*department_mappings + .get(&allocation.from_department) + .unwrap()]; *elem = allocation.percent; } @@ -53,39 +177,22 @@ fn get_reciprocal_allocation_matrix(allocations: Vec = DMatrix::from_row_slice( + department_mappings.len(), + department_mappings.len(), + &slice_allocations, + ); - // TODO: Would be nice to make this batched... matrix doesn't support that though. - let mat: DMatrix = DMatrix::from_row_slice(department_mappings.len(), department_mappings.len(), &slice_allocations); - let costs_vec: DMatrix = DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs); + let costs_vec: DMatrix = + DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs); - - // Perform reciprocal allocation (LU solve or pseudoinverse regression if the matrix is singular - pseudo inverse is done using nalgebra svd) - // TODO: Is it wasteful to perform the determinant rather than just immediately attempting lu? The implementation of determinant appears calls lu anyway? + // TODO: Only calculate lu/pseudoinverse once. We then do the solve for the overhead department totals for each account, and use this to + // calculate the final totals. if mat.determinant() == 0. { - // Pseudo inverse to find mininmum allocation - // TODO: Error handling let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001); pseudo_inverse.unwrap().mul(&costs_vec) } else { - // Standard solve using lu with partial pivoting. let lup = mat.lu(); - // TODO: Error handling lup.solve(&costs_vec).unwrap() } } - -// This is kind of a pointless function, it's just a matrix multiply... better to have a method that takes a function that can retrieve the accounts, -// then an application would just need to pass in the batch retriever function and the initial overhead things. -// Only issue that could come up with this is I get a case where I can't pass a function in from another language. Better the application itself just -// uses the struct returned from the function above to -fn allocate_overheads(allocation_matrix: DMatrix, ) { - -} - - -// IDEA: -// Consider a state-machine approach. Struct of allocations + total costs, then have a method to transform to -// reciprocal matrix + hashmap of indexes, then another method that takes cc costs per account to transform into final outputs. -// I think the state machine can be a higher-level api, and can make use of the above functions to transition between states. -// This way you won't need to remember each step of the process, and it would be simpler to swap out implementations -// as each struct in the state can swap out which functions it can use in the transition. diff --git a/src/main.rs b/src/main.rs index e7a11a9..5fc9da8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,54 @@ -fn main() { - println!("Hello, world!"); +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; + +#[derive(Parser)] +#[clap(name = "coster-rs")] +#[clap(author = "Pivato M. ")] +#[clap(version = "0.0.1")] +#[clap(about = "Simple, fast, efficient costing tool", long_about = None)] +struct Cli { + #[clap(subcommand)] + command: Commands, } + +#[derive(Subcommand)] +enum Commands { + move_money { + #[clap(short = 'r', long, parse(from_os_str), value_name = "FILE")] + rules: PathBuf, + + #[clap(short, long, parse(from_os_str), value_name = "FILE")] + output: Option, + }, + allocate_overheads { + #[clap(short, long, parse(from_os_str), value_name = "FILE")] + rules: PathBuf, + + #[clap(short, long, parse(from_os_str), value_name = "FILE")] + lines: PathBuf, + + #[clap(short, long, parse(from_os_str), value_name = "FILE")] + output: Option, + }, +} + +// TODO: Return error (implement the required trait to allow an error to be returned) +fn main() { + let cli = Cli::parse(); + + match cli.command { + Commands::move_money { rules, output } => move_money(), + Commands::allocate_overheads { + rules, + lines, + output, + } => allocate_overheads(), + } +} + +fn move_money() { + // read rules, for each rule, inspect the files and +} + +fn allocate_overheads() {}