Refactor codebase into submodules
This commit is contained in:
456
src/lib.rs
456
src/lib.rs
@@ -1,452 +1,8 @@
|
|||||||
extern crate nalgebra as na;
|
mod move_money;
|
||||||
|
pub use self::move_money::*;
|
||||||
|
|
||||||
use itertools::Itertools;
|
mod smush_rules;
|
||||||
use na::{DMatrix, Dynamic, LU};
|
pub use self::smush_rules::*;
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::{collections::HashMap, error::Error, ops::Mul};
|
|
||||||
|
|
||||||
#[derive(Hash, Clone, Default, PartialEq, Eq)]
|
mod overhead_allocation;
|
||||||
pub struct Unit {
|
pub use self::overhead_allocation::*;
|
||||||
pub department: String,
|
|
||||||
pub account: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
pub struct CsvCost {
|
|
||||||
account: String,
|
|
||||||
department: String,
|
|
||||||
value: f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
|
||||||
struct CsvMovementRule {
|
|
||||||
#[serde(rename = "FromCC")]
|
|
||||||
// Need strings to further split later
|
|
||||||
from_departments: String,
|
|
||||||
to_departments: String,
|
|
||||||
all_from_departments: bool,
|
|
||||||
all_to_departments: bool,
|
|
||||||
from_accounts: String,
|
|
||||||
to_accounts: String,
|
|
||||||
all_from_accounts: bool,
|
|
||||||
all_to_accounts: bool,
|
|
||||||
amount: f64,
|
|
||||||
is_percent: Option<bool>,
|
|
||||||
is_separator: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct MovementRule {
|
|
||||||
// If the vectors are empty, then it means 'all'
|
|
||||||
pub from_departments: Vec<String>,
|
|
||||||
pub to_departments: Vec<String>,
|
|
||||||
pub all_from_departments: bool,
|
|
||||||
pub all_to_departments: bool,
|
|
||||||
pub from_accounts: Vec<String>,
|
|
||||||
pub to_accounts: Vec<String>,
|
|
||||||
pub all_from_accounts: bool,
|
|
||||||
pub all_to_accounts: bool,
|
|
||||||
pub amount: f64,
|
|
||||||
pub is_percent: bool,
|
|
||||||
pub is_separator: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MovementRule {
|
|
||||||
pub fn pass_break() -> MovementRule {
|
|
||||||
MovementRule {
|
|
||||||
is_separator: true,
|
|
||||||
..MovementRule::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn validate(&self) -> bool {
|
|
||||||
if self.from_departments.is_empty() && self.to_departments.is_empty() {
|
|
||||||
// Would be nice to have a decent message/error here as well
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
|
|
||||||
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
|
|
||||||
//
|
|
||||||
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
|
|
||||||
// be worked around by actually inputting every type into the rules
|
|
||||||
|
|
||||||
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
|
|
||||||
let ruleMapping: HashMap<String, usize> = HashMap::new();
|
|
||||||
// First build out the list/map of all departments (store index of each element in the array)
|
|
||||||
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
|
|
||||||
// needed between each pass is actually required
|
|
||||||
for rule in rules {
|
|
||||||
for department in rule.from_departments {
|
|
||||||
// ruleMapping.entry(department).or_insert(ruleMapping.len());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
vec![]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn move_money<R, L, O>(
|
|
||||||
rules_reader: csv::Reader<R>,
|
|
||||||
lines_reader: csv::Reader<L>,
|
|
||||||
output: csv::Writer<O>,
|
|
||||||
use_numeric_accounts: bool,
|
|
||||||
) -> anyhow::Result<()>
|
|
||||||
where
|
|
||||||
R: std::io::Read,
|
|
||||||
L: std::io::Read,
|
|
||||||
O: std::io::Write,
|
|
||||||
{
|
|
||||||
let mut lines_reader = lines_reader;
|
|
||||||
let headers = lines_reader.headers()?;
|
|
||||||
let mut account_index = 0;
|
|
||||||
let mut department_index = 0;
|
|
||||||
for (index, field) in headers.iter().enumerate() {
|
|
||||||
if field.eq_ignore_ascii_case("account") {
|
|
||||||
account_index = index;
|
|
||||||
} else if field.eq_ignore_ascii_case("department") {
|
|
||||||
department_index = index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let lines: HashMap<Unit, f64> = lines_reader
|
|
||||||
.records()
|
|
||||||
.map(|record| {
|
|
||||||
let record = record.unwrap();
|
|
||||||
let account = record.get(account_index).unwrap();
|
|
||||||
let department = record.get(department_index).unwrap();
|
|
||||||
let sum = record
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter(|(i, _)| *i != account_index && *i != department_index)
|
|
||||||
.map(|(_, f)| f.parse::<f64>().unwrap())
|
|
||||||
.sum();
|
|
||||||
(
|
|
||||||
Unit {
|
|
||||||
account: account.into(),
|
|
||||||
department: department.into(),
|
|
||||||
},
|
|
||||||
sum,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let all_accounts_sorted = if use_numeric_accounts {
|
|
||||||
lines
|
|
||||||
.keys()
|
|
||||||
.map(|key| key.account.clone().parse::<i32>().unwrap())
|
|
||||||
.sorted()
|
|
||||||
.map(|account| account.to_string())
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
lines
|
|
||||||
.keys()
|
|
||||||
.map(|key| key.account.clone())
|
|
||||||
.sorted()
|
|
||||||
.collect()
|
|
||||||
};
|
|
||||||
let all_departments_sorted = lines
|
|
||||||
.keys()
|
|
||||||
.map(|key| key.department.clone())
|
|
||||||
.sorted()
|
|
||||||
.collect();
|
|
||||||
let mut rules_reader = rules_reader;
|
|
||||||
let mut rules: Vec<MovementRule> = vec![];
|
|
||||||
for result in rules_reader.deserialize() {
|
|
||||||
let movement_rule: CsvMovementRule = result?;
|
|
||||||
let from_accounts = extract_range(
|
|
||||||
movement_rule.from_accounts,
|
|
||||||
movement_rule.all_from_accounts,
|
|
||||||
&all_accounts_sorted,
|
|
||||||
);
|
|
||||||
let to_accounts = extract_range(
|
|
||||||
movement_rule.to_accounts,
|
|
||||||
movement_rule.all_to_accounts,
|
|
||||||
&all_accounts_sorted,
|
|
||||||
);
|
|
||||||
let from_departments = extract_range(
|
|
||||||
movement_rule.from_departments,
|
|
||||||
movement_rule.all_from_departments,
|
|
||||||
&all_departments_sorted,
|
|
||||||
);
|
|
||||||
let to_departments = extract_range(
|
|
||||||
movement_rule.to_departments,
|
|
||||||
movement_rule.all_to_departments,
|
|
||||||
&all_departments_sorted,
|
|
||||||
);
|
|
||||||
rules.push(MovementRule {
|
|
||||||
from_departments,
|
|
||||||
to_departments,
|
|
||||||
all_from_departments: movement_rule.all_from_departments,
|
|
||||||
all_to_departments: movement_rule.all_to_departments,
|
|
||||||
from_accounts,
|
|
||||||
to_accounts,
|
|
||||||
all_from_accounts: movement_rule.all_from_accounts,
|
|
||||||
all_to_accounts: movement_rule.all_to_accounts,
|
|
||||||
amount: movement_rule.amount,
|
|
||||||
is_percent: movement_rule.is_percent.unwrap_or(false),
|
|
||||||
is_separator: movement_rule.is_separator.unwrap_or(false),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then run move_money
|
|
||||||
let moved = move_money_2(lines, &rules);
|
|
||||||
let mut output = output;
|
|
||||||
|
|
||||||
// Ouput the list moved moneys
|
|
||||||
for money in moved {
|
|
||||||
output.serialize(CsvCost {
|
|
||||||
account: money.0.account,
|
|
||||||
department: money.0.department,
|
|
||||||
value: money.1,
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract_range(range: String, all: bool, options: &Vec<String>) -> Vec<String> {
|
|
||||||
if all {
|
|
||||||
return vec![];
|
|
||||||
}
|
|
||||||
let split_range: Vec<&str> = range.split("-").collect();
|
|
||||||
if split_range.len() == 1 {
|
|
||||||
return vec![range];
|
|
||||||
}
|
|
||||||
let start_index = options
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.find(|option| option.1 == split_range[0])
|
|
||||||
.map(|start| start.0);
|
|
||||||
let end_index = options
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.find(|option| option.1 == split_range[1])
|
|
||||||
.map(|end| end.0);
|
|
||||||
if let Some(start) = start_index {
|
|
||||||
if let Some(end) = end_index {
|
|
||||||
return Vec::from(&options[start..end + 1]);
|
|
||||||
} else {
|
|
||||||
return vec![options[start].clone()];
|
|
||||||
}
|
|
||||||
} else if let Some(end) = end_index {
|
|
||||||
return vec![options[end].clone()];
|
|
||||||
}
|
|
||||||
return vec![];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Approach 1:
|
|
||||||
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
|
||||||
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
|
||||||
// to perform fast, particularly on larger datasets.
|
|
||||||
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
|
||||||
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
|
||||||
// Advantage of this approach is it can be easily extended to run on the gpu.
|
|
||||||
pub fn move_money_1() {}
|
|
||||||
|
|
||||||
// Approach 2:
|
|
||||||
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
|
||||||
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
|
||||||
// map.
|
|
||||||
// Upon a pass break (separator), the temp map will assign the values into the total map.
|
|
||||||
// Once done, do a final assignment back to the total, and return that.
|
|
||||||
// Advantage of this is the required code is tiny, and no third-party math library is required.
|
|
||||||
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
|
|
||||||
// to every. It's also much more memory efficient than approach 1.
|
|
||||||
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
|
||||||
pub fn move_money_2(
|
|
||||||
initial_totals: HashMap<Unit, f64>,
|
|
||||||
rules: &Vec<MovementRule>,
|
|
||||||
) -> HashMap<Unit, f64> {
|
|
||||||
// Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
|
|
||||||
// but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
|
|
||||||
let mut running_total = HashMap::from(initial_totals);
|
|
||||||
let mut temp_total = running_total.clone();
|
|
||||||
for rule in rules {
|
|
||||||
if rule.is_separator {
|
|
||||||
running_total = temp_total.clone();
|
|
||||||
} else {
|
|
||||||
let mut sum_from = 0.;
|
|
||||||
for unit in &running_total {
|
|
||||||
if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department))
|
|
||||||
&& (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account))
|
|
||||||
{
|
|
||||||
let previous_temp = unit.1;
|
|
||||||
let added_amount = if rule.is_percent {
|
|
||||||
previous_temp * rule.amount
|
|
||||||
} else {
|
|
||||||
rule.amount
|
|
||||||
};
|
|
||||||
sum_from += added_amount;
|
|
||||||
*temp_total.get_mut(&unit.0).unwrap() -= added_amount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let num_to_units = running_total
|
|
||||||
.keys()
|
|
||||||
.filter(|key| {
|
|
||||||
(rule.all_to_accounts || rule.to_departments.contains(&key.department))
|
|
||||||
&& (rule.all_to_accounts || rule.to_accounts.contains(&key.account))
|
|
||||||
})
|
|
||||||
.count();
|
|
||||||
let value_per_unit = sum_from / num_to_units as f64;
|
|
||||||
for unit in running_total.keys() {
|
|
||||||
*temp_total.get_mut(&unit).unwrap() += value_per_unit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
temp_total
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
pub enum DepartmentType {
|
|
||||||
Operating,
|
|
||||||
Overhead,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
|
|
||||||
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
|
|
||||||
// up with negative there so yes this is expected)
|
|
||||||
pub struct OverheadAllocationRule {
|
|
||||||
from_overhead_department: String,
|
|
||||||
to_department: String,
|
|
||||||
percent: f64,
|
|
||||||
to_department_type: DepartmentType,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TotalDepartmentCost {
|
|
||||||
department: String,
|
|
||||||
value: f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct AccountCost {
|
|
||||||
account: String,
|
|
||||||
summed_department_costs: Vec<TotalDepartmentCost>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Also need a way to dictate the order of the departments?
|
|
||||||
pub trait ReciprocalAllocationSolver {
|
|
||||||
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
|
|
||||||
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
|
||||||
self.solve(costs).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReciprocalAllocationSolver for DMatrix<f64> {
|
|
||||||
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
|
||||||
self.mul(costs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_rules_indexes(
|
|
||||||
allocations: &Vec<OverheadAllocationRule>,
|
|
||||||
department_type: DepartmentType,
|
|
||||||
) -> HashMap<String, usize> {
|
|
||||||
allocations
|
|
||||||
.iter()
|
|
||||||
.filter(|allocation| allocation.to_department_type == department_type)
|
|
||||||
.flat_map(|department| {
|
|
||||||
[
|
|
||||||
department.from_overhead_department.clone(),
|
|
||||||
department.to_department.clone(),
|
|
||||||
]
|
|
||||||
})
|
|
||||||
.unique()
|
|
||||||
.enumerate()
|
|
||||||
.map(|(index, department)| (department, index))
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
|
|
||||||
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
|
|
||||||
// matrix is singular
|
|
||||||
pub fn reciprocal_allocation(
|
|
||||||
allocations: Vec<OverheadAllocationRule>,
|
|
||||||
account_costs: Vec<AccountCost>,
|
|
||||||
// TODO: Throw an appropriate error
|
|
||||||
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
|
|
||||||
let overhead_department_mappings: HashMap<String, usize> =
|
|
||||||
get_rules_indexes(&allocations, DepartmentType::Overhead);
|
|
||||||
let operating_department_mappings: HashMap<String, usize> =
|
|
||||||
get_rules_indexes(&allocations, DepartmentType::Operating);
|
|
||||||
|
|
||||||
let mut slice_allocations =
|
|
||||||
vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()];
|
|
||||||
|
|
||||||
for allocation in allocations
|
|
||||||
.iter()
|
|
||||||
.filter(|allocation| allocation.to_department_type == DepartmentType::Overhead)
|
|
||||||
{
|
|
||||||
// TODO: Check if we need to flp this around
|
|
||||||
let from_index = overhead_department_mappings
|
|
||||||
.get(&allocation.from_overhead_department)
|
|
||||||
.unwrap();
|
|
||||||
let to_index = operating_department_mappings
|
|
||||||
.get(&allocation.to_department)
|
|
||||||
.unwrap();
|
|
||||||
let elem = &mut slice_allocations
|
|
||||||
[(*from_index) + (overhead_department_mappings.len() * (*to_index))];
|
|
||||||
*elem = allocation.percent;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Also need ones along the diagonal, and negatives in some places...
|
|
||||||
|
|
||||||
let mat: DMatrix<f64> = DMatrix::from_row_slice(
|
|
||||||
overhead_department_mappings.len(),
|
|
||||||
overhead_department_mappings.len(),
|
|
||||||
&slice_allocations,
|
|
||||||
);
|
|
||||||
|
|
||||||
if mat.determinant() == 0. {
|
|
||||||
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
|
|
||||||
do_solve_reciprocal(
|
|
||||||
pseudo_inverse.unwrap(),
|
|
||||||
account_costs,
|
|
||||||
overhead_department_mappings,
|
|
||||||
allocations,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
do_solve_reciprocal(
|
|
||||||
mat.lu(),
|
|
||||||
account_costs,
|
|
||||||
overhead_department_mappings,
|
|
||||||
allocations,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
|
|
||||||
solver: T,
|
|
||||||
account_costs: Vec<AccountCost>,
|
|
||||||
department_mappings: HashMap<String, usize>,
|
|
||||||
allocations: Vec<OverheadAllocationRule>,
|
|
||||||
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
|
|
||||||
// TODO: Could batch the accounts, although probably won't see to big a speed increase, compiler should help us out
|
|
||||||
for total_costs in account_costs {
|
|
||||||
let mut slice_costs = vec![0.; department_mappings.len()];
|
|
||||||
|
|
||||||
for cost in total_costs.summed_department_costs {
|
|
||||||
let elem = &mut slice_costs[*department_mappings.get(&cost.department).unwrap()];
|
|
||||||
*elem = cost.value;
|
|
||||||
}
|
|
||||||
|
|
||||||
let costs_vec: DMatrix<f64> =
|
|
||||||
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
|
|
||||||
|
|
||||||
let calculated_overheads = solver.solve(&costs_vec);
|
|
||||||
|
|
||||||
// Calculation: operating_overhead_usage . calculated_overheads + initial_totals
|
|
||||||
// Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the
|
|
||||||
// solved overheads usages after taking into account usage between departments, and initial_totals is the initial values
|
|
||||||
// for the operating departments.
|
|
||||||
}
|
|
||||||
// TODO: return something appropriate
|
|
||||||
Ok(vec![])
|
|
||||||
}
|
|
||||||
|
|||||||
283
src/move_money.rs
Normal file
283
src/move_money.rs
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
// TODO: Fix up these names, check if all is actually integrated into the strings
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct CsvMovementRule {
|
||||||
|
#[serde(rename = "FromCC")]
|
||||||
|
// Need strings to further split later
|
||||||
|
from_departments: String,
|
||||||
|
to_departments: String,
|
||||||
|
all_from_departments: bool,
|
||||||
|
all_to_departments: bool,
|
||||||
|
from_accounts: String,
|
||||||
|
to_accounts: String,
|
||||||
|
all_from_accounts: bool,
|
||||||
|
all_to_accounts: bool,
|
||||||
|
amount: f64,
|
||||||
|
is_percent: Option<bool>,
|
||||||
|
is_separator: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MovementRule {
|
||||||
|
// If the vectors are empty, then it means 'all'
|
||||||
|
pub from_departments: Vec<String>,
|
||||||
|
pub to_departments: Vec<String>,
|
||||||
|
pub all_from_departments: bool,
|
||||||
|
pub all_to_departments: bool,
|
||||||
|
pub from_accounts: Vec<String>,
|
||||||
|
pub to_accounts: Vec<String>,
|
||||||
|
pub all_from_accounts: bool,
|
||||||
|
pub all_to_accounts: bool,
|
||||||
|
pub amount: f64,
|
||||||
|
pub is_percent: bool,
|
||||||
|
pub is_separator: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MovementRule {
|
||||||
|
pub fn pass_break() -> MovementRule {
|
||||||
|
MovementRule {
|
||||||
|
is_separator: true,
|
||||||
|
..MovementRule::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate(&self) -> bool {
|
||||||
|
if self.from_departments.is_empty() && self.to_departments.is_empty() {
|
||||||
|
// Would be nice to have a decent message/error here as well
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Hash, Clone, Default, PartialEq, Eq)]
|
||||||
|
pub struct Unit {
|
||||||
|
pub department: String,
|
||||||
|
pub account: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct CsvCost {
|
||||||
|
account: String,
|
||||||
|
department: String,
|
||||||
|
value: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn move_money<R, L, O>(
|
||||||
|
rules_reader: csv::Reader<R>,
|
||||||
|
lines_reader: csv::Reader<L>,
|
||||||
|
output: csv::Writer<O>,
|
||||||
|
use_numeric_accounts: bool,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
where
|
||||||
|
R: std::io::Read,
|
||||||
|
L: std::io::Read,
|
||||||
|
O: std::io::Write,
|
||||||
|
{
|
||||||
|
let mut lines_reader = lines_reader;
|
||||||
|
let headers = lines_reader.headers()?;
|
||||||
|
let mut account_index = 0;
|
||||||
|
let mut department_index = 0;
|
||||||
|
for (index, field) in headers.iter().enumerate() {
|
||||||
|
if field.eq_ignore_ascii_case("account") {
|
||||||
|
account_index = index;
|
||||||
|
} else if field.eq_ignore_ascii_case("department") {
|
||||||
|
department_index = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines: HashMap<Unit, f64> = lines_reader
|
||||||
|
.records()
|
||||||
|
.map(|record| {
|
||||||
|
let record = record.unwrap();
|
||||||
|
let account = record.get(account_index).unwrap();
|
||||||
|
let department = record.get(department_index).unwrap();
|
||||||
|
let sum = record
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(i, _)| *i != account_index && *i != department_index)
|
||||||
|
.map(|(_, f)| f.parse::<f64>().unwrap())
|
||||||
|
.sum();
|
||||||
|
(
|
||||||
|
Unit {
|
||||||
|
account: account.into(),
|
||||||
|
department: department.into(),
|
||||||
|
},
|
||||||
|
sum,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let all_accounts_sorted = if use_numeric_accounts {
|
||||||
|
lines
|
||||||
|
.keys()
|
||||||
|
.map(|key| key.account.clone().parse::<i32>().unwrap())
|
||||||
|
.sorted()
|
||||||
|
.map(|account| account.to_string())
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
lines
|
||||||
|
.keys()
|
||||||
|
.map(|key| key.account.clone())
|
||||||
|
.sorted()
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
let all_departments_sorted = lines
|
||||||
|
.keys()
|
||||||
|
.map(|key| key.department.clone())
|
||||||
|
.sorted()
|
||||||
|
.collect();
|
||||||
|
let mut rules_reader = rules_reader;
|
||||||
|
let mut rules: Vec<MovementRule> = vec![];
|
||||||
|
for result in rules_reader.deserialize() {
|
||||||
|
let movement_rule: CsvMovementRule = result?;
|
||||||
|
let from_accounts = extract_range(
|
||||||
|
movement_rule.from_accounts,
|
||||||
|
movement_rule.all_from_accounts,
|
||||||
|
&all_accounts_sorted,
|
||||||
|
);
|
||||||
|
let to_accounts = extract_range(
|
||||||
|
movement_rule.to_accounts,
|
||||||
|
movement_rule.all_to_accounts,
|
||||||
|
&all_accounts_sorted,
|
||||||
|
);
|
||||||
|
let from_departments = extract_range(
|
||||||
|
movement_rule.from_departments,
|
||||||
|
movement_rule.all_from_departments,
|
||||||
|
&all_departments_sorted,
|
||||||
|
);
|
||||||
|
let to_departments = extract_range(
|
||||||
|
movement_rule.to_departments,
|
||||||
|
movement_rule.all_to_departments,
|
||||||
|
&all_departments_sorted,
|
||||||
|
);
|
||||||
|
rules.push(MovementRule {
|
||||||
|
from_departments,
|
||||||
|
to_departments,
|
||||||
|
all_from_departments: movement_rule.all_from_departments,
|
||||||
|
all_to_departments: movement_rule.all_to_departments,
|
||||||
|
from_accounts,
|
||||||
|
to_accounts,
|
||||||
|
all_from_accounts: movement_rule.all_from_accounts,
|
||||||
|
all_to_accounts: movement_rule.all_to_accounts,
|
||||||
|
amount: movement_rule.amount,
|
||||||
|
is_percent: movement_rule.is_percent.unwrap_or(false),
|
||||||
|
is_separator: movement_rule.is_separator.unwrap_or(false),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then run move_money
|
||||||
|
let moved = move_money_2(lines, &rules);
|
||||||
|
let mut output = output;
|
||||||
|
|
||||||
|
// Ouput the list moved moneys
|
||||||
|
for money in moved {
|
||||||
|
output.serialize(CsvCost {
|
||||||
|
account: money.0.account,
|
||||||
|
department: money.0.department,
|
||||||
|
value: money.1,
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_range(range: String, all: bool, options: &Vec<String>) -> Vec<String> {
|
||||||
|
if all {
|
||||||
|
return vec![];
|
||||||
|
}
|
||||||
|
let split_range: Vec<&str> = range.split("-").collect();
|
||||||
|
if split_range.len() == 1 {
|
||||||
|
return vec![range];
|
||||||
|
}
|
||||||
|
let start_index = options
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.find(|option| option.1 == split_range[0])
|
||||||
|
.map(|start| start.0);
|
||||||
|
let end_index = options
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.find(|option| option.1 == split_range[1])
|
||||||
|
.map(|end| end.0);
|
||||||
|
if let Some(start) = start_index {
|
||||||
|
if let Some(end) = end_index {
|
||||||
|
return Vec::from(&options[start..end + 1]);
|
||||||
|
} else {
|
||||||
|
return vec![options[start].clone()];
|
||||||
|
}
|
||||||
|
} else if let Some(end) = end_index {
|
||||||
|
return vec![options[end].clone()];
|
||||||
|
}
|
||||||
|
return vec![];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Approach 1:
|
||||||
|
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
||||||
|
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
||||||
|
// to perform fast, particularly on larger datasets.
|
||||||
|
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
||||||
|
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
||||||
|
// Advantage of this approach is it can be easily extended to run on the gpu.
|
||||||
|
pub fn move_money_1() {}
|
||||||
|
|
||||||
|
// Approach 2:
|
||||||
|
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
||||||
|
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
||||||
|
// map.
|
||||||
|
// Upon a pass break (separator), the temp map will assign the values into the total map.
|
||||||
|
// Once done, do a final assignment back to the total, and return that.
|
||||||
|
// Advantage of this is the required code is tiny, and no third-party math library is required.
|
||||||
|
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
|
||||||
|
// to every. It's also much more memory efficient than approach 1.
|
||||||
|
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
||||||
|
pub fn move_money_2(
|
||||||
|
initial_totals: HashMap<Unit, f64>,
|
||||||
|
rules: &Vec<MovementRule>,
|
||||||
|
) -> HashMap<Unit, f64> {
|
||||||
|
// Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
|
||||||
|
// but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
|
||||||
|
let mut running_total = HashMap::from(initial_totals);
|
||||||
|
let mut temp_total = running_total.clone();
|
||||||
|
for rule in rules {
|
||||||
|
if rule.is_separator {
|
||||||
|
running_total = temp_total.clone();
|
||||||
|
} else {
|
||||||
|
let mut sum_from = 0.;
|
||||||
|
for unit in &running_total {
|
||||||
|
if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department))
|
||||||
|
&& (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account))
|
||||||
|
{
|
||||||
|
let previous_temp = unit.1;
|
||||||
|
let added_amount = if rule.is_percent {
|
||||||
|
previous_temp * rule.amount
|
||||||
|
} else {
|
||||||
|
rule.amount
|
||||||
|
};
|
||||||
|
sum_from += added_amount;
|
||||||
|
*temp_total.get_mut(&unit.0).unwrap() -= added_amount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_to_units = running_total
|
||||||
|
.keys()
|
||||||
|
.filter(|key| {
|
||||||
|
(rule.all_to_accounts || rule.to_departments.contains(&key.department))
|
||||||
|
&& (rule.all_to_accounts || rule.to_accounts.contains(&key.account))
|
||||||
|
})
|
||||||
|
.count();
|
||||||
|
let value_per_unit = sum_from / num_to_units as f64;
|
||||||
|
for unit in running_total.keys() {
|
||||||
|
*temp_total.get_mut(&unit).unwrap() += value_per_unit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
temp_total
|
||||||
|
}
|
||||||
153
src/overhead_allocation.rs
Normal file
153
src/overhead_allocation.rs
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
use nalgebra::{DMatrix, Dynamic, LU};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum DepartmentType {
|
||||||
|
Operating,
|
||||||
|
Overhead,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
|
||||||
|
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
|
||||||
|
// up with negative there so yes this is expected)
|
||||||
|
pub struct OverheadAllocationRule {
|
||||||
|
from_overhead_department: String,
|
||||||
|
to_department: String,
|
||||||
|
percent: f64,
|
||||||
|
to_department_type: DepartmentType,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TotalDepartmentCost {
|
||||||
|
department: String,
|
||||||
|
value: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AccountCost {
|
||||||
|
account: String,
|
||||||
|
summed_department_costs: Vec<TotalDepartmentCost>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Also need a way to dictate the order of the departments?
|
||||||
|
pub trait ReciprocalAllocationSolver {
|
||||||
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
|
||||||
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
||||||
|
self.solve(costs).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReciprocalAllocationSolver for DMatrix<f64> {
|
||||||
|
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
|
||||||
|
self * costs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_rules_indexes(
|
||||||
|
allocations: &Vec<OverheadAllocationRule>,
|
||||||
|
department_type: DepartmentType,
|
||||||
|
) -> HashMap<String, usize> {
|
||||||
|
allocations
|
||||||
|
.iter()
|
||||||
|
.filter(|allocation| allocation.to_department_type == department_type)
|
||||||
|
.flat_map(|department| {
|
||||||
|
[
|
||||||
|
department.from_overhead_department.clone(),
|
||||||
|
department.to_department.clone(),
|
||||||
|
]
|
||||||
|
})
|
||||||
|
.unique()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, department)| (department, index))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
|
||||||
|
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
|
||||||
|
// matrix is singular
|
||||||
|
pub fn reciprocal_allocation(
|
||||||
|
allocations: Vec<OverheadAllocationRule>,
|
||||||
|
account_costs: Vec<AccountCost>,
|
||||||
|
// TODO: Throw an appropriate error
|
||||||
|
) -> anyhow::Result<Vec<AccountCost>> {
|
||||||
|
let overhead_department_mappings: HashMap<String, usize> =
|
||||||
|
get_rules_indexes(&allocations, DepartmentType::Overhead);
|
||||||
|
let operating_department_mappings: HashMap<String, usize> =
|
||||||
|
get_rules_indexes(&allocations, DepartmentType::Operating);
|
||||||
|
|
||||||
|
let mut slice_allocations =
|
||||||
|
vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()];
|
||||||
|
|
||||||
|
for allocation in allocations
|
||||||
|
.iter()
|
||||||
|
.filter(|allocation| allocation.to_department_type == DepartmentType::Overhead)
|
||||||
|
{
|
||||||
|
// TODO: Check if we need to flp this around
|
||||||
|
let from_index = overhead_department_mappings
|
||||||
|
.get(&allocation.from_overhead_department)
|
||||||
|
.unwrap();
|
||||||
|
let to_index = operating_department_mappings
|
||||||
|
.get(&allocation.to_department)
|
||||||
|
.unwrap();
|
||||||
|
let elem = &mut slice_allocations
|
||||||
|
[(*from_index) + (overhead_department_mappings.len() * (*to_index))];
|
||||||
|
*elem = allocation.percent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Also need ones along the diagonal, and negatives in some places...
|
||||||
|
|
||||||
|
let mat: DMatrix<f64> = DMatrix::from_row_slice(
|
||||||
|
overhead_department_mappings.len(),
|
||||||
|
overhead_department_mappings.len(),
|
||||||
|
&slice_allocations,
|
||||||
|
);
|
||||||
|
|
||||||
|
if mat.determinant() == 0. {
|
||||||
|
let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
|
||||||
|
do_solve_reciprocal(
|
||||||
|
pseudo_inverse.unwrap(),
|
||||||
|
account_costs,
|
||||||
|
overhead_department_mappings,
|
||||||
|
allocations,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
do_solve_reciprocal(
|
||||||
|
mat.lu(),
|
||||||
|
account_costs,
|
||||||
|
overhead_department_mappings,
|
||||||
|
allocations,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
|
||||||
|
solver: T,
|
||||||
|
account_costs: Vec<AccountCost>,
|
||||||
|
department_mappings: HashMap<String, usize>,
|
||||||
|
allocations: Vec<OverheadAllocationRule>,
|
||||||
|
) -> anyhow::Result<Vec<AccountCost>> {
|
||||||
|
// TODO: Could batch the accounts, although probably won't see to big a speed increase, compiler should help us out
|
||||||
|
for total_costs in account_costs {
|
||||||
|
let mut slice_costs = vec![0.; department_mappings.len()];
|
||||||
|
|
||||||
|
for cost in total_costs.summed_department_costs {
|
||||||
|
let elem = &mut slice_costs[*department_mappings.get(&cost.department).unwrap()];
|
||||||
|
*elem = cost.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
let costs_vec: DMatrix<f64> =
|
||||||
|
DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);
|
||||||
|
|
||||||
|
let calculated_overheads = solver.solve(&costs_vec);
|
||||||
|
|
||||||
|
// Calculation: operating_overhead_usage . calculated_overheads + initial_totals
|
||||||
|
// Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the
|
||||||
|
// solved overheads usages after taking into account usage between departments, and initial_totals is the initial values
|
||||||
|
// for the operating departments.
|
||||||
|
}
|
||||||
|
// TODO: return something appropriate
|
||||||
|
Ok(vec![])
|
||||||
|
}
|
||||||
22
src/smush_rules.rs
Normal file
22
src/smush_rules.rs
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
// Rules get parsed from file, converted into matrix format (for the in-memory movement),
|
||||||
|
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
|
||||||
|
//
|
||||||
|
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
|
||||||
|
// be worked around by actually inputting every type into the rules
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::MovementRule;
|
||||||
|
|
||||||
|
pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
|
||||||
|
let ruleMapping: HashMap<String, usize> = HashMap::new();
|
||||||
|
// First build out the list/map of all departments (store index of each element in the array)
|
||||||
|
// TODO: We could make this more advanced by only smushing per divider, so that only the departments
|
||||||
|
// needed between each pass is actually required
|
||||||
|
for rule in rules {
|
||||||
|
for department in rule.from_departments {
|
||||||
|
// ruleMapping.entry(department).or_insert(ruleMapping.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user