ingey/src/lib.rs

extern crate nalgebra as na;

use itertools::Itertools;
use na::{DMatrix, Dynamic, LU};
use std::{collections::HashMap, error::Error, ops::Mul};

// TODO: Look into serde for serialisation, can also use it to serialise/deserialise
// records from a csv file using the csv crate
#[derive(Default)]
pub struct MovementRule {
    // If the vectors are empty, then it means 'all'
    pub from_units: Vec<String>,
    pub to_units: Vec<String>,
    pub amount: f64,
    pub is_percent: bool,
    pub is_separator: bool,
}

impl MovementRule {
    pub fn pass_break() -> MovementRule {
        MovementRule {
            from_units: vec![],
            to_units: vec![],
            amount: 0.0,
            is_percent: false,
            is_separator: true,
        }
    }

    pub fn validate(&self) -> bool {
        if self.from_units.is_empty() && self.to_units.is_empty() {
            // Would be nice to have a decent message/error here as well
            return false;
        }
        if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
            return false;
        }
        true
    }
}

// Rules get parsed from file, converted into matrix format (for the in-memory movement),
// then combined (smushed) into a single matrix + vector/rule for each . The list of units can then have the rules applied
//
// For now just ignore the all from/to stuff, it's kind of a shit thing to do, and can
// be worked around by actually inputting every type into the rules

pub fn smush_rules(rules: Vec<MovementRule>) -> Vec<MovementRule> {
    let ruleMapping: HashMap<String, usize> = HashMap::new();
    // First build out the list/map of all departments (store index of each element in the array)
    // TODO: We could make this more advanced by only smushing per divider, so that only the departments
    // needed between each pass is actually required
    for rule in rules {
        for department in rule.from_units {
            // ruleMapping.entry(department).or_insert(ruleMapping.len());
        }
    }
    vec![]
}

// Approach 1:
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
// to perform fast, particularly on larger datasets.
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
// Advantage of this approach is it can be easily extended to run on the gpu.
pub fn move_money_1() {}

// Approach 2:
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
// map.
// Upon a pass break (separator), the temp map will assign the values into the total map.
// Once done, do a final assignment back to the total, and return that.
// Advantage of this is the required code is tiny, and no third-party math library is required.
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
// to every. It's also much more memory efficient than approach 1.
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
// TODO: Right now this only supports movements between departments, we also need to support movements between accounts.
//      This would require an expansion so that we also have from/to accounts, and the hashmap will use some struct
//      that combines an account/department, which is also how the totals will be loaded. (so when loading from disk,
//      we load the whole GL into memory sum the account/department totals, and move these into a map line by line)
pub fn move_money_2(
    initial_totals: HashMap<String, f64>,
    rules: Vec<MovementRule>,
) -> HashMap<String, f64> {
    // TODO: Should probably validate that all the rules have departments that actually exist in initial_totals.
    // Note: It's potentially a bit more intensive to use cloned totals, but it's much simpler code and, and since we're only working line-by-line
    // it isn't really that much memory. in practice
    let mut running_total = HashMap::from(initial_totals);
    let mut temp_total: HashMap<String, f64> = running_total.clone();
    for rule in rules {
        if rule.is_separator {
            running_total = temp_total.clone();
        } else {
            let mut sum_from = 0.;
            for department in rule.from_units {
                let previous_temp = running_total.get(&department).expect(
                    "Failed to find department in temp totals, this should not be possible",
                );
                let added_amount = if rule.is_percent {
                    *previous_temp * rule.amount
                } else {
                    rule.amount
                };
                sum_from += added_amount;
                *temp_total.get_mut(&department).unwrap() -= added_amount;
            }

            let value_per_unit = sum_from / rule.to_units.len() as f64;
            for department in rule.to_units {
                *temp_total.get_mut(&department).unwrap() += value_per_unit;
            }
        }
    }
    temp_total
}

#[derive(Debug, PartialEq, Eq)]
pub enum DepartmentType {
    Operating,
    Overhead,
}

// TODO: Could also look at BigDecimal rather than f64 for higher precision (even i64 might be fine if we don't need to divide...)
// Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,
// you either need -1 or rest negative for a row to subtract the initial amounts so we end up effectively 0 (simultaneous equations end
// up with negative there so yes this is expected)
// Also, we could potentially use this same struct for non-overhead departments when mapping from overhead to
pub struct OverheadAllocationRule {
    from_overhead_department: String,
    to_department: String,
    percent: f64,
    to_department_type: DepartmentType,
}

pub struct TotalDepartmentCost {
    department: String,
    value: f64,
}

pub struct AccountCost {
    account: String,
    summed_department_costs: Vec<TotalDepartmentCost>,
}

// TODO: Also need a way to dictate the order of the departments?
pub trait ReciprocalAllocationSolver {
    fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
}

impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
    fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
        self.solve(costs).unwrap()
    }
}

impl ReciprocalAllocationSolver for DMatrix<f64> {
    fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
        self.mul(costs)
    }
}

fn get_rules_indexes(
    allocations: &Vec<OverheadAllocationRule>,
    department_type: DepartmentType,
) -> HashMap<String, usize> {
    allocations
        .iter()
        .filter(|allocation| allocation.to_department_type == department_type)
        .flat_map(|department| {
            [
                department.from_overhead_department.clone(),
                department.to_department.clone(),
            ]
        })
        .unique()
        .enumerate()
        .map(|(index, department)| (department, index))
        .collect()
}

// Perform the reciprocal allocation (matrix) method to allocate servicing departments (indirect) costs
// to functional departments. Basically just a matrix solve, uses regression (moore-penrose pseudoinverse) when
// matrix is singular
pub fn reciprocal_allocation(
    allocations: Vec<OverheadAllocationRule>,
    account_costs: Vec<AccountCost>,
    // TODO: Throw an appropriate error
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
    let overhead_department_mappings: HashMap<String, usize> =
        get_rules_indexes(&allocations, DepartmentType::Overhead);
    let operating_department_mappings: HashMap<String, usize> =
        get_rules_indexes(&allocations, DepartmentType::Operating);

    let mut slice_allocations =
        vec![0.; overhead_department_mappings.len() * overhead_department_mappings.len()];

    for allocation in allocations
        .iter()
        .filter(|allocation| allocation.to_department_type == DepartmentType::Overhead)
    {
        // TODO: Check if we need to flp this around
        let from_index = overhead_department_mappings
            .get(&allocation.from_overhead_department)
            .unwrap();
        let to_index = operating_department_mappings
            .get(&allocation.to_department)
            .unwrap();
        let elem = &mut slice_allocations
            [(*from_index) + (overhead_department_mappings.len() * (*to_index))];
        *elem = allocation.percent;
    }

    // TODO: Also need ones along the diagonal, and negatives in some places...

    let mat: DMatrix<f64> = DMatrix::from_row_slice(
        overhead_department_mappings.len(),
        overhead_department_mappings.len(),
        &slice_allocations,
    );

    if mat.determinant() == 0. {
        let pseudo_inverse = mat.svd(true, true).pseudo_inverse(0.000001);
        do_solve_reciprocal(
            pseudo_inverse.unwrap(),
            account_costs,
            overhead_department_mappings,
            allocations,
        )
    } else {
        do_solve_reciprocal(
            mat.lu(),
            account_costs,
            overhead_department_mappings,
            allocations,
        )
    }
}

fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
    solver: T,
    account_costs: Vec<AccountCost>,
    department_mappings: HashMap<String, usize>,
    allocations: Vec<OverheadAllocationRule>,
) -> Result<Vec<AccountCost>, Box<dyn Error>> {
    // TODO: Could batch the accounts, although probably won't see to big a speed increase, compiler should help us out
    for total_costs in account_costs {
        let mut slice_costs = vec![0.; department_mappings.len()];

        for cost in total_costs.summed_department_costs {
            let elem = &mut slice_costs[*department_mappings.get(&cost.department).unwrap()];
            *elem = cost.value;
        }

        let costs_vec: DMatrix<f64> =
            DMatrix::from_row_slice(department_mappings.len(), 1, &slice_costs);

        let calculated_overheads = solver.solve(&costs_vec);

        // Calculation: operating_overhead_usage . calculated_overheads + initial_totals
        // Where operating_overhead_usage is the direct mapping from overhead -> operating department, calculated overheads is the
        // solved overheads usages after taking into account usage between departments, and initial_totals is the initial values
        // for the operating departments.
    }
    // TODO: return something appropriate
    Ok(vec![])
}