From 8a6a94a11ea19105da3f59a512e7ecd814bbf6cf Mon Sep 17 00:00:00 2001
From: Piv <18462828+Piv200@users.noreply.github.com>
Date: Sat, 11 Mar 2023 23:46:37 +1030
Subject: [PATCH] Stop calculating amounts of overheads that don't have initial
 costs

---
 src/main.rs                |  2 +-
 src/overhead_allocation.rs | 64 ++++++++++++++++++++++++++++----------
 2 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/src/main.rs b/src/main.rs
index 5c0970b..5c0879e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -66,7 +66,7 @@ enum Commands {
         #[arg(short = 'f', long)]
         show_from: bool,
 
-        #[arg(short, long, default_value = "0.1")]
+        #[arg(short, long, default_value = "0.000001")]
         zero_threshold: f64,
 
         #[arg(short, long, value_name = "FILE")]
diff --git a/src/overhead_allocation.rs b/src/overhead_allocation.rs
index b870b7e..a605780 100644
--- a/src/overhead_allocation.rs
+++ b/src/overhead_allocation.rs
@@ -6,10 +6,7 @@ use std::{
 use csv::Writer;
 use itertools::Itertools;
 use nalgebra::{DMatrix, Dynamic, LU};
-use rayon::prelude::{
-    IntoParallelIterator, IntoParallelRefIterator, ParallelDrainFull, ParallelDrainRange,
-    ParallelIterator,
-};
+use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
 use serde::{Deserialize, Serialize};
 
 use crate::{CsvAccount, CsvCost};
@@ -75,7 +72,6 @@ struct MovedAmount {
     from_cost_centre: String,
 }
 
-// TODO: Also need a way to dictate the order of the departments?
 pub trait ReciprocalAllocationSolver {
     fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
 }
@@ -163,7 +159,6 @@ where
     // value is (cc, allocation_statistic, total)
     let mut totals: HashMap<(String, String), f64> = HashMap::new();
     for line in lines.iter() {
-        // TODO: Another optimisation potential here, puttinig the accounts into a map, although less important since there's usually <1k accounts
         let line_index = all_accounts_sorted
             .iter()
             .position(|account| account == &line.account);
@@ -324,6 +319,25 @@ where
             overhead_other_total.append(&mut totals);
         }
     }
+    // TODO: This seems to do nothing even in a complex setting where I'd expect it to do something, can probably be removed
+    let error_amounts: Vec<usize> = overhead_other_total
+        .iter()
+        .filter(|(_, _, value)| *value < zero_threshold && *value > -1. * zero_threshold)
+        .enumerate()
+        .map(|(index, _)| index)
+        .collect();
+    for index in error_amounts.iter() {
+        let (overhead_cc, _, value) = overhead_other_total.remove(*index);
+        let non_error_match = overhead_other_total
+            .iter_mut()
+            .filter(|next| {
+                next.0 == *overhead_cc && (next.2 > zero_threshold || next.2 < -1. * zero_threshold)
+            })
+            .next();
+        if let Some((_, _, match_value)) = non_error_match {
+            *match_value = *match_value + value;
+        }
+    }
 
     // overhead department -> total (summed limit to costs)
     let mut overhead_cc_totals: HashMap<String, f64> = HashMap::new();
@@ -346,10 +360,6 @@ where
         }
     }
 
-    //TODO: Redistribute floating point errors so everything still sums to 1. Also reduces the amount of data produced
-    // Basically needs to take each mapping, and somehow sum the percentages less than the threshold, and evenly(?) redistribute
-    // them to the others within the same department... Wonder if this could have been done in the previous step somehow?
-
     // Finally, for each cc match total produced previously, sum the overhead cc where overhead cc appears in other cc, then
     // divide the other cc by this summed amount (thus getting the relative cost)
 
@@ -617,9 +627,22 @@ fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
             // TODO: A performance improvement will be to create another hashmap for index -> department, then just
             // iterate over the actual indexes instead (will have preloading)
             for (overhead_department, index) in overhead_department_mappings.iter() {
+                // TODO: Check this filter is actually working correctly by summing the costs and comparing to the non show_from setting
+                // (the sums should match up)
+                // Thinking intuitively, if the cost truely didn't exist, then it never would have been included in the totals
+                // in the first place,
+                let initial_amount = total_costs
+                    .summed_department_costs
+                    .iter()
+                    .filter(|cost| cost.department == *overhead_department)
+                    .next();
+                if initial_amount.is_none() {
+                    continue;
+                }
+
+                let calculated_amount = calculated_overheads[*index];
                 // Calculate each movement individually
-                let calculated =
-                    operating_overhead_mappings.column(*index) * calculated_overheads[*index];
+                let calculated = operating_overhead_mappings.column(*index) * calculated_amount;
                 for (department, index) in &operating_department_mappings {
                     let value = *calculated.get(*index).unwrap();
                     if value > zero_threshold || value < -1. * zero_threshold {
@@ -650,7 +673,8 @@ fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
             })
             .collect();
         // Redistribute floating point errors (only for ccs we actually allocated from/to)
-        // TODO: Consider removing this once we're doing this above.
+        // Considered removing this since redistribution should be done in cost driver calculations, however since that usually
+        // does nothing, we may as well keep this just in case.
         let initial_cost: f64 = total_costs
             .summed_department_costs
             .iter()
@@ -672,6 +696,7 @@ fn do_solve_reciprocal<T: ReciprocalAllocationSolver>(
                 })
                 .collect(),
         });
+        break;
     }
     Ok(final_account_costs)
 }
@@ -763,9 +788,14 @@ mod tests {
             ],
         }];
 
-        let result =
-            reciprocal_allocation_impl::<File>(allocation_rules, initial_totals, None, 0.001)
-                .unwrap();
+        let mut movement_writer = csv::Writer::from_path("test_output.csv").unwrap();
+        let result = reciprocal_allocation_impl::<File>(
+            allocation_rules,
+            initial_totals,
+            Some(&mut movement_writer),
+            0.00001,
+        )
+        .unwrap();
         assert_eq!(expected_final_allocations, result);
     }
 
@@ -802,7 +832,7 @@ mod tests {
             true,
             "E".to_owned(),
             true,
-            0.1,
+            0.00001,
         );
         assert!(result.is_ok())
     }