From 1e36bc68e45a6f4c9ed45038038b7ce670e37b5f Mon Sep 17 00:00:00 2001
From: piv <>
Date: Wed, 8 Mar 2023 12:56:39 +1030
Subject: [PATCH] Fix move money to correctly move between accounts/ccs with
 ranges and  and consider cost output correctly, speed up process by using
 hashset for rule accounts/departments

---
 src/lib.rs                 |  14 ++-
 src/main.rs                |  13 ++-
 src/move_money.rs          | 211 +++++++++++++++++++++++--------------
 src/overhead_allocation.rs |   2 -
 4 files changed, 152 insertions(+), 88 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index 2292aa2..488ada4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,6 +22,7 @@ pub extern "C" fn move_money_from_text(
     rules: *const c_char,
     lines: *const c_char,
     accounts: *const c_char,
+    cost_centres: *const c_char,
     use_numeric_accounts: bool,
 ) -> *mut c_char {
     let mut output_writer = csv::Writer::from_writer(vec![]);
@@ -34,13 +35,18 @@ pub extern "C" fn move_money_from_text(
         CStr::from_ptr(lines)
     };
     let safe_accounts = unsafe {
-        assert!(!lines.is_null());
+        assert!(!accounts.is_null());
         CStr::from_ptr(accounts)
     };
+    let safe_cost_centres = unsafe {
+        assert!(!cost_centres.is_null());
+        CStr::from_ptr(cost_centres)
+    };
     move_money(
-        csv::Reader::from_reader(safe_rules.to_bytes()),
-        csv::Reader::from_reader(safe_lines.to_bytes()),
-        csv::Reader::from_reader(safe_accounts.to_bytes()),
+        &mut csv::Reader::from_reader(safe_rules.to_bytes()),
+        &mut csv::Reader::from_reader(safe_lines.to_bytes()),
+        &mut csv::Reader::from_reader(safe_accounts.to_bytes()),
+        &mut csv::Reader::from_reader(safe_cost_centres.to_bytes()),
         &mut output_writer,
         use_numeric_accounts,
         true,
diff --git a/src/main.rs b/src/main.rs
index 4337768..5a8c90e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -27,6 +27,9 @@ enum Commands {
         #[arg(short = 'a', long, value_name = "FILE")]
         accounts: PathBuf,
 
+        #[arg(short = 'c', long, value_name = "FILE")]
+        cost_centres: PathBuf,
+
         #[arg(short, long, value_name = "FILE")]
         output: Option<PathBuf>,
 
@@ -80,6 +83,7 @@ fn main() -> anyhow::Result<()> {
             rules,
             lines,
             accounts,
+            cost_centres,
             output,
             use_numeric_accounts,
             flush_pass,
@@ -87,6 +91,7 @@ fn main() -> anyhow::Result<()> {
             rules,
             lines,
             accounts,
+            cost_centres,
             output,
             use_numeric_accounts,
             flush_pass,
@@ -118,14 +123,16 @@ fn move_money(
     rules: PathBuf,
     lines: PathBuf,
     accounts: PathBuf,
+    cost_centres: PathBuf,
     output: Option<PathBuf>,
     use_numeric_accounts: bool,
     flush_pass: bool,
 ) -> anyhow::Result<()> {
     coster_rs::move_money(
-        csv::Reader::from_path(rules)?,
-        csv::Reader::from_path(lines)?,
-        csv::Reader::from_path(accounts)?,
+        &mut csv::Reader::from_path(rules)?,
+        &mut csv::Reader::from_path(lines)?,
+        &mut csv::Reader::from_path(accounts)?,
+        &mut csv::Reader::from_path(cost_centres)?,
         &mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?,
         use_numeric_accounts,
         flush_pass,
diff --git a/src/move_money.rs b/src/move_money.rs
index a2e1bb4..e81ac82 100644
--- a/src/move_money.rs
+++ b/src/move_money.rs
@@ -1,4 +1,7 @@
-use std::{collections::HashMap, thread::current};
+use std::{
+    collections::{HashMap, HashSet},
+    thread::current,
+};
 
 use itertools::Itertools;
 use serde::{Deserialize, Serialize, Serializer};
@@ -42,15 +45,23 @@ struct CsvMovementRule {
     cost_output: Option<String>,
 }
 
+#[derive(Deserialize)]
+pub struct PartialCostCentre {
+    #[serde(rename = "Code")]
+    pub code: String,
+    #[serde(rename = "Area")]
+    pub area: Option<String>,
+}
+
 #[derive(Default)]
 pub struct MovementRule {
     // If the vectors are empty, then it means 'all'
-    pub from_departments: Vec<String>,
-    pub to_departments: Vec<String>,
+    pub from_departments: HashSet<String>,
+    pub to_departments: HashSet<String>,
     pub all_from_departments: bool,
     pub all_to_departments: bool,
-    pub from_accounts: Vec<String>,
-    pub to_accounts: Vec<String>,
+    pub from_accounts: HashSet<String>,
+    pub to_accounts: HashSet<String>,
     pub all_from_accounts: bool,
     pub all_to_accounts: bool,
     pub amount: f64,
@@ -102,10 +113,11 @@ where
     s.serialize_f64((x * 100000.).round() / 100000.)
 }
 
-pub fn move_money<R, L, A, O>(
-    rules_reader: csv::Reader<R>,
-    lines_reader: csv::Reader<L>,
-    accounts_reader: csv::Reader<A>,
+pub fn move_money<R, L, A, C, O>(
+    rules_reader: &mut csv::Reader<R>,
+    lines_reader: &mut csv::Reader<L>,
+    accounts_reader: &mut csv::Reader<A>,
+    cost_centres_reader: &mut csv::Reader<C>,
     output: &mut csv::Writer<O>,
     use_numeric_accounts: bool,
     flush_pass: bool,
@@ -114,9 +126,9 @@ where
     R: std::io::Read,
     L: std::io::Read,
     A: std::io::Read,
+    C: std::io::Read,
     O: std::io::Write,
 {
-    let mut lines_reader = lines_reader;
     let headers = lines_reader.headers()?;
     let mut account_index = 0;
     let mut department_index = 0;
@@ -179,53 +191,89 @@ where
             .sorted()
             .collect()
     };
-    let all_departments_sorted = lines
+    let line_departments_sorted = lines
         .keys()
         .map(|key| key.department.clone())
         .unique()
         .sorted()
         .collect();
+    let all_departments_sorted = cost_centres_reader
+        .deserialize::<PartialCostCentre>()
+        .map(|cc| cc.unwrap().code)
+        .unique()
+        .sorted()
+        .collect();
     let mut rules_reader = rules_reader;
     let mut rules: Vec<MovementRule> = vec![];
     for movement_rule in rules_reader.deserialize() {
         // TODO: Consider reclass rule group, how does that even work?
         let movement_rule: CsvMovementRule = movement_rule?;
-        let from_accounts = extract_range(
-            movement_rule.source_from_account,
-            movement_rule.source_to_account,
-            false,
-            &all_accounts_sorted,
-        );
-        let to_accounts = if movement_rule.cost_output.is_some() {
-            account_mappings
-                .iter()
-                .filter(|(_, account)| {
-                    account.cost_output.is_some()
-                        && account.cost_output.clone().unwrap()
-                            == movement_rule.cost_output.clone().unwrap()
-                })
-                .map(|(code, _)| code.clone())
-                .collect()
+        let is_separator = movement_rule.apply == "-DIVIDER-";
+        let from_accounts = if is_separator {
+            HashSet::new()
+        } else {
+            if movement_rule.cost_output.is_some() {
+                account_mappings
+                    .iter()
+                    .filter(|(_, account)| {
+                        account.cost_output.is_some()
+                            && account.cost_output.clone().unwrap()
+                                == movement_rule.cost_output.clone().unwrap()
+                    })
+                    .map(|(code, _)| code.clone())
+                    .collect()
+            } else {
+                extract_range(
+                    movement_rule.source_from_account,
+                    movement_rule.source_to_account,
+                    false,
+                    &all_accounts_sorted,
+                )
+            }
+        };
+        let to_accounts = if is_separator {
+            HashSet::new()
+        } else {
+            if movement_rule.cost_output.is_some() {
+                account_mappings
+                    .iter()
+                    .filter(|(_, account)| {
+                        account.cost_output.is_some()
+                            && account.cost_output.clone().unwrap()
+                                == movement_rule.cost_output.clone().unwrap()
+                    })
+                    .map(|(code, _)| code.clone())
+                    .collect()
+            } else {
+                extract_range(
+                    movement_rule.dest_from_account,
+                    movement_rule.dest_to_account,
+                    false,
+                    &all_accounts_sorted,
+                )
+            }
+        };
+        let from_departments = if is_separator {
+            HashSet::new()
         } else {
             extract_range(
-                movement_rule.dest_from_account,
-                movement_rule.dest_to_account,
+                movement_rule.source_from_department,
+                movement_rule.source_to_department,
                 false,
-                &all_accounts_sorted,
+                // Don't use all departments, as we only need ones that actually have costs
+                &line_departments_sorted,
+            )
+        };
+        let to_departments = if is_separator {
+            HashSet::new()
+        } else {
+            extract_range(
+                movement_rule.dest_from_department,
+                movement_rule.dest_to_department,
+                false,
+                &all_departments_sorted,
             )
         };
-        let from_departments = extract_range(
-            movement_rule.source_from_department,
-            movement_rule.source_to_department,
-            false,
-            &all_departments_sorted,
-        );
-        let to_departments = extract_range(
-            movement_rule.dest_from_department,
-            movement_rule.dest_to_department,
-            false,
-            &all_departments_sorted,
-        );
         rules.push(MovementRule {
             from_departments,
             to_departments,
@@ -242,7 +290,7 @@ where
                     1.
                 }),
             is_percent: movement_rule.is_percent == "%",
-            is_separator: movement_rule.apply == "-DIVIDER-",
+            is_separator,
         })
     }
 
@@ -264,9 +312,9 @@ where
     Ok(())
 }
 
-fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) -> Vec<String> {
-    if all {
-        return vec![];
+fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) -> HashSet<String> {
+    if all || (from.is_empty() && to.is_empty()) {
+        return options.clone().into_iter().collect();
     }
     let start_index = options
         .iter()
@@ -280,14 +328,14 @@ fn extract_range(from: String, to: String, all: bool, options: &Vec<String>) ->
         .map(|end| end.0);
     if let Some(start) = start_index {
         if let Some(end) = end_index {
-            return Vec::from(&options[start..end + 1]);
+            return Vec::from(&options[start..end + 1]).into_iter().collect();
         } else {
-            return vec![options[start].clone()];
+            return vec![options[start].clone()].into_iter().collect();
         }
     } else if let Some(end) = end_index {
-        return vec![options[end].clone()];
+        return vec![options[end].clone()].into_iter().collect();
     }
-    return vec![];
+    return HashSet::new();
 }
 
 // Approach 1:
@@ -301,6 +349,7 @@ pub fn move_money_1() {}
 
 pub struct MoveMoneyResult {
     pass: i32,
+    // TODO: We want the from account and the to account
     totals: HashMap<Unit, f64>,
 }
 
@@ -322,6 +371,8 @@ pub fn move_money_2(
 ) -> Vec<MoveMoneyResult> {
     // Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
     // but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
+    // TODO: This is initial totals is problematic, as we may move into a cc that wasn't in the list of lines. In which case we'd need
+    // to add a new line
     let mut running_total = HashMap::from(initial_totals);
     let mut temp_total = running_total.clone();
     let mut moveMoneyResult: Vec<MoveMoneyResult> = vec![];
@@ -335,48 +386,49 @@ pub fn move_money_2(
     for rule in rules {
         if rule.is_separator {
             if flush_pass {
+                current_pass += 1;
                 // Flush the totals at the end of this pass (more specifically the change)
                 moveMoneyResult.push(MoveMoneyResult {
                     pass: current_pass,
-                    totals: running_total
+                    totals: temp_total
                         .iter()
-                        .map(|(unit, value)| (unit.clone(), temp_total.get(unit).unwrap() - value))
+                        .map(|(unit, value)| {
+                            (unit.clone(), value - running_total.get(unit).unwrap_or(&0.))
+                        })
                         .filter(|(_, value)| *value != 0.)
                         .collect(),
                 });
-                current_pass += 1;
             }
             running_total = temp_total.clone();
         } else {
-            let mut sum_from = 0.;
-            for unit in &running_total {
-                if (rule.all_from_departments || rule.from_departments.contains(&unit.0.department))
-                    && (rule.all_from_accounts || rule.from_accounts.contains(&unit.0.account))
+            for (unit, amount) in &running_total {
+                if (rule.all_from_departments || rule.from_departments.contains(&unit.department))
+                    && (rule.all_from_accounts || rule.from_accounts.contains(&unit.account))
                 {
-                    let previous_temp = unit.1;
+                    let previous_temp = amount;
                     let added_amount = if rule.is_percent {
                         previous_temp * rule.amount
                     } else {
                         rule.amount
                     };
-                    sum_from += added_amount;
-                    *temp_total.get_mut(&unit.0).unwrap() -= added_amount;
-                }
-            }
-
-            let num_to_units = running_total
-                .keys()
-                .filter(|key| {
-                    (rule.all_to_departments || rule.to_departments.contains(&key.department))
-                        && (rule.all_to_accounts || rule.to_accounts.contains(&key.account))
-                })
-                .count();
-            let value_per_unit = sum_from / num_to_units as f64;
-            for unit in running_total.keys() {
-                if (rule.all_to_departments || rule.to_departments.contains(&unit.department))
-                    && (rule.all_to_accounts || rule.to_accounts.contains(&unit.account))
-                {
-                    *temp_total.get_mut(&unit).unwrap() += value_per_unit;
+                    // TODO: Track the from department/account, maintained in a separate list if flush_pass is set
+                    *temp_total.get_mut(&unit).unwrap() -= added_amount;
+                    let department = if rule.to_departments.len() == 1 {
+                        rule.to_departments.iter().next().unwrap().clone()
+                    } else {
+                        unit.department.clone()
+                    };
+                    let account = if rule.to_accounts.len() == 1 {
+                        rule.to_accounts.iter().next().unwrap().clone()
+                    } else {
+                        unit.account.clone()
+                    };
+                    *temp_total
+                        .entry(Unit {
+                            department,
+                            account,
+                        })
+                        .or_insert(0.) += added_amount;
                 }
             }
         }
@@ -394,9 +446,10 @@ mod tests {
     #[test]
     fn move_money() {
         super::move_money(
-            csv::Reader::from_path("reclassrule.csv").unwrap(),
-            csv::Reader::from_path("line.csv").unwrap(),
-            csv::Reader::from_path("account.csv").unwrap(),
+            &mut csv::Reader::from_path("reclassrule.csv").unwrap(),
+            &mut csv::Reader::from_path("line.csv").unwrap(),
+            &mut csv::Reader::from_path("account.csv").unwrap(),
+            &mut csv::Reader::from_path("costcentres.csv").unwrap(),
             &mut csv::Writer::from_path("output.csv").unwrap(),
             false,
             true,
diff --git a/src/overhead_allocation.rs b/src/overhead_allocation.rs
index 0b3569e..7ecabf5 100644
--- a/src/overhead_allocation.rs
+++ b/src/overhead_allocation.rs
@@ -39,8 +39,6 @@ pub struct AllocationStatisticAccountRange {
     end: usize,
 }
 
-type CsvCostCentre = HashMap<String, String>;
-
 type CsvArea = HashMap<String, String>;
 
 // Note: remember these are overhead departments only when calculating the lu decomposition or pseudoinverse, and for each department,