439 lines
15 KiB
Rust
439 lines
15 KiB
Rust
use std::collections::{HashMap, HashSet};
|
|
|
|
use itertools::Itertools;
|
|
use serde::{Deserialize, Serialize, Serializer};
|
|
|
|
use crate::CsvAccount;
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct CsvMovementRule {
|
|
#[serde(rename = "CostCentreSourceFrom", default)]
|
|
source_from_department: String,
|
|
#[serde(rename = "CostCentreSourceTo", default)]
|
|
source_to_department: String,
|
|
#[serde(rename = "CostCentreDestFrom", default)]
|
|
dest_from_department: String,
|
|
#[serde(rename = "CostCentreDestTo", default)]
|
|
dest_to_department: String,
|
|
#[serde(rename = "AccountSourceFrom", default)]
|
|
source_from_account: String,
|
|
#[serde(rename = "AccountSourceTo", default)]
|
|
source_to_account: String,
|
|
#[serde(rename = "AccountDestFrom", default)]
|
|
dest_from_account: String,
|
|
#[serde(rename = "AccountDestTo", default)]
|
|
dest_to_account: String,
|
|
#[serde(rename = "AmountValue")]
|
|
amount: Option<f64>,
|
|
#[serde(rename = "AmountType", default)]
|
|
is_percent: String,
|
|
#[serde(rename = "Apply", default)]
|
|
apply: String,
|
|
#[serde(rename = "CostOutput")]
|
|
cost_output: Option<String>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct PartialCostCentre {
|
|
#[serde(rename = "Code")]
|
|
pub code: String,
|
|
#[serde(rename = "Area")]
|
|
pub area: Option<String>,
|
|
}
|
|
|
|
#[derive(Default)]
|
|
pub struct MovementRule {
|
|
// If the vectors are empty, then it means 'all'
|
|
pub from_departments: HashSet<String>,
|
|
pub to_departments: HashSet<String>,
|
|
pub all_from_departments: bool,
|
|
pub all_to_departments: bool,
|
|
pub from_accounts: HashSet<String>,
|
|
pub to_accounts: HashSet<String>,
|
|
pub all_from_accounts: bool,
|
|
pub all_to_accounts: bool,
|
|
pub amount: f64,
|
|
pub is_percent: bool,
|
|
pub is_separator: bool,
|
|
}
|
|
|
|
impl MovementRule {
|
|
pub fn pass_break() -> MovementRule {
|
|
MovementRule {
|
|
is_separator: true,
|
|
..MovementRule::default()
|
|
}
|
|
}
|
|
|
|
pub fn validate(&self) -> bool {
|
|
if self.from_departments.is_empty() && self.to_departments.is_empty() {
|
|
// Would be nice to have a decent message/error here as well
|
|
return false;
|
|
}
|
|
if self.is_percent && (self.amount < 0.0 || self.amount > 100.0) {
|
|
return false;
|
|
}
|
|
true
|
|
}
|
|
}
|
|
|
|
#[derive(Hash, Clone, Default, PartialEq, Eq)]
|
|
pub struct Unit {
|
|
pub department: String,
|
|
pub account: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct CsvCost {
|
|
#[serde(rename = "ACCOUNT")]
|
|
pub account: String,
|
|
#[serde(rename = "COSTCENTRE")]
|
|
pub department: String,
|
|
#[serde(serialize_with = "round_serialize")]
|
|
pub value: f64,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct MovedMoneyAmount {
|
|
pub account: String,
|
|
pub department: String,
|
|
pub value: f64,
|
|
pub from_account: String,
|
|
pub from_department: String,
|
|
pub pass: i32,
|
|
}
|
|
|
|
fn round_serialize<S>(x: &f64, s: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: Serializer,
|
|
{
|
|
s.serialize_f64((x * 100000.).round() / 100000.)
|
|
}
|
|
|
|
pub fn move_money<R, L, A, C, O>(
|
|
rules_reader: &mut csv::Reader<R>,
|
|
lines_reader: &mut csv::Reader<L>,
|
|
accounts_reader: &mut csv::Reader<A>,
|
|
cost_centres_reader: &mut csv::Reader<C>,
|
|
output: &mut csv::Writer<O>,
|
|
use_numeric_accounts: bool,
|
|
flush_pass: bool,
|
|
) -> anyhow::Result<()>
|
|
where
|
|
R: std::io::Read,
|
|
L: std::io::Read,
|
|
A: std::io::Read,
|
|
C: std::io::Read,
|
|
O: std::io::Write,
|
|
{
|
|
let headers = lines_reader.headers()?;
|
|
let mut account_index = 0;
|
|
let mut department_index = 0;
|
|
let mut account_type_index = 0;
|
|
for (index, field) in headers.iter().enumerate() {
|
|
if field.eq_ignore_ascii_case("account") {
|
|
account_index = index;
|
|
} else if field.eq_ignore_ascii_case("costcentre") {
|
|
department_index = index;
|
|
} else if field.eq_ignore_ascii_case("accounttype") {
|
|
account_type_index = index;
|
|
}
|
|
}
|
|
|
|
let lines: HashMap<Unit, f64> = lines_reader
|
|
.records()
|
|
.map(|record| {
|
|
let record = record.unwrap();
|
|
let account = record.get(account_index).unwrap();
|
|
let department = record.get(department_index).unwrap();
|
|
let sum = record
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(i, _)| {
|
|
*i != account_index && *i != department_index && *i != account_type_index
|
|
})
|
|
.map(|(_, f)| f.parse::<f64>().unwrap_or(0.))
|
|
.sum();
|
|
(
|
|
Unit {
|
|
account: account.into(),
|
|
department: department.into(),
|
|
},
|
|
sum,
|
|
)
|
|
})
|
|
.collect();
|
|
let accounts_reader = accounts_reader;
|
|
let all_accounts = accounts_reader
|
|
.deserialize()
|
|
.collect::<Result<Vec<CsvAccount>, csv::Error>>()?;
|
|
let account_mappings: HashMap<String, CsvAccount> = all_accounts
|
|
.into_iter()
|
|
.map(|account| (account.code.clone(), account))
|
|
.collect();
|
|
|
|
let all_accounts_sorted = if use_numeric_accounts {
|
|
lines
|
|
.keys()
|
|
.map(|key| key.account.clone().parse::<i32>().unwrap())
|
|
.unique()
|
|
.sorted()
|
|
.map(|account| account.to_string())
|
|
.collect()
|
|
} else {
|
|
lines
|
|
.keys()
|
|
.map(|key| key.account.clone())
|
|
.unique()
|
|
.sorted()
|
|
.collect()
|
|
};
|
|
let all_departments_sorted = cost_centres_reader
|
|
.deserialize::<PartialCostCentre>()
|
|
.map(|cc| cc.unwrap().code)
|
|
.unique()
|
|
.sorted()
|
|
.collect();
|
|
let mut rules: Vec<MovementRule> = vec![];
|
|
for movement_rule in rules_reader.deserialize() {
|
|
// TODO: Consider reclass rule group, how does that even work?
|
|
let movement_rule: CsvMovementRule = movement_rule?;
|
|
let is_separator = movement_rule.apply == "-DIVIDER-";
|
|
let from_accounts = if is_separator {
|
|
HashSet::new()
|
|
} else if movement_rule.cost_output.is_some() {
|
|
account_mappings
|
|
.iter()
|
|
.filter(|(_, account)| {
|
|
account.cost_output.is_some()
|
|
&& account.cost_output.clone().unwrap()
|
|
== movement_rule.cost_output.clone().unwrap()
|
|
})
|
|
.map(|(code, _)| code.clone())
|
|
.collect()
|
|
} else {
|
|
extract_range(
|
|
movement_rule.source_from_account,
|
|
movement_rule.source_to_account,
|
|
&all_accounts_sorted,
|
|
)
|
|
};
|
|
let to_accounts = if is_separator {
|
|
HashSet::new()
|
|
} else if movement_rule.cost_output.is_some() {
|
|
account_mappings
|
|
.iter()
|
|
.filter(|(_, account)| {
|
|
account.cost_output.is_some()
|
|
&& account.cost_output.clone().unwrap()
|
|
== movement_rule.cost_output.clone().unwrap()
|
|
})
|
|
.map(|(code, _)| code.clone())
|
|
.collect()
|
|
} else {
|
|
extract_range(
|
|
movement_rule.dest_from_account,
|
|
movement_rule.dest_to_account,
|
|
&all_accounts_sorted,
|
|
)
|
|
};
|
|
let from_departments = if is_separator {
|
|
HashSet::new()
|
|
} else {
|
|
extract_range(
|
|
movement_rule.source_from_department,
|
|
movement_rule.source_to_department,
|
|
&all_departments_sorted,
|
|
)
|
|
};
|
|
let to_departments = if is_separator {
|
|
HashSet::new()
|
|
} else {
|
|
extract_range(
|
|
movement_rule.dest_from_department,
|
|
movement_rule.dest_to_department,
|
|
&all_departments_sorted,
|
|
)
|
|
};
|
|
rules.push(MovementRule {
|
|
all_from_departments: from_departments.is_empty(),
|
|
all_to_departments: to_departments.is_empty(),
|
|
from_departments,
|
|
to_departments,
|
|
all_from_accounts: from_accounts.is_empty(),
|
|
all_to_accounts: to_accounts.is_empty(),
|
|
from_accounts,
|
|
to_accounts,
|
|
amount: movement_rule.amount.unwrap_or(0.)
|
|
* (if movement_rule.is_percent == "%" {
|
|
0.01
|
|
} else {
|
|
1.
|
|
}),
|
|
is_percent: movement_rule.is_percent == "%",
|
|
is_separator,
|
|
})
|
|
}
|
|
|
|
// Then run move_money
|
|
let moved = move_money_2(lines, &rules, flush_pass, output);
|
|
|
|
if !flush_pass {
|
|
// Ouput the final moneys
|
|
for (unit, value) in moved {
|
|
output.serialize(CsvCost {
|
|
account: unit.account,
|
|
department: unit.department,
|
|
value,
|
|
})?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn extract_range(from: String, to: String, options: &Vec<String>) -> HashSet<String> {
|
|
if from.is_empty() && to.is_empty() {
|
|
return HashSet::new();
|
|
}
|
|
let start_index = options
|
|
.iter()
|
|
.enumerate()
|
|
.find(|option| option.1 == &from)
|
|
.map(|start| start.0);
|
|
let end_index = options
|
|
.iter()
|
|
.enumerate()
|
|
.find(|option| option.1 == &to)
|
|
.map(|end| end.0);
|
|
if let Some(start) = start_index {
|
|
if let Some(end) = end_index {
|
|
return Vec::from(&options[start..end + 1]).into_iter().collect();
|
|
} else {
|
|
return vec![options[start].clone()].into_iter().collect();
|
|
}
|
|
} else if let Some(end) = end_index {
|
|
return vec![options[end].clone()].into_iter().collect();
|
|
}
|
|
return HashSet::new();
|
|
}
|
|
|
|
// Approach 1:
|
|
// Use math (linear algebra) to move between departments. Memory/computationally it's equivalent
|
|
// to the worst case of approach one, however can take advantage of auto parallelisation/simd
|
|
// to perform fast, particularly on larger datasets.
|
|
// This basically just involves smushing all the rules, then doing a matrix multiple and matrix addition
|
|
// on the initial set. Can't record passes, but can record the smushed rules if only the data changes later
|
|
// Advantage of this approach is it can be easily extended to run on the gpu.
|
|
pub fn move_money_1() {}
|
|
|
|
// Approach 2:
|
|
// Traditinoal/naive, total for each department is stored in an initial map (department -> total amount).
|
|
// Another map is built up for each rule, and each rule is processed based on the amount in the current total
|
|
// map.
|
|
// Upon a pass break (separator), the temp map will assign the values into the total map.
|
|
// Once done, do a final assignment back to the total, and return that.
|
|
// Advantage of this is the required code is tiny, and no third-party math library is required.
|
|
// Note that the movement happens on a line-by-line level. So we can stream the data from disk, and potentially apply this
|
|
// to every. It's also much more memory efficient than approach 1.
|
|
// TODO: Time both approaches to seee which is faster depending on the size of the input data/number of rules
|
|
// Verdict: This is already pretty fast, at least much faster than ppm for BigDataset
|
|
pub fn move_money_2<W>(
|
|
initial_totals: HashMap<Unit, f64>,
|
|
rules: &Vec<MovementRule>,
|
|
flush_pass: bool,
|
|
output: &mut csv::Writer<W>,
|
|
) -> HashMap<Unit, f64>
|
|
where
|
|
W: std::io::Write,
|
|
{
|
|
// Note: It's potentially a bit more intensive to use cloned totals (rather than just update temp_total per rule),
|
|
// but it's much simpler code and, and since we're only working line-by-line, it isn't really that much memory in practice
|
|
// TODO: This is initial totals is problematic, as we may move into a cc that wasn't in the list of lines. In which case we'd need
|
|
// to add a new line
|
|
let mut running_total = HashMap::from(initial_totals);
|
|
let mut temp_total = running_total.clone();
|
|
let mut current_pass = 0;
|
|
if flush_pass {
|
|
for (unit, value) in running_total.iter() {
|
|
output
|
|
.serialize(MovedMoneyAmount {
|
|
account: unit.account.clone(),
|
|
department: unit.department.clone(),
|
|
value: *value,
|
|
from_account: unit.account.clone(),
|
|
from_department: unit.department.clone(),
|
|
pass: current_pass,
|
|
})
|
|
.expect("Failed to write moved amount")
|
|
}
|
|
}
|
|
for rule in rules {
|
|
if rule.is_separator {
|
|
current_pass += 1;
|
|
running_total = temp_total.clone();
|
|
} else {
|
|
for (unit, amount) in &running_total {
|
|
if (rule.all_from_departments || rule.from_departments.contains(&unit.department))
|
|
&& (rule.all_from_accounts || rule.from_accounts.contains(&unit.account))
|
|
{
|
|
let previous_temp = amount;
|
|
let added_amount = if rule.is_percent {
|
|
previous_temp * rule.amount
|
|
} else {
|
|
rule.amount
|
|
};
|
|
// TODO: Track the from department/account, maintained in a separate list if flush_pass is set
|
|
*temp_total.get_mut(&unit).unwrap() -= added_amount;
|
|
let department = if rule.to_departments.len() == 1 {
|
|
rule.to_departments.iter().next().unwrap().clone()
|
|
} else {
|
|
unit.department.clone()
|
|
};
|
|
let account = if rule.to_accounts.len() == 1 {
|
|
rule.to_accounts.iter().next().unwrap().clone()
|
|
} else {
|
|
unit.account.clone()
|
|
};
|
|
if flush_pass {
|
|
output
|
|
.serialize(MovedMoneyAmount {
|
|
department: department.clone(),
|
|
account: department.clone(),
|
|
value: added_amount,
|
|
from_department: unit.department.clone(),
|
|
from_account: unit.account.clone(),
|
|
pass: current_pass,
|
|
})
|
|
.expect("Failed to write moved amount");
|
|
}
|
|
*temp_total
|
|
.entry(Unit {
|
|
department,
|
|
account,
|
|
})
|
|
.or_insert(0.) += added_amount;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
temp_total
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
#[test]
|
|
fn move_money() {
|
|
super::move_money(
|
|
&mut csv::Reader::from_path("reclassrule.csv").unwrap(),
|
|
&mut csv::Reader::from_path("line.csv").unwrap(),
|
|
&mut csv::Reader::from_path("account.csv").unwrap(),
|
|
&mut csv::Reader::from_path("costcentres.csv").unwrap(),
|
|
&mut csv::Writer::from_path("output.csv").unwrap(),
|
|
false,
|
|
true,
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|