Start adding linking

This commit is contained in:
Piv
2023-06-04 21:47:02 +09:30
parent bd7e3590c0
commit befce9f60a
2 changed files with 149 additions and 68 deletions

View File

@@ -13,6 +13,8 @@ pub use self::products::create_products;
mod shared_models;
pub use self::shared_models::*;
pub mod link;
#[no_mangle]
pub extern "C" fn move_money_from_text(
rules: *const c_char,
@@ -22,22 +24,10 @@ pub extern "C" fn move_money_from_text(
use_numeric_accounts: bool,
) -> *mut c_char {
let mut output_writer = csv::Writer::from_writer(vec![]);
let safe_rules = unsafe {
assert!(!rules.is_null());
CStr::from_ptr(rules)
};
let safe_lines = unsafe {
assert!(!lines.is_null());
CStr::from_ptr(lines)
};
let safe_accounts = unsafe {
assert!(!accounts.is_null());
CStr::from_ptr(accounts)
};
let safe_cost_centres = unsafe {
assert!(!cost_centres.is_null());
CStr::from_ptr(cost_centres)
};
let safe_rules = unwrap_c_char(rules);
let safe_lines = unwrap_c_char(lines);
let safe_accounts = unwrap_c_char(accounts);
let safe_cost_centres = unwrap_c_char(cost_centres);
move_money(
&mut csv::Reader::from_reader(safe_rules.to_bytes()),
&mut csv::Reader::from_reader(safe_lines.to_bytes()),
@@ -81,30 +71,12 @@ pub extern "C" fn allocate_overheads_from_text(
account_type: *const c_char,
use_numeric_accounts: bool,
) -> *mut c_char {
let lines = unsafe {
assert!(!lines.is_null());
CStr::from_ptr(lines)
};
let accounts = unsafe {
assert!(!accounts.is_null());
CStr::from_ptr(accounts)
};
let allocation_statistics = unsafe {
assert!(!allocation_statistics.is_null());
CStr::from_ptr(allocation_statistics)
};
let areas = unsafe {
assert!(!areas.is_null());
CStr::from_ptr(areas)
};
let cost_centres = unsafe {
assert!(!cost_centres.is_null());
CStr::from_ptr(cost_centres)
};
let account_type = unsafe {
assert!(!account_type.is_null());
CStr::from_ptr(account_type)
};
let lines = unwrap_c_char(lines);
let accounts = unwrap_c_char(accounts);
let allocation_statistics = unwrap_c_char(allocation_statistics);
let areas = unwrap_c_char(areas);
let cost_centres = unwrap_c_char(cost_centres);
let account_type = unwrap_c_char(account_type);
let mut output_writer = csv::Writer::from_writer(vec![]);
reciprocal_allocation(
&mut csv::Reader::from_reader(lines.to_bytes()),
@@ -142,34 +114,13 @@ pub extern "C" fn allocate_overheads_from_text_to_file(
use_numeric_accounts: bool,
show_from: bool,
) {
let lines = unsafe {
assert!(!lines.is_null());
CStr::from_ptr(lines)
};
let accounts = unsafe {
assert!(!accounts.is_null());
CStr::from_ptr(accounts)
};
let allocation_statistics = unsafe {
assert!(!allocation_statistics.is_null());
CStr::from_ptr(allocation_statistics)
};
let areas = unsafe {
assert!(!areas.is_null());
CStr::from_ptr(areas)
};
let cost_centres = unsafe {
assert!(!cost_centres.is_null());
CStr::from_ptr(cost_centres)
};
let account_type = unsafe {
assert!(!account_type.is_null());
CStr::from_ptr(account_type)
};
let output_path = unsafe {
assert!(!output_path.is_null());
CStr::from_ptr(output_path)
};
let lines = unwrap_c_char(lines);
let accounts = unwrap_c_char(accounts);
let allocation_statistics = unwrap_c_char(allocation_statistics);
let areas = unwrap_c_char(areas);
let cost_centres = unwrap_c_char(cost_centres);
let account_type = unwrap_c_char(account_type);
let output_path = unwrap_c_char(output_path);
reciprocal_allocation(
&mut csv::Reader::from_reader(lines.to_bytes()),
&mut csv::Reader::from_reader(accounts.to_bytes()),
@@ -187,6 +138,13 @@ pub extern "C" fn allocate_overheads_from_text_to_file(
.expect("Failed to allocate overheads");
}
fn unwrap_c_char<'a>(s: *const c_char) -> &'a CStr {
unsafe {
assert!(!s.is_null());
CStr::from_ptr(s)
}
}
#[no_mangle]
pub extern "C" fn allocate_overheads_from_text_free(s: *mut c_char) {
unsafe {

123
src/link.rs Normal file
View File

@@ -0,0 +1,123 @@
// Given encounter + service file, assign an encounter to each service with the given rules
// Algorithm:
// 1. Read all linking rules into memory
// 2. Scan through list of encounters and create indexes for match columns, based on what columns get linked
// 3. For each service, check if there's an encounter that matches the linking rules
// Preferably do this without needing to continually scan though all encounters
// Data spec:
// Linking rules specify match columns, date columns, source number column, target-source number column
// Match columns can be anything. Must come in a pair of source column=target column.
// Date columns must again be a pair, and include a look back/forward range
// Source number column is the column containing the source id (e.g. encounter number)
// Target-source number column is the column in the target file that contains the source id (e.g. linked encounter number)
use std::{
collections::HashMap,
io::{Read, Write},
};
use itertools::Itertools;
pub struct MatchColumn {
source_column: String,
target_column: String,
}
pub struct DateMatchColumn {
source_column: String,
target_column: String,
search_back_days: i32,
search_forward_days: i32,
}
pub struct LinkingRule {
match_columns: Vec<MatchColumn>,
date_match_columns: Vec<DateMatchColumn>,
}
pub struct ProcessLinkingRule {
linking_rules: Vec<LinkingRule>,
source_number_column: String,
target_source_number_column: String,
}
// TODO: return thiserror or something rather than anyhow
pub fn link(
// TODO: Make these readers/writers not coupled with csv reader/writer
source_reader: &mut csv::Reader<impl Read>,
target_reader: &mut csv::Reader<impl Read>,
linking_rule: ProcessLinkingRule,
linked_writer: &mut csv::Writer<impl Write>,
) -> anyhow::Result<()> {
let mut source_columns: Vec<&String> = linking_rule
.linking_rules
.iter()
.flat_map(|rule| {
rule.match_columns
.iter()
.map(|match_column| &match_column.source_column)
.collect::<Vec<&String>>()
})
// TODO: Check this filters out correctly, as it's filtering on a reference, not a value
.unique()
.collect();
let mut source_date_columns: Vec<&String> = linking_rule
.linking_rules
.iter()
.flat_map(|rule| {
rule.date_match_columns
.iter()
.map(|match_column| &match_column.source_column)
.collect::<Vec<&String>>()
})
// TODO: Check this filters out correctly, as it's filtering on a reference, not a value
.unique()
.collect();
// Indexes of encounter ids for the given match column values (index in vec = index in source_columns)
// i.e List of Map of match column values -> source id, or source id with given values for each match column
// TODO: Can save more memory by storing values in the match columns in a vec of vecs
// Note: not as memory efficient as just continually scanning through encounter file each time,
// but it's way faster and will scale better
let mut source_indexes: Vec<HashMap<String, Vec<usize>>> =
vec![HashMap::new(); source_columns.len()];
let mut source_ids: Vec<String> = Vec::new();
// TODO: Merge with source_indexes?
// Also store the actual date value rather than string, so we
// don't need to convert as much later
let mut source_dates: Vec<HashMap<String, Vec<usize>>>;
for source_record in source_reader.deserialize() {
let source_record: HashMap<String, String> = source_record?;
let current_idx = source_ids.len();
// Make indexes of parts we need.
source_ids.push(
source_record
.get(&linking_rule.source_number_column)
.unwrap()
.clone(),
);
for (i, source_column) in source_columns.iter().enumerate() {
let source_column_value = source_record.get(*source_column);
if source_column_value.is_none() || source_column_value.unwrap().is_empty() {
continue;
}
let source_column_value = source_column_value.unwrap();
source_indexes[i]
.entry(source_column_value.clone())
.or_insert(Vec::new())
.push(current_idx);
}
}
for target_record in target_reader.deserialize() {
let target_record: HashMap<String, String> = target_record?;
// For each target record, get the source records that match each criteria in the match columns,
// then filter down to the date columns... how to do this quickly (without scanning again). Easiest thing
// is to just store a list of a list of all the dates + source ids. Not perfectly efficient, but can
// sort this to make it easier to find dates within forward/back lookup
}
Ok(())
}