Merge branch 'linking' into 'main'
Start adding linking See merge request vato007/coster-rs!4
This commit is contained in:
94
src/lib.rs
94
src/lib.rs
@@ -13,6 +13,8 @@ pub use self::products::create_products;
|
|||||||
mod shared_models;
|
mod shared_models;
|
||||||
pub use self::shared_models::*;
|
pub use self::shared_models::*;
|
||||||
|
|
||||||
|
pub mod link;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn move_money_from_text(
|
pub extern "C" fn move_money_from_text(
|
||||||
rules: *const c_char,
|
rules: *const c_char,
|
||||||
@@ -22,22 +24,10 @@ pub extern "C" fn move_money_from_text(
|
|||||||
use_numeric_accounts: bool,
|
use_numeric_accounts: bool,
|
||||||
) -> *mut c_char {
|
) -> *mut c_char {
|
||||||
let mut output_writer = csv::Writer::from_writer(vec![]);
|
let mut output_writer = csv::Writer::from_writer(vec![]);
|
||||||
let safe_rules = unsafe {
|
let safe_rules = unwrap_c_char(rules);
|
||||||
assert!(!rules.is_null());
|
let safe_lines = unwrap_c_char(lines);
|
||||||
CStr::from_ptr(rules)
|
let safe_accounts = unwrap_c_char(accounts);
|
||||||
};
|
let safe_cost_centres = unwrap_c_char(cost_centres);
|
||||||
let safe_lines = unsafe {
|
|
||||||
assert!(!lines.is_null());
|
|
||||||
CStr::from_ptr(lines)
|
|
||||||
};
|
|
||||||
let safe_accounts = unsafe {
|
|
||||||
assert!(!accounts.is_null());
|
|
||||||
CStr::from_ptr(accounts)
|
|
||||||
};
|
|
||||||
let safe_cost_centres = unsafe {
|
|
||||||
assert!(!cost_centres.is_null());
|
|
||||||
CStr::from_ptr(cost_centres)
|
|
||||||
};
|
|
||||||
move_money(
|
move_money(
|
||||||
&mut csv::Reader::from_reader(safe_rules.to_bytes()),
|
&mut csv::Reader::from_reader(safe_rules.to_bytes()),
|
||||||
&mut csv::Reader::from_reader(safe_lines.to_bytes()),
|
&mut csv::Reader::from_reader(safe_lines.to_bytes()),
|
||||||
@@ -81,30 +71,12 @@ pub extern "C" fn allocate_overheads_from_text(
|
|||||||
account_type: *const c_char,
|
account_type: *const c_char,
|
||||||
use_numeric_accounts: bool,
|
use_numeric_accounts: bool,
|
||||||
) -> *mut c_char {
|
) -> *mut c_char {
|
||||||
let lines = unsafe {
|
let lines = unwrap_c_char(lines);
|
||||||
assert!(!lines.is_null());
|
let accounts = unwrap_c_char(accounts);
|
||||||
CStr::from_ptr(lines)
|
let allocation_statistics = unwrap_c_char(allocation_statistics);
|
||||||
};
|
let areas = unwrap_c_char(areas);
|
||||||
let accounts = unsafe {
|
let cost_centres = unwrap_c_char(cost_centres);
|
||||||
assert!(!accounts.is_null());
|
let account_type = unwrap_c_char(account_type);
|
||||||
CStr::from_ptr(accounts)
|
|
||||||
};
|
|
||||||
let allocation_statistics = unsafe {
|
|
||||||
assert!(!allocation_statistics.is_null());
|
|
||||||
CStr::from_ptr(allocation_statistics)
|
|
||||||
};
|
|
||||||
let areas = unsafe {
|
|
||||||
assert!(!areas.is_null());
|
|
||||||
CStr::from_ptr(areas)
|
|
||||||
};
|
|
||||||
let cost_centres = unsafe {
|
|
||||||
assert!(!cost_centres.is_null());
|
|
||||||
CStr::from_ptr(cost_centres)
|
|
||||||
};
|
|
||||||
let account_type = unsafe {
|
|
||||||
assert!(!account_type.is_null());
|
|
||||||
CStr::from_ptr(account_type)
|
|
||||||
};
|
|
||||||
let mut output_writer = csv::Writer::from_writer(vec![]);
|
let mut output_writer = csv::Writer::from_writer(vec![]);
|
||||||
reciprocal_allocation(
|
reciprocal_allocation(
|
||||||
&mut csv::Reader::from_reader(lines.to_bytes()),
|
&mut csv::Reader::from_reader(lines.to_bytes()),
|
||||||
@@ -142,34 +114,13 @@ pub extern "C" fn allocate_overheads_from_text_to_file(
|
|||||||
use_numeric_accounts: bool,
|
use_numeric_accounts: bool,
|
||||||
show_from: bool,
|
show_from: bool,
|
||||||
) {
|
) {
|
||||||
let lines = unsafe {
|
let lines = unwrap_c_char(lines);
|
||||||
assert!(!lines.is_null());
|
let accounts = unwrap_c_char(accounts);
|
||||||
CStr::from_ptr(lines)
|
let allocation_statistics = unwrap_c_char(allocation_statistics);
|
||||||
};
|
let areas = unwrap_c_char(areas);
|
||||||
let accounts = unsafe {
|
let cost_centres = unwrap_c_char(cost_centres);
|
||||||
assert!(!accounts.is_null());
|
let account_type = unwrap_c_char(account_type);
|
||||||
CStr::from_ptr(accounts)
|
let output_path = unwrap_c_char(output_path);
|
||||||
};
|
|
||||||
let allocation_statistics = unsafe {
|
|
||||||
assert!(!allocation_statistics.is_null());
|
|
||||||
CStr::from_ptr(allocation_statistics)
|
|
||||||
};
|
|
||||||
let areas = unsafe {
|
|
||||||
assert!(!areas.is_null());
|
|
||||||
CStr::from_ptr(areas)
|
|
||||||
};
|
|
||||||
let cost_centres = unsafe {
|
|
||||||
assert!(!cost_centres.is_null());
|
|
||||||
CStr::from_ptr(cost_centres)
|
|
||||||
};
|
|
||||||
let account_type = unsafe {
|
|
||||||
assert!(!account_type.is_null());
|
|
||||||
CStr::from_ptr(account_type)
|
|
||||||
};
|
|
||||||
let output_path = unsafe {
|
|
||||||
assert!(!output_path.is_null());
|
|
||||||
CStr::from_ptr(output_path)
|
|
||||||
};
|
|
||||||
reciprocal_allocation(
|
reciprocal_allocation(
|
||||||
&mut csv::Reader::from_reader(lines.to_bytes()),
|
&mut csv::Reader::from_reader(lines.to_bytes()),
|
||||||
&mut csv::Reader::from_reader(accounts.to_bytes()),
|
&mut csv::Reader::from_reader(accounts.to_bytes()),
|
||||||
@@ -187,6 +138,13 @@ pub extern "C" fn allocate_overheads_from_text_to_file(
|
|||||||
.expect("Failed to allocate overheads");
|
.expect("Failed to allocate overheads");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn unwrap_c_char<'a>(s: *const c_char) -> &'a CStr {
|
||||||
|
unsafe {
|
||||||
|
assert!(!s.is_null());
|
||||||
|
CStr::from_ptr(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn allocate_overheads_from_text_free(s: *mut c_char) {
|
pub extern "C" fn allocate_overheads_from_text_free(s: *mut c_char) {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
|||||||
123
src/link.rs
Normal file
123
src/link.rs
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
// Given encounter + service file, assign an encounter to each service with the given rules
|
||||||
|
|
||||||
|
// Algorithm:
|
||||||
|
// 1. Read all linking rules into memory
|
||||||
|
// 2. Scan through list of encounters and create indexes for match columns, based on what columns get linked
|
||||||
|
// 3. For each service, check if there's an encounter that matches the linking rules
|
||||||
|
// Preferably do this without needing to continually scan though all encounters
|
||||||
|
|
||||||
|
// Data spec:
|
||||||
|
// Linking rules specify match columns, date columns, source number column, target-source number column
|
||||||
|
// Match columns can be anything. Must come in a pair of source column=target column.
|
||||||
|
// Date columns must again be a pair, and include a look back/forward range
|
||||||
|
// Source number column is the column containing the source id (e.g. encounter number)
|
||||||
|
// Target-source number column is the column in the target file that contains the source id (e.g. linked encounter number)
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::{Read, Write},
|
||||||
|
};
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
pub struct MatchColumn {
|
||||||
|
source_column: String,
|
||||||
|
target_column: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DateMatchColumn {
|
||||||
|
source_column: String,
|
||||||
|
target_column: String,
|
||||||
|
search_back_days: i32,
|
||||||
|
search_forward_days: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LinkingRule {
|
||||||
|
match_columns: Vec<MatchColumn>,
|
||||||
|
date_match_columns: Vec<DateMatchColumn>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ProcessLinkingRule {
|
||||||
|
linking_rules: Vec<LinkingRule>,
|
||||||
|
source_number_column: String,
|
||||||
|
target_source_number_column: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: return thiserror or something rather than anyhow
|
||||||
|
pub fn link(
|
||||||
|
// TODO: Make these readers/writers not coupled with csv reader/writer
|
||||||
|
source_reader: &mut csv::Reader<impl Read>,
|
||||||
|
target_reader: &mut csv::Reader<impl Read>,
|
||||||
|
linking_rule: ProcessLinkingRule,
|
||||||
|
linked_writer: &mut csv::Writer<impl Write>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let mut source_columns: Vec<&String> = linking_rule
|
||||||
|
.linking_rules
|
||||||
|
.iter()
|
||||||
|
.flat_map(|rule| {
|
||||||
|
rule.match_columns
|
||||||
|
.iter()
|
||||||
|
.map(|match_column| &match_column.source_column)
|
||||||
|
.collect::<Vec<&String>>()
|
||||||
|
})
|
||||||
|
// TODO: Check this filters out correctly, as it's filtering on a reference, not a value
|
||||||
|
.unique()
|
||||||
|
.collect();
|
||||||
|
let mut source_date_columns: Vec<&String> = linking_rule
|
||||||
|
.linking_rules
|
||||||
|
.iter()
|
||||||
|
.flat_map(|rule| {
|
||||||
|
rule.date_match_columns
|
||||||
|
.iter()
|
||||||
|
.map(|match_column| &match_column.source_column)
|
||||||
|
.collect::<Vec<&String>>()
|
||||||
|
})
|
||||||
|
// TODO: Check this filters out correctly, as it's filtering on a reference, not a value
|
||||||
|
.unique()
|
||||||
|
.collect();
|
||||||
|
// Indexes of encounter ids for the given match column values (index in vec = index in source_columns)
|
||||||
|
// i.e List of Map of match column values -> source id, or source id with given values for each match column
|
||||||
|
// TODO: Can save more memory by storing values in the match columns in a vec of vecs
|
||||||
|
// Note: not as memory efficient as just continually scanning through encounter file each time,
|
||||||
|
// but it's way faster and will scale better
|
||||||
|
let mut source_indexes: Vec<HashMap<String, Vec<usize>>> =
|
||||||
|
vec![HashMap::new(); source_columns.len()];
|
||||||
|
let mut source_ids: Vec<String> = Vec::new();
|
||||||
|
// TODO: Merge with source_indexes?
|
||||||
|
// Also store the actual date value rather than string, so we
|
||||||
|
// don't need to convert as much later
|
||||||
|
let mut source_dates: Vec<HashMap<String, Vec<usize>>>;
|
||||||
|
for source_record in source_reader.deserialize() {
|
||||||
|
let source_record: HashMap<String, String> = source_record?;
|
||||||
|
let current_idx = source_ids.len();
|
||||||
|
// Make indexes of parts we need.
|
||||||
|
source_ids.push(
|
||||||
|
source_record
|
||||||
|
.get(&linking_rule.source_number_column)
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
for (i, source_column) in source_columns.iter().enumerate() {
|
||||||
|
let source_column_value = source_record.get(*source_column);
|
||||||
|
if source_column_value.is_none() || source_column_value.unwrap().is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let source_column_value = source_column_value.unwrap();
|
||||||
|
source_indexes[i]
|
||||||
|
.entry(source_column_value.clone())
|
||||||
|
.or_insert(Vec::new())
|
||||||
|
.push(current_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for target_record in target_reader.deserialize() {
|
||||||
|
let target_record: HashMap<String, String> = target_record?;
|
||||||
|
|
||||||
|
// For each target record, get the source records that match each criteria in the match columns,
|
||||||
|
// then filter down to the date columns... how to do this quickly (without scanning again). Easiest thing
|
||||||
|
// is to just store a list of a list of all the dates + source ids. Not perfectly efficient, but can
|
||||||
|
// sort this to make it easier to find dates within forward/back lookup
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user