Refactor product creator, remove threading for writing to disk
This commit is contained in:
139
src/products/create_products.rs
Normal file
139
src/products/create_products.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
use core::panic;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
io::{Read, Write},
|
||||
sync::mpsc,
|
||||
thread,
|
||||
};
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use csv::Position;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::csv::{read_definitions, BuildFrom, ConstraintType, Definition};
|
||||
|
||||
#[derive(Debug, Serialize, Default)]
|
||||
struct Product {
|
||||
// Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
|
||||
// TODO: Serialisers.
|
||||
start_date_time: NaiveDateTime,
|
||||
end_date_time: NaiveDateTime,
|
||||
encounter_start_date_time: Option<NaiveDateTime>,
|
||||
encounter: Option<String>,
|
||||
service: Option<String>,
|
||||
transfer: Option<String>,
|
||||
quantity: Option<f64>,
|
||||
duration: Option<f64>,
|
||||
actual_charge: Option<f64>,
|
||||
standard_cost: Option<f64>,
|
||||
// TODO: Enum this?
|
||||
day_of_stay: Option<u8>,
|
||||
source_allocated_amount: Option<f64>,
|
||||
}
|
||||
|
||||
// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
|
||||
// cosing run (BSCO, Dataset_Encounter_Cache, etc).
|
||||
pub fn create_products<D, E, S, T, P, Di, O>(
|
||||
definitions: &mut csv::Reader<D>,
|
||||
encounters: &mut csv::Reader<E>,
|
||||
services: &mut csv::Reader<S>,
|
||||
transfers: &mut csv::Reader<T>,
|
||||
procedures: &mut csv::Reader<P>,
|
||||
diagnoses: &mut csv::Reader<Di>,
|
||||
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
|
||||
output: &mut csv::Writer<O>,
|
||||
// TODO: Default to 10 million or something sane
|
||||
batch_size: usize,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
D: Read,
|
||||
E: Read,
|
||||
S: Read,
|
||||
T: Read,
|
||||
P: Read,
|
||||
Di: Read,
|
||||
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
|
||||
O: Write + Send + 'static,
|
||||
{
|
||||
let mut all_definitions: HashMap<String, Definition> = read_definitions(definitions)?;
|
||||
// Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
|
||||
// then opening files and scanning one at a time. Could also do batches in files
|
||||
|
||||
let mut mapped_definitions: HashMap<BuildFrom, Vec<Definition>> = HashMap::new();
|
||||
for (_, definition) in all_definitions {
|
||||
mapped_definitions
|
||||
.entry(definition.build_from)
|
||||
.or_insert(vec![])
|
||||
.push(definition);
|
||||
}
|
||||
|
||||
// Now whenever we want to produce a built service, just write it to tx.
|
||||
|
||||
// Note that rust csv can seek to a certain position, so we can read in a batch from a reader, then
|
||||
// seek to that position in the reader (or position 0) if we couldn't find a particular record.
|
||||
// Alternatively, we could store an index of all records (e.g. encounter numbers) that map to their position in the reader,
|
||||
// so we can quickly seek to the appropriate index and read the record.
|
||||
// https://docs.rs/csv/latest/csv/struct.Reader.html#method.seek
|
||||
// Store encounter positions in file, so that later when we read through transfers/whatever we can easily
|
||||
// seak to the correct position quickly in case we have a cache miss
|
||||
let mut encounter_positions: HashMap<String, Position> = HashMap::new();
|
||||
|
||||
// TODO: Alternative to storing encounter positions would be to sort portions of the file bits at a time (I think it's called a merge sort?).
|
||||
|
||||
// TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
|
||||
// although we're still likely to be bottlenecked by just write-speed
|
||||
let mut encounters = encounters;
|
||||
let headers = encounters.headers()?.clone();
|
||||
|
||||
for encounter in encounters.records() {
|
||||
let encounter = encounter?;
|
||||
let position = encounter.position().unwrap();
|
||||
let encounter: HashMap<String, String> = encounter.deserialize(Some(&headers))?;
|
||||
encounter_positions.insert(
|
||||
encounter.get("EncounterNumber").unwrap().to_string(),
|
||||
position.clone(),
|
||||
);
|
||||
// TODO: For each encounter definition, check this fits the filter criteria/constraints,
|
||||
// and
|
||||
let definitions = mapped_definitions.get(&BuildFrom::Encounter).unwrap();
|
||||
for definition in definitions {
|
||||
let matching_filter = (definition.filters.is_empty()
|
||||
|| definition.filters.iter().any(|filter| {
|
||||
let field = encounter.get(filter.field.as_str());
|
||||
if field.is_none() {
|
||||
return false;
|
||||
}
|
||||
let field = field.unwrap();
|
||||
if filter.equal {
|
||||
return filter.value == *field;
|
||||
} else {
|
||||
return filter.value != *field;
|
||||
}
|
||||
}))
|
||||
&& (definition.constraints.is_empty()
|
||||
|| definition.constraints.iter().any(|constraint| {
|
||||
let field = encounter.get(constraint.field.as_str());
|
||||
if field.is_none() {
|
||||
return false;
|
||||
}
|
||||
let field = field.unwrap();
|
||||
// TODO: Is this just number/datetime? Should probably be an enum? It's not, seems to be E in the test data
|
||||
let field_type = &constraint.source_type;
|
||||
match constraint.constraint_type {
|
||||
ConstraintType::Equal => *field == constraint.value,
|
||||
_ => false,
|
||||
}
|
||||
}));
|
||||
if matching_filter {
|
||||
// Generate the service code
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Generate the built service
|
||||
output.serialize(Product::default());
|
||||
}
|
||||
|
||||
// Now do the same with transfers, services, etc, referencing the encounter reader by using the
|
||||
// indexes in encounter_positions
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user