Start adding product builder

This commit is contained in:
Piv
2023-02-18 23:05:32 +10:30
parent 37c95909db
commit 3d6042d929
4 changed files with 493 additions and 1 deletions

148
src/create_products.rs Normal file
View File

@@ -0,0 +1,148 @@
use std::{
collections::HashMap,
io::{Read, Write},
sync::{mpsc, Arc, Mutex},
thread,
};
use chrono::NaiveDateTime;
use rayon::prelude::{IntoParallelRefIterator, ParallelBridge, ParallelIterator};
use serde::Serialize;
struct Filter {}
struct Constraint {}
enum Component {
Constant(String),
Field(String),
}
enum BuildFrom {
Service,
Transfer,
Encounter,
CodingProcedure,
CodingDiagnosis,
// TODO: This is hard/expensive, ignore for now as we don't have test data
LinkedDataset,
}
enum Frequency {
OnePerSource,
DailyOrChangeInWard,
Daily,
}
enum Quantity {
Constant(f64),
Extra(String),
}
enum DurationFallback {
BuiltService,
Encounter,
Service,
}
struct Definition {
name: String,
components: Vec<Component>,
filters: Vec<Filter>,
constraints: Vec<Constraint>,
build_from: BuildFrom,
frequency: Frequency,
quantity: Quantity,
duration_fallback: DurationFallback,
}
#[derive(Debug, Serialize, Default)]
struct Product {
// Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
// TODO: Serialisers.
start_date_time: NaiveDateTime,
end_date_time: NaiveDateTime,
encounter_start_date_time: Option<NaiveDateTime>,
encounter: Option<String>,
service: Option<String>,
transfer: Option<String>,
quantity: Option<f64>,
duration: Option<f64>,
actual_charge: Option<f64>,
standard_cost: Option<f64>,
// TODO: Enum this?
day_of_stay: Option<u8>,
source_allocated_amount: Option<f64>,
}
// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
// cosing run (BSCO, Dataset_Encounter_Cache, etc).
fn create_products<D, E, S, T, P, Di, O>(
definitions: csv::Reader<D>,
encounters: csv::Reader<E>,
services: csv::Reader<S>,
transfers: csv::Reader<T>,
procedures: csv::Reader<P>,
diagnoses: csv::Reader<Di>,
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
output: &'static mut csv::Writer<O>,
// TODO: Default to 10 million or something sane
batch_size: usize,
) -> anyhow::Result<()>
where
D: Read,
E: Read,
S: Read,
T: Read,
P: Read,
Di: Read,
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
O: Write + Send + 'static,
{
let mapped_definitions: HashMap<BuildFrom, Definition> = HashMap::new();
// Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
// then opening files and scanning one at a time. Could also do batches in files
let mut definitions = definitions;
// First read in all the definitions. Read them into a map <build from type> -> definition
for record in definitions.deserialize::<HashMap<String, String>>() {
let record = record?;
// Get the type, then switch based on that, as that's how we determine whether we've got a definition/filter/component/constraint (definition should always come first)
}
// Then read through each file type line by line if there are definitions for that type, and process all records (read into memory the batch size)
// Probably output to a separate thread (or maybe some kind of limited queue?) to write to disk. Try on same thread to start, then if it's too slow
// or I want to experiment, split to a separate thread. Probably want to use sync_sender to limit number of messages to a max size in case
// writing to disk takes longer than reading (unlikely but possible): https://doc.rust-lang.org/std/sync/mpsc/fn.sync_channel.html
let (tx, rx) = mpsc::sync_channel::<Product>(batch_size);
let write_thread = thread::spawn(move || {
let mut output = output;
while let Ok(product) = rx.recv() {
output.serialize(product).unwrap();
}
});
// Now whenever we want to produce a built service, just write it to tx.
// TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
// although we're still likely to be bottlenecked by just write-speed
let mut encounters = encounters;
encounters
.deserialize::<HashMap<String, String>>()
.map(|encounter| encounter.unwrap())
//TODO: Rayon can't be used with csv, consider just batching reads perhaps?
// .par_bridge()
.for_each(|encounter| {
// TODO: Calculate quantitty for this encounter
tx.send(Product::default()).unwrap();
});
let encounters: Vec<Product> = vec![];
encounters.par_iter().for_each(|encounter| {
println!("{:?}", encounter);
tx.send(Product::default()).unwrap();
});
write_thread.join().unwrap();
Ok(())
}

View File

@@ -11,6 +11,9 @@ pub use self::smush_rules::*;
mod overhead_allocation;
pub use self::overhead_allocation::*;
mod create_products;
pub use self::create_products::*;
#[no_mangle]
pub extern "C" fn move_money_from_text(
rules: *const c_char,