Start adding product builder
This commit is contained in:
148
src/create_products.rs
Normal file
148
src/create_products.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
io::{Read, Write},
|
||||
sync::{mpsc, Arc, Mutex},
|
||||
thread,
|
||||
};
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use rayon::prelude::{IntoParallelRefIterator, ParallelBridge, ParallelIterator};
|
||||
use serde::Serialize;
|
||||
|
||||
struct Filter {}
|
||||
|
||||
struct Constraint {}
|
||||
|
||||
enum Component {
|
||||
Constant(String),
|
||||
Field(String),
|
||||
}
|
||||
|
||||
enum BuildFrom {
|
||||
Service,
|
||||
Transfer,
|
||||
Encounter,
|
||||
CodingProcedure,
|
||||
CodingDiagnosis,
|
||||
// TODO: This is hard/expensive, ignore for now as we don't have test data
|
||||
LinkedDataset,
|
||||
}
|
||||
|
||||
enum Frequency {
|
||||
OnePerSource,
|
||||
DailyOrChangeInWard,
|
||||
Daily,
|
||||
}
|
||||
|
||||
enum Quantity {
|
||||
Constant(f64),
|
||||
Extra(String),
|
||||
}
|
||||
|
||||
enum DurationFallback {
|
||||
BuiltService,
|
||||
Encounter,
|
||||
Service,
|
||||
}
|
||||
|
||||
struct Definition {
|
||||
name: String,
|
||||
components: Vec<Component>,
|
||||
filters: Vec<Filter>,
|
||||
constraints: Vec<Constraint>,
|
||||
build_from: BuildFrom,
|
||||
frequency: Frequency,
|
||||
quantity: Quantity,
|
||||
duration_fallback: DurationFallback,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default)]
|
||||
struct Product {
|
||||
// Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
|
||||
// TODO: Serialisers.
|
||||
start_date_time: NaiveDateTime,
|
||||
end_date_time: NaiveDateTime,
|
||||
encounter_start_date_time: Option<NaiveDateTime>,
|
||||
encounter: Option<String>,
|
||||
service: Option<String>,
|
||||
transfer: Option<String>,
|
||||
quantity: Option<f64>,
|
||||
duration: Option<f64>,
|
||||
actual_charge: Option<f64>,
|
||||
standard_cost: Option<f64>,
|
||||
// TODO: Enum this?
|
||||
day_of_stay: Option<u8>,
|
||||
source_allocated_amount: Option<f64>,
|
||||
}
|
||||
|
||||
// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
|
||||
// cosing run (BSCO, Dataset_Encounter_Cache, etc).
|
||||
fn create_products<D, E, S, T, P, Di, O>(
|
||||
definitions: csv::Reader<D>,
|
||||
encounters: csv::Reader<E>,
|
||||
services: csv::Reader<S>,
|
||||
transfers: csv::Reader<T>,
|
||||
procedures: csv::Reader<P>,
|
||||
diagnoses: csv::Reader<Di>,
|
||||
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
|
||||
output: &'static mut csv::Writer<O>,
|
||||
// TODO: Default to 10 million or something sane
|
||||
batch_size: usize,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
D: Read,
|
||||
E: Read,
|
||||
S: Read,
|
||||
T: Read,
|
||||
P: Read,
|
||||
Di: Read,
|
||||
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
|
||||
O: Write + Send + 'static,
|
||||
{
|
||||
let mapped_definitions: HashMap<BuildFrom, Definition> = HashMap::new();
|
||||
// Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
|
||||
// then opening files and scanning one at a time. Could also do batches in files
|
||||
|
||||
let mut definitions = definitions;
|
||||
// First read in all the definitions. Read them into a map <build from type> -> definition
|
||||
for record in definitions.deserialize::<HashMap<String, String>>() {
|
||||
let record = record?;
|
||||
// Get the type, then switch based on that, as that's how we determine whether we've got a definition/filter/component/constraint (definition should always come first)
|
||||
}
|
||||
|
||||
// Then read through each file type line by line if there are definitions for that type, and process all records (read into memory the batch size)
|
||||
// Probably output to a separate thread (or maybe some kind of limited queue?) to write to disk. Try on same thread to start, then if it's too slow
|
||||
// or I want to experiment, split to a separate thread. Probably want to use sync_sender to limit number of messages to a max size in case
|
||||
// writing to disk takes longer than reading (unlikely but possible): https://doc.rust-lang.org/std/sync/mpsc/fn.sync_channel.html
|
||||
let (tx, rx) = mpsc::sync_channel::<Product>(batch_size);
|
||||
|
||||
let write_thread = thread::spawn(move || {
|
||||
let mut output = output;
|
||||
while let Ok(product) = rx.recv() {
|
||||
output.serialize(product).unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
// Now whenever we want to produce a built service, just write it to tx.
|
||||
|
||||
// TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
|
||||
// although we're still likely to be bottlenecked by just write-speed
|
||||
let mut encounters = encounters;
|
||||
encounters
|
||||
.deserialize::<HashMap<String, String>>()
|
||||
.map(|encounter| encounter.unwrap())
|
||||
//TODO: Rayon can't be used with csv, consider just batching reads perhaps?
|
||||
// .par_bridge()
|
||||
.for_each(|encounter| {
|
||||
// TODO: Calculate quantitty for this encounter
|
||||
tx.send(Product::default()).unwrap();
|
||||
});
|
||||
let encounters: Vec<Product> = vec![];
|
||||
encounters.par_iter().for_each(|encounter| {
|
||||
println!("{:?}", encounter);
|
||||
tx.send(Product::default()).unwrap();
|
||||
});
|
||||
|
||||
write_thread.join().unwrap();
|
||||
Ok(())
|
||||
}
|
||||
@@ -11,6 +11,9 @@ pub use self::smush_rules::*;
|
||||
mod overhead_allocation;
|
||||
pub use self::overhead_allocation::*;
|
||||
|
||||
mod create_products;
|
||||
pub use self::create_products::*;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn move_money_from_text(
|
||||
rules: *const c_char,
|
||||
|
||||
Reference in New Issue
Block a user