Start adding product builder

2023-02-18 23:05:32 +10:30
parent 37c95909db
commit 3d6042d929
4 changed files with 493 additions and 1 deletions
--- a/src/create_products.rs
+++ b/src/create_products.rs
@@ -0,0 +1,148 @@
+use std::{
+    collections::HashMap,
+    io::{Read, Write},
+    sync::{mpsc, Arc, Mutex},
+    thread,
+};
+
+use chrono::NaiveDateTime;
+use rayon::prelude::{IntoParallelRefIterator, ParallelBridge, ParallelIterator};
+use serde::Serialize;
+
+struct Filter {}
+
+struct Constraint {}
+
+enum Component {
+    Constant(String),
+    Field(String),
+}
+
+enum BuildFrom {
+    Service,
+    Transfer,
+    Encounter,
+    CodingProcedure,
+    CodingDiagnosis,
+    // TODO: This is hard/expensive, ignore for now as we don't have test data
+    LinkedDataset,
+}
+
+enum Frequency {
+    OnePerSource,
+    DailyOrChangeInWard,
+    Daily,
+}
+
+enum Quantity {
+    Constant(f64),
+    Extra(String),
+}
+
+enum DurationFallback {
+    BuiltService,
+    Encounter,
+    Service,
+}
+
+struct Definition {
+    name: String,
+    components: Vec<Component>,
+    filters: Vec<Filter>,
+    constraints: Vec<Constraint>,
+    build_from: BuildFrom,
+    frequency: Frequency,
+    quantity: Quantity,
+    duration_fallback: DurationFallback,
+}
+
+#[derive(Debug, Serialize, Default)]
+struct Product {
+    // Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
+    // TODO: Serialisers.
+    start_date_time: NaiveDateTime,
+    end_date_time: NaiveDateTime,
+    encounter_start_date_time: Option<NaiveDateTime>,
+    encounter: Option<String>,
+    service: Option<String>,
+    transfer: Option<String>,
+    quantity: Option<f64>,
+    duration: Option<f64>,
+    actual_charge: Option<f64>,
+    standard_cost: Option<f64>,
+    // TODO: Enum this?
+    day_of_stay: Option<u8>,
+    source_allocated_amount: Option<f64>,
+}
+
+// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
+// cosing run (BSCO, Dataset_Encounter_Cache, etc).
+fn create_products<D, E, S, T, P, Di, O>(
+    definitions: csv::Reader<D>,
+    encounters: csv::Reader<E>,
+    services: csv::Reader<S>,
+    transfers: csv::Reader<T>,
+    procedures: csv::Reader<P>,
+    diagnoses: csv::Reader<Di>,
+    // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
+    output: &'static mut csv::Writer<O>,
+    // TODO: Default to 10 million or something sane
+    batch_size: usize,
+) -> anyhow::Result<()>
+where
+    D: Read,
+    E: Read,
+    S: Read,
+    T: Read,
+    P: Read,
+    Di: Read,
+    // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
+    O: Write + Send + 'static,
+{
+    let mapped_definitions: HashMap<BuildFrom, Definition> = HashMap::new();
+    // Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
+    // then opening files and scanning one at a time. Could also do batches in files
+
+    let mut definitions = definitions;
+    // First read in all the definitions. Read them into a map <build from type> -> definition
+    for record in definitions.deserialize::<HashMap<String, String>>() {
+        let record = record?;
+        // Get the type, then switch based on that, as that's how we determine whether we've got a definition/filter/component/constraint (definition should always come first)
+    }
+
+    // Then read through each file type line by line if there are definitions for that type, and process all records (read into memory the batch size)
+    // Probably output to a separate thread (or maybe some kind of limited queue?) to write to disk. Try on same thread to start, then if it's too slow
+    // or I want to experiment, split to a separate thread. Probably want to use sync_sender to limit number of messages to a max size in case
+    // writing to disk takes longer than reading (unlikely but possible): https://doc.rust-lang.org/std/sync/mpsc/fn.sync_channel.html
+    let (tx, rx) = mpsc::sync_channel::<Product>(batch_size);
+
+    let write_thread = thread::spawn(move || {
+        let mut output = output;
+        while let Ok(product) = rx.recv() {
+            output.serialize(product).unwrap();
+        }
+    });
+
+    // Now whenever we want to produce a built service, just write it to tx.
+
+    // TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
+    // although we're still likely to be bottlenecked by just write-speed
+    let mut encounters = encounters;
+    encounters
+        .deserialize::<HashMap<String, String>>()
+        .map(|encounter| encounter.unwrap())
+        //TODO: Rayon can't be used with csv, consider just batching reads perhaps?
+        // .par_bridge()
+        .for_each(|encounter| {
+            // TODO: Calculate quantitty for this encounter
+            tx.send(Product::default()).unwrap();
+        });
+    let encounters: Vec<Product> = vec![];
+    encounters.par_iter().for_each(|encounter| {
+        println!("{:?}", encounter);
+        tx.send(Product::default()).unwrap();
+    });
+
+    write_thread.join().unwrap();
+    Ok(())
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -11,6 +11,9 @@ pub use self::smush_rules::*;
 mod overhead_allocation;
 pub use self::overhead_allocation::*;

+mod create_products;
+pub use self::create_products::*;
+
 #[no_mangle]
 pub extern "C" fn move_money_from_text(
    rules: *const c_char,