Start adding product builder

This commit is contained in:
Piv
2023-02-18 23:05:32 +10:30
parent 37c95909db
commit 3d6042d929
4 changed files with 493 additions and 1 deletions

340
Cargo.lock generated
View File

@@ -2,6 +2,15 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.68" version = "1.0.68"
@@ -23,7 +32,7 @@ version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [ dependencies = [
"hermit-abi", "hermit-abi 0.1.19",
"libc", "libc",
"winapi", "winapi",
] ]
@@ -52,12 +61,46 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bumpalo"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.9.1" version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
dependencies = [
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"serde",
"time",
"wasm-bindgen",
"winapi",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "3.2.23" version = "3.2.23"
@@ -97,18 +140,79 @@ dependencies = [
"os_str_bytes", "os_str_bytes",
] ]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]] [[package]]
name = "coster-rs" name = "coster-rs"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono",
"clap", "clap",
"csv", "csv",
"itertools", "itertools",
"nalgebra", "nalgebra",
"rayon",
"serde", "serde",
] ]
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "csv" name = "csv"
version = "1.1.6" version = "1.1.6"
@@ -131,6 +235,50 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "cxx"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxx-build"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690"
dependencies = [
"cc",
"codespan-reporting",
"once_cell",
"proc-macro2",
"quote",
"scratch",
"syn",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf"
[[package]]
name = "cxxbridge-macro"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "either" name = "either"
version = "1.6.1" version = "1.6.1"
@@ -158,6 +306,39 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "hermit-abi"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
dependencies = [
"libc",
]
[[package]]
name = "iana-time-zone"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
dependencies = [
"cxx",
"cxx-build",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.9.2" version = "1.9.2"
@@ -183,6 +364,15 @@ version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "js-sys"
version = "0.3.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
dependencies = [
"wasm-bindgen",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@@ -195,6 +385,24 @@ version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "link-cplusplus"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
dependencies = [
"cc",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "matrixmultiply" name = "matrixmultiply"
version = "0.3.2" version = "0.3.2"
@@ -210,6 +418,15 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "nalgebra" name = "nalgebra"
version = "0.31.0" version = "0.31.0"
@@ -276,6 +493,16 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "num_cpus"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
dependencies = [
"hermit-abi 0.2.6",
"libc",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.12.0" version = "1.12.0"
@@ -342,6 +569,28 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.1.10" version = "0.1.10"
@@ -363,6 +612,18 @@ dependencies = [
"bytemuck", "bytemuck",
] ]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scratch"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.137" version = "1.0.137"
@@ -428,6 +689,17 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.15.0" version = "1.15.0"
@@ -440,12 +712,78 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasm-bindgen"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
[[package]] [[package]]
name = "wide" name = "wide"
version = "0.7.4" version = "0.7.4"

View File

@@ -18,6 +18,9 @@ clap = { version = "3.1.18", features = ["derive"] }
anyhow = "1.0" anyhow = "1.0"
itertools = "0.10.3" itertools = "0.10.3"
chrono = {version = "0.4.23", features = ["default", "serde"]}
rayon = "1.6.0"
[lib] [lib]
crate-type = ["cdylib", "staticlib", "lib"] crate-type = ["cdylib", "staticlib", "lib"]

148
src/create_products.rs Normal file
View File

@@ -0,0 +1,148 @@
use std::{
collections::HashMap,
io::{Read, Write},
sync::{mpsc, Arc, Mutex},
thread,
};
use chrono::NaiveDateTime;
use rayon::prelude::{IntoParallelRefIterator, ParallelBridge, ParallelIterator};
use serde::Serialize;
struct Filter {}
struct Constraint {}
enum Component {
Constant(String),
Field(String),
}
enum BuildFrom {
Service,
Transfer,
Encounter,
CodingProcedure,
CodingDiagnosis,
// TODO: This is hard/expensive, ignore for now as we don't have test data
LinkedDataset,
}
enum Frequency {
OnePerSource,
DailyOrChangeInWard,
Daily,
}
enum Quantity {
Constant(f64),
Extra(String),
}
enum DurationFallback {
BuiltService,
Encounter,
Service,
}
struct Definition {
name: String,
components: Vec<Component>,
filters: Vec<Filter>,
constraints: Vec<Constraint>,
build_from: BuildFrom,
frequency: Frequency,
quantity: Quantity,
duration_fallback: DurationFallback,
}
#[derive(Debug, Serialize, Default)]
struct Product {
// Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
// TODO: Serialisers.
start_date_time: NaiveDateTime,
end_date_time: NaiveDateTime,
encounter_start_date_time: Option<NaiveDateTime>,
encounter: Option<String>,
service: Option<String>,
transfer: Option<String>,
quantity: Option<f64>,
duration: Option<f64>,
actual_charge: Option<f64>,
standard_cost: Option<f64>,
// TODO: Enum this?
day_of_stay: Option<u8>,
source_allocated_amount: Option<f64>,
}
// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
// cosing run (BSCO, Dataset_Encounter_Cache, etc).
fn create_products<D, E, S, T, P, Di, O>(
definitions: csv::Reader<D>,
encounters: csv::Reader<E>,
services: csv::Reader<S>,
transfers: csv::Reader<T>,
procedures: csv::Reader<P>,
diagnoses: csv::Reader<Di>,
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
output: &'static mut csv::Writer<O>,
// TODO: Default to 10 million or something sane
batch_size: usize,
) -> anyhow::Result<()>
where
D: Read,
E: Read,
S: Read,
T: Read,
P: Read,
Di: Read,
// TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
O: Write + Send + 'static,
{
let mapped_definitions: HashMap<BuildFrom, Definition> = HashMap::new();
// Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
// then opening files and scanning one at a time. Could also do batches in files
let mut definitions = definitions;
// First read in all the definitions. Read them into a map <build from type> -> definition
for record in definitions.deserialize::<HashMap<String, String>>() {
let record = record?;
// Get the type, then switch based on that, as that's how we determine whether we've got a definition/filter/component/constraint (definition should always come first)
}
// Then read through each file type line by line if there are definitions for that type, and process all records (read into memory the batch size)
// Probably output to a separate thread (or maybe some kind of limited queue?) to write to disk. Try on same thread to start, then if it's too slow
// or I want to experiment, split to a separate thread. Probably want to use sync_sender to limit number of messages to a max size in case
// writing to disk takes longer than reading (unlikely but possible): https://doc.rust-lang.org/std/sync/mpsc/fn.sync_channel.html
let (tx, rx) = mpsc::sync_channel::<Product>(batch_size);
let write_thread = thread::spawn(move || {
let mut output = output;
while let Ok(product) = rx.recv() {
output.serialize(product).unwrap();
}
});
// Now whenever we want to produce a built service, just write it to tx.
// TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
// although we're still likely to be bottlenecked by just write-speed
let mut encounters = encounters;
encounters
.deserialize::<HashMap<String, String>>()
.map(|encounter| encounter.unwrap())
//TODO: Rayon can't be used with csv, consider just batching reads perhaps?
// .par_bridge()
.for_each(|encounter| {
// TODO: Calculate quantitty for this encounter
tx.send(Product::default()).unwrap();
});
let encounters: Vec<Product> = vec![];
encounters.par_iter().for_each(|encounter| {
println!("{:?}", encounter);
tx.send(Product::default()).unwrap();
});
write_thread.join().unwrap();
Ok(())
}

View File

@@ -11,6 +11,9 @@ pub use self::smush_rules::*;
mod overhead_allocation; mod overhead_allocation;
pub use self::overhead_allocation::*; pub use self::overhead_allocation::*;
mod create_products;
pub use self::create_products::*;
#[no_mangle] #[no_mangle]
pub extern "C" fn move_money_from_text( pub extern "C" fn move_money_from_text(
rules: *const c_char, rules: *const c_char,