diff --git a/Cargo.lock b/Cargo.lock index 6757a52..613b5f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.68" @@ -23,7 +32,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -52,12 +61,46 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + [[package]] name = "bytemuck" version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-integer", + "num-traits", + "serde", + "time", + "wasm-bindgen", + "winapi", +] + [[package]] name = "clap" version = "3.2.23" @@ -97,18 +140,79 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + [[package]] name = "coster-rs" version = "0.1.0" dependencies = [ "anyhow", + "chrono", "clap", "csv", "itertools", "nalgebra", + "rayon", "serde", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +dependencies = [ + "cfg-if", +] + [[package]] name = "csv" version = "1.1.6" @@ -131,6 +235,50 @@ dependencies = [ "memchr", ] +[[package]] +name = "cxx" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.6.1" @@ -158,6 +306,39 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "indexmap" version = "1.9.2" @@ -183,6 +364,15 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -195,6 +385,24 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + [[package]] name = "matrixmultiply" version = "0.3.2" @@ -210,6 +418,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "nalgebra" version = "0.31.0" @@ -276,6 +493,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi 0.2.6", + "libc", +] + [[package]] name = "once_cell" version = "1.12.0" @@ -342,6 +569,28 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "rayon" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "regex-automata" version = "0.1.10" @@ -363,6 +612,18 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "scratch" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" + [[package]] name = "serde" version = "1.0.137" @@ -428,6 +689,17 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi", + "winapi", +] + [[package]] name = "typenum" version = "1.15.0" @@ -440,12 +712,78 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + [[package]] name = "wide" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index 6a82117..5904b58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,9 @@ clap = { version = "3.1.18", features = ["derive"] } anyhow = "1.0" itertools = "0.10.3" +chrono = {version = "0.4.23", features = ["default", "serde"]} + +rayon = "1.6.0" [lib] crate-type = ["cdylib", "staticlib", "lib"] diff --git a/src/create_products.rs b/src/create_products.rs new file mode 100644 index 0000000..9dc30ec --- /dev/null +++ b/src/create_products.rs @@ -0,0 +1,148 @@ +use std::{ + collections::HashMap, + io::{Read, Write}, + sync::{mpsc, Arc, Mutex}, + thread, +}; + +use chrono::NaiveDateTime; +use rayon::prelude::{IntoParallelRefIterator, ParallelBridge, ParallelIterator}; +use serde::Serialize; + +struct Filter {} + +struct Constraint {} + +enum Component { + Constant(String), + Field(String), +} + +enum BuildFrom { + Service, + Transfer, + Encounter, + CodingProcedure, + CodingDiagnosis, + // TODO: This is hard/expensive, ignore for now as we don't have test data + LinkedDataset, +} + +enum Frequency { + OnePerSource, + DailyOrChangeInWard, + Daily, +} + +enum Quantity { + Constant(f64), + Extra(String), +} + +enum DurationFallback { + BuiltService, + Encounter, + Service, +} + +struct Definition { + name: String, + components: Vec, + filters: Vec, + constraints: Vec, + build_from: BuildFrom, + frequency: Frequency, + quantity: Quantity, + duration_fallback: DurationFallback, +} + +#[derive(Debug, Serialize, Default)] +struct Product { + // Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct + // TODO: Serialisers. + start_date_time: NaiveDateTime, + end_date_time: NaiveDateTime, + encounter_start_date_time: Option, + encounter: Option, + service: Option, + transfer: Option, + quantity: Option, + duration: Option, + actual_charge: Option, + standard_cost: Option, + // TODO: Enum this? + day_of_stay: Option, + source_allocated_amount: Option, +} + +// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's +// cosing run (BSCO, Dataset_Encounter_Cache, etc). +fn create_products( + definitions: csv::Reader, + encounters: csv::Reader, + services: csv::Reader, + transfers: csv::Reader, + procedures: csv::Reader

, + diagnoses: csv::Reader, + // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well + output: &'static mut csv::Writer, + // TODO: Default to 10 million or something sane + batch_size: usize, +) -> anyhow::Result<()> +where + D: Read, + E: Read, + S: Read, + T: Read, + P: Read, + Di: Read, + // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well + O: Write + Send + 'static, +{ + let mapped_definitions: HashMap = HashMap::new(); + // Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster + // then opening files and scanning one at a time. Could also do batches in files + + let mut definitions = definitions; + // First read in all the definitions. Read them into a map -> definition + for record in definitions.deserialize::>() { + let record = record?; + // Get the type, then switch based on that, as that's how we determine whether we've got a definition/filter/component/constraint (definition should always come first) + } + + // Then read through each file type line by line if there are definitions for that type, and process all records (read into memory the batch size) + // Probably output to a separate thread (or maybe some kind of limited queue?) to write to disk. Try on same thread to start, then if it's too slow + // or I want to experiment, split to a separate thread. Probably want to use sync_sender to limit number of messages to a max size in case + // writing to disk takes longer than reading (unlikely but possible): https://doc.rust-lang.org/std/sync/mpsc/fn.sync_channel.html + let (tx, rx) = mpsc::sync_channel::(batch_size); + + let write_thread = thread::spawn(move || { + let mut output = output; + while let Ok(product) = rx.recv() { + output.serialize(product).unwrap(); + } + }); + + // Now whenever we want to produce a built service, just write it to tx. + + // TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially, + // although we're still likely to be bottlenecked by just write-speed + let mut encounters = encounters; + encounters + .deserialize::>() + .map(|encounter| encounter.unwrap()) + //TODO: Rayon can't be used with csv, consider just batching reads perhaps? + // .par_bridge() + .for_each(|encounter| { + // TODO: Calculate quantitty for this encounter + tx.send(Product::default()).unwrap(); + }); + let encounters: Vec = vec![]; + encounters.par_iter().for_each(|encounter| { + println!("{:?}", encounter); + tx.send(Product::default()).unwrap(); + }); + + write_thread.join().unwrap(); + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index f9608eb..69c4477 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ pub use self::smush_rules::*; mod overhead_allocation; pub use self::overhead_allocation::*; +mod create_products; +pub use self::create_products::*; + #[no_mangle] pub extern "C" fn move_money_from_text( rules: *const c_char,