diff --git a/src/products/create_products.rs b/src/products/create_products.rs index 023d68b..40c5036 100644 --- a/src/products/create_products.rs +++ b/src/products/create_products.rs @@ -3,6 +3,7 @@ use std::{ io::{Read, Write}, }; +use anyhow::bail; use chrono::NaiveDateTime; use csv::Position; // inluding dsl works better for completion with rust analyzer @@ -104,8 +105,7 @@ where T: Read, P: Read, Di: Read, - // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well - O: Write + Send + 'static, + O: Write + Send, { let mut all_definitions: HashMap = read_definitions(definitions)?; // Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster @@ -143,7 +143,11 @@ where for encounter in encounters.records() { let encounter = encounter?; - let position = encounter.position().unwrap(); + let position = encounter.position(); + if position.is_none() { + bail!("Position in encounter file not found") + } + let position = position.unwrap(); let encounter: HashMap = encounter.deserialize(Some(&headers))?; encounter_positions.insert( encounter.get("EncounterNumber").unwrap().to_string(), @@ -197,6 +201,9 @@ where //TODO: This will iterate over the file multiple times, which could technically be // slower than just going through the file once since reading from disk is slower // than reading from memory. However, reading from +// Also, we can use a custom definition format that is translated from the +// ppm format, so things like constraints/filters are one thing, and way more generic +// (i.e. filter can be based on a join between files). pub fn build_encounters_polars( definition: Definition, encounters_path: String, @@ -211,14 +218,23 @@ pub fn build_encounters_polars( // TODO: Filter field depends on type, as extra/classificiation need to append extra/classification // but how do we check this? let col = col(&filter.field); + match filter.source_type { + SourceType::CodingDiagnosis => todo!(), + SourceType::CodingProcedure => todo!(), + SourceType::Encounter => todo!(), + SourceType::Incident => todo!(), + SourceType::Patient => todo!(), + SourceType::Revenue => todo!(), + SourceType::Service => todo!(), + SourceType::Transfer => todo!(), + } if filter.equal { col.eq(lit(filter.value.clone())) } else { col.neq(lit(filter.value.clone())) } }) - .reduce(|prev, next| prev.and(next)) - .unwrap(); + .reduce(|prev, next| prev.and(next)); let constraint = definition .constraints @@ -236,8 +252,7 @@ pub fn build_encounters_polars( ConstraintType::NotEqualTo => col.neq(lit(constraint.value.clone())), } }) - .reduce(|prev, next| prev.and(next)) - .unwrap(); + .reduce(|prev, next| prev.and(next)); // TODO: If constraints or components include patient field, then we need to join onto // the patient file. @@ -245,6 +260,10 @@ pub fn build_encounters_polars( .has_header(true) .finish()?; // TODO: Refactor me + // TODO: Could also make this really generic. Instead of patient/whatever type field, we make it some + // file and specify the join field in the file. Then could add arbitrary files when building. + // This would require reworking the constraint/filter to specify a filename rather than a sourcetype, + // and would then have some common filename for ppm's sourcetype or whatever. if definition .constraints .iter() @@ -268,10 +287,17 @@ pub fn build_encounters_polars( JoinArgs::new(JoinType::Inner), ); } - let filtered = reader - .filter(filter.and(constraint)) - .with_streaming(true) - .collect()?; + let filter = match constraint { + Some(constraint) => filter.map(|f| f.and(constraint)), + None => filter, + }; + + let filtered = (match filter { + Some(filter) => reader.filter(filter), + None => reader, + }) + .with_streaming(true) + .collect()?; // TODO: Now for each of the filtered records, create a new record that is the built record, based on the components // quantity, etc. from the definition