110 lines
4.0 KiB
Rust
110 lines
4.0 KiB
Rust
use std::{collections::HashMap, path::PathBuf};
|
|
|
|
use anyhow::anyhow;
|
|
use chrono::NaiveDateTime;
|
|
use itertools::Itertools;
|
|
// inluding dsl works better for completion with rust analyzer
|
|
use polars::lazy::dsl::*;
|
|
use polars::prelude::*;
|
|
use serde::Serialize;
|
|
|
|
use super::csv::{read_definitions, Definition, FileJoin, SourceType};
|
|
|
|
// TODO: Polars suggests this, but docs suggest it doesn't have very good platform support
|
|
//use jemallocator::Jemalloc;
|
|
// #[global_allocator]
|
|
// static GLOBAL: Jemalloc = Jemalloc;
|
|
|
|
#[derive(Debug, Serialize, Default)]
|
|
struct Product {
|
|
// Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
|
|
// TODO: Serialisers.
|
|
start_date_time: NaiveDateTime,
|
|
end_date_time: NaiveDateTime,
|
|
encounter_start_date_time: Option<NaiveDateTime>,
|
|
encounter: Option<String>,
|
|
service: Option<String>,
|
|
transfer: Option<String>,
|
|
quantity: Option<f64>,
|
|
duration: Option<f64>,
|
|
actual_charge: Option<f64>,
|
|
standard_cost: Option<f64>,
|
|
// TODO: Enum this?
|
|
day_of_stay: Option<u8>,
|
|
source_allocated_amount: Option<f64>,
|
|
}
|
|
|
|
pub struct InputFile {
|
|
pub file_path: PathBuf,
|
|
pub joins: Vec<FileJoin>,
|
|
}
|
|
|
|
pub fn create_products_polars(
|
|
definitions_path: PathBuf,
|
|
inputs: HashMap<SourceType, InputFile>,
|
|
output_path: PathBuf,
|
|
) -> anyhow::Result<()> {
|
|
let definitions = read_definitions(&mut csv::Reader::from_path(definitions_path)?)?;
|
|
let definitions = definitions.values().collect_vec();
|
|
for definition in definitions {
|
|
build_polars(definition, &inputs, &output_path)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
//TODO: This will iterate over the file multiple times, which could technically be
|
|
// slower than just going through the file once since reading from disk is slower
|
|
// than reading from memory. However, reading from
|
|
// Also, we can use a custom definition format that is translated from the
|
|
// ppm format, so things like constraints/filters are one thing, and way more generic
|
|
// (i.e. filter can be based on a join between files).
|
|
pub fn build_polars(
|
|
definition: &Definition,
|
|
inputs: &HashMap<SourceType, InputFile>,
|
|
output_path: &PathBuf,
|
|
) -> anyhow::Result<()> {
|
|
// 1. Apply filters to limit encounters
|
|
let filter = definition
|
|
.filters
|
|
.iter()
|
|
.map(|filter| {
|
|
let col = col(&filter.field);
|
|
match filter.filter_type {
|
|
super::csv::FilterType::Equal => col.eq(lit(filter.value.clone())),
|
|
super::csv::FilterType::GreaterThan => col.gt(lit(filter.value.clone())),
|
|
super::csv::FilterType::GreaterThanOrEqualTo => {
|
|
col.gt_eq(lit(filter.value.clone()))
|
|
}
|
|
super::csv::FilterType::LessThan => col.lt(lit(filter.value.clone())),
|
|
super::csv::FilterType::LessThanOrEqualTo => col.lt_eq(lit(filter.value.clone())),
|
|
super::csv::FilterType::NotEqualTo => col.neq(lit(filter.value.clone())),
|
|
}
|
|
})
|
|
.reduce(|prev, next| prev.and(next));
|
|
|
|
let input_file = inputs
|
|
.get(&definition.source_type)
|
|
.ok_or(anyhow!("Failed to find valid file"))?;
|
|
let reader = LazyCsvReader::new(&input_file.file_path)
|
|
.has_header(true)
|
|
.finish()?;
|
|
// TODO: Do joins based on usage in definitions components and filters. Ideally just join the columns that are actually wanted.
|
|
// Can do this by first going over each component/filter, and
|
|
|
|
let mut filtered = match filter {
|
|
Some(filter) => reader.filter(filter),
|
|
None => reader,
|
|
}
|
|
.with_streaming(true)
|
|
.collect()?;
|
|
|
|
// TODO: Now for each of the filtered records, create a new record that is the built record, based on the components
|
|
// quantity, etc. from the definition
|
|
|
|
let mut file = std::fs::File::create(output_path).unwrap();
|
|
// TODO: Don't use filtered, but the results that outputs created product columns
|
|
CsvWriter::new(&mut file).finish(&mut filtered)?;
|
|
Ok(())
|
|
}
|