ingey/src/products/create_products.rs

use std::{collections::HashMap, path::PathBuf};

use anyhow::anyhow;
use chrono::NaiveDateTime;
use itertools::Itertools;
// inluding dsl works better for completion with rust analyzer
use polars::lazy::dsl::*;
use polars::prelude::*;
use serde::Serialize;

use super::csv::{read_definitions, Definition, FileJoin, SourceType};

// TODO: Polars suggests this, but docs suggest it doesn't have very good platform support
//use jemallocator::Jemalloc;
// #[global_allocator]
// static GLOBAL: Jemalloc = Jemalloc;

#[derive(Debug, Serialize, Default)]
struct Product {
    // Parse datetime from string: https://rust-lang-nursery.github.io/rust-cookbook/datetime/parse.html#parse-string-into-datetime-struct
    // TODO: Serialisers.
    start_date_time: NaiveDateTime,
    end_date_time: NaiveDateTime,
    encounter_start_date_time: Option<NaiveDateTime>,
    encounter: Option<String>,
    service: Option<String>,
    transfer: Option<String>,
    quantity: Option<f64>,
    duration: Option<f64>,
    actual_charge: Option<f64>,
    standard_cost: Option<f64>,
    // TODO: Enum this?
    day_of_stay: Option<u8>,
    source_allocated_amount: Option<f64>,
}

pub struct InputFile {
    pub file_path: PathBuf,
    pub joins: Vec<FileJoin>,
}

pub fn create_products_polars(
    definitions_path: PathBuf,
    inputs: HashMap<SourceType, InputFile>,
    output_path: PathBuf,
) -> anyhow::Result<()> {
    let definitions = read_definitions(&mut csv::Reader::from_path(definitions_path)?)?;
    let definitions = definitions.values().collect_vec();
    for definition in definitions {
        build_polars(definition, &inputs, &output_path)?;
    }

    Ok(())
}

//TODO: This will iterate over the file multiple times, which could technically be
// slower than just going through the file once since reading from disk is slower
// than reading from memory. However, reading from
// Also, we can use a custom definition format that is translated from the
// ppm format, so things like constraints/filters are one thing, and way more generic
// (i.e. filter can be based on a join between files).
pub fn build_polars(
    definition: &Definition,
    inputs: &HashMap<SourceType, InputFile>,
    output_path: &PathBuf,
) -> anyhow::Result<()> {
    // 1. Apply filters to limit encounters
    let filter = definition
        .filters
        .iter()
        .map(|filter| {
            let col = col(&filter.field);
            match filter.filter_type {
                super::csv::FilterType::Equal => col.eq(lit(filter.value.clone())),
                super::csv::FilterType::GreaterThan => col.gt(lit(filter.value.clone())),
                super::csv::FilterType::GreaterThanOrEqualTo => {
                    col.gt_eq(lit(filter.value.clone()))
                }
                super::csv::FilterType::LessThan => col.lt(lit(filter.value.clone())),
                super::csv::FilterType::LessThanOrEqualTo => col.lt_eq(lit(filter.value.clone())),
                super::csv::FilterType::NotEqualTo => col.neq(lit(filter.value.clone())),
            }
        })
        .reduce(|prev, next| prev.and(next));

    let input_file = inputs
        .get(&definition.source_type)
        .ok_or(anyhow!("Failed to find valid file"))?;
    let reader = LazyCsvReader::new(&input_file.file_path)
        .has_header(true)
        .finish()?;
    // TODO: Do joins based on usage in definitions components and filters. Ideally just join the columns that are actually wanted.
    //      Can do this by first going over each component/filter, and

    let mut filtered = match filter {
        Some(filter) => reader.filter(filter),
        None => reader,
    }
    .with_streaming(true)
    .collect()?;

    // TODO: Now for each of the filtered records, create a new record that is the built record, based on the components
    //  quantity, etc. from the definition

    let mut file = std::fs::File::create(output_path).unwrap();
    // TODO: Don't use filtered, but the results that outputs created product columns
    CsvWriter::new(&mut file).finish(&mut filtered)?;
    Ok(())
}