Add schema generation, refactor cli, add most of the derive operations

This commit is contained in:
2024-08-03 16:33:16 +09:30
parent fbf74d5299
commit 3cdaa81da1
11 changed files with 567 additions and 328 deletions

107
src/cli/commands.rs Normal file
View File

@@ -0,0 +1,107 @@
use std::path::PathBuf;
use clap::Subcommand;
#[derive(Subcommand)]
pub enum Commands {
/// Moves money between accounts and departments, using the given rules and lines
MoveMoney {
#[arg(short = 'r', long, value_name = "FILE")]
rules: PathBuf,
#[arg(short = 'l', long, value_name = "FILE")]
lines: PathBuf,
#[arg(short = 'a', long, value_name = "FILE")]
accounts: PathBuf,
#[arg(short = 'c', long, value_name = "FILE")]
cost_centres: PathBuf,
#[arg(short, long, value_name = "FILE")]
output: Option<PathBuf>,
#[arg(short, long)]
use_numeric_accounts: bool,
#[arg(short, long)]
flush_pass: bool,
},
/// Allocates servicing department amounts to operating departments
AllocateOverheads {
#[arg(short, long, value_name = "FILE")]
lines: PathBuf,
#[arg(short, long, value_name = "FILE")]
accounts: PathBuf,
#[arg(short = 's', long, value_name = "FILE")]
allocation_statistics: PathBuf,
#[arg(short, long, value_name = "FILE")]
areas: PathBuf,
#[arg(short, long, value_name = "FILE")]
cost_centres: PathBuf,
#[arg(short, long)]
use_numeric_accounts: bool,
#[arg(long, default_value = "E")]
account_type: String,
#[arg(short, long)]
exclude_negative_allocation_statistics: bool,
#[arg(short = 'f', long)]
show_from: bool,
#[arg(short, long, default_value = "0.00000000000000001")]
zero_threshold: f64,
#[arg(short, long, value_name = "FILE", default_value = "alloc_output.csv")]
output: PathBuf,
#[arg(short, long)]
msgpack_serialisation: bool,
},
CreateProducts {
#[arg(short, long, value_name = "FILE")]
definitions: PathBuf,
#[arg(short, long, value_name = "FILE")]
encounters: PathBuf,
#[arg(short, long, value_name = "FILE")]
services: PathBuf,
#[arg(short, long, value_name = "FILE")]
transfers: PathBuf,
#[arg(short, long, value_name = "FILE")]
procedures: PathBuf,
#[arg(short, long, value_name = "FILE")]
diagnoses: PathBuf,
#[arg(short, long, value_name = "FILE")]
patients: PathBuf,
#[arg(short, long, value_name = "FILE")]
revenues: PathBuf,
#[arg(short, long, value_name = "FILE")]
output: PathBuf,
},
RunGraph {
#[arg(short, long, value_name = "FILE")]
graph: PathBuf,
#[arg(short, long, default_value_t = num_cpus::get())]
threads: usize,
},
GenerateSchema {
#[arg(short, long, value_name = "FILE", default_value = "schema.json")]
output: PathBuf,
},
}

187
src/cli/mod.rs Normal file
View File

@@ -0,0 +1,187 @@
use std::{
collections::HashMap,
fs::File,
io::{BufReader, BufWriter},
path::PathBuf,
};
use std::io::Write;
use clap::{command, Parser};
pub use commands::Commands;
use coster_rs::{
create_products::InputFile,
graph::{Graph, RunnableGraph},
SourceType,
};
use log::info;
use schemars::schema_for;
mod commands;
#[derive(Parser)]
#[command(name = "coster-rs")]
#[command(author = "Pivato M. <mpivato4@gmail.com>")]
#[command(version = "0.0.1")]
#[command(about = "Simple, fast, efficient costing tool", long_about = None)]
pub struct Cli {
#[clap(subcommand)]
pub command: Commands,
}
impl Cli {
pub fn run(self) -> anyhow::Result<()> {
match self.command {
Commands::MoveMoney {
rules,
lines,
accounts,
cost_centres,
output,
use_numeric_accounts,
flush_pass,
} => coster_rs::move_money(
&mut csv::Reader::from_path(rules)?,
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?,
use_numeric_accounts,
flush_pass,
),
Commands::AllocateOverheads {
lines,
accounts,
allocation_statistics,
areas,
cost_centres,
use_numeric_accounts,
account_type,
exclude_negative_allocation_statistics,
show_from,
zero_threshold,
output,
msgpack_serialisation,
} => {
if msgpack_serialisation {
let mut file = BufWriter::new(File::create(output)?);
coster_rs::reciprocal_allocation(
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(allocation_statistics)?,
&mut csv::Reader::from_path(areas)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut rmp_serde::Serializer::new(&mut file),
use_numeric_accounts,
exclude_negative_allocation_statistics,
true,
account_type,
show_from,
zero_threshold,
)
} else {
coster_rs::reciprocal_allocation(
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(allocation_statistics)?,
&mut csv::Reader::from_path(areas)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut csv::Writer::from_path(output)?,
use_numeric_accounts,
exclude_negative_allocation_statistics,
true,
account_type,
show_from,
zero_threshold,
)
}
}
Commands::CreateProducts {
definitions,
encounters,
services,
transfers,
procedures,
diagnoses,
patients,
revenues,
output,
} => {
let mut inputs = HashMap::new();
inputs.insert(
SourceType::Encounter,
InputFile {
file_path: encounters,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::Service,
InputFile {
file_path: services,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::Transfer,
InputFile {
file_path: transfers,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::CodingProcedure,
InputFile {
file_path: procedures,
joins: HashMap::new(),
date_order_column: Some("ProcedureDateTime".to_owned()),
},
);
inputs.insert(
SourceType::CodingDiagnosis,
InputFile {
file_path: diagnoses,
joins: HashMap::new(),
date_order_column: None,
},
);
inputs.insert(
SourceType::Patient,
InputFile {
file_path: patients,
joins: HashMap::new(),
date_order_column: None,
},
);
inputs.insert(
SourceType::Revenue,
InputFile {
file_path: revenues,
joins: HashMap::new(),
date_order_column: None,
},
);
coster_rs::create_products::create_products_polars(definitions, vec![], output)
}
Commands::RunGraph { graph, threads } => {
let file = File::open(graph)?;
let reader = BufReader::new(file);
let graph = serde_json::from_reader(reader)?;
let graph = RunnableGraph::from_graph(graph);
graph.run_default_tasks(threads, |id, status| {
info!("Node with id {} finished with status {:?}", id, status)
})
}
Commands::GenerateSchema { output } => {
let schema = schema_for!(Graph);
let mut output = File::create(output).unwrap();
write!(output, "{}", serde_json::to_string_pretty(&schema).unwrap())?;
Ok(())
}
}
}
}

View File

@@ -1,5 +1,8 @@
use std::{collections::BTreeMap, str::FromStr};
use anyhow::bail;
use itertools::Itertools;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::{
@@ -7,18 +10,13 @@ use crate::{
node::RunnableNode,
};
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum DeriveColumnType {
Column(String),
Constant(String),
}
#[derive(Serialize, Deserialize, Clone)]
pub struct MapOperation {
pub mapped_value: String,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum DatePart {
Year,
Month,
@@ -29,13 +27,13 @@ pub enum DatePart {
Second,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum SplitType {
DateTime(String, DatePart),
Numeric(String, isize),
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum MatchComparisonType {
Equal,
GreaterThan,
@@ -43,18 +41,22 @@ pub enum MatchComparisonType {
NotEqual,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum DeriveOperation {
Concat(Vec<DeriveColumnType>),
Add(Vec<DeriveColumnType>),
Multiply(Vec<DeriveColumnType>),
Subtract(DeriveColumnType, DeriveColumnType),
Divide(DeriveColumnType, DeriveColumnType),
Map(String, Vec<MapOperation>),
Subtract(Vec<DeriveColumnType>),
Divide(Vec<DeriveColumnType>),
Map(String),
// Might be better putting this into its own node, then we can do sorting operations
// and ensure the split only happens when a particular column changes value. Could
// also just leave these more complex use cases for SQL/Code nodes instead (if even possible
// in an SQL node, and code nodes aren't even implemented yet)
Split(String, SplitType),
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum ValueType {
String,
Integer,
@@ -62,7 +64,7 @@ pub enum ValueType {
Boolean,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct DeriveFilter {
pub column_name: String,
pub comparator: MatchComparisonType,
@@ -158,19 +160,19 @@ fn get_filter_rule<T: PartialOrd>(filter: &DeriveFilter, value: T) -> FilterRule
}
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct DeriveColumnOperation {
pub column_name: String,
pub operation: DeriveOperation,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct DeriveRule {
pub operations: Vec<DeriveColumnOperation>,
pub filters: Vec<DeriveFilter>,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct DeriveNode {
pub rules: Vec<DeriveRule>,
pub input_file_path: String,
@@ -192,6 +194,120 @@ impl DeriveRule {
}
}
pub fn is_line_valid(line: &BTreeMap<String, String>, rules: &DataValidators) -> bool {
rules.iter().all(|rule| {
line.get(&rule.get_field_name()).map_or(true, |value| {
if value.trim().is_empty() {
true
} else {
rule.is_valid(value)
}
})
})
}
fn concat_columns(line: &BTreeMap<String, String>, columns: &Vec<DeriveColumnType>) -> String {
columns
.iter()
.map(|col| match col {
DeriveColumnType::Column(column) => line
.get(column)
.map(|column| column.clone())
.unwrap_or("".to_owned()),
DeriveColumnType::Constant(constant) => constant.clone(),
})
.collect()
}
fn reduce_numeric_columns<F>(
line: &BTreeMap<String, String>,
columns: &Vec<DeriveColumnType>,
reducer: F,
) -> String
where
F: Fn(f64, f64) -> f64,
{
let value = columns
.iter()
.map(|col| match col {
DeriveColumnType::Column(column) => line
.get(column)
.map(|value| value.parse::<f64>().ok())
.flatten(),
DeriveColumnType::Constant(constant) => constant.parse().ok(),
})
.filter(|value| value.is_some())
.map(|value| value.unwrap())
.reduce(reducer);
value
.map(|value| value.to_string())
.unwrap_or("".to_owned())
}
fn derive_line(
line: BTreeMap<String, String>,
rules: &Vec<RunnableDeriveRule>,
output: &mut impl RecordSerializer,
) -> anyhow::Result<()> {
let mut line = line;
for rule in rules {
if !is_line_valid(&line, &rule.filters) {
continue;
}
for operation in &rule.operations {
if let DeriveOperation::Split(_, _) = operation.operation {
continue;
}
let value = match &operation.operation {
DeriveOperation::Concat(concat) => concat_columns(&line, concat),
DeriveOperation::Add(columns) => {
reduce_numeric_columns(&line, columns, |a, b| a + b)
}
DeriveOperation::Multiply(columns) => {
reduce_numeric_columns(&line, columns, |a, b| a * b)
}
DeriveOperation::Subtract(columns) => {
reduce_numeric_columns(&line, columns, |a, b| a - b)
}
DeriveOperation::Divide(columns) => {
reduce_numeric_columns(&line, columns, |a, b| a / b)
}
DeriveOperation::Map(mapped_value) => mapped_value.clone(),
DeriveOperation::Split(_, _) => {
bail!("Invalid state, split type must be checked after other operations")
}
};
line.insert(operation.column_name.clone(), value);
}
}
let split_operations = rules
.iter()
.flat_map(|rule| {
if !is_line_valid(&line, &rule.filters) {
return vec![];
}
rule.operations
.iter()
.filter(|operation| {
if let DeriveOperation::Split(_, _) = operation.operation {
return true;
}
false
})
.collect_vec()
})
.collect_vec();
if split_operations.is_empty() {
output.serialize(line)?;
} else {
}
Ok(())
}
fn derive(
rules: &Vec<RunnableDeriveRule>,
input: &mut impl RecordDeserializer,
@@ -210,23 +326,6 @@ fn derive(
Ok(())
}
fn derive_line(
line: BTreeMap<String, String>,
rules: &Vec<RunnableDeriveRule>,
output: &mut impl RecordSerializer,
) -> anyhow::Result<()> {
for rule in rules {
// First check the filter works. If there are no filters, the rule applies to all rows
for filter in &rule.filters {}
// TODO: Split operations should be processed separately, after all the other operations have been applied
// Apply all operations individually, adding as a column to the record map
for operation in &rule.operations {}
}
// for line in line {
output.serialize(line)
// }
}
pub struct DeriveNodeRunner {
derive_node: DeriveNode,
}

View File

@@ -1,25 +1,14 @@
use std::collections::BTreeMap;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::{
derive::{to_filter_rules, DataValidator, DataValidators, DeriveFilter},
derive::{is_line_valid, to_filter_rules, DataValidators, DeriveFilter},
io::{RecordDeserializer, RecordSerializer},
node::RunnableNode,
};
fn is_line_valid(line: &BTreeMap<String, String>, rules: &DataValidators) -> bool {
rules.iter().all(|rule| {
line.get(&rule.get_field_name()).map_or(true, |value| {
if value.trim().is_empty() {
true
} else {
rule.is_valid(value)
}
})
})
}
/**
* Write all lines from the input file to the output file, skipping records
* that don't satisfy the filter criteria
@@ -33,7 +22,7 @@ pub fn filter_file(
let line: BTreeMap<String, String> = line;
output.write_header(&line)?;
if (is_line_valid(&line, &rules)) {
if is_line_valid(&line, &rules) {
output.write_record(&line)?;
}
@@ -48,7 +37,7 @@ pub fn filter_file(
Ok(())
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct FilterNode {
pub filters: Vec<DeriveFilter>,
pub input_file_path: String,

View File

@@ -9,6 +9,7 @@ use std::{
};
use chrono::Local;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::{
@@ -19,7 +20,7 @@ use crate::{
upload_to_db::{UploadNode, UploadNodeRunner},
};
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum NodeConfiguration {
FileNode,
MoveMoneyNode(MoveMoneyNode),
@@ -32,13 +33,13 @@ pub enum NodeConfiguration {
Dynamic,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct DynamicConfiguration {
pub node_type: String,
pub parameters: HashMap<String, String>,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct NodeInfo {
pub name: String,
pub output_files: Vec<String>,
@@ -46,13 +47,13 @@ pub struct NodeInfo {
pub dynamic_configuration: Option<DynamicConfiguration>,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum MoveMoneyAmountType {
Percent,
Amount,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct MoveMoneyRule {
pub from_account: String,
pub from_cc: String,
@@ -62,7 +63,7 @@ pub struct MoveMoneyRule {
pub amount_type: MoveMoneyAmountType,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct MoveMoneyNode {
pub departments_path: String,
pub accounts_path: String,
@@ -70,40 +71,40 @@ pub struct MoveMoneyNode {
pub rules: Vec<MoveMoneyRule>,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum JoinType {
Left,
Inner,
Right,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct MergeJoin {
pub join_type: JoinType,
pub left_column_name: String,
pub right_column_name: String,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct MergeNode {
pub input_files: Vec<String>,
pub joins: Vec<MergeJoin>,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub enum CodeRuleLanguage {
Javascript,
Rust,
Go,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct CodeRuleNode {
pub language: CodeRuleLanguage,
pub text: String,
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct Node {
pub id: i64,
pub info: NodeInfo,
@@ -135,17 +136,18 @@ fn get_runnable_node(node: Node) -> Box<dyn RunnableNode> {
}
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct Graph {
pub name: String,
pub nodes: Vec<Node>,
}
#[derive(Debug)]
pub enum NodeStatus {
Completed,
Running,
// TODO: Error code?
Failed,
// Error code
Failed(anyhow::Error),
}
pub struct RunnableGraph {
@@ -157,11 +159,14 @@ impl RunnableGraph {
RunnableGraph { graph }
}
pub fn run_default_tasks(&mut self, num_threads: usize) -> anyhow::Result<()> {
pub fn run_default_tasks<F>(&self, num_threads: usize, status_changed: F) -> anyhow::Result<()>
where
F: Fn(i64, NodeStatus),
{
self.run(
num_threads,
Box::new(|node| get_runnable_node(node)),
|id, status| {},
status_changed,
)
}
@@ -196,7 +201,7 @@ impl RunnableGraph {
node_status_changed_fn(node.id, NodeStatus::Running);
match get_node_fn(node.clone()).run() {
Ok(_) => node_status_changed_fn(node.id, NodeStatus::Completed),
Err(_) => node_status_changed_fn(node.id, NodeStatus::Failed),
Err(err) => node_status_changed_fn(node.id, NodeStatus::Failed(err)),
};
}
return Ok(());
@@ -216,8 +221,13 @@ impl RunnableGraph {
let node_fn = node_fn.clone();
let handle = thread::spawn(move || {
for node in rx {
node_fn(node.clone()).run();
finish_task.send((n, node));
let status = match node_fn(node.clone()).run() {
Ok(_) => NodeStatus::Completed,
Err(err) => NodeStatus::Failed(err),
};
finish_task
.send((n, node, status))
.expect("Failed to notify node status completion");
}
println!("Thread {} finished", n);
});
@@ -238,15 +248,14 @@ impl RunnableGraph {
let node = nodes.remove(i);
node_status_changed_fn(node.id, NodeStatus::Running);
running_nodes.insert(node.id);
senders[i % senders.len()].send(node);
senders[i % senders.len()].send(node)?;
}
}
// Run each dependent node after a graph above finishes.
for (n, node) in listen_finish_task {
for (n, node, error) in listen_finish_task {
running_threads.remove(&n);
// TODO: Add error check here
node_status_changed_fn(node.id, NodeStatus::Completed);
node_status_changed_fn(node.id, error);
running_nodes.remove(&node.id);
completed_nodes.insert(node.id);
// Run all the nodes that can be run and aren't in completed
@@ -261,7 +270,7 @@ impl RunnableGraph {
let node = nodes.remove(i);
for i in 0..num_threads {
if !running_threads.contains(&i) {
senders[i].send(node);
senders[i].send(node)?;
break;
}
}
@@ -293,7 +302,7 @@ mod tests {
#[test]
fn test_basic() -> anyhow::Result<()> {
let mut graph = RunnableGraph {
let graph = RunnableGraph {
graph: super::Graph {
name: "Test".to_owned(),
nodes: vec![super::Node {
@@ -313,7 +322,7 @@ mod tests {
}],
},
};
graph.run_default_tasks(2)?;
graph.run_default_tasks(2, |_, _| {})?;
Ok(())
}
}

View File

@@ -14,7 +14,7 @@ pub use self::shared_models::*;
pub mod code_rule;
pub mod derive;
pub mod filter;
mod graph;
pub mod graph;
mod io;
pub mod link;
pub mod node;

View File

@@ -1,248 +1,9 @@
use std::{collections::HashMap, fs::File, io::BufWriter, path::PathBuf};
use clap::{Parser, Subcommand};
use coster_rs::{create_products::InputFile, SourceType};
#[derive(Parser)]
#[command(name = "coster-rs")]
#[command(author = "Pivato M. <mpivato4@gmail.com>")]
#[command(version = "0.0.1")]
#[command(about = "Simple, fast, efficient costing tool", long_about = None)]
struct Cli {
#[clap(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Moves money between accounts and departments, using the given rules and lines
MoveMoney {
#[arg(short = 'r', long, value_name = "FILE")]
rules: PathBuf,
#[arg(short = 'l', long, value_name = "FILE")]
lines: PathBuf,
#[arg(short = 'a', long, value_name = "FILE")]
accounts: PathBuf,
#[arg(short = 'c', long, value_name = "FILE")]
cost_centres: PathBuf,
#[arg(short, long, value_name = "FILE")]
output: Option<PathBuf>,
#[arg(short, long)]
use_numeric_accounts: bool,
#[arg(short, long)]
flush_pass: bool,
},
/// Allocates servicing department amounts to operating departments
AllocateOverheads {
#[arg(short, long, value_name = "FILE")]
lines: PathBuf,
#[arg(short, long, value_name = "FILE")]
accounts: PathBuf,
#[arg(short = 's', long, value_name = "FILE")]
allocation_statistics: PathBuf,
#[arg(short, long, value_name = "FILE")]
areas: PathBuf,
#[arg(short, long, value_name = "FILE")]
cost_centres: PathBuf,
#[arg(short, long)]
use_numeric_accounts: bool,
#[arg(long, default_value = "E")]
account_type: String,
#[arg(short, long)]
exclude_negative_allocation_statistics: bool,
#[arg(short = 'f', long)]
show_from: bool,
#[arg(short, long, default_value = "0.00000000000000001")]
zero_threshold: f64,
#[arg(short, long, value_name = "FILE", default_value = "alloc_output.csv")]
output: PathBuf,
#[arg(short, long)]
msgpack_serialisation: bool,
},
CreateProducts {
#[arg(short, long, value_name = "FILE")]
definitions: PathBuf,
#[arg(short, long, value_name = "FILE")]
encounters: PathBuf,
#[arg(short, long, value_name = "FILE")]
services: PathBuf,
#[arg(short, long, value_name = "FILE")]
transfers: PathBuf,
#[arg(short, long, value_name = "FILE")]
procedures: PathBuf,
#[arg(short, long, value_name = "FILE")]
diagnoses: PathBuf,
#[arg(short, long, value_name = "FILE")]
patients: PathBuf,
#[arg(short, long, value_name = "FILE")]
revenues: PathBuf,
#[arg(short, long, value_name = "FILE")]
output: PathBuf,
},
}
use clap::Parser;
use cli::Cli;
mod cli;
fn main() -> anyhow::Result<()> {
env_logger::init();
let cli = Cli::parse();
match cli.command {
Commands::MoveMoney {
rules,
lines,
accounts,
cost_centres,
output,
use_numeric_accounts,
flush_pass,
} => coster_rs::move_money(
&mut csv::Reader::from_path(rules)?,
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut csv::Writer::from_path(output.unwrap_or(PathBuf::from("output.csv")))?,
use_numeric_accounts,
flush_pass,
),
Commands::AllocateOverheads {
lines,
accounts,
allocation_statistics,
areas,
cost_centres,
use_numeric_accounts,
account_type,
exclude_negative_allocation_statistics,
show_from,
zero_threshold,
output,
msgpack_serialisation,
} => {
if msgpack_serialisation {
let mut file = BufWriter::new(File::create(output)?);
coster_rs::reciprocal_allocation(
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(allocation_statistics)?,
&mut csv::Reader::from_path(areas)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut rmp_serde::Serializer::new(&mut file),
use_numeric_accounts,
exclude_negative_allocation_statistics,
true,
account_type,
show_from,
zero_threshold,
)
} else {
coster_rs::reciprocal_allocation(
&mut csv::Reader::from_path(lines)?,
&mut csv::Reader::from_path(accounts)?,
&mut csv::Reader::from_path(allocation_statistics)?,
&mut csv::Reader::from_path(areas)?,
&mut csv::Reader::from_path(cost_centres)?,
&mut csv::Writer::from_path(output)?,
use_numeric_accounts,
exclude_negative_allocation_statistics,
true,
account_type,
show_from,
zero_threshold,
)
}
}
Commands::CreateProducts {
definitions,
encounters,
services,
transfers,
procedures,
diagnoses,
patients,
revenues,
output,
} => {
let mut inputs = HashMap::new();
inputs.insert(
SourceType::Encounter,
InputFile {
file_path: encounters,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::Service,
InputFile {
file_path: services,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::Transfer,
InputFile {
file_path: transfers,
joins: HashMap::new(),
date_order_column: Some("StartDateTime".to_owned()),
},
);
inputs.insert(
SourceType::CodingProcedure,
InputFile {
file_path: procedures,
joins: HashMap::new(),
date_order_column: Some("ProcedureDateTime".to_owned()),
},
);
inputs.insert(
SourceType::CodingDiagnosis,
InputFile {
file_path: diagnoses,
joins: HashMap::new(),
date_order_column: None,
},
);
inputs.insert(
SourceType::Patient,
InputFile {
file_path: patients,
joins: HashMap::new(),
date_order_column: None,
},
);
inputs.insert(
SourceType::Revenue,
InputFile {
file_path: revenues,
joins: HashMap::new(),
date_order_column: None,
},
);
coster_rs::create_products::create_products_polars(definitions, vec![], output)
}
}
cli.run()
}

View File

@@ -5,11 +5,12 @@ use polars::{
prelude::{CsvWriter, LazyCsvReader, LazyFileListReader},
};
use polars_sql::SQLContext;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::node::RunnableNode;
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct CSVFile {
name: String,
path: String,
@@ -30,7 +31,7 @@ fn run_sql(files: &Vec<CSVFile>, output_path: &String, query: &String) -> anyhow
Ok(())
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct SQLNode {
pub files: Vec<CSVFile>,
pub output_file: String,

View File

@@ -1,6 +1,7 @@
use std::collections::HashMap;
use anyhow::bail;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use sqlx::{Any, Pool, QueryBuilder};
@@ -69,7 +70,7 @@ pub async fn upload_file_bulk(pool: &Pool<Any>, upload_node: &UploadNode) -> any
}
}
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct UploadNode {
file_path: String,
table_name: String,