Merge pull request 'create_encounter_products' (#1) from create_encounter_products into main

Reviewed-on: vato007/coster-rs#1
2024-05-09 22:51:40 +09:30
parent 2485e45026 98d38d47a3
commit e31499a7e4
21 changed files with 2184 additions and 428 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
 /target
 .DS_Store
 *.xcuserdatad
+.venv
+*.csv
+*.h
+*.py
+.idea
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,12 +18,14 @@ clap = { version = "4.1.8", features = ["derive"] }
 anyhow = "1.0"

 itertools = "0.10.3"
-chrono = {version = "0.4.23", features = ["default", "serde"]}
+chrono = {version = "0.4.31", features = ["default", "serde"]}

 rayon = "1.6.0"
 tokio = { version = "1.26.0", features = ["full"] }
-sqlx = { version = "0.6", features = [ "runtime-tokio-rustls", "mssql" ] }
+sqlx = { version = "0.6", features = [ "runtime-tokio-rustls", "mssql", "any" ] }
 rmp-serde = "1.1.1"
+tempfile = "3.7.0"
+polars =  {version = "0.32.1", features = ["lazy", "performant", "streaming", "cse", "dtype-datetime"]}

 # More info on targets: https://doc.rust-lang.org/cargo/reference/cargo-targets.html#configuring-a-target
 [lib]
--- a/FastCoster/FastCoster.xcodeproj/project.pbxproj
+++ b/FastCoster/FastCoster.xcodeproj/project.pbxproj
@@ -12,6 +12,14 @@
 		5A1986FB2996502C00FA0471 /* FileButtonSelector.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A1986FA2996502C00FA0471 /* FileButtonSelector.swift */; };
 		5A450751298CE6D500E3D402 /* CsvDocument.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A450750298CE6D500E3D402 /* CsvDocument.swift */; };
 		5A45075B298D01EF00E3D402 /* libcoster_rs.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 5A45075A298D01EF00E3D402 /* libcoster_rs.a */; };
+		5A53D5742BE4B4FB00563893 /* FileNodeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D5732BE4B4FB00563893 /* FileNodeView.swift */; };
+		5A53D5772BE4B98300563893 /* SwiftCSV in Frameworks */ = {isa = PBXBuildFile; productRef = 5A53D5762BE4B98300563893 /* SwiftCSV */; };
+		5A53D5792BE4C0C300563893 /* CsvEditor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D5782BE4C0C300563893 /* CsvEditor.swift */; };
+		5A53D57B2BE4C1D400563893 /* OutputFilesView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D57A2BE4C1D400563893 /* OutputFilesView.swift */; };
+		5A53D5822BE507AD00563893 /* ChartEditor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D5812BE507AD00563893 /* ChartEditor.swift */; };
+		5A53D5842BE507FF00563893 /* ChartView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D5832BE507FF00563893 /* ChartView.swift */; };
+		5A53D5892BE5182C00563893 /* Tasks.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D5882BE5182C00563893 /* Tasks.swift */; };
+		5A53D58B2BE518CA00563893 /* Graph.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A53D58A2BE518CA00563893 /* Graph.swift */; };
 		5ADD9F2D298A713300F998F5 /* FastCosterApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5ADD9F2C298A713300F998F5 /* FastCosterApp.swift */; };
 		5ADD9F2F298A713300F998F5 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5ADD9F2E298A713300F998F5 /* ContentView.swift */; };
 		5ADD9F31298A713400F998F5 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 5ADD9F30298A713400F998F5 /* Assets.xcassets */; };
@@ -48,6 +56,13 @@
 		5A450755298CFFE400E3D402 /* create-lib.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "create-lib.sh"; sourceTree = "<group>"; };
 		5A450756298D00AE00E3D402 /* remove-lib.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "remove-lib.sh"; sourceTree = "<group>"; };
 		5A45075A298D01EF00E3D402 /* libcoster_rs.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcoster_rs.a; path = "../costerrs/target/aarch64-apple-ios/release/libcoster_rs.a"; sourceTree = "<group>"; };
+		5A53D5732BE4B4FB00563893 /* FileNodeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileNodeView.swift; sourceTree = "<group>"; };
+		5A53D5782BE4C0C300563893 /* CsvEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CsvEditor.swift; sourceTree = "<group>"; };
+		5A53D57A2BE4C1D400563893 /* OutputFilesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OutputFilesView.swift; sourceTree = "<group>"; };
+		5A53D5812BE507AD00563893 /* ChartEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChartEditor.swift; sourceTree = "<group>"; };
+		5A53D5832BE507FF00563893 /* ChartView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChartView.swift; sourceTree = "<group>"; };
+		5A53D5882BE5182C00563893 /* Tasks.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tasks.swift; sourceTree = "<group>"; };
+		5A53D58A2BE518CA00563893 /* Graph.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Graph.swift; sourceTree = "<group>"; };
 		5ADD9F29298A713300F998F5 /* FastCoster.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = FastCoster.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		5ADD9F2C298A713300F998F5 /* FastCosterApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FastCosterApp.swift; sourceTree = "<group>"; };
 		5ADD9F2E298A713300F998F5 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
@@ -67,6 +82,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				5A45075B298D01EF00E3D402 /* libcoster_rs.a in Frameworks */,
+				5A53D5772BE4B98300563893 /* SwiftCSV in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -106,6 +122,24 @@
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
+		5A53D5802BE4C26A00563893 /* Charts */ = {
+			isa = PBXGroup;
+			children = (
+				5A53D5812BE507AD00563893 /* ChartEditor.swift */,
+				5A53D5832BE507FF00563893 /* ChartView.swift */,
+			);
+			path = Charts;
+			sourceTree = "<group>";
+		};
+		5A53D5852BE50C7B00563893 /* Model */ = {
+			isa = PBXGroup;
+			children = (
+				5A53D5882BE5182C00563893 /* Tasks.swift */,
+				5A53D58A2BE518CA00563893 /* Graph.swift */,
+			);
+			path = Model;
+			sourceTree = "<group>";
+		};
 		5ADD9F20298A713300F998F5 = {
 			isa = PBXGroup;
 			children = (
@@ -131,6 +165,8 @@
 		5ADD9F2B298A713300F998F5 /* FastCoster */ = {
 			isa = PBXGroup;
 			children = (
+				5A53D5852BE50C7B00563893 /* Model */,
+				5A53D5802BE4C26A00563893 /* Charts */,
 				5ADD9F2C298A713300F998F5 /* FastCosterApp.swift */,
 				5ADD9F2E298A713300F998F5 /* ContentView.swift */,
 				5ADD9F30298A713400F998F5 /* Assets.xcassets */,
@@ -140,6 +176,9 @@
 				5A1986F62996436500FA0471 /* OverheadAllocation.swift */,
 				5A1986F82996436D00FA0471 /* MoveMoney.swift */,
 				5A1986FA2996502C00FA0471 /* FileButtonSelector.swift */,
+				5A53D5732BE4B4FB00563893 /* FileNodeView.swift */,
+				5A53D5782BE4C0C300563893 /* CsvEditor.swift */,
+				5A53D57A2BE4C1D400563893 /* OutputFilesView.swift */,
 			);
 			path = FastCoster;
 			sourceTree = "<group>";
@@ -187,6 +226,9 @@
 			dependencies = (
 			);
 			name = FastCoster;
+			packageProductDependencies = (
+				5A53D5762BE4B98300563893 /* SwiftCSV */,
+			);
 			productName = FastCoster;
 			productReference = 5ADD9F29298A713300F998F5 /* FastCoster.app */;
 			productType = "com.apple.product-type.application";
@@ -259,6 +301,9 @@
 				Base,
 			);
 			mainGroup = 5ADD9F20298A713300F998F5;
+			packageReferences = (
+				5A53D5752BE4B98300563893 /* XCRemoteSwiftPackageReference "SwiftCSV" */,
+			);
 			productRefGroup = 5ADD9F2A298A713300F998F5 /* Products */;
 			projectDirPath = "";
 			projectRoot = "";
@@ -344,11 +389,18 @@
 			buildActionMask = 2147483647;
 			files = (
 				5A1986FB2996502C00FA0471 /* FileButtonSelector.swift in Sources */,
+				5A53D58B2BE518CA00563893 /* Graph.swift in Sources */,
 				5ADD9F2F298A713300F998F5 /* ContentView.swift in Sources */,
 				5A1986F92996436D00FA0471 /* MoveMoney.swift in Sources */,
+				5A53D57B2BE4C1D400563893 /* OutputFilesView.swift in Sources */,
 				5ADD9F2D298A713300F998F5 /* FastCosterApp.swift in Sources */,
 				5A450751298CE6D500E3D402 /* CsvDocument.swift in Sources */,
+				5A53D5822BE507AD00563893 /* ChartEditor.swift in Sources */,
+				5A53D5842BE507FF00563893 /* ChartView.swift in Sources */,
+				5A53D5792BE4C0C300563893 /* CsvEditor.swift in Sources */,
+				5A53D5892BE5182C00563893 /* Tasks.swift in Sources */,
 				5A1986F72996436500FA0471 /* OverheadAllocation.swift in Sources */,
+				5A53D5742BE4B4FB00563893 /* FileNodeView.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -524,7 +576,7 @@
 				"LIBRARY_SEARCH_PATHS[arch=*]" = "${DERIVED_FILES_DIR}";
 				MACOSX_DEPLOYMENT_TARGET = 13.1;
 				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.Vato.FastCoster;
+				PRODUCT_BUNDLE_IDENTIFIER = dev.michaelpivato.FastCoster;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
@@ -564,7 +616,7 @@
 				"LIBRARY_SEARCH_PATHS[arch=*]" = "${DERIVED_FILES_DIR}";
 				MACOSX_DEPLOYMENT_TARGET = 13.1;
 				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.Vato.FastCoster;
+				PRODUCT_BUNDLE_IDENTIFIER = dev.michaelpivato.FastCoster;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
@@ -705,6 +757,25 @@
 			defaultConfigurationName = Release;
 		};
 /* End XCConfigurationList section */
+
+/* Begin XCRemoteSwiftPackageReference section */
+		5A53D5752BE4B98300563893 /* XCRemoteSwiftPackageReference "SwiftCSV" */ = {
+			isa = XCRemoteSwiftPackageReference;
+			repositoryURL = "https://github.com/swiftcsv/SwiftCSV.git";
+			requirement = {
+				kind = upToNextMajorVersion;
+				minimumVersion = 0.9.1;
+			};
+		};
+/* End XCRemoteSwiftPackageReference section */
+
+/* Begin XCSwiftPackageProductDependency section */
+		5A53D5762BE4B98300563893 /* SwiftCSV */ = {
+			isa = XCSwiftPackageProductDependency;
+			package = 5A53D5752BE4B98300563893 /* XCRemoteSwiftPackageReference "SwiftCSV" */;
+			productName = SwiftCSV;
+		};
+/* End XCSwiftPackageProductDependency section */
 	};
 	rootObject = 5ADD9F21298A713300F998F5 /* Project object */;
 }
--- a/FastCoster/FastCoster/Charts/ChartEditor.swift
+++ b/FastCoster/FastCoster/Charts/ChartEditor.swift
@@ -0,0 +1,18 @@
+//
+//  ChartEditor.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import SwiftUI
+
+struct ChartEditor: View {
+    var body: some View {
+        Text(/*@START_MENU_TOKEN@*/"Hello, World!"/*@END_MENU_TOKEN@*/)
+    }
+}
+
+#Preview {
+    ChartEditor()
+}
--- a/FastCoster/FastCoster/Charts/ChartView.swift
+++ b/FastCoster/FastCoster/Charts/ChartView.swift
@@ -0,0 +1,19 @@
+//
+//  ChartView.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import SwiftUI
+
+struct ChartView: View {
+    // View the chart for the given file and configuration: https://developer.apple.com/documentation/Charts
+    var body: some View {
+        Text(/*@START_MENU_TOKEN@*/"Hello, World!"/*@END_MENU_TOKEN@*/)
+    }
+}
+
+#Preview {
+    ChartView()
+}
--- a/FastCoster/FastCoster/ContentView.swift
+++ b/FastCoster/FastCoster/ContentView.swift
@@ -11,6 +11,7 @@ enum ProcessType: String, Hashable {
    case MoveMoney = "Move Money"
    case OverheadAllocation = "Overhead Allocation"
    
+    // TODO: This needs to be the list of graphs
    static let values = [MoveMoney, OverheadAllocation]
 }

@@ -37,6 +38,7 @@ struct ContentView: View {
            }
        }
    }
+    // TODO: Button to add a new graph
    
 }

--- a/FastCoster/FastCoster/CsvEditor.swift
+++ b/FastCoster/FastCoster/CsvEditor.swift
@@ -0,0 +1,20 @@
+//
+//  CsvEditor.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import SwiftUI
+
+struct CsvEditor: View {
+    // A table to view data in a file: https://developer.apple.com/documentation/SwiftUI/Table
+    // It's fine to load it all into memory to begin with, we'll probably want to change that later though.
+    var body: some View {
+        Text(/*@START_MENU_TOKEN@*/"Hello, World!"/*@END_MENU_TOKEN@*/)
+    }
+}
+
+#Preview {
+    CsvEditor()
+}
--- a/FastCoster/FastCoster/FileNodeView.swift
+++ b/FastCoster/FastCoster/FileNodeView.swift
@@ -0,0 +1,33 @@
+//
+//  FileNode.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import SwiftUI
+
+struct FileNodeView: View {
+    @State private var showPicker = false
+    @State private var selectedFileUrl: URL?
+    var body: some View {
+        // Should basically show a file selector.
+        Button {
+            showPicker.toggle()
+        } label: {
+            Text("Select File")
+        }.fileImporter(isPresented: $showPicker, allowedContentTypes: [.commaSeparatedText]) { result in
+            
+            switch result {
+            case .success(let fileUrl):
+                selectedFileUrl = fileUrl
+            case .failure(let error):
+                print(error)
+            }
+        }.padding()
+    }
+}
+
+#Preview {
+    FileNodeView()
+}
--- a/FastCoster/FastCoster/Model/Graph.swift
+++ b/FastCoster/FastCoster/Model/Graph.swift
@@ -0,0 +1,23 @@
+//
+//  Graph.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import Foundation
+// JSON for saving/loading configuration: https://www.avanderlee.com/swift/json-parsing-decoding/
+struct Node: Codable {
+    var id: Int
+    var info: NodeInfo
+    var dependentNodeIds: [Int]
+    
+    func hasDependentNodes() -> Bool {
+        return !dependentNodeIds.isEmpty
+    }
+}
+
+struct Graph: Codable {
+    var name: String
+    var nodes: [Node]
+}
--- a/FastCoster/FastCoster/Model/Tasks.swift
+++ b/FastCoster/FastCoster/Model/Tasks.swift
@@ -0,0 +1,118 @@
+//
+//  InputFile.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import Foundation
+
+struct NodeInfo: Codable {
+    var name: String;
+    var outputFiles: [String]
+    var configuration: NodeConfiguration
+}
+
+// Need to check if enums with data actually works with json serialisation/deserialisation, otherwise
+// can look into binary serialisation/deserialisation instead
+enum NodeConfiguration: Codable {
+    case FileNode
+    case MoveMoneyNode(MoveMoneyNode)
+    case MergeNode(MergeNode)
+    case DeriveNode(DeriveNode)
+}
+
+enum MoveMoneyAmoutType: String, Codable {
+    case Percent, Amount
+}
+
+struct MoveMoneyRule: Codable {
+    let fromAccout: String
+    let fromCC: String
+    let toAccount: String
+    let toCC: String
+    let value: Double
+    let type: MoveMoneyAmoutType
+}
+
+struct MoveMoneyNode: Codable {
+    var departmentsPath: String
+    var accountsPath: String
+    var glPath: String
+    var rules: [MoveMoneyRule]
+}
+
+enum JoinType: Codable {
+    case Left, Inner, Right
+}
+
+struct MergeJoin: Codable {
+    var type: JoinType
+    var leftColumnName: String
+    var rightColumnName: String
+}
+
+struct MergeNode: Codable {
+    var inputFiles: [String]
+    var joins: [MergeJoin]
+}
+
+enum DeriveColumnType: Codable {
+    case Column(String)
+    case Constant(String)
+}
+
+struct MapOperation: Codable {
+    var mappedValue: String
+}
+
+enum DatePart: Codable {
+    case Year, Month, Week, Day, Hour, Minute, Secod
+}
+
+enum SplitType: Codable {
+    case DateTime(String, DatePart)
+    case Numeric(String, Int)
+}
+
+enum MatchComparisonType: Codable {
+    case Equal, GreaterThan, LessThan
+}
+
+enum DeriveOperation: Codable {
+    case Concat([DeriveColumnType])
+    case Add([DeriveColumnType])
+    case Multiply([DeriveColumnType])
+    case Subtract(DeriveColumnType, DeriveColumnType)
+    case Divide(DeriveColumnType, DeriveColumnType)
+    case Map(String, [MapOperation])
+    case Split(String, SplitType)
+}
+
+struct DeriveFilter: Codable {
+    var columnName: String
+    var comparator: MatchComparisonType
+    var matchValue: String
+}
+
+struct DeriveRule: Codable {
+    // Should this actually be  an array though? It think it's fine?
+    var operations: [DeriveOperation]
+    // Filter to only specific values if required, if empty every value is considered a match
+    var filters: [DeriveFilter]
+}
+
+struct DeriveNode: Codable {
+    var rules: [DeriveRule]
+}
+
+
+// Example json serialisation
+func tryJson() {
+    do {
+        let json = try JSONEncoder().encode(NodeInfo(name: "", outputFiles: [], configuration: NodeConfiguration.FileNode))
+        let decoded = try JSONDecoder().decode(NodeInfo.self, from: json)
+    }catch {
+        
+    }
+}
--- a/FastCoster/FastCoster/OutputFilesView.swift
+++ b/FastCoster/FastCoster/OutputFilesView.swift
@@ -0,0 +1,19 @@
+//
+//  OutputFilesView.swift
+//  FastCoster
+//
+//  Created by Michael Pivato on 3/5/2024.
+//
+
+import SwiftUI
+
+struct OutputFilesView: View {
+    // List of files, with links to open a file editor to edit the linked files
+    var body: some View {
+        Text(/*@START_MENU_TOKEN@*/"Hello, World!"/*@END_MENU_TOKEN@*/)
+    }
+}
+
+#Preview {
+    OutputFilesView()
+}
--- a/FastCoster/FastCoster/OverheadAllocation.swift
+++ b/FastCoster/FastCoster/OverheadAllocation.swift
@@ -8,6 +8,7 @@
 import SwiftUI

 struct OverheadAllocation: View {
+    // TODO: Refactor to take inputs from another task instead
    @State private var lines: String?
    @State private var accounts: String?
    @State private var areas: String?
--- a/src/bin/agent2/main.rs
+++ b/src/bin/agent2/main.rs
@@ -1,4 +1,5 @@
-use sqlx::mssql::MssqlPoolOptions;
+use coster_rs::upload_to_db;
+use sqlx::{any::AnyPoolOptions, mssql::MssqlPoolOptions};

 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
@@ -8,11 +9,11 @@ async fn main() -> anyhow::Result<()> {
    let database = "";
    // USing sqlx: https://github.com/launchbadge/sqlx
    let connection_string = format!("mssq://{}:{}@{}/{}", user, password, host, database);
-    let pool = MssqlPoolOptions::new()
+    let pool = AnyPoolOptions::new()
        .max_connections(20)
        .connect(&connection_string)
        .await?;
-    // sqlx::query_as("")
-    // connection.
+    
+    // upload_to_db::upload_file_bulk(&pool, &"".to_owned(), &"".to_owned(), None, "".to_owned()).await?;
    Ok(())
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+// TODO: Module api can probably use a cleanup
 mod move_money;
 pub use self::move_money::*;
 use std::ffi::c_char;
@@ -9,7 +10,7 @@ pub use self::overhead_allocation::*;

 mod products;
 pub use self::products::create_products;
-pub use self::products::CreateProductInputs;
+pub use self::products::csv::SourceType;

 mod shared_models;
 pub use self::shared_models::*;
@@ -18,6 +19,8 @@ pub mod link;

 pub mod filter;

+pub mod upload_to_db;
+
 mod io;

 #[no_mangle]
@@ -56,6 +59,33 @@ pub extern "C" fn move_money_from_text(
    // This looks like exactly what I'm doing too: https://mozilla.github.io/firefox-browser-architecture/experiments/2017-09-06-rust-on-ios.htmlcar
 }

+#[no_mangle]
+pub extern "C" fn move_money_from_file(
+    rules_file: *const c_char,
+    lines: *const c_char,
+    accounts: *const c_char,
+    cost_centres: *const c_char,
+    output_path: *const c_char,
+    use_numeric_accounts: bool,
+) {
+    let mut output_writer = csv::Writer::from_writer(vec![]);
+    let safe_rules = unwrap_c_char(rules_file);
+    let safe_lines = unwrap_c_char(lines);
+    let safe_accounts = unwrap_c_char(accounts);
+    let safe_cost_centres = unwrap_c_char(cost_centres);
+    move_money_2()
+    // move_money(
+    //     ,
+    //     &mut csv::Reader::from_reader(safe_lines.to_str().unwrap()),
+    //     &mut csv::Reader::from_reader(safe_accounts.to_bytes()),
+    //     &mut csv::Reader::from_reader(safe_cost_centres.to_bytes()),
+    //     &mut output_writer,
+    //     use_numeric_accounts,
+    //     false,
+    // )
+    // .expect("Failed to move money");
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn move_money_from_text_free(s: *mut c_char) {
    unsafe {
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,7 @@
-use std::{fs::File, io::BufWriter, path::PathBuf};
+use std::{collections::HashMap, fs::File, io::BufWriter, path::PathBuf};

 use clap::{Parser, Subcommand};
-use coster_rs::CreateProductInputs;
+use coster_rs::{create_products::InputFile, SourceType};

 #[derive(Parser)]
 #[command(name = "coster-rs")]
@@ -95,6 +95,12 @@ enum Commands {
        #[arg(short, long, value_name = "FILE")]
        diagnoses: PathBuf,

+        #[arg(short, long, value_name = "FILE")]
+        patients: PathBuf,
+
+        #[arg(short, long, value_name = "FILE")]
+        revenues: PathBuf,
+
        #[arg(short, long, value_name = "FILE")]
        output: PathBuf,
    },
@@ -175,18 +181,68 @@ fn main() -> anyhow::Result<()> {
            transfers,
            procedures,
            diagnoses,
+            patients,
+            revenues,
            output,
-        } => coster_rs::create_products(
-            &mut csv::Reader::from_path(definitions)?,
-            CreateProductInputs {
-                encounters: csv::Reader::from_path(encounters)?,
-                services: csv::Reader::from_path(services)?,
-                transfers: csv::Reader::from_path(transfers)?,
-                procedures: csv::Reader::from_path(procedures)?,
-                diagnoses: csv::Reader::from_path(diagnoses)?,
+        } => {
+            let mut inputs = HashMap::new();
+            inputs.insert(
+                SourceType::Encounter,
+                InputFile {
+                    file_path: encounters,
+                    joins: HashMap::new(),
+                    date_order_column: Some("StartDateTime".to_owned()),
                },
-            &mut csv::Writer::from_path(output)?,
-            1000000,
-        ),
+            );
+            inputs.insert(
+                SourceType::Service,
+                InputFile {
+                    file_path: services,
+                    joins: HashMap::new(),
+                    date_order_column: Some("StartDateTime".to_owned()),
+                },
+            );
+            inputs.insert(
+                SourceType::Transfer,
+                InputFile {
+                    file_path: transfers,
+                    joins: HashMap::new(),
+                    date_order_column: Some("StartDateTime".to_owned()),
+                },
+            );
+            inputs.insert(
+                SourceType::CodingProcedure,
+                InputFile {
+                    file_path: procedures,
+                    joins: HashMap::new(),
+                    date_order_column: Some("ProcedureDateTime".to_owned()),
+                },
+            );
+            inputs.insert(
+                SourceType::CodingDiagnosis,
+                InputFile {
+                    file_path: diagnoses,
+                    joins: HashMap::new(),
+                    date_order_column: None,
+                },
+            );
+            inputs.insert(
+                SourceType::Patient,
+                InputFile {
+                    file_path: patients,
+                    joins: HashMap::new(),
+                    date_order_column: None,
+                },
+            );
+            inputs.insert(
+                SourceType::Revenue,
+                InputFile {
+                    file_path: revenues,
+                    joins: HashMap::new(),
+                    date_order_column: None,
+                },
+            );
+            coster_rs::create_products::create_products_polars(definitions, vec![], output)
+        }
    }
 }
--- a/src/overhead_allocation.rs
+++ b/src/overhead_allocation.rs
@@ -3,6 +3,7 @@ use std::{
    io::Read,
 };

+use csv::Reader;
 use itertools::Itertools;
 use nalgebra::{DMatrix, Dynamic, LU};
 use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
@@ -93,9 +94,6 @@ pub fn reciprocal_allocation<Lines, Account, AllocationStatistic, Area, CostCent
    allocation_statistics: &mut csv::Reader<AllocationStatistic>,
    areas: &mut csv::Reader<Area>,
    cost_centres: &mut csv::Reader<CostCentre>,
-    // TODO: Receiver method rather than this writer that can accept
-    // the raw float results, so we can write in an alternate format
-    // that more accurately represents the values on disk
    output: &mut impl RecordSerializer,
    use_numeric_accounts: bool,
    exclude_negative_allocation_statistics: bool,
@@ -115,28 +113,8 @@ where
        .deserialize()
        .collect::<Result<Vec<CsvCost>, csv::Error>>()?;

-    let all_accounts_sorted: Vec<String> = if use_numeric_accounts {
-        accounts
-            .deserialize::<CsvAccount>()
-            .filter(|account| {
-                account.is_ok() && account.as_ref().unwrap().account_type == account_type
-            })
-            .map(|line| line.unwrap().code.clone().parse::<i32>().unwrap())
-            .unique()
-            .sorted()
-            .map(|account| account.to_string())
-            .collect()
-    } else {
-        accounts
-            .deserialize::<CsvAccount>()
-            .filter(|account| {
-                account.is_ok() && account.as_ref().unwrap().account_type == account_type
-            })
-            .map(|line| line.unwrap().code.clone())
-            .unique()
-            .sorted()
-            .collect()
-    };
+    let all_accounts_sorted: Vec<String> =
+        get_accounts_sorted(use_numeric_accounts, &account_type, accounts);

    let allocation_statistics = allocation_statistics
        .deserialize::<CsvAllocationStatistic>()
@@ -266,7 +244,8 @@ where
            let mut limited_ccs: Vec<String> = Vec::new();
            for limit_to in limit_tos.iter() {
                // TODO: It is technically possible to have more than one limit to (I think?) for a slot, so consider eventually splitting this and doing a foreach
-                let limit_value = area.get(&("LimitTo:".to_owned() + limit_to)).unwrap();
+                //      Also there's an exclude criteria that needs to be considered, which can exclude a rollup that would normally get included
+                let limit_value = area.get(&(format!("LimitTo:{}", limit_to))).unwrap();
                if limit_value.is_empty() {
                    continue;
                }
@@ -274,7 +253,7 @@ where
                    limited_ccs.push(limit_value.clone());
                } else {
                    let mut found_ccs = rollups
-                        .get(&("RollupSlot:".to_owned() + limit_to))
+                        .get(&(format!("RollupSlot:{}", limit_to)))
                        .map(|rollups| rollups.get(limit_value))
                        .flatten()
                        .unwrap()
@@ -293,35 +272,24 @@ where
            let mut totals: Vec<(String, String, f64)> = overhead_ccs
                .par_iter()
                .flat_map(|overhead_cc| {
-                    let limited = limited_ccs
+                    limited_ccs
                        .iter()
-                        .filter(|other_cc| {
-                            totals.contains_key(&(
-                                // TODO: This looks terrible
-                                other_cc.clone().clone(),
-                                allocation_statistic.clone(),
-                            ))
-                        })
-                        .map(|other_cc| {
-                            (
+                        .map(|other_cc| (other_cc.clone(), allocation_statistic.clone()))
+                        .filter_map(|(other_cc, allocation_statistic)| {
+                            let combined_stat = (other_cc, allocation_statistic);
+                            if !totals.contains_key(&combined_stat) {
+                                None
+                            } else {
+                                Some((
                                    overhead_cc.clone(),
-                                other_cc.clone(),
-                                totals
-                                    .get(&(other_cc.clone(), allocation_statistic.clone()))
-                                    .map(|f| *f)
-                                    .unwrap(),
-                            )
+                                    combined_stat.0.clone(),
+                                    totals.get(&combined_stat).map(|f| *f).unwrap(),
+                                ))
+                            }
                        })
                        .filter(|(_, _, value)| *value != 0.)
                        .filter(|(from_cc, to_cc, _)| from_cc != to_cc)
-                        .collect_vec();
-                    // TODO: Put me back if rayon proves problematic
-                    // Insert is safe, since an overhead cc can only be a part of one area
-                    // overhead_cc_totals.insert(
-                    //     overhead_cc.clone(),
-                    //     limited.iter().map(|(_, _, value)| value).sum(),
-                    // );
-                    limited
+                        .collect_vec()
                })
                .collect();
            overhead_other_total.append(&mut totals);
@@ -355,24 +323,41 @@ where
    }

    // Export initial totals for operating departments
-    if show_from {
    for line in lines.iter() {
-            if !overhead_ccs.contains(&line.department) {
+        // TODO: Should we still output accounts that aren't in the accounts file anyway?
+        if all_accounts_sorted
+            .iter()
+            .find(|account| **account == line.account)
+            .is_some()
+            && !overhead_ccs.contains(&line.department)
+            && (show_from
+                // When we write out the final amounts rather than changes,
+                // ensure we still output departments that won't be receiving
+                // any costs.
+                || !overhead_other_total
+                    .iter()
+                    .any(|(_, to_department, _)| *to_department == line.department))
+        {
+            if show_from {
                output.serialize(MovedAmount {
                    account: line.account.clone(),
                    cost_centre: line.department.clone(),
                    value: line.value,
                    from_cost_centre: line.department.clone(),
                })?;
+            } else {
+                output.serialize(CsvCost {
+                    account: line.account.clone(),
+                    department: line.department.clone(),
+                    value: line.value,
+                })?;
            }
        }
    }

    // Finally, for each cc match total produced previously, sum the overhead cc where overhead cc appears in other cc, then
    // divide the other cc by this summed amount (thus getting the relative cost)
-
-    // At this point we convert to our format that's actually used, need to somehow recover the to_cc_type... could build that out from the areas
-
+    // At this point we convert to our format that's actually used in overhead allocation
    let allocation_rules: Vec<OverheadAllocationRule> = overhead_other_total
        .iter()
        .map(
@@ -389,6 +374,8 @@ where
        )
        .collect();

+    // TODO: THIS CAN BE WRONG WHEN USING A FILE WITH ALL PASSES, for now ensure the input movement
+    // file only contains the final pass/outputs.
    let mut initial_account_costs: HashMap<String, Vec<TotalDepartmentCost>> = HashMap::new();
    for line in lines {
        // Only include accounts we've already filtered on (i.e. by account type)
@@ -430,7 +417,7 @@ where
        for cost in results {
            for department in cost.summed_department_costs {
                // Any consumers should assume missing cc/account value was 0 (we already ignore overhead, as they all 0 out)
-                if department.value > 0.00001 || department.value < -0.00001 {
+                if department.value != 0_f64 {
                    output.serialize(CsvCost {
                        account: cost.account.clone(),
                        department: department.department,
@@ -443,6 +430,35 @@ where
    Ok(())
 }

+fn get_accounts_sorted(
+    use_numeric_accounts: bool,
+    account_type: &String,
+    accounts: &mut Reader<impl Read>,
+) -> Vec<String> {
+    if use_numeric_accounts {
+        accounts
+            .deserialize::<CsvAccount>()
+            .filter(|account| {
+                account.is_ok() && account.as_ref().unwrap().account_type == *account_type
+            })
+            .map(|line| line.unwrap().code.clone().parse::<i32>().unwrap())
+            .unique()
+            .sorted()
+            .map(|account| account.to_string())
+            .collect()
+    } else {
+        accounts
+            .deserialize::<CsvAccount>()
+            .filter(|account| {
+                account.is_ok() && account.as_ref().unwrap().account_type == *account_type
+            })
+            .map(|line| line.unwrap().code.clone())
+            .unique()
+            .sorted()
+            .collect()
+    }
+}
+
 fn split_allocation_statistic_range(
    allocation_statistic: &CsvAllocationStatistic,
    accounts_sorted: &Vec<String>,
@@ -661,7 +677,7 @@ fn solve_reciprocal_no_from(
                &operating_slice_costs,
            );

-            // // Borrow so we don't move between loops
+            // Borrow so we don't move between loops
            let operating_overhead_mappings = &operating_overhead_mappings_mat;
            let calculated_overheads = &calculated_overheads;

@@ -682,7 +698,6 @@ fn solve_reciprocal_no_from(
            // Redistribute floating point errors (only for ccs we actually allocated from/to)
            // Considered removing this since redistribution should be done in cost driver calculations, however since that usually
            // does nothing, we may as well keep this just in case.
-
            // TODO: Not sure we actually need this, would probably be better to have a better storage format than
            // csv/string conversions
            // let initial_cost: f64 = total_costs
@@ -696,7 +711,6 @@ fn solve_reciprocal_no_from(
            //     .sum();
            // let new_cost: f64 = converted_result.iter().map(|cost| cost.value).sum();
            // let diff = initial_cost - new_cost;
-
            AccountCost {
                account: total_costs.account.clone(),
                summed_department_costs: converted_result
@@ -753,7 +767,7 @@ fn solve_reciprocal_with_from<T: ReciprocalAllocationSolver + Sync + Send>(
                        account: total_costs.account.clone(),
                        cost_centre: department.clone(),
                        value,
-                        from_cost_centre: department.clone(),
+                        from_cost_centre: overhead_department_cost.department.clone(),
                    })
                    .filter(|cost| cost.value != 0_f64)
                    .collect::<Vec<MovedAmount>>()
--- a/src/products/create_products.rs
+++ b/src/products/create_products.rs
@@ -1,13 +1,22 @@
 use std::{
-    collections::HashMap,
-    io::{Read, Write},
+    collections::{HashMap, HashSet},
+    path::PathBuf,
 };

+use anyhow::anyhow;
 use chrono::NaiveDateTime;
-use csv::Position;
+use itertools::Itertools;
+// inluding dsl works better for completion with rust analyzer
+use polars::lazy::dsl::*;
+use polars::prelude::*;
 use serde::Serialize;

-use super::csv::{read_definitions, BuildFrom, ConstraintType, Definition};
+use super::csv::{read_definitions, Component, Definition, FileJoin, SourceType};
+
+// TODO: Polars suggests this, but docs suggest it doesn't have very good platform support
+//use jemallocator::Jemalloc;
+// #[global_allocator]
+// static GLOBAL: Jemalloc = Jemalloc;

 #[derive(Debug, Serialize, Default)]
 struct Product {
@@ -28,120 +37,120 @@ struct Product {
    source_allocated_amount: Option<f64>,
 }

-pub struct CreateProductInputs<E, S, T, P, Di>
-where
-    E: Read,
-    S: Read,
-    T: Read,
-    P: Read,
-    Di: Read,
-{
-    pub encounters: csv::Reader<E>,
-    pub services: csv::Reader<S>,
-    pub transfers: csv::Reader<T>,
-    pub procedures: csv::Reader<P>,
-    pub diagnoses: csv::Reader<Di>,
+pub struct InputFile {
+    pub file_path: PathBuf,
+    pub joins: HashMap<PathBuf, String>,
+    // if not specified, then don't allow change in type builds, as there's no way to detect changes over time
+    pub date_order_column: Option<String>,
 }

-// TODO: Build from linked dataset is pretty hard, it potentially requires knowing everything abuot the previous year's
-// cosing run (BSCO, Dataset_Encounter_Cache, etc).
-pub fn create_products<D, E, S, T, P, Di, O>(
-    definitions: &mut csv::Reader<D>,
-    product_inputs: CreateProductInputs<E, S, T, P, Di>,
-    // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
-    output: &mut csv::Writer<O>,
-    // TODO: Default to 10 million or something sane
-    batch_size: usize,
-) -> anyhow::Result<()>
-where
-    D: Read,
-    E: Read,
-    S: Read,
-    T: Read,
-    P: Read,
-    Di: Read,
-    // TODO: Looks kind of bad, any other way around it? I'd rather not have to depend on crossbeam as well
-    O: Write + Send + 'static,
-{
-    let mut all_definitions: HashMap<String, Definition> = read_definitions(definitions)?;
-    // Partition the rules by the build from type, so that we'll run all the rules at once for a particular file, which should be much faster
-    // then opening files and scanning one at a time. Could also do batches in files
-
-    let mut mapped_definitions: HashMap<BuildFrom, Vec<Definition>> = HashMap::new();
-    for (_, definition) in all_definitions {
-        mapped_definitions
-            .entry(definition.build_from)
-            .or_insert(vec![])
-            .push(definition);
-    }
-
-    // Now whenever we want to produce a built service, just write it to tx.
-
-    // Note that rust csv can seek to a certain position, so we can read in a batch from a reader, then
-    // seek to that position in the reader (or position 0) if we couldn't find a particular record.
-    // Alternatively, we could store an index of all records (e.g. encounter numbers) that map to their position in the reader,
-    // so we can quickly seek to the appropriate index and read the record.
-    // https://docs.rs/csv/latest/csv/struct.Reader.html#method.seek
-    // Store encounter positions in file, so that later when we read through transfers/whatever we can easily
-    // seak to the correct position quickly in case we have a cache miss
-    let mut encounter_positions: HashMap<String, Position> = HashMap::new();
-
-    // TODO: Alternative to storing encounter positions would be to sort portions of the file bits at a time (I think it's called a merge sort?).
-
-    // TODO: Try with and without rayon, should be able to help I think as we're going through so much data sequentially,
-    // although we're still likely to be bottlenecked by just write-speed
-    let mut encounters = product_inputs.encounters;
-    let headers = encounters.headers()?.clone();
-
-    for encounter in encounters.records() {
-        let encounter = encounter?;
-        let position = encounter.position().unwrap();
-        let encounter: HashMap<String, String> = encounter.deserialize(Some(&headers))?;
-        encounter_positions.insert(
-            encounter.get("EncounterNumber").unwrap().to_string(),
-            position.clone(),
-        );
-        // TODO: For each encounter definition, check this fits the filter criteria/constraints,
-        // and
-        let definitions = mapped_definitions.get(&BuildFrom::Encounter).unwrap();
+pub fn create_products_polars(
+    definitions_path: PathBuf,
+    inputs: Vec<InputFile>,
+    output_path: PathBuf,
+) -> anyhow::Result<()> {
+    let definitions = read_definitions(&mut csv::Reader::from_path(definitions_path)?)?;
+    let definitions = definitions.values().collect_vec();
    for definition in definitions {
-            let matching_filter = (definition.filters.is_empty()
-                || definition.filters.iter().any(|filter| {
-                    let field = encounter.get(filter.field.as_str());
-                    if field.is_none() {
-                        return false;
+        build_polars(definition, &inputs, &output_path)?;
    }
-                    let field = field.unwrap();
-                    if filter.equal {
-                        filter.value == *field
-                    } else {
-                        filter.value != *field
-                    }
-                }))
-                && (definition.constraints.is_empty()
-                    || definition.constraints.iter().any(|constraint| {
-                        let field = encounter.get(constraint.field.as_str());
-                        if field.is_none() {
-                            return false;
-                        }
-                        let field = field.unwrap();
-                        // TODO: Is this just number/datetime? Should probably be an enum? It's not, seems to be E in the test data
-                        let field_type = &constraint.source_type;
-                        match constraint.constraint_type {
-                            ConstraintType::Equal => *field == constraint.value,
-                            _ => false,
-                        }
-                    }));
-            if matching_filter {
-                // Generate the service code
-            }
-        }
-
-        // TODO: Generate the built service
-        output.serialize(Product::default())?;
-    }
-
-    // Now do the same with transfers, services, etc, referencing the encounter reader by using the
-    // indexes in encounter_positions
+    Ok(())
+}
+
+pub fn build_polars(
+    definition: &Definition,
+    inputs: &Vec<InputFile>,
+    output_path: &PathBuf,
+) -> anyhow::Result<()> {
+    // 1. Apply filters to limit encounters
+    let filter = definition
+        .filters
+        .iter()
+        .map(|filter| {
+            let col = col(&filter.field);
+            match filter.filter_type {
+                super::csv::FilterType::Equal => col.eq(lit(filter.value.clone())),
+                super::csv::FilterType::GreaterThan => col.gt(lit(filter.value.clone())),
+                super::csv::FilterType::GreaterThanOrEqualTo => {
+                    col.gt_eq(lit(filter.value.clone()))
+                }
+                super::csv::FilterType::LessThan => col.lt(lit(filter.value.clone())),
+                super::csv::FilterType::LessThanOrEqualTo => col.lt_eq(lit(filter.value.clone())),
+                super::csv::FilterType::NotEqualTo => col.neq(lit(filter.value.clone())),
+            }
+        })
+        .reduce(|prev, next| prev.and(next));
+
+    let input_file = inputs.iter().find(|input| input.file_path == definition.source)
+        .ok_or(anyhow!("Failed to find valid file"))?;
+    let mut reader = LazyCsvReader::new(&input_file.file_path)
+        .has_header(true)
+        .finish()?;
+    let mut required_files = HashSet::new();
+    for component in &definition.components {
+        if let Component::Field(file, field) = component {
+            required_files.insert(file);
+        }
+    }
+    for filter in &definition.filters {
+        required_files.insert(&filter.file);
+    }
+    for source_type in required_files {
+        // TODO: Better error messages
+        if source_type != &definition.source {
+            let source_file = inputs.iter()
+            .find(|input| input.file_path == definition.source)
+                .ok_or(anyhow!("Input file was not specified for source type"))?;
+            // TODO: Alias the joined columns so they don't potentially clash with the current column
+            let join_reader = LazyCsvReader::new(source_file.file_path.clone()).finish()?;
+            let left_column = input_file
+                .joins
+                .get(source_type)
+                .ok_or(anyhow!("Failed to get left join column"))?;
+            let right_column = source_file
+                .joins
+                .get(&definition.source)
+                .ok_or(anyhow!("Failed to get right join column"))?;
+            reader = reader.inner_join(join_reader, col(&left_column), col(&right_column));
+        }
+    }
+    // TODO: Also work out how to expand rows, so that transfers can have stuff like daily or change in x expanded into multiple rows
+    // Since it's related to time it is probably related to this: https://docs.pola.rs/user-guide/transformations/time-series/parsing/
+    // I'm guessing upsampling is what I'm looking for: https://docs.pola.rs/user-guide/transformations/time-series/resampling/#upsampling-to-a-higher-frequency
+    // Can use different strategies to break the time period down, range can be calculated by using start/end datetime
+    // Wonder if this can be done more generally (e.g. splitting up based on a number?)
+    // Note: This must occur before creating the components, since we'll need to create one for every upsampled row
+    let mut built_expression = lit("");
+    // Create component columns
+    for component in &definition.components {
+        match component {
+            Component::Constant(constant) => {
+                built_expression = built_expression + lit(constant.clone())
+
+            }
+            // TODO: Do we need to worry about the source type? Might be clashing column names we need to think about earlier then address here?
+            // TODO: What I really want to do is not use source type, instead I want to be referring to a file, which we translate from the sourcetype
+            //       to an actual filename. I don't want to be limited by a concept of 'sourcetype' at all, instead the definition should treat everything
+            //       the same, and just translate the imported csv format to the necessary files and columns in files that are expected to be input.
+            Component::Field(source_type, column) => {
+                built_expression = built_expression + col(&column)
+            }
+        }
+    }
+
+    // TODO: Build out the rest of the product definition, depending on the input definition
+    let select_columns = [built_expression];
+
+    // Filter and select the required data in one step, so optimiser can speed things up if necessary
+    let mut filtered = match filter {
+        Some(filter) => reader.filter(filter),
+        None => reader,
+    }
+    .select(select_columns)
+    .with_streaming(true)
+    .collect()?;
+
+    let mut file = std::fs::File::create(output_path).unwrap();
+    CsvWriter::new(&mut file).finish(&mut filtered)?;
    Ok(())
 }
--- a/src/products/csv.rs
+++ b/src/products/csv.rs
@@ -1,17 +1,83 @@
-use std::{collections::HashMap, io::Read};
+use std::{collections::HashMap, io::Read, path::PathBuf};

-#[derive(Hash, PartialEq, PartialOrd, Ord, Eq)]
+use anyhow::bail;
+use chrono::NaiveDateTime;
+
+#[derive(Hash, PartialEq, PartialOrd)]
 pub struct Filter {
-    // Equal/not equal
-    pub equal: bool,
+    pub filter_type: FilterType,
+    pub file: PathBuf,
    pub field: String,
    pub value: String,
-    // TODO: Probably want to enum this. Source type determines things like filtering
-    // on encounter/patient fields when using something like a transfer
-    pub source_type: String,
 }

-pub enum ConstraintType {
+#[derive(Hash, PartialEq, PartialOrd, Eq, Ord, Clone)]
+pub enum SourceType {
+    CodingDiagnosis,
+    CodingProcedure,
+    Encounter,
+    // TODO: Incident isn't used right now
+    // Incident,
+    Patient,
+    Revenue,
+    Service,
+    Transfer,
+}
+
+impl TryFrom<&String> for SourceType {
+    type Error = anyhow::Error;
+
+    fn try_from(value: &String) -> Result<Self, Self::Error> {
+        match value.as_str() {
+            "CD" => Ok(SourceType::CodingDiagnosis),
+            "CP" => Ok(SourceType::CodingProcedure),
+            "E" => Ok(SourceType::Encounter),
+            "P" => Ok(SourceType::Patient),
+            "R" => Ok(SourceType::Revenue),
+            "S" => Ok(SourceType::Service),
+            "T" => Ok(SourceType::Transfer),
+            _ => bail!("Source Type is not valid"),
+        }
+    }
+}
+
+impl SourceType {
+    fn from_component_source_type(value: &str) -> anyhow::Result<Self> {
+        match value {
+            "CD" => Ok(SourceType::CodingDiagnosis),
+            "CP" => Ok(SourceType::CodingProcedure),
+            "E" => Ok(SourceType::Encounter),
+            "P" => Ok(SourceType::Patient),
+            "R" => Ok(SourceType::Revenue),
+            "S" => Ok(SourceType::Service),
+            "T" => Ok(SourceType::Transfer),
+            "EC" => Ok(SourceType::Encounter),
+            "CDX" => Ok(SourceType::CodingDiagnosis),
+            "CPX" => Ok(SourceType::CodingProcedure),
+            "EX" => Ok(SourceType::Encounter),
+            "PX" => Ok(SourceType::Patient),
+            "RX" => Ok(SourceType::Revenue),
+            "SX" => Ok(SourceType::Service),
+            "TX" => Ok(SourceType::Transfer),
+            _ => bail!("Invalid ComponentSourceType found: {}", value),
+        }
+    }
+
+    fn to_file_path(&self) -> String {
+        match self {
+            SourceType::CodingDiagnosis => "coding_diagnoses.csv".to_owned(),
+            SourceType::CodingProcedure => "coding_procedures.csv".to_owned(),
+            SourceType::Encounter => "encounters.csv".to_owned(),
+            SourceType::Patient => "patients.csv".to_owned(),
+            SourceType::Revenue => "revenues.csv".to_owned(),
+            SourceType::Service => "services.csv".to_owned(),
+            SourceType::Transfer => "transfers.csv".to_owned(),
+        }
+    }
+}
+
+#[derive(Hash, PartialEq, PartialOrd)]
+pub enum FilterType {
    Equal,
    GreaterThan,
    GreaterThanOrEqualTo,
@@ -20,59 +86,37 @@ pub enum ConstraintType {
    NotEqualTo,
 }

-impl From<&String> for ConstraintType {
-    fn from(string: &String) -> Self {
-        match string.as_str() {
-            "=" => ConstraintType::Equal,
-            ">" => ConstraintType::GreaterThan,
-            ">=" => ConstraintType::GreaterThanOrEqualTo,
-            "<" => ConstraintType::LessThan,
-            "<=" => ConstraintType::LessThanOrEqualTo,
-            "!=" => ConstraintType::NotEqualTo,
-            _ => panic!(),
+impl TryFrom<&String> for FilterType {
+    type Error = anyhow::Error;
+
+    fn try_from(value: &String) -> Result<Self, Self::Error> {
+        match value.as_str() {
+            "=" => Ok(FilterType::Equal),
+            ">" => Ok(FilterType::GreaterThan),
+            ">=" => Ok(FilterType::GreaterThanOrEqualTo),
+            "<" => Ok(FilterType::LessThan),
+            "<=" => Ok(FilterType::LessThanOrEqualTo),
+            "!=" => Ok(FilterType::NotEqualTo),
+            _ => bail!("Invalid FilterType found: {}", value),
        }
    }
 }

-pub struct Constraint {
-    pub source_type: String,
-    pub field: String,
-    pub constraint_type: ConstraintType,
-    pub value: String,
+#[derive(PartialEq)]
+pub enum ExtraType {
+    CodingDiagnosis,
+    CodingProcedure,
+    Encounter,
+    Patient,
+    Revenue,
+    Service,
+    Transfer,
 }

 pub enum Component {
    Constant(String),
-    // Even extras are allowed here, just specify the field type (encounter, service, etc) and the field name (incl Extra: or Classification: as appropriate)
-    // TODO: This first string should also be some kind of source type enum, probably shared with source types on filter/constraint
-    Field(String, String),
-}
-
-#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
-pub enum BuildFrom {
-    Service,
-    Transfer,
-    Encounter,
-    CodingProcedure,
-    CodingDiagnosis,
-    // TODO: This is hard/expensive, ignore for now as we don't have test data
-    LinkedDataset,
-    Revenue,
-}
-
-impl From<&String> for BuildFrom {
-    fn from(string: &String) -> Self {
-        match string.as_str() {
-            "S" => BuildFrom::Service,
-            "E" => BuildFrom::Encounter,
-            "CP" => BuildFrom::CodingProcedure,
-            "CD" => BuildFrom::CodingDiagnosis,
-            "T" => BuildFrom::Transfer,
-            "BS" => BuildFrom::LinkedDataset,
-            "R" => BuildFrom::Revenue,
-            _ => panic!(),
-        }
-    }
+    // File, column_name
+    Field(PathBuf, String),
 }

 // Frequency per type:
@@ -104,17 +148,20 @@ pub enum Frequency {
    OnePerSource,
 }

-impl From<&String> for Frequency {
-    fn from(frequency: &String) -> Self {
+impl TryFrom<&String> for Frequency {
+    type Error = anyhow::Error;
+
+    fn try_from(frequency: &String) -> Result<Self, Self::Error> {
        match frequency.as_str() {
-            "O" => Frequency::OnePerSource,
-            "DOCW" => Frequency::DailyOrChangeInWard,
-            "D" => Frequency::Daily,
-            "DOCC" => Frequency::DailyOrChangeInClinic,
-            "DEAD" => Frequency::DailyExceptOnAdmissionDay,
-            "OAL" => Frequency::OnlyAdmissionLocation,
-            "CIW" => Frequency::ChangeInWard,
-            _ => panic!(),
+            "O" => Ok(Frequency::OnePerSource),
+            "DOCW" => Ok(Frequency::DailyOrChangeInWard),
+            "D" => Ok(Frequency::Daily),
+            "DOCC" => Ok(Frequency::DailyOrChangeInClinic),
+            "DEAD" => Ok(Frequency::DailyExceptOnAdmissionDay),
+            "OAL" => Ok(Frequency::OnlyAdmissionLocation),
+            "CIW" => Ok(Frequency::ChangeInWard),
+            "DDSD" => Ok(Frequency::DailyExceptOnDischargeDay),
+            _ => bail!("Invalid Frequency found: {}", frequency),
        }
    }
 }
@@ -126,29 +173,30 @@ pub enum RoundingMode {
    None,
 }

-impl From<&String> for RoundingMode {
-    fn from(rounding: &String) -> Self {
+impl TryFrom<&String> for RoundingMode {
+    type Error = anyhow::Error;
+
+    fn try_from(rounding: &String) -> Result<Self, Self::Error> {
        match rounding.as_str() {
-            "U" => RoundingMode::UpToClosestWhole,
-            "N" => RoundingMode::None,
-            "D" => RoundingMode::DownToClosestWhole,
-            "T" => RoundingMode::ToClosestWhole,
-            // TODO: Just use none when unknown?
-            _ => panic!(),
+            "U" => Ok(RoundingMode::UpToClosestWhole),
+            "N" => Ok(RoundingMode::None),
+            "D" => Ok(RoundingMode::DownToClosestWhole),
+            "T" => Ok(RoundingMode::ToClosestWhole),
+            _ => bail!("Invalid rounding mode found: {}", rounding),
        }
    }
 }

-// enum ExtraValue {
-//     string(String),
-//     numeric(f64),
-//     datetime(NaiveDateTime),
-// }
+enum ExtraValue {
+    String(String),
+    Sumeric(f64),
+    Datetime(NaiveDateTime),
+}

-// struct Extra {
-//     extraType: String,
-//     value: ExtraValue,
-// }
+struct Extra {
+    extra_type: String,
+    value: ExtraValue,
+}

 // Quantities per type:
 // Built Service: Constant, SourceQuantity
@@ -187,15 +235,22 @@ pub enum DurationFallback {
    Service,
 }

+pub struct FileJoin {
+    join_column: String,
+    file: String,
+    file_join_column: String,
+}
+
 pub struct Definition {
    pub name: String,
    pub components: Vec<Component>,
    pub filters: Vec<Filter>,
-    pub constraints: Vec<Constraint>,
-    pub build_from: BuildFrom,
+    pub source: PathBuf,
    pub frequency: Frequency,
    pub quantity: BuiltQuantity,
    pub duration_fallback: DurationFallback,
+    // TODO: Need a way to define joins between different files. Or put that at some higher level might be better
+    // At the very least we still need a source/file type, and there should be one file supplied for each type
 }

 pub fn read_definitions<R>(
@@ -213,33 +268,36 @@ where
            "Definition" => {
                let quantity_type = record.get("BuiltQuantity").unwrap();
                let rounding_mode =
-                    RoundingMode::from(record.get("BuiltQuantityRounding").unwrap());
-                let quantity = match quantity_type.as_str() {
-                    "S" => Quantity::SourceQuantity,
-                    "C" => Quantity::Constant(
-                        record
-                            .get("BuiltQuantityConstant")
-                            .unwrap()
-                            .parse()
-                            .unwrap(),
-                    ),
-                    "H" => Quantity::Hours,
+                    RoundingMode::try_from(record.get("BuiltQuantityRounding").unwrap())?;
+                let quantity: anyhow::Result<Quantity> = match quantity_type.as_str() {
+                    "S" => Ok(Quantity::SourceQuantity),
+                    "C" => {
+                        let constant_value =
+                            record.get("BuiltQuantityConstant").unwrap().parse()?;
+                        Ok(Quantity::Constant(constant_value))
+                    }
+                    "H" => Ok(Quantity::Hours),
+                    "D" => Ok(Quantity::Days),
                    // Above 3 are all that's needed for now
-                    _ => panic![],
+                    invalid_quantity => {
+                        anyhow::bail!("Invalid quantity found: {}", invalid_quantity)
+                    }
                };
+                let quantity = quantity?;
                let built_quantity = BuiltQuantity {
                    quantity,
                    rounding_mode,
                };
+                let build_from = SourceType::try_from(record.get("BuildFrom").unwrap())?;
+                let frequency = Frequency::try_from(record.get("Frequency").unwrap())?;
                all_definitions.insert(
                    record.get("Name").unwrap().to_owned(),
                    Definition {
                        name: record.get("Name").unwrap().to_owned(),
                        components: vec![],
                        filters: vec![],
-                        constraints: vec![],
-                        build_from: BuildFrom::from(record.get("BuildFrom").unwrap()),
-                        frequency: Frequency::from(record.get("Frequency").unwrap()),
+                        source: build_from.to_file_path().into(),
+                        frequency,
                        quantity: built_quantity,
                        // TODO: Figure this out
                        // Not even in use, can ignore, or will BuiltService always be the default?
@@ -248,11 +306,21 @@ where
                );
            }
            "Filter" => {
-                let new_filter = Filter {
-                    equal: record.get("FilterNotIn").unwrap() != "",
+                let new_filter = {
+                    let source_type =
+                        SourceType::try_from(record.get("FilterSourceType").unwrap())?;
+                    Filter {
+                        // TODO: This looks wrong
+                        filter_type: if record.get("FilterNotIn").unwrap() != "" {
+                            FilterType::Equal
+                        } else {
+                            FilterType::NotEqualTo
+                        },
+                        // TODO: extra/classification types need to append Extra:/Classification: to the start of the field
                        field: record.get("FilterField").unwrap().clone(),
                        value: record.get("FilterValue").unwrap().clone(),
-                    source_type: record.get("FilterSourceType").unwrap().clone(),
+                        file: source_type.to_file_path().into(),
+                    }
                };
                let all_filters = &mut all_definitions
                    .get_mut(record.get("Name").unwrap())
@@ -265,11 +333,16 @@ where
                    "C" => {
                        Component::Constant(record.get("ComponentValueOrField").unwrap().to_owned())
                    }
-                    source => Component::Field(
-                        // TODO: Parse into source type enum
-                        source.to_owned(),
+                    "MC" => {
+                        Component::Constant(record.get("ComponentValueOrField").unwrap().to_owned())
+                    }
+                    source => {
+                        let component_source_type = SourceType::from_component_source_type(source)?;
+                        Component::Field(
+                            component_source_type.to_file_path().into(),
                            record.get("ComponentValueOrField").unwrap().to_owned(),
-                    ),
+                        )
+                    }
                };
                let all_components = &mut all_definitions
                    .get_mut(record.get("Name").unwrap())
@@ -278,20 +351,41 @@ where
                all_components.push(component);
            }
            "Constraint" => {
-                let constraint = Constraint {
-                    source_type: record.get("ConstraintSourceType").unwrap().to_owned(),
+                let constraint = {
+                    let filter_type = FilterType::try_from(record.get("FilterType").unwrap())?;
+                    let source_type =
+                        SourceType::try_from(record.get("ConstraintSourceType").unwrap())?;
+                    Filter {
                        field: record.get("ConstraintColumn").unwrap().to_owned(),
-                    constraint_type: ConstraintType::from(record.get("ConstraintType").unwrap()),
+                        filter_type,
                        value: record.get("ConstraintValue").unwrap().to_owned(),
+                        file: source_type.to_file_path().into(),
+                    }
                };
-                let all_constraints = &mut all_definitions
+                let all_filters = &mut all_definitions
                    .get_mut(record.get("Name").unwrap())
                    .unwrap()
-                    .constraints;
-                all_constraints.push(constraint);
+                    .filters;
+                all_filters.push(constraint);
            }
            unknown => println!("Invalid type found: {}", unknown),
        }
    }
    Ok(all_definitions)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::read_definitions;
+
+    #[test]
+    fn test_read_definitions() {
+        let definitions = read_definitions(
+            &mut csv::Reader::from_path("service_builder_definitions.csv").unwrap(),
+        );
+        if let Err(error) = &definitions {
+            println!("{}", error)
+        }
+        assert!(definitions.is_ok())
+    }
+}
--- a/src/products/mod.rs
+++ b/src/products/mod.rs
@@ -1,5 +1,4 @@
-mod create_products;
-pub use create_products::*;
+pub mod create_products;

 // Don't re-export anything in csv atm, it's only used for internal processing
-mod csv;
+pub mod csv;
--- a/src/upload_to_db.rs
+++ b/src/upload_to_db.rs
@@ -0,0 +1,71 @@
+use std::{collections::HashMap, io::Read};
+
+use csv::Reader;
+use sqlx::{query, query_builder, Any, Mssql, Pool, QueryBuilder};
+
+// Note: right now this is set to mssql only, since sqlx 0.7 is requried to use the Any
+//         type for sqlx 0.6 and earlier due to a query_builder lifetime issue,
+//         however sqlx >=0.7 currently doesn't support mssql.
+
+// Upload data in a file to a db table, with an optional post-script to run,
+// such as to move data from the upload table into other tables
+// TODO: Add bulk insert options for non-mssql dbs
+// TODO: Add fallback insert when bulk insert fails (e.g. due to
+// permission errors)
+pub async fn upload_file_bulk(
+    pool: &Pool<sqlx::Mssql>,
+    file_name: &String,
+    table_name: &String,
+    // Mappings from column in file -> column in db
+    column_mappings: Option<HashMap<String, String>>,
+    post_script: Option<String>,
+) -> anyhow::Result<u64> {
+    // TODO: Test if the table already exists. If it doesn't, try creating the table
+
+    // First try a bulk insert command
+    // let result = match pool.any_kind() {
+    //     sqlx::any::AnyKind::Mssql => {
+            let result = sqlx::query(&format!("BULK INSERT {} FROM {}", table_name, file_name))
+                .execute(pool)
+                .await?;
+    //     }
+    // };
+
+    let mut rows_affected = result.rows_affected();
+
+
+    // let mut rows_affected = match &result {
+    //     Result::Ok(result) => result.rows_affected(),
+    //     // TODO: Log error
+    //     Err(error) => 0_u64,
+    // };
+
+    // TODO: Adjust for various dbmss
+    if rows_affected == 0 {
+        let rows: Vec<HashMap<String, String>> = vec![];
+
+        let BIND_LIMIT: usize = 65535;
+        // TODO: Use csv to read from file
+
+        // TODO: When bulk insert fails, Fall back to sql batched insert
+        // TODO: Columns to insert... needs some kind of mapping from file column name <-> db column
+        let mut query_builder = QueryBuilder::new(format!("INSERT INTO {}({}) ", table_name, ""));
+        // TODO: Iterate over all values in file, not the limit
+        query_builder.push_values(&rows[0..BIND_LIMIT], |mut b, row| {
+            b.push_bind(row.get("s"));
+        });
+        let mut query_builder = query_builder;
+        // TODO: Looks like this issue: https://github.com/launchbadge/sqlx/issues/1978
+        // Turns out we need v0.7 for this to not bug out, however mssql is only supported in versions before v0.7, so right now can't use sqlx
+        // to use this, unless we explicity specified mssql only, not Any as the db type...
+        let query = query_builder.build();
+        let result = query.execute(pool).await?;
+        rows_affected = result.rows_affected();
+    }
+
+    if let Some(post_script) = post_script {
+        sqlx::query(&post_script).execute(pool).await?;
+    }
+
+    Ok(rows_affected)
+}