Fix overhead allocation tests, clean up some imports

This commit is contained in:
2024-12-30 11:08:25 +10:30
parent a9a9b1bec2
commit 7d985977f2
12 changed files with 64200 additions and 46 deletions

View File

@@ -7,7 +7,6 @@ use std::{
};
use chrono::Local;
use futures::lock::Mutex;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use split::{SplitNode, SplitNodeRunner};

View File

@@ -1,14 +1,12 @@
use super::sql::QueryExecutor;
use crate::graph::node::RunnableNode;
use crate::graph::upload_to_db::{upload_file_bulk, DBType};
use crate::graph::upload_to_db::DBType;
use crate::io::{DataSource, RecordSerializer};
use async_trait::async_trait;
use polars::prelude::CsvWriter;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use sqlx::AnyPool;
use std::collections::BTreeMap;
use tiberius::{AuthMethod, Config, EncryptionLevel};
use tiberius::{Config, EncryptionLevel};
use tokio_util::compat::TokioAsyncWriteCompatExt;
/**
@@ -90,7 +88,6 @@ mod tests {
.await?;
let host = container.get_host().await?;
let port = container.get_host_port_ipv4(1433).await?;
let port = 1433;
let connection_string = format!("jdbc:sqlserver://{}:{};username=sa;password=TestOnlyContainer123", host, port).to_owned();
let connection_string = "jdbc:sqlserver://localhost:1433;username=sa;password=TestOnlyContainer123;Encrypt=False".to_owned();
@@ -108,7 +105,7 @@ mod tests {
};
runner.run().await?;
let mut result_contents = String::new();
let result_length = File::open("test_pull.csv")?.read_to_string(&mut result_contents)?;
let _ = File::open("test_pull.csv")?.read_to_string(&mut result_contents)?;
assert_eq!(
"Test
1

View File

@@ -1,6 +1,5 @@
use futures::TryStreamExt;
use futures_io::{AsyncRead, AsyncWrite};
use itertools::Itertools;
use sqlx::{Any, Column, Pool, Row};
use std::borrow::Borrow;
use std::collections::BTreeMap;

View File

@@ -3,8 +3,7 @@ use std::fs::File;
use super::node::RunnableNode;
use crate::io::{DataSource, SourceType};
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, LazyFrame, ParquetReader, ScanArgsParquet};
use polars::prelude::{LazyFrame, ScanArgsParquet};
use polars::{
io::SerWriter,
prelude::{CsvWriter, LazyCsvReader, LazyFileListReader},

View File

@@ -51,7 +51,7 @@ pub fn link(
linking_rule: ProcessLinkingRule,
linked_writer: &mut csv::Writer<impl Write>,
) -> anyhow::Result<()> {
let mut source_columns: Vec<&String> = linking_rule
let source_columns: Vec<&String> = linking_rule
.linking_rules
.iter()
.flat_map(|rule| {
@@ -83,10 +83,6 @@ pub fn link(
let mut source_indexes: Vec<HashMap<String, Vec<usize>>> =
vec![HashMap::new(); source_columns.len()];
let mut source_ids: Vec<String> = Vec::new();
// TODO: Merge with source_indexes?
// Also store the actual date value rather than string, so we
// don't need to convert as much later
let source_dates: Vec<HashMap<String, Vec<usize>>>;
for source_record in source_reader.deserialize() {
let source_record: HashMap<String, String> = source_record?;
let current_idx = source_ids.len();

View File

@@ -4,9 +4,8 @@ use std::{
};
use csv::Reader;
use futures::SinkExt;
use itertools::Itertools;
use nalgebra::{DMatrix, Dynamic, LU};
use nalgebra::{DMatrix, Dyn, LU};
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
use serde::{Deserialize, Serialize};
@@ -77,7 +76,7 @@ pub trait ReciprocalAllocationSolver {
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64>;
}
impl ReciprocalAllocationSolver for LU<f64, Dynamic, Dynamic> {
impl ReciprocalAllocationSolver for LU<f64, Dyn, Dyn> {
fn solve(&self, costs: &DMatrix<f64>) -> DMatrix<f64> {
self.solve(costs).unwrap()
}
@@ -882,37 +881,33 @@ mod tests {
0.00001,
)
.unwrap();
assert_eq!(expected_final_allocations, result);
}
#[test]
fn test_basic_real() {
let result = reciprocal_allocation(
&mut csv::Reader::from_path("test_line.csv").unwrap(),
&mut csv::Reader::from_path("test_account.csv").unwrap(),
&mut csv::Reader::from_path("test_alloc_stat.csv").unwrap(),
&mut csv::Reader::from_path("test_area.csv").unwrap(),
&mut csv::Reader::from_path("test_costcentre.csv").unwrap(),
&mut csv::Writer::from_path("test_output_alloc_stat.csv").unwrap(),
true,
false,
true,
"E".to_owned(),
false,
0.1,
);
assert!(result.is_ok());
assert_eq!(expected_final_allocations.len(), result.len());
let expected_account = &expected_final_allocations[0];
let final_account = &result[0];
assert_eq!(expected_account.account, final_account.account);
let expected_department_a = &expected_account.summed_department_costs[0];
let expected_department_b = &expected_account.summed_department_costs[1];
for final_department in &final_account.summed_department_costs {
if final_department.department == expected_department_a.department {
assert_eq!(*expected_department_a, *final_department);
}else if final_department.department == expected_department_b.department {
assert_eq!(*expected_department_b, *final_department);
}
else {
panic!("Unknown department found!");
}
}
}
#[test]
fn test_real() {
let result = reciprocal_allocation(
&mut csv::Reader::from_path("output.csv").unwrap(),
&mut csv::Reader::from_path("account.csv").unwrap(),
&mut csv::Reader::from_path("allocstat.csv").unwrap(),
&mut csv::Reader::from_path("area.csv").unwrap(),
&mut csv::Reader::from_path("costcentre.csv").unwrap(),
&mut csv::Writer::from_path("output_alloc_stat.csv").unwrap(),
&mut csv::Reader::from_path("testing/input/moved_lines.csv").unwrap(),
&mut csv::Reader::from_path("testing/input/account.csv").unwrap(),
&mut csv::Reader::from_path("testing/input/allocstat.csv").unwrap(),
&mut csv::Reader::from_path("testing/input/area.csv").unwrap(),
&mut csv::Reader::from_path("testing/input/costcentre.csv").unwrap(),
&mut csv::Writer::from_path("testing/output/output_alloc_stat.csv").unwrap(),
false,
false,
true,

View File

@@ -90,7 +90,7 @@ pub fn build_polars(
.finish()?;
let mut required_files = HashSet::new();
for component in &definition.components {
if let Component::Field(file, field) = component {
if let Component::Field(file, _) = component {
required_files.insert(file);
}
}
@@ -134,7 +134,7 @@ pub fn build_polars(
// TODO: What I really want to do is not use source type, instead I want to be referring to a file, which we translate from the sourcetype
// to an actual filename. I don't want to be limited by a concept of 'sourcetype' at all, instead the definition should treat everything
// the same, and just translate the imported csv format to the necessary files and columns in files that are expected to be input.
Component::Field(source_type, column) => {
Component::Field(_, column) => {
built_expression = built_expression + col(column)
}
}