Add custom graph executor and implement filter node to test it (#2)

Reviewed-on: vato007/coster-rs#2
This commit is contained in:
2024-07-28 16:41:49 +09:30
parent 25180d3616
commit 5acee8c889
12 changed files with 1123 additions and 500 deletions

111
src/io.rs
View File

@@ -1,12 +1,21 @@
use std::io::{Read, Seek, Write};
use std::{
collections::BTreeMap,
io::{Read, Seek, Write},
};
use anyhow::bail;
use csv::Position;
use rmp_serde::{decode::ReadReader, Deserializer, Serializer};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
pub trait RecordSerializer {
fn serialize(&mut self, record: impl Serialize) -> anyhow::Result<()>;
// For when serde serialization can't be used. Forcing BTreeMap to ensure keys/values are
// sorted consistently
fn write_header(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()>;
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()>;
fn flush(&mut self) -> anyhow::Result<()>;
}
impl<W: Write> RecordSerializer for csv::Writer<W> {
@@ -14,6 +23,21 @@ impl<W: Write> RecordSerializer for csv::Writer<W> {
self.serialize(record)?;
Ok(())
}
fn flush(&mut self) -> anyhow::Result<()> {
self.flush()?;
Ok(())
}
fn write_header(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
self.write_record(record.keys())?;
Ok(())
}
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
self.write_record(record.values())?;
Ok(())
}
}
impl<W: Write> RecordSerializer for Serializer<W> {
@@ -21,34 +45,28 @@ impl<W: Write> RecordSerializer for Serializer<W> {
record.serialize(self)?;
Ok(())
}
}
// TODO: I still don't like this api, should split deserialize and position at the least,
// and we need a way to get the current position (otherwise it's left to consumers to track current)
// position
pub trait RecordDeserializer<P> {
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error>;
fn flush(&mut self) -> anyhow::Result<()> {
Ok(())
}
// Move the deserializer to the specified position in the underlying reader
fn position(&mut self, record: P) -> anyhow::Result<()>;
}
fn write_header(&mut self, _: &BTreeMap<String, String>) -> anyhow::Result<()> {
Ok(())
}
struct CsvMessagePackDeserializer<R> {
reader: csv::Reader<R>,
}
impl<R: Read> CsvMessagePackDeserializer<R> {
fn new(reader: R) -> CsvMessagePackDeserializer<R> {
CsvMessagePackDeserializer {
reader: csv::Reader::from_reader(reader),
}
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
self.serialize(record)?;
Ok(())
}
}
impl<R: Read + Seek> RecordDeserializer<Position> for CsvMessagePackDeserializer<R> {
pub trait RecordDeserializer {
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error>;
}
impl<R: Read> RecordDeserializer for csv::Reader<R> {
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error> {
// TODO: This isn't great, need to somehow maintain the state/position
match self.reader.deserialize().next() {
match self.deserialize().next() {
None => Ok(Option::None),
Some(result) => match result {
Ok(ok) => Ok(Option::Some(ok)),
@@ -56,56 +74,13 @@ impl<R: Read + Seek> RecordDeserializer<Position> for CsvMessagePackDeserializer
},
}
}
fn position(&mut self, record: Position) -> anyhow::Result<()> {
self.reader.seek(record)?;
Ok(())
}
}
struct MessagePackDeserializer<R: Read> {
reader: Deserializer<ReadReader<R>>,
record_positions: Vec<u64>,
}
impl<R: Read + Seek> MessagePackDeserializer<R> {
fn new(reader: R) -> MessagePackDeserializer<R> {
MessagePackDeserializer {
reader: Deserializer::new(reader),
record_positions: vec![],
}
}
}
// TODO: These need tests
impl<R: Read + Seek> RecordDeserializer<usize> for MessagePackDeserializer<R> {
impl<R: Read + Seek> RecordDeserializer for Deserializer<ReadReader<R>> {
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error> {
// Keep track of byte position of each record, in case we want to go back later
let current_position = self.reader.get_mut().stream_position()?;
if self
.record_positions
.last()
.map_or(true, |position| *position < current_position)
{
self.record_positions.push(current_position);
}
match Deserialize::deserialize(&mut self.reader) {
match Deserialize::deserialize(self) {
Ok(value) => Ok(value),
Err(value) => Err(anyhow::Error::from(value)),
}
}
fn position(&mut self, record: usize) -> anyhow::Result<()> {
let reader = self.reader.get_mut();
// Unsigned so can't be less than 0
if self.record_positions.len() > record {
// Go to position in reader
let position = self.record_positions[record];
reader.seek(std::io::SeekFrom::Start(position))?;
} else {
// read through the reader until we get to the correct record
bail!("Record hasn't been read yet, please use deserialize to find the record")
}
Ok(())
}
}