Add custom graph executor and implement filter node to test it (#2)
Reviewed-on: vato007/coster-rs#2
This commit is contained in:
111
src/io.rs
111
src/io.rs
@@ -1,12 +1,21 @@
|
||||
use std::io::{Read, Seek, Write};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
io::{Read, Seek, Write},
|
||||
};
|
||||
|
||||
use anyhow::bail;
|
||||
use csv::Position;
|
||||
use rmp_serde::{decode::ReadReader, Deserializer, Serializer};
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
|
||||
pub trait RecordSerializer {
|
||||
fn serialize(&mut self, record: impl Serialize) -> anyhow::Result<()>;
|
||||
|
||||
// For when serde serialization can't be used. Forcing BTreeMap to ensure keys/values are
|
||||
// sorted consistently
|
||||
fn write_header(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()>;
|
||||
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()>;
|
||||
|
||||
fn flush(&mut self) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
impl<W: Write> RecordSerializer for csv::Writer<W> {
|
||||
@@ -14,6 +23,21 @@ impl<W: Write> RecordSerializer for csv::Writer<W> {
|
||||
self.serialize(record)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> anyhow::Result<()> {
|
||||
self.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_header(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
|
||||
self.write_record(record.keys())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
|
||||
self.write_record(record.values())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write> RecordSerializer for Serializer<W> {
|
||||
@@ -21,34 +45,28 @@ impl<W: Write> RecordSerializer for Serializer<W> {
|
||||
record.serialize(self)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: I still don't like this api, should split deserialize and position at the least,
|
||||
// and we need a way to get the current position (otherwise it's left to consumers to track current)
|
||||
// position
|
||||
pub trait RecordDeserializer<P> {
|
||||
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error>;
|
||||
fn flush(&mut self) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Move the deserializer to the specified position in the underlying reader
|
||||
fn position(&mut self, record: P) -> anyhow::Result<()>;
|
||||
}
|
||||
fn write_header(&mut self, _: &BTreeMap<String, String>) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct CsvMessagePackDeserializer<R> {
|
||||
reader: csv::Reader<R>,
|
||||
}
|
||||
|
||||
impl<R: Read> CsvMessagePackDeserializer<R> {
|
||||
fn new(reader: R) -> CsvMessagePackDeserializer<R> {
|
||||
CsvMessagePackDeserializer {
|
||||
reader: csv::Reader::from_reader(reader),
|
||||
}
|
||||
fn write_record(&mut self, record: &BTreeMap<String, String>) -> anyhow::Result<()> {
|
||||
self.serialize(record)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> RecordDeserializer<Position> for CsvMessagePackDeserializer<R> {
|
||||
pub trait RecordDeserializer {
|
||||
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error>;
|
||||
}
|
||||
|
||||
impl<R: Read> RecordDeserializer for csv::Reader<R> {
|
||||
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error> {
|
||||
// TODO: This isn't great, need to somehow maintain the state/position
|
||||
match self.reader.deserialize().next() {
|
||||
match self.deserialize().next() {
|
||||
None => Ok(Option::None),
|
||||
Some(result) => match result {
|
||||
Ok(ok) => Ok(Option::Some(ok)),
|
||||
@@ -56,56 +74,13 @@ impl<R: Read + Seek> RecordDeserializer<Position> for CsvMessagePackDeserializer
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn position(&mut self, record: Position) -> anyhow::Result<()> {
|
||||
self.reader.seek(record)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct MessagePackDeserializer<R: Read> {
|
||||
reader: Deserializer<ReadReader<R>>,
|
||||
record_positions: Vec<u64>,
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> MessagePackDeserializer<R> {
|
||||
fn new(reader: R) -> MessagePackDeserializer<R> {
|
||||
MessagePackDeserializer {
|
||||
reader: Deserializer::new(reader),
|
||||
record_positions: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: These need tests
|
||||
impl<R: Read + Seek> RecordDeserializer<usize> for MessagePackDeserializer<R> {
|
||||
impl<R: Read + Seek> RecordDeserializer for Deserializer<ReadReader<R>> {
|
||||
fn deserialize<D: DeserializeOwned>(&mut self) -> Result<Option<D>, anyhow::Error> {
|
||||
// Keep track of byte position of each record, in case we want to go back later
|
||||
let current_position = self.reader.get_mut().stream_position()?;
|
||||
if self
|
||||
.record_positions
|
||||
.last()
|
||||
.map_or(true, |position| *position < current_position)
|
||||
{
|
||||
self.record_positions.push(current_position);
|
||||
}
|
||||
match Deserialize::deserialize(&mut self.reader) {
|
||||
match Deserialize::deserialize(self) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(value) => Err(anyhow::Error::from(value)),
|
||||
}
|
||||
}
|
||||
|
||||
fn position(&mut self, record: usize) -> anyhow::Result<()> {
|
||||
let reader = self.reader.get_mut();
|
||||
// Unsigned so can't be less than 0
|
||||
if self.record_positions.len() > record {
|
||||
// Go to position in reader
|
||||
let position = self.record_positions[record];
|
||||
reader.seek(std::io::SeekFrom::Start(position))?;
|
||||
} else {
|
||||
// read through the reader until we get to the correct record
|
||||
bail!("Record hasn't been read yet, please use deserialize to find the record")
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user