Skip to content

Commit

Permalink
wip: port VCD parser to use ReadBuf trait
Browse files Browse the repository at this point in the history
  • Loading branch information
ekiwi committed Oct 31, 2024
1 parent 2fa9677 commit 296e35c
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 102 deletions.
13 changes: 13 additions & 0 deletions wellen/src/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::{
};
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::io::{BufRead, Seek};

/// Read a waveform file with the default options. Reads in header and body at once.
pub fn read<P: AsRef<std::path::Path>>(filename: P) -> Result<Waveform> {
Expand All @@ -30,6 +31,18 @@ pub fn read_with_options<P: AsRef<std::path::Path>>(
))
}

/// Read from something that is not a file.
pub fn read_from_reader<R: BufRead + Seek + Send + Sync + 'static>(input: R) -> Result<Waveform> {
let options = LoadOptions::default();
let header = viewers::read_header(input, &options)?;
let body = viewers::read_body(header.body, &header.hierarchy, None)?;
Ok(Waveform::new(
header.hierarchy,
body.source,
body.time_table,
))
}

/// Provides file format independent access to a waveform file.
pub struct Waveform {
hierarchy: Hierarchy,
Expand Down
203 changes: 108 additions & 95 deletions wellen/src/vcd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,26 @@ pub fn read_body<R: BufRead + Seek>(
progress: Option<ProgressCount>,
) -> Result<(SignalSource, TimeTable)> {
let (source, time_table) = match data.input {
Input::Reader(input) => todo!("parse VCD from reader"),
Input::Reader(mut input) => {
// determine body length
let start = input.stream_position()?;
input.seek(SeekFrom::End(0))?;
let end = input.stream_position()?;
input.seek(SeekFrom::Start(start))?;
let input_len = (end - start) as usize;

// encode signals
let encoder = read_single_stream_of_values(
&mut input,
input_len - 1,
true,
true,
hierarchy,
&data.lookup,
progress,
)?;
encoder.finish()
}
Input::Mmap(mmap) => read_values(
&mmap[data.header_len..],
data.multi_thread,
Expand Down Expand Up @@ -963,7 +982,7 @@ fn read_values(
) -> Result<(SignalSource, TimeTable)> {
if multi_thread {
let chunks = determine_thread_chunks(input.len());
let encoders: Vec<crate::wavemem::Encoder> = chunks
let encoders: Result<Vec<crate::wavemem::Encoder>> = chunks
.par_iter()
.map(|(start, len)| {
let is_first = *start == 0;
Expand All @@ -975,8 +994,9 @@ fn read_values(
// TODO: deal with \n\r
before == b'\n'
};
let mut inp = std::io::Cursor::new(&input[*start..]);
read_single_stream_of_values(
&input[*start..],
&mut inp,
*len - 1,
is_first,
starts_on_new_line,
Expand All @@ -986,6 +1006,7 @@ fn read_values(
)
})
.collect();
let encoders = encoders?;

// combine encoders
let mut encoder_iter = encoders.into_iter();
Expand All @@ -995,42 +1016,43 @@ fn read_values(
}
Ok(encoder.finish())
} else {
let mut inp = std::io::Cursor::new(input);
let encoder = read_single_stream_of_values(
input,
&mut inp,
input.len() - 1,
true,
true,
hierarchy,
lookup,
progress,
);
)?;
Ok(encoder.finish())
}
}

fn read_single_stream_of_values(
input: &[u8],
fn read_single_stream_of_values<R: BufRead + Seek>(
input: &mut R,
stop_pos: usize,
is_first: bool,
starts_on_new_line: bool,
hierarchy: &Hierarchy,
lookup: &IdLookup,
progress: Option<ProgressCount>,
) -> crate::wavemem::Encoder {
) -> Result<crate::wavemem::Encoder> {
let mut encoder = crate::wavemem::Encoder::new(hierarchy);

let (input2, offset) = if starts_on_new_line {
(input, 0)
} else {
advance_to_first_newline(input)
};
let mut reader = BodyReader::new(input2);
if !starts_on_new_line {
// if we start in the middle of a line, we need to skip it
let mut dummy = Vec::new();
input.read_until(b'\n', &mut dummy)?;
}
let mut reader = BodyReader::new(input);
// We only start recording once we have encountered our first time step
let mut found_first_time_step = false;

// progress tracking
let mut last_reported_pos = 0;
let report_increments = std::cmp::max(input2.len() as u64 / 1000, 512);
let report_increments = std::cmp::max(stop_pos as u64 / 1000, 512);

loop {
if let Some((pos, cmd)) = reader.next() {
Expand Down Expand Up @@ -1094,105 +1116,101 @@ fn advance_to_first_newline(input: &[u8]) -> (&[u8], usize) {
(&[], 0) // no whitespaces found
}

struct BodyReader<'a> {
input: &'a [u8],
struct BodyReader<'a, R: BufRead> {
input: &'a R,
// state
pos: usize,
token: Vec<u8>,
prev_token: Vec<u8>,
// statistics
lines_read: usize,
}

const ASCII_ZERO: &[u8] = b"0";

impl<'a> BodyReader<'a> {
fn new(input: &'a [u8]) -> Self {
impl<'a, R: BufRead> BodyReader<'a, R> {
fn new(input: &'a mut R) -> Self {
BodyReader {
input,
pos: 0,
token: Vec::with_capacity(64),
prev_token: Vec::with_capacity(64),
lines_read: 0,
}
}

#[inline]
fn try_finish_token(
&mut self,
pos: usize,
token_start: &mut Option<usize>,
prev_token: &mut Option<&'a [u8]>,
search_for_end: &mut bool,
) -> Option<BodyCmd<'a>> {
match *token_start {
None => None,
Some(start) => {
let token = &self.input[start..pos];
if token.is_empty() {
return None;
}
if *search_for_end {
*search_for_end = token != b"$end";
// consume token and return
*token_start = None;
return None;
fn try_finish_token(&mut self, pos: usize, search_for_end: &mut bool) -> Option<BodyCmd<'a>> {
// no token means that there is nothing to do
if self.token.is_empty() {
return None;
}

// if we are looking for the $end token, we discard everything else
if *search_for_end {
// did we find the end token?
*search_for_end = self.token != b"$end";
// consume token and return
self.token.clear();
return None;
}

// if there was no previous token
if self.prev_token.is_empty() {
if self.token.len() == 1 {
// too short, wait for more input
return None;
}

// 1-token commands are binary changes or time commands
match self.token[0] {
b'#' => Some(BodyCmd::Time(&self.token[1..])),
b'0' | b'1' | b'z' | b'Z' | b'x' | b'X' | b'h' | b'H' | b'u' | b'U' | b'w'
| b'W' | b'l' | b'L' | b'-' => {
Some(BodyCmd::Value(&self.token[0..1], &self.token[1..]))
}
let ret = match *prev_token {
None => {
if token.len() == 1 {
// too short
return None;
_ => {
// parse command tokens
match self.token.as_slice() {
b"$dumpall" => {
// interpret dumpall as indicating timestep zero
self.token.clear();
return Some(BodyCmd::Time(ASCII_ZERO));
}
// 1-token commands are binary changes or time commands
match token[0] {
b'#' => Some(BodyCmd::Time(&token[1..])),
b'0' | b'1' | b'z' | b'Z' | b'x' | b'X' | b'h' | b'H' | b'u' | b'U'
| b'w' | b'W' | b'l' | b'L' | b'-' => {
Some(BodyCmd::Value(&token[0..1], &token[1..]))
}
_ => {
if token == b"$dumpall" {
// interpret dumpall as indicating timestep zero
return Some(BodyCmd::Time(ASCII_ZERO));
}
if token == b"$comment" {
// drop token, but start searching for $end in order to skip the comment
*search_for_end = true;
} else if token != b"$dumpvars"
&& token != b"$end"
&& token != b"$dumpoff"
&& token != b"$dumpon"
{
// ignore dumpvars, dumpoff, dumpon, and end command
*prev_token = Some(token);
}
None
}
b"$comment" => {
// drop token, but start searching for $end in order to skip the comment
*search_for_end = true;
}
b"$dumpvars" | b"$end" | b"$dumpoff" | b"$dumpon" => {
// ignore dumpvars, dumpoff, dumpon, and end command
self.prev_token.copy_from_slice(self.token.as_slice());
}
_ => {} // do nothing
}
Some(first) => {
let cmd = match first[0] {
b'b' | b'B' | b'r' | b'R' | b's' | b'S' => {
BodyCmd::Value(&first[0..], token)
}
_ => {
panic!(
"Unexpected tokens: `{}` and `{}` ({} lines after header)",
String::from_utf8_lossy(first),
String::from_utf8_lossy(token),
self.lines_read
);
}
};
*prev_token = None;
Some(cmd)
}
};
*token_start = None;
ret
// wait for more input
None
}
}
} else {
let cmd = match self.prev_token[0] {
b'b' | b'B' | b'r' | b'R' | b's' | b'S' => {
BodyCmd::Value(&self.prev_token[0..], self.token.as_slice())
}
_ => {
panic!(
"Unexpected tokens: `{}` and `{}` ({} lines after header)",
String::from_utf8_lossy(self.prev_token.as_slice()),
String::from_utf8_lossy(self.token.as_slice()),
self.lines_read
);
}
};
Some(cmd)
}
}
}

impl<'a> Iterator for BodyReader<'a> {
impl<'a, R: BufRead> Iterator for BodyReader<'a, R> {
type Item = (usize, BodyCmd<'a>);

/// returns the starting position and the body of the command
Expand Down Expand Up @@ -1253,12 +1271,7 @@ impl<'a> Iterator for BodyReader<'a> {
// update final position
self.pos = self.input.len();
// check to see if there is a final token at the end
match self.try_finish_token(
self.pos,
&mut token_start,
&mut prev_token,
&mut search_for_end,
) {
match self.try_finish_token(self.pos, &mut search_for_end) {
None => {}
Some(cmd) => {
return Some((start_pos, cmd));
Expand Down
Loading

0 comments on commit 296e35c

Please sign in to comment.