From 296e35ce6cf4db12dd18b3c83ca21b59a9c60c6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20L=C3=A4ufer?= Date: Thu, 31 Oct 2024 13:13:59 -0400 Subject: [PATCH] wip: port VCD parser to use ReadBuf trait --- wellen/src/simple.rs | 13 +++ wellen/src/vcd.rs | 203 ++++++++++++++++++++----------------- wellen/tests/diff_tests.rs | 36 +++++-- 3 files changed, 150 insertions(+), 102 deletions(-) diff --git a/wellen/src/simple.rs b/wellen/src/simple.rs index c396544..e38629f 100644 --- a/wellen/src/simple.rs +++ b/wellen/src/simple.rs @@ -10,6 +10,7 @@ use crate::{ }; use std::collections::HashMap; use std::fmt::{Debug, Formatter}; +use std::io::{BufRead, Seek}; /// Read a waveform file with the default options. Reads in header and body at once. pub fn read>(filename: P) -> Result { @@ -30,6 +31,18 @@ pub fn read_with_options>( )) } +/// Read from something that is not a file. +pub fn read_from_reader(input: R) -> Result { + let options = LoadOptions::default(); + let header = viewers::read_header(input, &options)?; + let body = viewers::read_body(header.body, &header.hierarchy, None)?; + Ok(Waveform::new( + header.hierarchy, + body.source, + body.time_table, + )) +} + /// Provides file format independent access to a waveform file. pub struct Waveform { hierarchy: Hierarchy, diff --git a/wellen/src/vcd.rs b/wellen/src/vcd.rs index f0a1d8a..9cb56fa 100644 --- a/wellen/src/vcd.rs +++ b/wellen/src/vcd.rs @@ -117,7 +117,26 @@ pub fn read_body( progress: Option, ) -> Result<(SignalSource, TimeTable)> { let (source, time_table) = match data.input { - Input::Reader(input) => todo!("parse VCD from reader"), + Input::Reader(mut input) => { + // determine body length + let start = input.stream_position()?; + input.seek(SeekFrom::End(0))?; + let end = input.stream_position()?; + input.seek(SeekFrom::Start(start))?; + let input_len = (end - start) as usize; + + // encode signals + let encoder = read_single_stream_of_values( + &mut input, + input_len - 1, + true, + true, + hierarchy, + &data.lookup, + progress, + )?; + encoder.finish() + } Input::Mmap(mmap) => read_values( &mmap[data.header_len..], data.multi_thread, @@ -963,7 +982,7 @@ fn read_values( ) -> Result<(SignalSource, TimeTable)> { if multi_thread { let chunks = determine_thread_chunks(input.len()); - let encoders: Vec = chunks + let encoders: Result> = chunks .par_iter() .map(|(start, len)| { let is_first = *start == 0; @@ -975,8 +994,9 @@ fn read_values( // TODO: deal with \n\r before == b'\n' }; + let mut inp = std::io::Cursor::new(&input[*start..]); read_single_stream_of_values( - &input[*start..], + &mut inp, *len - 1, is_first, starts_on_new_line, @@ -986,6 +1006,7 @@ fn read_values( ) }) .collect(); + let encoders = encoders?; // combine encoders let mut encoder_iter = encoders.into_iter(); @@ -995,42 +1016,43 @@ fn read_values( } Ok(encoder.finish()) } else { + let mut inp = std::io::Cursor::new(input); let encoder = read_single_stream_of_values( - input, + &mut inp, input.len() - 1, true, true, hierarchy, lookup, progress, - ); + )?; Ok(encoder.finish()) } } -fn read_single_stream_of_values( - input: &[u8], +fn read_single_stream_of_values( + input: &mut R, stop_pos: usize, is_first: bool, starts_on_new_line: bool, hierarchy: &Hierarchy, lookup: &IdLookup, progress: Option, -) -> crate::wavemem::Encoder { +) -> Result { let mut encoder = crate::wavemem::Encoder::new(hierarchy); - let (input2, offset) = if starts_on_new_line { - (input, 0) - } else { - advance_to_first_newline(input) - }; - let mut reader = BodyReader::new(input2); + if !starts_on_new_line { + // if we start in the middle of a line, we need to skip it + let mut dummy = Vec::new(); + input.read_until(b'\n', &mut dummy)?; + } + let mut reader = BodyReader::new(input); // We only start recording once we have encountered our first time step let mut found_first_time_step = false; // progress tracking let mut last_reported_pos = 0; - let report_increments = std::cmp::max(input2.len() as u64 / 1000, 512); + let report_increments = std::cmp::max(stop_pos as u64 / 1000, 512); loop { if let Some((pos, cmd)) = reader.next() { @@ -1094,105 +1116,101 @@ fn advance_to_first_newline(input: &[u8]) -> (&[u8], usize) { (&[], 0) // no whitespaces found } -struct BodyReader<'a> { - input: &'a [u8], +struct BodyReader<'a, R: BufRead> { + input: &'a R, // state pos: usize, + token: Vec, + prev_token: Vec, // statistics lines_read: usize, } const ASCII_ZERO: &[u8] = b"0"; -impl<'a> BodyReader<'a> { - fn new(input: &'a [u8]) -> Self { +impl<'a, R: BufRead> BodyReader<'a, R> { + fn new(input: &'a mut R) -> Self { BodyReader { input, pos: 0, + token: Vec::with_capacity(64), + prev_token: Vec::with_capacity(64), lines_read: 0, } } #[inline] - fn try_finish_token( - &mut self, - pos: usize, - token_start: &mut Option, - prev_token: &mut Option<&'a [u8]>, - search_for_end: &mut bool, - ) -> Option> { - match *token_start { - None => None, - Some(start) => { - let token = &self.input[start..pos]; - if token.is_empty() { - return None; - } - if *search_for_end { - *search_for_end = token != b"$end"; - // consume token and return - *token_start = None; - return None; + fn try_finish_token(&mut self, pos: usize, search_for_end: &mut bool) -> Option> { + // no token means that there is nothing to do + if self.token.is_empty() { + return None; + } + + // if we are looking for the $end token, we discard everything else + if *search_for_end { + // did we find the end token? + *search_for_end = self.token != b"$end"; + // consume token and return + self.token.clear(); + return None; + } + + // if there was no previous token + if self.prev_token.is_empty() { + if self.token.len() == 1 { + // too short, wait for more input + return None; + } + + // 1-token commands are binary changes or time commands + match self.token[0] { + b'#' => Some(BodyCmd::Time(&self.token[1..])), + b'0' | b'1' | b'z' | b'Z' | b'x' | b'X' | b'h' | b'H' | b'u' | b'U' | b'w' + | b'W' | b'l' | b'L' | b'-' => { + Some(BodyCmd::Value(&self.token[0..1], &self.token[1..])) } - let ret = match *prev_token { - None => { - if token.len() == 1 { - // too short - return None; + _ => { + // parse command tokens + match self.token.as_slice() { + b"$dumpall" => { + // interpret dumpall as indicating timestep zero + self.token.clear(); + return Some(BodyCmd::Time(ASCII_ZERO)); } - // 1-token commands are binary changes or time commands - match token[0] { - b'#' => Some(BodyCmd::Time(&token[1..])), - b'0' | b'1' | b'z' | b'Z' | b'x' | b'X' | b'h' | b'H' | b'u' | b'U' - | b'w' | b'W' | b'l' | b'L' | b'-' => { - Some(BodyCmd::Value(&token[0..1], &token[1..])) - } - _ => { - if token == b"$dumpall" { - // interpret dumpall as indicating timestep zero - return Some(BodyCmd::Time(ASCII_ZERO)); - } - if token == b"$comment" { - // drop token, but start searching for $end in order to skip the comment - *search_for_end = true; - } else if token != b"$dumpvars" - && token != b"$end" - && token != b"$dumpoff" - && token != b"$dumpon" - { - // ignore dumpvars, dumpoff, dumpon, and end command - *prev_token = Some(token); - } - None - } + b"$comment" => { + // drop token, but start searching for $end in order to skip the comment + *search_for_end = true; } + b"$dumpvars" | b"$end" | b"$dumpoff" | b"$dumpon" => { + // ignore dumpvars, dumpoff, dumpon, and end command + self.prev_token.copy_from_slice(self.token.as_slice()); + } + _ => {} // do nothing } - Some(first) => { - let cmd = match first[0] { - b'b' | b'B' | b'r' | b'R' | b's' | b'S' => { - BodyCmd::Value(&first[0..], token) - } - _ => { - panic!( - "Unexpected tokens: `{}` and `{}` ({} lines after header)", - String::from_utf8_lossy(first), - String::from_utf8_lossy(token), - self.lines_read - ); - } - }; - *prev_token = None; - Some(cmd) - } - }; - *token_start = None; - ret + // wait for more input + None + } } + } else { + let cmd = match self.prev_token[0] { + b'b' | b'B' | b'r' | b'R' | b's' | b'S' => { + BodyCmd::Value(&self.prev_token[0..], self.token.as_slice()) + } + _ => { + panic!( + "Unexpected tokens: `{}` and `{}` ({} lines after header)", + String::from_utf8_lossy(self.prev_token.as_slice()), + String::from_utf8_lossy(self.token.as_slice()), + self.lines_read + ); + } + }; + Some(cmd) } } } -impl<'a> Iterator for BodyReader<'a> { +impl<'a, R: BufRead> Iterator for BodyReader<'a, R> { type Item = (usize, BodyCmd<'a>); /// returns the starting position and the body of the command @@ -1253,12 +1271,7 @@ impl<'a> Iterator for BodyReader<'a> { // update final position self.pos = self.input.len(); // check to see if there is a final token at the end - match self.try_finish_token( - self.pos, - &mut token_start, - &mut prev_token, - &mut search_for_end, - ) { + match self.try_finish_token(self.pos, &mut search_for_end) { None => {} Some(cmd) => { return Some((start_pos, cmd)); diff --git a/wellen/tests/diff_tests.rs b/wellen/tests/diff_tests.rs index 99158ac..64c30f2 100644 --- a/wellen/tests/diff_tests.rs +++ b/wellen/tests/diff_tests.rs @@ -8,27 +8,32 @@ use wellen::simple::*; use wellen::*; fn run_diff_test(vcd_filename: &str, fst_filename: &str) { - run_diff_test_internal(vcd_filename, Some(fst_filename), false); + run_diff_test_internal(vcd_filename, Some(fst_filename), false, false); +} + +fn run_diff_test_from_bytes(vcd_filename: &str, fst_filename: &str) { + run_diff_test_internal(vcd_filename, Some(fst_filename), false, true); } fn run_diff_test_vcd_only(vcd_filename: &str) { - run_diff_test_internal(vcd_filename, None, false); + run_diff_test_internal(vcd_filename, None, false, false); } /// Skips trying to load the content with the `vcd` library. This is important for files /// with 9-state values since these cannot be read by the `vcd` library. fn run_load_test(vcd_filename: &str, fst_filename: &str) { - run_diff_test_internal(vcd_filename, Some(fst_filename), true); + run_diff_test_internal(vcd_filename, Some(fst_filename), true, false); } fn run_load_test_vcd(vcd_filename: &str) { - run_diff_test_internal(vcd_filename, None, true); + run_diff_test_internal(vcd_filename, None, true, false); } fn run_diff_test_internal( vcd_filename: &str, fst_filename: Option<&str>, skip_content_comparison: bool, + load_from_bytes_instead_of_file: bool, ) { { let single_thread = LoadOptions { @@ -39,13 +44,25 @@ fn run_diff_test_internal( .expect("Failed to load VCD with a single thread"); diff_test_one(vcd_filename, wave, skip_content_comparison); } - { + if load_from_bytes_instead_of_file { + let bytes = std::io::Cursor::new(std::fs::read(vcd_filename).expect("failed")); + let wave = + read_from_reader(bytes).expect("Failed to load VCD with multiple threads from bytes"); + diff_test_one(vcd_filename, wave, skip_content_comparison); + } else { let wave = read(vcd_filename).expect("Failed to load VCD with multiple threads"); diff_test_one(vcd_filename, wave, skip_content_comparison); } if let Some(fst_filename) = fst_filename { - let wave = read(fst_filename).expect("Failed to load FST"); - diff_test_one(vcd_filename, wave, skip_content_comparison); + if load_from_bytes_instead_of_file { + let bytes = std::io::Cursor::new(std::fs::read(fst_filename).expect("failed")); + let wave = read_from_reader(bytes) + .expect("Failed to load FST with multiple threads from bytes"); + diff_test_one(fst_filename, wave, skip_content_comparison); + } else { + let wave = read(fst_filename).expect("Failed to load FST"); + diff_test_one(vcd_filename, wave, skip_content_comparison); + } } } @@ -470,6 +487,11 @@ fn diff_icarus_test1() { run_diff_test("inputs/icarus/test1.vcd", "inputs/icarus/test1.vcd.fst"); } +#[test] +fn diff_icarus_test1_from_bytes() { + run_diff_test_from_bytes("inputs/icarus/test1.vcd", "inputs/icarus/test1.vcd.fst"); +} + #[test] fn diff_model_sim_clkdiv2n_tb() { run_diff_test(