Skip to content

Add buffering during encode #138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 6, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 63 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -168,6 +168,12 @@ pub use {
crate::primitives::{Bech32, Bech32m, NoChecksum},
};

// Write to fmt buffer, small during testing to exercise full code path.
#[cfg(not(test))]
const BUF_LENGTH: usize = 1024;
#[cfg(test)]
const BUF_LENGTH: usize = 10;

/// Decodes a bech32 encoded string.
///
/// If this function succeeds the input string was found to be well formed (hrp, separator, bech32
@@ -276,11 +282,26 @@ pub fn encode_lower_to_fmt<Ck: Checksum, W: fmt::Write>(
) -> Result<(), EncodeError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let mut buf = [0u8; BUF_LENGTH];
let mut pos = 0;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();

for c in chars {
fmt.write_char(c)?;
buf[pos] = c as u8;
pos += 1;

if pos == BUF_LENGTH {
let s = core::str::from_utf8(&buf).expect("we only write ASCII");
fmt.write_str(s)?;
pos = 0;
}
}

let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
fmt.write_str(s)?;

Ok(())
}

@@ -296,11 +317,25 @@ pub fn encode_upper_to_fmt<Ck: Checksum, W: fmt::Write>(
) -> Result<(), EncodeError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let mut buf = [0u8; BUF_LENGTH];
let mut pos = 0;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();

for c in chars {
fmt.write_char(c.to_ascii_uppercase())?;
buf[pos] = c.to_ascii_uppercase() as u8;
pos += 1;
if pos == BUF_LENGTH {
let s = core::str::from_utf8(&buf).expect("we only write ASCII");
fmt.write_str(s)?;
pos = 0;
}
}

let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
fmt.write_str(s)?;

Ok(())
}

@@ -331,11 +366,23 @@ pub fn encode_lower_to_writer<Ck: Checksum, W: std::io::Write>(
) -> Result<(), EncodeIoError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let mut buf = [0u8; BUF_LENGTH];
let mut pos = 0;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();

for c in chars {
w.write_all(&[c as u8])?;
buf[pos] = c as u8;
pos += 1;
if pos == BUF_LENGTH {
w.write_all(&buf)?;
pos = 0;
}
}

w.write_all(&buf[..pos])?;

Ok(())
}

@@ -352,11 +399,23 @@ pub fn encode_upper_to_writer<Ck: Checksum, W: std::io::Write>(
) -> Result<(), EncodeIoError> {
let _ = encoded_length::<Ck>(hrp, data)?;

let mut buf = [0u8; BUF_LENGTH];
let mut pos = 0;

let iter = data.iter().copied().bytes_to_fes();
let chars = iter.with_checksum::<Ck>(&hrp).chars();

for c in chars {
w.write_all(&[c.to_ascii_uppercase() as u8])?;
buf[pos] = c.to_ascii_uppercase() as u8;
pos += 1;
if pos == BUF_LENGTH {
w.write_all(&buf)?;
pos = 0;
}
}

w.write_all(&buf[..pos])?;

Ok(())
}

60 changes: 60 additions & 0 deletions src/primitives/encode.rs
Original file line number Diff line number Diff line change
@@ -113,6 +113,14 @@ where
CharIter::new(self.hrp, witver_iter)
}

/// Returns an iterator that yields the bech32 encoded address as field ASCII characters, as
/// byte values.
#[inline]
pub fn bytes(self) -> ByteIter<'hrp, I, Ck> {
let char_iter = self.chars();
ByteIter::new(char_iter)
}

/// Returns an iterator that yields the field elements that go into the checksum, as well as the checksum at the end.
///
/// Each field element yielded has been input into the checksum algorithm (including the HRP as it is fed into the algorithm).
@@ -237,6 +245,43 @@ where
}
}

/// Iterator adaptor which takes a stream of ASCII field elements (an encoded string) and yields a stream of bytes.
///
/// This is equivalent to using the `CharsIter` and the casting each character to a byte. Doing
/// so is technically sound because we only yield ASCII characters but it makes for ugly code so
/// we provide this iterator also.
pub struct ByteIter<'hrp, I, Ck>
where
I: Iterator<Item = Fe32>,
Ck: Checksum,
{
char_iter: CharIter<'hrp, I, Ck>,
}

impl<'hrp, I, Ck> ByteIter<'hrp, I, Ck>
where
I: Iterator<Item = Fe32>,
Ck: Checksum,
{
/// Adapts the `CharIter` iterator to yield bytes representing the bech32 encoding as ASCII bytes.
#[inline]
pub fn new(char_iter: CharIter<'hrp, I, Ck>) -> Self { Self { char_iter } }
}

impl<'a, I, Ck> Iterator for ByteIter<'a, I, Ck>
where
I: Iterator<Item = Fe32>,
Ck: Checksum,
{
type Item = u8;

#[inline]
fn next(&mut self) -> Option<u8> { self.char_iter.next().map(|c| c as u8) }

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) { self.char_iter.size_hint() }
}

/// Iterator adaptor for a checksummed iterator that inputs the HRP into the checksum algorithm
/// before yielding the HRP as field elements followed by the data then checksum.
pub struct Fe32Iter<'hrp, I, Ck>
@@ -344,4 +389,19 @@ mod tests {
let checksummed_len = 2 + 1 + 1 + char_len + 6; // bc + SEP + Q + chars + checksum
assert_eq!(iter.size_hint().0, checksummed_len);
}

#[test]
#[cfg(feature = "alloc")]
fn hrpstring_iter_bytes() {
let hrp = Hrp::parse_unchecked("bc");
let fes = DATA.iter().copied().bytes_to_fes();
let iter = fes.with_checksum::<Bech32>(&hrp).with_witness_version(Fe32::Q);

let chars = iter.clone().chars();
let bytes = iter.bytes();

for (c, b) in chars.zip(bytes) {
assert_eq!(c as u8, b)
}
}
}
88 changes: 64 additions & 24 deletions src/segwit.rs
Original file line number Diff line number Diff line change
@@ -156,19 +156,30 @@ pub fn encode_lower_to_fmt_unchecked<W: fmt::Write>(
witness_version: Fe32,
witness_program: &[u8],
) -> fmt::Result {
let mut buf = [0u8; MAX_STRING_LENGTH];
let mut pos = 0;

let iter = witness_program.iter().copied().bytes_to_fes();
match witness_version {
VERSION_0 => {
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
fmt.write_char(c)?;
}
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src;
pos += 1;
});
}
version => {
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
fmt.write_char(c)?;
}
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src;
pos += 1;
});
}
}

let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
fmt.write_str(s)?;

Ok(())
}

@@ -185,20 +196,30 @@ pub fn encode_upper_to_fmt_unchecked<W: fmt::Write>(
witness_version: Fe32,
witness_program: &[u8],
) -> fmt::Result {
let mut buf = [0u8; MAX_STRING_LENGTH];
let mut pos = 0;

let iter = witness_program.iter().copied().bytes_to_fes();
match witness_version {
VERSION_0 => {
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
fmt.write_char(c.to_ascii_uppercase())?;
}
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src.to_ascii_uppercase();
pos += 1;
});
}
version => {
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
fmt.write_char(c.to_ascii_uppercase())?;
}
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src.to_ascii_uppercase();
pos += 1;
});
}
}

let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
fmt.write_str(s)?;

Ok(())
}

@@ -229,19 +250,29 @@ pub fn encode_lower_to_writer_unchecked<W: std::io::Write>(
witness_version: Fe32,
witness_program: &[u8],
) -> std::io::Result<()> {
let mut buf = [0u8; MAX_STRING_LENGTH];
let mut pos = 0;

let iter = witness_program.iter().copied().bytes_to_fes();
match witness_version {
VERSION_0 => {
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
w.write_all(&[c.to_ascii_lowercase() as u8])?;
}
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src;
pos += 1;
});
}
version => {
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
w.write_all(&[c.to_ascii_lowercase() as u8])?;
}
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src;
pos += 1;
});
}
}

w.write_all(&buf[..pos])?;

Ok(())
}

@@ -259,20 +290,29 @@ pub fn encode_upper_to_writer_unchecked<W: std::io::Write>(
witness_version: Fe32,
witness_program: &[u8],
) -> std::io::Result<()> {
let mut buf = [0u8; MAX_STRING_LENGTH];
let mut pos = 0;

let iter = witness_program.iter().copied().bytes_to_fes();
match witness_version {
VERSION_0 => {
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
w.write_all(&[c.to_ascii_uppercase() as u8])?;
}
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src.to_ascii_uppercase();
pos += 1;
});
}
version => {
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
w.write_all(&[c.to_ascii_uppercase() as u8])?;
}
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
*dst = src.to_ascii_uppercase();
pos += 1;
});
}
}

w.write_all(&buf[..pos])?;

Ok(())
}