-
Notifications
You must be signed in to change notification settings - Fork 752
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
704 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
// Copyright (c) 2020 Ritchie Vink | ||
// Copyright 2021 Datafuse Labs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use std::fmt::Debug; | ||
|
||
use serde::Deserialize; | ||
use serde::Serialize; | ||
|
||
use super::FixedSizeBinaryColumn; | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||
pub struct FixedSizeBinaryColumnBuilder { | ||
pub data: Vec<u8>, | ||
pub value_length: usize, | ||
} | ||
|
||
impl FixedSizeBinaryColumnBuilder { | ||
pub fn with_capacity(data_capacity: usize, value_length: usize) -> Self { | ||
FixedSizeBinaryColumnBuilder { | ||
data: Vec::with_capacity(data_capacity), | ||
value_length, | ||
} | ||
} | ||
|
||
pub fn from_column(col: FixedSizeBinaryColumn) -> Self { | ||
FixedSizeBinaryColumnBuilder { | ||
data: col.data.make_mut(), | ||
value_length: col.value_length, | ||
} | ||
} | ||
|
||
pub fn from_data(data: Vec<u8>) -> Self { | ||
let value_length = data.len(); | ||
FixedSizeBinaryColumnBuilder { data, value_length } | ||
} | ||
|
||
pub fn repeat(scalar: &[u8], n: usize) -> Self { | ||
let len = scalar.len(); | ||
let data = scalar.repeat(n); | ||
FixedSizeBinaryColumnBuilder { | ||
data, | ||
value_length: len, | ||
} | ||
} | ||
|
||
pub fn repeat_default(n: usize, value_length: usize) -> Self { | ||
FixedSizeBinaryColumnBuilder { | ||
data: vec![0; n * value_length], | ||
value_length, | ||
} | ||
} | ||
|
||
pub fn len(&self) -> usize { | ||
if self.data.is_empty() { | ||
0 | ||
} else { | ||
self.data.len() / self.value_length | ||
} | ||
} | ||
|
||
pub fn is_empty(&self) -> bool { | ||
self.len() <= 1 | ||
} | ||
|
||
pub fn memory_size(&self) -> usize { | ||
self.data.len() | ||
} | ||
|
||
pub fn put_u8(&mut self, item: u8) { | ||
self.data.push(item); | ||
} | ||
|
||
pub fn push_default(&mut self) { | ||
self.data.extend_from_slice(&vec![0; self.value_length]); | ||
} | ||
|
||
pub fn put_char(&mut self, item: char) { | ||
self.data | ||
.extend_from_slice(item.encode_utf8(&mut [0; 4]).as_bytes()); | ||
} | ||
|
||
#[inline] | ||
pub fn put_str(&mut self, item: &str) { | ||
debug_assert!(self.value_length == item.as_bytes().len()); | ||
self.data.extend_from_slice(item.as_bytes()); | ||
} | ||
|
||
#[inline] | ||
pub fn put_slice(&mut self, item: &[u8]) { | ||
debug_assert!(self.value_length == item.len()); | ||
self.data.extend_from_slice(item); | ||
} | ||
|
||
#[inline] | ||
pub fn commit_row(&mut self) { | ||
self.data.reserve(self.data.capacity()); | ||
} | ||
|
||
pub fn put_char_iter(&mut self, iter: impl Iterator<Item = char>) { | ||
for c in iter { | ||
let mut buf = [0; 4]; | ||
let result = c.encode_utf8(&mut buf); | ||
self.data.extend_from_slice(result.as_bytes()); | ||
} | ||
} | ||
|
||
pub fn put(&mut self, item: &[u8]) { | ||
self.data.extend_from_slice(item); | ||
} | ||
|
||
pub fn append_column(&mut self, other: &FixedSizeBinaryColumn) { | ||
debug_assert!(other.value_length == self.value_length); | ||
self.data.extend_from_slice(&other.data); | ||
} | ||
|
||
pub fn build(self) -> FixedSizeBinaryColumn { | ||
FixedSizeBinaryColumn::new(self.data.into(), self.value_length) | ||
} | ||
|
||
pub fn build_scalar(self) -> Vec<u8> { | ||
if self.data.is_empty() { | ||
vec![] | ||
} else { | ||
self.data[0..self.value_length].to_vec() | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn may_resize(&self, add_size: usize) -> bool { | ||
self.data.len() + add_size > self.data.capacity() | ||
} | ||
|
||
/// # Safety | ||
/// | ||
/// Calling this method with an out-of-bounds index is *[undefined behavior]* | ||
pub unsafe fn index_unchecked(&self, row: usize) -> &[u8] { | ||
debug_assert!((row + 1) * self.value_length < self.data.len()); | ||
|
||
self.data | ||
.get_unchecked(row * self.value_length..(row + 1) * self.value_length) | ||
} | ||
|
||
pub fn push_repeat(&mut self, item: &[u8], n: usize) { | ||
debug_assert!(item.len() / n == self.data.len()); | ||
self.data.reserve(item.len() * n); | ||
for _ in 0..n { | ||
self.data.extend_from_slice(item) | ||
} | ||
} | ||
|
||
pub fn pop(&mut self) -> Option<Vec<u8>> { | ||
if !self.is_empty() { | ||
let val = self.data.split_off(self.len() - 1); | ||
Some(val) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
/// Extends the [`MutableBinaryArray`] from an iterator of values. | ||
/// This differs from `extended_trusted_len` which accepts iterator of optional values. | ||
#[inline] | ||
pub fn extend_values<I, P>(&mut self, iterator: I) | ||
where | ||
P: AsRef<[u8]>, | ||
I: Iterator<Item = P>, | ||
{ | ||
for item in iterator { | ||
self.put_slice(item.as_ref()); | ||
self.commit_row(); | ||
} | ||
} | ||
} | ||
|
||
impl<P: AsRef<[u8]>> FromIterator<P> for FixedSizeBinaryColumnBuilder { | ||
fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self { | ||
let iter = iter.into_iter(); | ||
let (lower, _) = iter.size_hint(); | ||
let mut builder = FixedSizeBinaryColumnBuilder::with_capacity(lower, 0); | ||
builder.extend_values(iter); | ||
builder | ||
} | ||
} | ||
|
||
impl From<FixedSizeBinaryColumnBuilder> for FixedSizeBinaryColumn { | ||
fn from(value: FixedSizeBinaryColumnBuilder) -> Self { | ||
value.build() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Copyright (c) 2020 Ritchie Vink | ||
// Copyright 2021 Datafuse Labs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use std::fmt::Debug; | ||
use std::fmt::Formatter; | ||
use std::fmt::Result; | ||
use std::fmt::Write; | ||
|
||
use super::FixedSizeBinaryColumn; | ||
use crate::fmt::write_vec; | ||
|
||
pub fn write_value<W: Write>(array: &FixedSizeBinaryColumn, index: usize, f: &mut W) -> Result { | ||
let bytes = array.value(index); | ||
let writer = |f: &mut W, index| write!(f, "{}", bytes[index]); | ||
|
||
write_vec(f, writer, None, bytes.len(), "None", false) | ||
} | ||
|
||
impl Debug for FixedSizeBinaryColumn { | ||
fn fmt(&self, f: &mut Formatter) -> Result { | ||
f.debug_struct("FixedSizeBinaryColumn") | ||
.field( | ||
"data", | ||
&format_args!("0x{}", &hex::encode(self.data().as_slice())), | ||
) | ||
.field("value_length", &self.value_length()) | ||
.finish() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// Copyright 2021 Datafuse Labs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use super::builder::FixedSizeBinaryColumnBuilder; | ||
use super::FixedSizeBinaryColumn; | ||
use crate::iterator::ColumnAccessor; | ||
use crate::iterator::ColumnValuesIter; | ||
|
||
unsafe impl<'a> ColumnAccessor<'a> for FixedSizeBinaryColumn { | ||
type Item = &'a [u8]; | ||
|
||
#[inline] | ||
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { | ||
self.index_unchecked(index) | ||
} | ||
|
||
#[inline] | ||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
} | ||
|
||
/// Iterator of values of an [`BinaryArray`]. | ||
pub type FixedSizeBinaryColumnIter<'a> = ColumnValuesIter<'a, FixedSizeBinaryColumn>; | ||
|
||
impl<'a> IntoIterator for &'a FixedSizeBinaryColumn { | ||
type Item = &'a [u8]; | ||
type IntoIter = FixedSizeBinaryColumnIter<'a>; | ||
|
||
fn into_iter(self) -> Self::IntoIter { | ||
self.iter() | ||
} | ||
} | ||
|
||
unsafe impl<'a> ColumnAccessor<'a> for FixedSizeBinaryColumnBuilder { | ||
type Item = &'a [u8]; | ||
|
||
#[inline] | ||
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { | ||
self.index_unchecked(index) | ||
} | ||
|
||
#[inline] | ||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
} | ||
|
||
/// Iterator of values of an [`FixedSizeBinaryColumnBuilder`]. | ||
pub type FixedSizeBinaryColumnBuilderIter<'a> = ColumnValuesIter<'a, FixedSizeBinaryColumnBuilder>; |
Oops, something went wrong.