Skip to content

Commit

Permalink
add some basic structure for quantizer, io and simd operators
Browse files Browse the repository at this point in the history
Signed-off-by: LHT129 <[email protected]>
  • Loading branch information
LHT129 committed Sep 12, 2024
1 parent 641d10b commit f1ae5ea
Show file tree
Hide file tree
Showing 20 changed files with 1,297 additions and 12 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ endif ()

include_directories (${CMAKE_CURRENT_BINARY_DIR}/spdlog/install/include)
include_directories (include)
include_directories (src)

set (CMAKE_CXX_STANDARD 17)

Expand Down
65 changes: 65 additions & 0 deletions src/io/basic_io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>

namespace vsag {

template <typename IOTmpl>
class BasicIO {
public:
BasicIO<IOTmpl>() = default;

~BasicIO() = default;

inline void
Write(const uint8_t* data, uint64_t size, uint64_t offset) {
return cast().WriteImpl(data, size, offset);
}

inline bool
Read(uint8_t* data, uint64_t size, uint64_t offset) const {
return cast().ReadImpl(data, size, offset);
}

inline const uint8_t*
Read(uint64_t size, uint64_t offset) const {
return cast().ReadImpl(size, offset);
}

inline bool
MultiRead(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const {
return cast().MultiReadImpl(datas, sizes, offsets, count);
}

inline void
Prefetch(uint64_t offset, uint64_t cacheLine = 64) {
return cast().PrefetchImpl(offset, cacheLine);
}

private:
inline IOTmpl&
cast() {
return static_cast<IOTmpl&>(*this);
}

inline const IOTmpl&
cast() const {
return static_cast<const IOTmpl&>(*this);
}
};
} // namespace vsag
24 changes: 24 additions & 0 deletions src/io/file_aio.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "basic_io.h"

namespace vsag {

class FileAIO : public BasicIO<FileAIO> {};

} // namespace vsag
112 changes: 112 additions & 0 deletions src/io/memory_io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <xmmintrin.h>

#include <cstring>

#include "basic_io.h"
#include "vsag/allocator.h"

namespace vsag {

class MemoryIO : public BasicIO<MemoryIO> {
public:
explicit MemoryIO(Allocator* allocator) : allocator_(allocator) {
start_ = reinterpret_cast<uint8_t*>(allocator_->Allocate(MIN_SIZE));
currentSize_ = MIN_SIZE;
}

~MemoryIO() {
allocator_->Deallocate(start_);
}

inline void
WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset);

inline bool
ReadImpl(uint8_t* data, uint64_t size, uint64_t offset) const;

[[nodiscard]] inline const uint8_t*
ReadImpl(uint64_t size, uint64_t offset) const;

inline bool
MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const;

inline void
PrefetchImpl(uint64_t offset, uint64_t cacheLine = 64);

private:
[[nodiscard]] inline bool
check_valid_offset(uint64_t size) const {
return size <= currentSize_;
}

void
check_and_realloc(uint64_t size) {
if (check_valid_offset(size)) {
return;
}
start_ = reinterpret_cast<uint8_t*>(allocator_->Reallocate(start_, size));
currentSize_ = size;
}

Allocator* allocator_{nullptr};

uint8_t* start_{nullptr};

uint64_t currentSize_ = 0;

static const uint64_t MIN_SIZE = 1024;
};

void
MemoryIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) {
check_and_realloc(size + offset);
memcpy(start_ + offset, data, size);
}

bool
MemoryIO::ReadImpl(uint8_t* data, uint64_t size, uint64_t offset) const {
bool ret = check_valid_offset(size + offset);
if (ret) {
memcpy(data, start_ + offset, size);
}
return ret;
}

const uint8_t*
MemoryIO::ReadImpl(uint64_t size, uint64_t offset) const {
if (check_valid_offset(size + offset)) {
return start_ + offset;
}
return nullptr;
}
bool
MemoryIO::MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const {
bool ret = true;
for (uint64_t i = 0; i < count; ++i) {
ret &= this->ReadImpl(datas, sizes[i], offsets[i]);
datas += sizes[i];
}
return ret;
}
void
MemoryIO::PrefetchImpl(uint64_t offset, uint64_t cacheLine) {
_mm_prefetch(this->start_ + offset, _MM_HINT_T0); // todo
}

} // namespace vsag
50 changes: 50 additions & 0 deletions src/io/memory_io_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "memory_io.h"

#include <catch2/catch_test_macros.hpp>
#include <memory>

#include "default_allocator.h"
#include "fixtures.h"
using namespace vsag;

template <typename T>
void
TestReadWrite(BasicIO<T>* basicIo) {
int dim = 32;
auto vector = fixtures::generate_vectors(100, dim);
auto codesize = dim * sizeof(float);
std::unordered_map<uint64_t, float*> maps;
for (int i = 0; i < 100; ++i) {
auto offset = random() % 10000000;
basicIo->Write((uint8_t*)(vector.data() + i * dim), codesize, offset);
maps[offset] = vector.data() + i * dim;
}

for (auto& iter : maps) {
const auto* result = (const float*)(basicIo->Read(codesize, iter.first));
auto* gt = iter.second;
for (int i = 0; i < dim; ++i) {
REQUIRE(result[i] == gt[i]);
}
}
}

TEST_CASE("read&write[ut][memory_io]") {
auto io = std::make_unique<MemoryIO>(new DefaultAllocator());
TestReadWrite(io.get());
}
20 changes: 20 additions & 0 deletions src/metric_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
namespace vsag {
enum class MetricType { METRIC_TYPE_L2SQR = 0, METRIC_TYPE_IP = 1, METRIC_TYPE_COSINE = 2 };

} // namespace vsag
Loading

0 comments on commit f1ae5ea

Please sign in to comment.