Skip to content

Commit

Permalink
add some basic structure for quantizer, io and simd operators
Browse files Browse the repository at this point in the history
Signed-off-by: LHT129 <[email protected]>
  • Loading branch information
LHT129 committed Sep 14, 2024
1 parent d262654 commit 0e8c538
Show file tree
Hide file tree
Showing 22 changed files with 1,354 additions and 15 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ endif ()

include_directories (${CMAKE_CURRENT_BINARY_DIR}/spdlog/install/include)
include_directories (include)
include_directories (src)

set (CMAKE_CXX_STANDARD 17)

Expand Down
1 change: 1 addition & 0 deletions extern/diskann/DiskANN/include/ann_exception.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma once
#include <string>
#include <cstdint>
#include <stdexcept>
#include <system_error>
#include "windows_customizations.h"
Expand Down
1 change: 1 addition & 0 deletions src/algorithm/hnswlib/algorithm_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#pragma once

#include <cstdint>
#include <functional>
#include <queue>
#include <string>
Expand Down
65 changes: 65 additions & 0 deletions src/io/basic_io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>

namespace vsag {

template <typename IOTmpl>
class BasicIO {
public:
BasicIO<IOTmpl>() = default;

~BasicIO() = default;

inline void
Write(const uint8_t* data, uint64_t size, uint64_t offset) {
return cast().WriteImpl(data, size, offset);
}

inline bool
Read(uint8_t* data, uint64_t size, uint64_t offset) const {
return cast().ReadImpl(data, size, offset);
}

[[nodiscard]] inline const uint8_t*
Read(uint64_t size, uint64_t offset) const {
return cast().ReadImpl(size, offset);
}

inline bool
MultiRead(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const {
return cast().MultiReadImpl(datas, sizes, offsets, count);
}

inline void
Prefetch(uint64_t offset, uint64_t cacheLine = 64) {
return cast().PrefetchImpl(offset, cacheLine);
}

private:
inline IOTmpl&
cast() {
return static_cast<IOTmpl&>(*this);
}

inline const IOTmpl&
cast() const {
return static_cast<const IOTmpl&>(*this);
}
};
} // namespace vsag
24 changes: 24 additions & 0 deletions src/io/file_aio.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "basic_io.h"

namespace vsag {

class FileAIO : public BasicIO<FileAIO> {};

} // namespace vsag
115 changes: 115 additions & 0 deletions src/io/memory_io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#if defined(ENABLE_SSE)
#include <xmmintrin.h> //todo
#endif
#include <cstring>

#include "basic_io.h"
#include "vsag/allocator.h"

namespace vsag {

class MemoryIO : public BasicIO<MemoryIO> {
public:
explicit MemoryIO(Allocator* allocator) : allocator_(allocator) {
start_ = reinterpret_cast<uint8_t*>(allocator_->Allocate(MIN_SIZE));
current_size_ = MIN_SIZE;
}

~MemoryIO() {
allocator_->Deallocate(start_);
}

inline void
WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset);

inline bool
ReadImpl(uint8_t* data, uint64_t size, uint64_t offset) const;

[[nodiscard]] inline const uint8_t*
ReadImpl(uint64_t size, uint64_t offset) const;

inline bool
MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const;

inline void
PrefetchImpl(uint64_t offset, uint64_t cacheLine = 64);

private:
[[nodiscard]] inline bool
checkValidOffset(uint64_t size) const {
return size <= current_size_;
}

void
checkAndRealloc(uint64_t size) {
if (checkValidOffset(size)) {
return;
}
start_ = reinterpret_cast<uint8_t*>(allocator_->Reallocate(start_, size));
current_size_ = size;
}

Allocator* allocator_{nullptr};

uint8_t* start_{nullptr};

uint64_t current_size_{0};

static const uint64_t MIN_SIZE = 1024;
};

void
MemoryIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) {
checkAndRealloc(size + offset);
memcpy(start_ + offset, data, size);
}

bool
MemoryIO::ReadImpl(uint8_t* data, uint64_t size, uint64_t offset) const {
bool ret = checkValidOffset(size + offset);
if (ret) {
memcpy(data, start_ + offset, size);
}
return ret;
}

const uint8_t*
MemoryIO::ReadImpl(uint64_t size, uint64_t offset) const {
if (checkValidOffset(size + offset)) {
return start_ + offset;
}
return nullptr;
}
bool
MemoryIO::MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const {
bool ret = true;
for (uint64_t i = 0; i < count; ++i) {
ret &= this->ReadImpl(datas, sizes[i], offsets[i]);
datas += sizes[i];
}
return ret;
}
void
MemoryIO::PrefetchImpl(uint64_t offset, uint64_t cacheLine) {
#if defined(ENABLE_SSE)
_mm_prefetch(this->start_ + offset, _MM_HINT_T0); // todo
#endif
}

} // namespace vsag
51 changes: 51 additions & 0 deletions src/io/memory_io_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "memory_io.h"

#include <catch2/catch_test_macros.hpp>
#include <memory>

#include "default_allocator.h"
#include "fixtures.h"
using namespace vsag;

template <typename T>
void
TestReadWrite(BasicIO<T>* basicIo) {
int dim = 32;
auto vector = fixtures::generate_vectors(100, dim);
auto code_size = dim * sizeof(float);
std::unordered_map<uint64_t, float*> maps;
for (int i = 0; i < 100; ++i) {
auto offset = random() % 10000000 * code_size;
basicIo->Write((uint8_t*)(vector.data() + i * dim), code_size, offset);
maps[offset] = vector.data() + i * dim;
}

for (auto& iter : maps) {
const auto* result = (const float*)(basicIo->Read(code_size, iter.first));
auto* gt = iter.second;
for (int i = 0; i < dim; ++i) {
REQUIRE(result[i] == gt[i]);
}
}
}

TEST_CASE("read&write[ut][memory_io]") {
auto allocator = std::make_unique<DefaultAllocator>();
auto io = std::make_unique<MemoryIO>(allocator.get());
TestReadWrite(io.get());
}
20 changes: 20 additions & 0 deletions src/metric_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
namespace vsag {
enum class MetricType { METRIC_TYPE_L2SQR = 0, METRIC_TYPE_IP = 1, METRIC_TYPE_COSINE = 2 };

} // namespace vsag
Loading

0 comments on commit 0e8c538

Please sign in to comment.