diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a4f847c..7d2ceb9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,8 @@ include(GNUInstallDirs) # With GNUInstallDirs we use platform-independent macros add_library(ptrie INTERFACE ${HEADER_FILES}) target_compile_features(ptrie INTERFACE cxx_std_20) # Require C++20 features. +find_package (Boost 1.70 REQUIRED COMPONENTS headers) +target_link_libraries(ptrie INTERFACE Boost::headers) target_include_directories(ptrie INTERFACE $ $ diff --git a/src/ptrie/ptrie_interface.h b/src/ptrie/ptrie_interface.h new file mode 100644 index 0000000..9664714 --- /dev/null +++ b/src/ptrie/ptrie_interface.h @@ -0,0 +1,345 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +/* + * Copyright Morten K. Schou + */ + +/* + * File: ptrie_interface.h + * Author: Morten K. Schou + * + * Created on 18-12-2020. + */ + +#ifndef PTRIE_INTERFACE_H +#define PTRIE_INTERFACE_H + +#include +#include +#include +#include + +namespace ptrie { + // This file defines interfacing functionality for using more types with ptrie. + // ptrie_interface provides a unified interface for already supported types, std::vector of such types, std::string, + // and types that get support from its specialization of ptrie_interface by conversion to/from std::vector. + // + // This file provides specializations for KEY where: + // a) std::has_unique_object_representations_v is true or ptrie::byte_iterator is explicitly defined + // b) Nested combinations of std::tuple and std::vector, but without vectors nested (somewhere) inside other vectors, + // where the lowest element types satisfies a). The size of std::vector is stored to make parsing consistent. + // + // Lastly, for convenience, the file provides ptrie_set and ptrie_map that uses ptrie_interface directly. + + template struct ptrie_interface; + template struct has_ptrie_interface : std::false_type {}; + template struct has_ptrie_interface>> : std::true_type {}; + template constexpr bool has_ptrie_interface_v = has_ptrie_interface::value; + + template struct has_custom_byte_iterator : std::false_type {}; // Can be specialized by user types. + template constexpr bool has_byte_iterator_v = std::has_unique_object_representations_v || has_custom_byte_iterator::value; + + // First define instances for resp. value and vector of values that trivially fits into a ptrie. + template + struct ptrie_interface>> { + using elem_type = KEY; + using insert_type = KEY; + using external_type = KEY; + static constexpr insert_type to_ptrie(const external_type& key) { + return key; + } + template + static constexpr external_type unpack(const PT& p, size_t id) { + static_assert(std::is_same_v().unpack(std::declval(),std::declval()))>); + external_type res; + p.unpack(id, &res); + return res; + } + }; + template + struct ptrie_interface, std::enable_if_t>> { + using elem_type = KEY; + using insert_type = std::vector; + using external_type = std::vector; + static insert_type to_ptrie(const external_type& key) { + return std::move(key); + } + template + static external_type unpack(const PT& p, size_t id) { + static_assert(std::is_same_v, decltype(std::declval().unpack(std::declval()))>); + return p.unpack(id); + } + }; + template + struct ptrie_interface> { + using elem_type = CharT; + using insert_type = std::pair; + using external_type = std::basic_string; + static insert_type to_ptrie(const external_type& key) { + return std::make_pair(key.data(), key.length()); + } + template + static external_type unpack(const PT& p, size_t id) { + static_assert(std::is_same_v, decltype(std::declval().unpack(std::declval()))>); + auto vector = p.unpack(id); + return external_type(vector.data(), vector.size()); + } + }; + + // For vectors, we need to know if elements are fixed size + template struct fixed_byte_size; + template struct has_fixed_byte_size : std::false_type {}; + template struct has_fixed_byte_size>> : std::true_type {}; + template constexpr bool has_fixed_byte_size_v = has_fixed_byte_size::value; + + template struct byte_vector_converter; + template struct has_byte_vector_converter : std::false_type {}; + template struct has_byte_vector_converter>> : std::true_type {}; + template constexpr bool has_byte_vector_converter_v = has_byte_vector_converter::value; + // Convenience definition + template constexpr bool has_fixed_size_converter = has_fixed_byte_size_v && has_byte_vector_converter_v; + + // If KEY satisfies has_byte_iterator_v it has a fixed size. + template + struct fixed_byte_size>> { + using T = KEY; + static constexpr size_t size() { + return ptrie::byte_iterator::element_size(); + } + }; + // A tuple of fixed-size Args has a fixed size. + template + struct fixed_byte_size, std::enable_if_t<((has_fixed_byte_size_v) && ...)>> { + using T = std::tuple; + static constexpr size_t size() { + return (fixed_byte_size::size() + ...); + } + }; + + // Byte converter by using byte_iterator directly. + template + struct byte_vector_converter>> { + using T = KEY; + static constexpr size_t size(const T&) { + return ptrie::byte_iterator::element_size(); + } + static constexpr void push_back_bytes(std::vector& result, const T& data){ + for (size_t i = 0; i < ptrie::byte_iterator::element_size(); ++i){ + result.push_back((std::byte)ptrie::byte_iterator::const_access(&data, i)); + } + } + static constexpr void from_bytes(const std::vector& bytes, size_t& bytes_id, T& data){ + for (size_t i = 0; i < ptrie::byte_iterator::element_size(); ++i, ++bytes_id){ + ptrie::byte_iterator::access(&data, i) = (unsigned char)bytes[bytes_id]; + } + } + }; + // Byte converter for tuples of elements that has a byte converter that knows its size (i.e. vector stores size, so it can parse it back correctly). + template + struct byte_vector_converter, std::enable_if_t<((has_byte_vector_converter_v) && ...)>> { + using T = std::tuple; + static constexpr size_t size(const T& data) { + return std::apply([](auto&&... args){return (byte_vector_converter>::size(args) + ...);}, data); + } + static constexpr void push_back_bytes(std::vector& result, const T& data){ + std::apply([&result](auto&&... args){(byte_vector_converter>::push_back_bytes(result, args), ...);}, data); + } + static constexpr void from_bytes(const std::vector& bytes, size_t& bytes_id, T& data){ + std::apply([&bytes, &bytes_id](auto&&... args){(byte_vector_converter>::from_bytes(bytes, bytes_id, args), ...);}, data); + } + }; + // Vector of fixed size elements. Is not itself fixed_size. It stores extra info of its size, so it only uses the bytes corresponding to it. + // This means multiple vectors can be in a tuple using this approach. + template + struct byte_vector_converter, std::enable_if_t && has_fixed_size_converter::size_type>>> { + using T = std::vector; + using size_type = typename std::vector::size_type; + static constexpr size_t size(const T& data) { + return fixed_byte_size::size() + fixed_byte_size::size() * data.size(); + } + static constexpr void push_back_bytes(std::vector& result, const T& data){ + byte_vector_converter::push_back_bytes(result, data.size()); + for (const auto& elem : data) { + byte_vector_converter::push_back_bytes(result, elem); + } + } + static constexpr void from_bytes(const std::vector& bytes, size_t& bytes_id, T& data){ + size_type size; + byte_vector_converter::from_bytes(bytes, bytes_id, size); + for (size_type i = 0; i < size; ++i) { + data.emplace_back(); + byte_vector_converter::from_bytes(bytes, bytes_id, data.back()); + } + } + }; + + // Support strings in tuples and vectors. + template + struct byte_vector_converter, std::enable_if_t>> { + using T = std::basic_string; + using size_type = typename T::size_type; + static size_t size(const T& data) { + return fixed_byte_size::size() + fixed_byte_size::size() * data.size(); + } + static void push_back_bytes(std::vector& result, const T& data){ + byte_vector_converter::push_back_bytes(result, data.size()); + for (const auto& elem : data) { + byte_vector_converter::push_back_bytes(result, elem); + } + } + static void from_bytes(const std::vector& bytes, size_t& bytes_id, T& data){ + size_type size; + byte_vector_converter::from_bytes(bytes, bytes_id, size); + data = T(reinterpret_cast(&bytes[bytes_id]), size); + bytes_id += fixed_byte_size::size() * size; + } + }; + + // Byte converter for variants of elements that has a byte converter that knows its size (i.e. vector stores size, so it can parse it back correctly). + template + struct byte_vector_converter, std::enable_if_t<((has_byte_vector_converter_v) && ...)>> { + static constexpr size_t variant_size = std::variant_size_v>; + using T = std::variant; + using index_type = std::conditional_t<(variant_size <= std::numeric_limits::max()), unsigned char, size_t>; // Just use one byte if variant has less than 256 options, which is common. + static constexpr size_t size(const T& data) { + if constexpr (variant_size == 1) { + return byte_vector_converter>::size(std::get<0>(data)); + } else { + return fixed_byte_size::size() + + boost::mp11::mp_with_index(data.index(), [&data](auto I) { + return byte_vector_converter>::size(std::get(data)); + }); + } + } + static constexpr void push_back_bytes(std::vector& result, const T& data){ + if constexpr (variant_size == 1) { + byte_vector_converter>::push_back_bytes(result, std::get<0>(data)); + } else { + if (data.valueless_by_exception()) { // This would mess up the static_cast, but should not happen in general. + assert(false); return; + } + byte_vector_converter::push_back_bytes(result, static_cast(data.index())); + boost::mp11::mp_with_index(data.index(), [&result, &data](auto I) { + byte_vector_converter>::push_back_bytes(result, std::get(data)); + }); + } + } + static constexpr void from_bytes(const std::vector& bytes, size_t& bytes_id, T& data){ + if constexpr (variant_size == 1) { + byte_vector_converter>::from_bytes(bytes, bytes_id, std::get<0>(data)); + } else { + index_type index = 0; + byte_vector_converter::from_bytes(bytes, bytes_id, index); + boost::mp11::mp_with_index(index, [&bytes,&bytes_id,&data](auto I){ + byte_vector_converter>::from_bytes(bytes, bytes_id, data.template emplace()); + }); + } + } + }; + + template struct is_vector_of_byte_iterator : std::false_type{}; + template struct is_vector_of_byte_iterator, std::enable_if_t>> : std::true_type{}; + template constexpr bool is_vector_of_byte_iterator_v = is_vector_of_byte_iterator::value; + template struct is_basic_string : std::false_type{}; + template struct is_basic_string> : std::true_type{}; + template constexpr bool is_basic_string_v = is_basic_string::value; + + // Special ptrie_interface for any KEY with a byte vector converter. + template + struct ptrie_interface && !has_byte_iterator_v && !is_vector_of_byte_iterator_v && !is_basic_string_v>> { + using elem_type = std::byte; + using insert_type = std::vector; + using external_type = KEY; + static insert_type to_ptrie(const external_type& key) { + std::vector result; + result.reserve(byte_vector_converter::size(key)); + byte_vector_converter::push_back_bytes(result, key); + return result; + } + template + static external_type unpack(const PT& p, size_t id) { + static_assert(std::is_same_v, decltype(std::declval().unpack(std::declval()))>); + auto bytes = p.unpack(id); + external_type result; + size_t bytes_id = 0; + byte_vector_converter::from_bytes(bytes, bytes_id, result); + return result; + } + }; + + template + using ptrie_interface_elem = typename ptrie_interface::elem_type; + + // Next we define ptrie_set and ptrie_map which makes using ptrie_interface seamless (except now you should use 'at' instead of 'unpack'). + template + class ptrie_set : private ptrie::set_stable> { + static_assert(has_ptrie_interface_v, "KEY does not provide a specialization for ptrie_interface."); + static_assert(std::is_same_v::external_type>, "KEY not matching ptrie_interface::external_type."); + using pt = ptrie::set_stable>; + public: + using elem_type = KEY; + + using typename pt::set_stable; + using pt::unpack; + using pt::size; + + std::pair insert(const KEY& key) { + return pt::insert(ptrie_interface::to_ptrie(key)); + } + [[nodiscard]] std::pair exists(const KEY& key) const { + return pt::exists(ptrie_interface::to_ptrie(key)); + } + bool erase (const KEY& key) { + return pt::erase(ptrie_interface::to_ptrie(key)); + } + KEY at(size_t index) const { + return ptrie_interface::unpack(*this, index); + } + }; + + template + class ptrie_map : private ptrie::map, T> { + static_assert(has_ptrie_interface_v, "KEY does not provide a specialization for ptrie_interface."); + static_assert(std::is_same_v::external_type>, "KEY not matching ptrie_interface::external_type."); + using pt = ptrie::map, T>; + public: + using elem_type = KEY; + + using typename pt::map; + using pt::unpack; + using pt::size; + using pt::get_data; + + // Yes, insert(), exists(), erase() and at() are same as ptrie_set, but I tried multiple inheritance, and it didn't fly. + std::pair insert(const KEY& key) { + return pt::insert(ptrie_interface::to_ptrie(key)); + } + [[nodiscard]] std::pair exists(const KEY& key) const { + return pt::exists(ptrie_interface::to_ptrie(key)); + } + bool erase (const KEY& key) { + return pt::erase(ptrie_interface::to_ptrie(key)); + } + KEY at(size_t index) const { + return ptrie_interface::unpack(*this, index); + } + T& operator[](const KEY& key) { + return pt::operator[](ptrie_interface::to_ptrie(key)); + } + }; +} + +#endif //PTRIE_INTERFACE_H diff --git a/src/ptrie/ptrie_map.h b/src/ptrie/ptrie_map.h index 7faa46c..eb15440 100644 --- a/src/ptrie/ptrie_map.h +++ b/src/ptrie/ptrie_map.h @@ -62,7 +62,7 @@ namespace ptrie { return get_data(pt::insert(key).second); } - T& operator[](std::pair key) + T& operator[](std::pair key) { return get_data(pt::insert(key.first, key.second).second); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 71881a3..7d82cf6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,13 +12,16 @@ add_executable (Delete delete.cpp) add_executable (Set set.cpp) add_executable (Map map.cpp) add_executable (StableSet stable_set.cpp) +add_executable (Interface interface.cpp) target_link_libraries(Set PRIVATE Boost::unit_test_framework ptrie) target_link_libraries(Delete PRIVATE Boost::unit_test_framework ptrie) target_link_libraries(StableSet PRIVATE Boost::unit_test_framework ptrie) target_link_libraries(Map PRIVATE Boost::unit_test_framework ptrie) +target_link_libraries(Interface PRIVATE Boost::unit_test_framework ptrie) add_test(NAME Set COMMAND Set) add_test(NAME Delete COMMAND Delete) add_test(NAME StableSet COMMAND StableSet) add_test(NAME Map COMMAND Map) +add_test(NAME Interface COMMAND Interface) diff --git a/test/interface.cpp b/test/interface.cpp new file mode 100644 index 0000000..1844b7a --- /dev/null +++ b/test/interface.cpp @@ -0,0 +1,158 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Copyright Morten K. Schou + */ + +/* + * File: interface + * Author: Morten K. Schou + * + * Created on 09-03-2022. + */ + +#define BOOST_TEST_MODULE interface + +#include +#include + +using namespace ptrie; + +struct my_struct { + size_t a; + size_t b; + my_struct() = default; + my_struct(size_t a, size_t b) : a(a), b(b) {}; + friend bool operator==(const my_struct& lhs, const my_struct& rhs) { return lhs.a == rhs.a && lhs.b == rhs.b; }; + friend bool operator!=(const my_struct& lhs, const my_struct& rhs) { return !(lhs==rhs); }; + friend std::ostream& operator<<(std::ostream& s, const my_struct& m) { + return s << "{a: " << m.a << ", b: " << m.b << "}"; + } +}; + +BOOST_AUTO_TEST_CASE(ptrie_interface_string_Test) +{ + ptrie_set string_set; + auto [fresh1, id1] = string_set.insert("A string"); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + std::string another_string = "Another string"; + auto [fresh2, id2] = string_set.insert(another_string); + BOOST_CHECK(fresh2); + BOOST_CHECK_EQUAL(id2, 1); + + std::string same_string = "Another string"; + auto [fresh3, id3] = string_set.insert(same_string); + BOOST_CHECK(!fresh3); + BOOST_CHECK_EQUAL(id3, 1); + + auto first_string = string_set.at(0); + BOOST_CHECK_EQUAL(first_string, "A string"); + + auto second_string = string_set.at(1); + BOOST_CHECK_EQUAL(second_string, another_string); +} + +BOOST_AUTO_TEST_CASE(ptrie_interface_tuple_Test) +{ + ptrie_set> string_set; + my_struct m{42,100}; + auto [fresh1, id1] = string_set.insert(std::make_tuple(1, u"a string", m)); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + + auto [i1, s1, m1] = string_set.at(0); + BOOST_CHECK_EQUAL(i1, 1); + BOOST_CHECK(s1 == u"a string"); + BOOST_CHECK_EQUAL(m1, m); +} + +BOOST_AUTO_TEST_CASE(ptrie_interface_vector_Test) +{ + ptrie_set> string_set; + std::vector my_vector; + my_vector.emplace_back(42, 100); + my_vector.emplace_back(9000, 2); + auto [fresh1, id1] = string_set.insert(my_vector); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + + auto v = string_set.at(0); + BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), my_vector.begin(), my_vector.end()); +} + +BOOST_AUTO_TEST_CASE(ptrie_interface_variant_Test) +{ + ptrie_set> string_set; + auto [fresh1, id1] = string_set.insert(1); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + auto [fresh2, id2] = string_set.insert(u"a string"); + BOOST_CHECK(fresh2); + BOOST_CHECK_EQUAL(id2, 1); + auto [fresh3, id3] = string_set.insert(my_struct(42,100)); + BOOST_CHECK(fresh3); + BOOST_CHECK_EQUAL(id3, 2); + + auto v1 = string_set.at(0); + BOOST_CHECK_EQUAL(v1.index(), 0); + BOOST_CHECK_EQUAL(std::get<0>(v1), 1); + auto v2 = string_set.at(1); + BOOST_CHECK_EQUAL(v2.index(), 1); + BOOST_CHECK(std::get<1>(v2) == u"a string"); + auto v3 = string_set.at(2); + BOOST_CHECK_EQUAL(v3.index(), 2); + BOOST_CHECK_EQUAL(std::get<2>(v3), my_struct(42,100)); +} + +BOOST_AUTO_TEST_CASE(ptrie_interface_tuple_of_vector_Test) +{ + ptrie_set,std::string,std::vector>> string_set; + std::vector v1; + v1.emplace_back(42,100); + v1.emplace_back(9000, 2); + v1.emplace_back(4, 10); + v1.emplace_back(29000,72); + v1.emplace_back(9000, 2); + std::vector v2; + v2.emplace_back(-5); + v2.emplace_back(123456789); + auto [fresh1, id1] = string_set.insert(std::make_tuple(v1, "a string", v2)); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + + auto [ov1, s, ov2] = string_set.at(0); + BOOST_CHECK_EQUAL_COLLECTIONS(ov1.begin(), ov1.end(), v1.begin(), v1.end()); + BOOST_CHECK_EQUAL(s, "a string"); + BOOST_CHECK_EQUAL_COLLECTIONS(ov2.begin(), ov2.end(), v2.begin(), v2.end()); +} + +BOOST_AUTO_TEST_CASE(ptrie_interface_map_string_Test) +{ + ptrie_map string_map; + auto [fresh1, id1] = string_map.insert("A string"); + BOOST_CHECK(fresh1); + BOOST_CHECK_EQUAL(id1, 0); + string_map.get_data(id1) = 42; + + string_map["Another string"] = 9000; + auto [exists, id2] = string_map.exists("Another string"); + BOOST_CHECK(exists); + BOOST_CHECK_EQUAL(id2, 1); + BOOST_CHECK_EQUAL(string_map.get_data(id2), 9000); + + BOOST_CHECK_EQUAL(string_map["A string"], 42); +}