Skip to content

Commit 0ed10fc

Browse files
committed
always pad and align memory
1 parent f57d518 commit 0ed10fc

File tree

2 files changed

+43
-27
lines changed

2 files changed

+43
-27
lines changed

cp-algo/util/big_alloc.hpp

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,43 +13,49 @@
1313
#endif
1414

1515
namespace cp_algo {
16-
template <typename T>
17-
class big_alloc: public std::allocator<T> {
18-
public:
16+
template <typename T, std::size_t Align = 32>
17+
class big_alloc {
18+
static_assert( Align >= alignof(void*), "Align must be at least pointer-size");
19+
static_assert(std::popcount(Align) == 1, "Align must be a power of two");
20+
public:
1921
using value_type = T;
20-
using base = std::allocator<T>;
22+
template <class U> struct rebind { using other = big_alloc<U, Align>; };
2123

2224
big_alloc() noexcept = default;
25+
template <typename U, std::size_t A>
26+
big_alloc(const big_alloc<U, A>&) noexcept {}
2327

24-
template <typename U>
25-
big_alloc(const big_alloc<U>&) noexcept {}
26-
27-
#if CP_ALGO_USE_MMAP
2828
[[nodiscard]] T* allocate(std::size_t n) {
29-
if(n * sizeof(T) < 1024 * 1024) {
30-
return base::allocate(n);
29+
std::size_t padded = round_up(n * sizeof(T));
30+
std::size_t align = std::max<std::size_t>(alignof(T), Align);
31+
#if CP_ALGO_USE_MMAP
32+
if (padded >= MEGABYTE) {
33+
void* raw = mmap(nullptr, padded,
34+
PROT_READ | PROT_WRITE,
35+
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
36+
madvise(raw, padded, MADV_HUGEPAGE);
37+
madvise(raw, padded, MADV_POPULATE_WRITE);
38+
return static_cast<T*>(raw);
3139
}
32-
n *= sizeof(T);
33-
void* raw = mmap(nullptr, n,
34-
PROT_READ | PROT_WRITE,
35-
MAP_PRIVATE | MAP_ANONYMOUS,
36-
-1, 0);
37-
madvise(raw, n, MADV_HUGEPAGE);
38-
madvise(raw, n, MADV_POPULATE_WRITE);
39-
return static_cast<T*>(raw);
40-
}
4140
#endif
41+
return static_cast<T*>(::operator new(padded, std::align_val_t(align)));
42+
}
4243

43-
#if CP_ALGO_USE_MMAP
4444
void deallocate(T* p, std::size_t n) noexcept {
45-
if(n * sizeof(T) < 1024 * 1024) {
46-
return base::deallocate(p, n);
47-
}
48-
if(p) {
49-
munmap(p, n * sizeof(T));
50-
}
45+
if (!p) return;
46+
std::size_t padded = round_up(n * sizeof(T));
47+
std::size_t align = std::max<std::size_t>(alignof(T), Align);
48+
#if CP_ALGO_USE_MMAP
49+
if (padded >= MEGABYTE) { munmap(p, padded); return; }
50+
#endif
51+
::operator delete(p, padded, std::align_val_t(align));
52+
}
53+
54+
private:
55+
static constexpr std::size_t MEGABYTE = 1 << 20;
56+
static constexpr std::size_t round_up(std::size_t x) noexcept {
57+
return (x + Align - 1) / Align * Align;
5158
}
52-
#endif
5359
};
5460
}
5561
#endif // CP_ALGO_UTIL_big_alloc_HPP

cp-algo/util/simd.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,15 @@ namespace cp_algo {
5858
static constexpr u64x4 shuffler = {3, 0, 1, 2};
5959
return __builtin_shuffle(x, shuffler);
6060
}
61+
62+
template<std::size_t Align = 32>
63+
constexpr std::size_t aligned_idx(auto const& c, std::size_t i = 0) {
64+
auto const* p = std::data(c) + i;
65+
using value_type = std::remove_pointer_t<decltype(p)>;
66+
constexpr auto mask = Align - 1;
67+
std::uintptr_t addr = reinterpret_cast<std::uintptr_t>(p);
68+
std::size_t bytes_to_next = (-addr) & mask;
69+
return i + bytes_to_next / sizeof(value_type);
70+
}
6171
}
6272
#endif // CP_ALGO_UTIL_SIMD_HPP

0 commit comments

Comments
 (0)