Skip to content

Commit 9e074cf

Browse files
committed
feat: add IntrusiveStringSet
1 parent 9b36f19 commit 9e074cf

File tree

2 files changed

+161
-94
lines changed

2 files changed

+161
-94
lines changed

src/core/intrusive_string_set.h

Lines changed: 145 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,153 +4,221 @@
44

55
#pragma once
66

7+
#include <cassert>
78
#include <cstring>
89
#include <memory>
910
#include <string_view>
1011
#include <vector>
1112

13+
#include "base/hash.h"
14+
1215
namespace dfly {
1316

14-
class ISSEntry {
17+
class ISLEntry {
18+
friend class IntrusiveStringList;
19+
1520
public:
16-
ISSEntry(std::string_view key) {
17-
ISSEntry* next = nullptr;
21+
ISLEntry() = default;
22+
23+
ISLEntry(char* data) {
24+
data_ = data;
25+
}
26+
27+
operator bool() const {
28+
return data_;
29+
}
30+
31+
std::string_view Key() const {
32+
return {GetKeyData(), GetKeySize()};
33+
}
34+
35+
private:
36+
static ISLEntry Create(std::string_view key) {
37+
char* next = nullptr;
1838
uint32_t key_size = key.size();
1939

2040
auto size = sizeof(next) + sizeof(key_size) + key_size;
2141

22-
data_ = (char*)malloc(size);
42+
char* data = (char*)malloc(size);
2343

24-
std::memcpy(data_, &next, sizeof(next));
44+
std::memcpy(data, &next, sizeof(next));
2545

26-
auto* key_size_pos = data_ + sizeof(next);
46+
auto* key_size_pos = data + sizeof(next);
2747
std::memcpy(key_size_pos, &key_size, sizeof(key_size));
2848

2949
auto* key_pos = key_size_pos + sizeof(key_size);
3050
std::memcpy(key_pos, key.data(), key_size);
51+
52+
return ISLEntry(data);
3153
}
3254

33-
std::string_view Key() const {
34-
return {GetKeyData(), GetKeySize()};
55+
static void Destroy(ISLEntry entry) {
56+
free(entry.data_);
3557
}
3658

37-
ISSEntry* Next() const {
38-
ISSEntry* next = nullptr;
39-
std::memcpy(&next, data_, sizeof(next));
59+
ISLEntry Next() const {
60+
ISLEntry next;
61+
std::memcpy(&next.data_, data_, sizeof(next));
4062
return next;
4163
}
4264

43-
void SetNext(ISSEntry* next) {
65+
void SetNext(ISLEntry next) {
4466
std::memcpy(data_, &next, sizeof(next));
4567
}
4668

47-
private:
4869
const char* GetKeyData() const {
49-
return data_ + sizeof(ISSEntry*) + sizeof(uint32_t);
70+
return data_ + sizeof(ISLEntry*) + sizeof(uint32_t);
5071
}
5172

5273
uint32_t GetKeySize() const {
5374
uint32_t size = 0;
54-
std::memcpy(&size, data_ + sizeof(ISSEntry*), sizeof(size));
75+
std::memcpy(&size, data_ + sizeof(ISLEntry*), sizeof(size));
5576
return size;
5677
}
5778

5879
// TODO consider use SDS strings or other approach
5980
// TODO add optimization for big keys
60-
// memory daya layout [ISSEntry*, key_size, key]
61-
char* data_;
81+
// memory daya layout [ISLEntry*, key_size, key]
82+
char* data_ = nullptr;
6283
};
6384

64-
class ISMEntry {
65-
public:
66-
ISMEntry(std::string_view key, std::string_view val) {
67-
ISMEntry* next = nullptr;
68-
uint32_t key_size = key.size();
69-
uint32_t val_size = val.size();
85+
class FakePrevISLEntry : public ISLEntry {
86+
FakePrevISLEntry(ISLEntry) {
87+
fake_allocated_mem_ = ;
88+
}
7089

71-
auto size = sizeof(next) + sizeof(key_size) + sizeof(val_size) + key_size + val_size;
90+
private:
91+
void* fake_allocated_mem_;
92+
}
7293

73-
data_ = (char*)malloc(size);
94+
class IntrusiveStringList {
95+
public:
96+
~IntrusiveStringList() {
97+
while (start_) {
98+
auto next = start_.Next();
99+
ISLEntry::Destroy(start_);
100+
start_ = next;
101+
}
102+
}
74103

75-
std::memcpy(data_, &next, sizeof(next));
104+
ISLEntry Insert(ISLEntry e) {
105+
e.SetNext(start_);
106+
start_ = e;
107+
return start_;
108+
}
76109

77-
auto* key_size_pos = data_ + sizeof(next);
78-
std::memcpy(key_size_pos, &key_size, sizeof(key_size));
110+
ISLEntry Emplace(std::string_view key) {
111+
return Insert(ISLEntry::Create(key));
112+
}
79113

80-
auto* val_size_pos = key_size_pos + sizeof(key_size);
81-
std::memcpy(val_size_pos, &val_size, sizeof(val_size));
114+
ISLEntry Find(std::string_view str) {
115+
auto it = start_;
116+
for (; it && it.Key() != str; it = it.Next())
117+
;
118+
return it;
119+
}
82120

83-
auto* key_pos = val_size_pos + sizeof(val_size);
84-
std::memcpy(key_pos, key.data(), key_size);
121+
bool Erase(std::string_view str) {
122+
if (!start_) {
123+
return false;
124+
}
125+
auto it = start_;
126+
if (it.Key() == str) {
127+
start_ = it.Next();
128+
ISLEntry::Destroy(it);
129+
return true;
130+
}
85131

86-
auto* val_pos = key_pos + key_size;
87-
std::memcpy(val_pos, val.data(), val_size);
132+
auto prev = it;
133+
for (it = it.Next(); it; prev = it, it = it.Next()) {
134+
if (it.Key() == str) {
135+
prev.SetNext(it.Next());
136+
ISLEntry::Destroy(it);
137+
return true;
138+
}
139+
}
140+
return false;
88141
}
89142

90-
std::string_view Key() const {
91-
return {GetKeyData(), GetKeySize()};
143+
void MoveNext(ISLEntry& prev) {
144+
auto next = prev.Next();
145+
prev.SetNext(next.Next());
146+
Insert(next);
92147
}
93148

94-
std::string_view Val() const {
95-
return {GetValData(), GetValSize()};
96-
}
149+
private:
150+
ISLEntry start_;
151+
};
97152

98-
ISMEntry* Next() const {
99-
ISMEntry* next = nullptr;
100-
std::memcpy(&next, data_, sizeof(next));
101-
return next;
102-
}
153+
class IntrusiveStringSet {
154+
public:
155+
// TODO add TTL processing
156+
ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) {
157+
if (size_ >= entries_.size()) {
158+
Grow();
159+
}
160+
auto bucket_id = BucketId(Hash(str));
161+
auto& bucket = entries_[bucket_id];
103162

104-
void SetVal(std::string_view val) {
105-
// TODO add optimization for the same size key
106-
uint32_t val_size = val.size();
107-
auto new_size =
108-
sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t) + GetKeySize() + val_size;
163+
if (auto existed_item = bucket.Find(str); existed_item) {
164+
// TODO consider common implementation for key value pair
165+
return ISLEntry();
166+
}
109167

110-
data_ = (char*)realloc(data_, new_size);
168+
++size_;
169+
return bucket.Emplace(str);
170+
}
111171

112-
auto* val_size_pos = data_ + sizeof(ISMEntry*) + sizeof(uint32_t);
113-
std::memcpy(val_size_pos, &val_size, sizeof(val_size));
172+
bool Erase(std::string_view str) {
173+
auto bucket_id = BucketId(Hash(str));
174+
return entries_[bucket_id].Erase(str);
175+
}
114176

115-
auto* val_pos = val_size_pos + sizeof(val_size) + GetKeySize();
116-
std::memcpy(val_pos, val.data(), val_size);
177+
ISLEntry Find(std::string_view member) {
178+
auto bucket_id = BucketId(Hash(member));
179+
return entries_[bucket_id].Find(member);
117180
}
118181

119-
void SetNext(ISMEntry* next) {
120-
std::memcpy(data_, &next, sizeof(next));
182+
// Returns the number of elements in the map. Note that it might be that some of these elements
183+
// have expired and can't be accessed.
184+
size_t UpperBoundSize() const {
185+
return size_;
121186
}
122187

123-
private:
124-
const char* GetKeyData() const {
125-
return data_ + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t);
188+
bool Empty() const {
189+
return size_ == 0;
126190
}
127191

128-
uint32_t GetKeySize() const {
129-
uint32_t size = 0;
130-
std::memcpy(&size, data_ + sizeof(ISMEntry*), sizeof(size));
131-
return size;
192+
private:
193+
std::uint32_t Capacity() const {
194+
return 1 << capacity_log_;
132195
}
133196

134-
const char* GetValData() const {
135-
return GetKeyData() + GetKeySize();
197+
void Grow() {
198+
++capacity_log_;
199+
entries_.resize(Capacity());
200+
201+
// TODO rehashing
136202
}
137203

138-
uint32_t GetValSize() const {
139-
uint32_t size = 0;
140-
std::memcpy(&size, data_ + sizeof(ISMEntry*) + sizeof(uint32_t), sizeof(size));
141-
return size;
204+
uint32_t BucketId(uint64_t hash) const {
205+
assert(capacity_log_ > 0);
206+
return hash >> (64 - capacity_log_);
142207
}
143208

144-
// TODO consider use SDS strings or other approach
145-
// TODO add optimization for big keys
146-
// memory daya layout [ISMEntry*, key_size, val_size, key, val]
147-
char* data_;
148-
};
209+
uint64_t Hash(std::string_view str) const {
210+
constexpr XXH64_hash_t kHashSeed = 24061983;
211+
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
212+
}
149213

150-
template <class EntryT> class IntrusiveStringSet {
151-
public:
152214
private:
153-
std::vector<EntryT*> entries_;
215+
static constexpr size_t kMinSizeShift = 2;
216+
std::uint32_t capacity_log_ = 1;
217+
std::uint32_t size_ = 0; // number of elements in the set.
218+
219+
static_assert(sizeof(IntrusiveStringList) == sizeof(void*),
220+
"IntrusiveStringList should be just a pointer");
221+
std::vector<IntrusiveStringList> entries_;
154222
};
155223

156224
} // namespace dfly

src/core/intrusive_string_set_test.cc

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,30 @@ class IntrusiveStringSetTest : public ::testing::Test {
2525
}
2626
};
2727

28-
TEST_F(IntrusiveStringSetTest, ISSEntryTest) {
29-
ISSEntry test("0123456789");
28+
TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) {
29+
IntrusiveStringList isl;
30+
ISLEntry test = isl.Emplace("0123456789");
3031

3132
EXPECT_EQ(test.Key(), "0123456789"sv);
32-
EXPECT_EQ(test.Next(), nullptr);
3333

34-
test.SetNext(&test);
34+
test = isl.Emplace("123456789");
3535

36-
EXPECT_EQ(test.Key(), "0123456789"sv);
37-
EXPECT_EQ(test.Next(), &test);
38-
}
36+
EXPECT_EQ(test.Key(), "123456789"sv);
3937

40-
TEST_F(IntrusiveStringSetTest, ISMEntryTest) {
41-
ISMEntry test("0123456789", "qwertyuiopasdfghjklzxcvbnm");
38+
test = isl.Emplace("23456789");
4239

43-
EXPECT_EQ(test.Key(), "0123456789"sv);
44-
EXPECT_EQ(test.Val(), "qwertyuiopasdfghjklzxcvbnm"sv);
45-
EXPECT_EQ(test.Next(), nullptr);
40+
EXPECT_EQ(isl.Find("0123456789").Key(), "0123456789"sv);
41+
EXPECT_EQ(isl.Find("23456789").Key(), "23456789"sv);
42+
EXPECT_EQ(isl.Find("123456789").Key(), "123456789"sv);
43+
EXPECT_EQ(isl.Find("test"), ISLEntry());
4644

47-
test.SetVal("QWERTYUIOPASDFGHJKLZXCVBNM");
48-
test.SetNext(&test);
45+
EXPECT_TRUE(isl.Erase("23456789"));
46+
EXPECT_EQ(isl.Find("23456789"), ISLEntry());
47+
EXPECT_FALSE(isl.Erase("test"));
48+
EXPECT_EQ(isl.Find("test"), ISLEntry());
4949

50-
EXPECT_EQ(test.Key(), "0123456789"sv);
51-
EXPECT_EQ(test.Val(), "QWERTYUIOPASDFGHJKLZXCVBNM"sv);
52-
EXPECT_EQ(test.Next(), &test);
50+
IntrusiveStringList isl2;
51+
isl2.MoveNext()
5352
}
5453

5554
} // namespace dfly

0 commit comments

Comments
 (0)