Skip to content

Commit 3ccd3bb

Browse files
committed
Refactor the collector to use reference counting
This is around a ~4x speedup, but required a lot of code changes. In terms of public API, only `AtomicGc` has a significant diff. (The old API would still work, but the changes make it work more efficently with the new internals.)
1 parent 4e1e8e7 commit 3ccd3bb

File tree

22 files changed

+659
-537
lines changed

22 files changed

+659
-537
lines changed

Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,23 @@ crossbeam = "0.8.1"
1818
dynqueue = { version = "0.3.0", features = ["crossbeam-queue"] }
1919
log = "0.4.14"
2020
once_cell = "1.8"
21+
num_cpus = "1.0"
2122
parking_lot = "0.11.2"
2223
rayon = "1.5"
2324
rental = "0.5.6"
2425
shredder_derive = "0.2.0"
2526
#shredder_derive = { git = "https://github.com/Others/shredder_derive.git" }
2627
#shredder_derive = { path = "../shredder_derive" }
2728
stable_deref_trait = "1.2"
29+
thread_local = "1.1"
2830

2931
[dev-dependencies]
3032
paste = "1.0"
3133
rand = "0.8.3"
3234
trybuild = "1.0"
3335

34-
#[profile.release]
35-
#debug = true
36+
[profile.release]
37+
debug = true
3638

3739
[features]
3840
default = []

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ of the data has unpredictable cycles in it. (So Arc would not be appropriate.)
2626
- guarded access: accessing `Gc` data requires acquiring a guard (although you can use `DerefGc` in many cases to avoid this)
2727
- multiple collectors: only a single global collector is supported
2828
- can't handle `Rc`/`Arc`: requires all `Gc` objects have straightforward ownership semantics
29-
- collection optimized for speed, not memory use: `Gc` and internal metadata is small, but there is bloat during collection (will fix!)
3029
- no no-std support: The collector requires threading and other `std` features (will fix!)
3130

3231
Getting Started

src/atomic.rs

Lines changed: 130 additions & 137 deletions
Large diffs are not rendered by default.

src/collector/alloc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ impl GcAllocation {
203203
}
204204
}
205205

206-
pub fn scan<F: FnMut(InternalGcRef)>(&self, callback: F) {
206+
pub fn scan<F: FnMut(&InternalGcRef)>(&self, callback: F) {
207207
unsafe {
208208
let mut scanner = Scanner::new(callback);
209209
let to_scan = &*self.scan_ptr;

src/collector/collect_impl.rs

Lines changed: 78 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,146 +1,123 @@
1-
use std::sync::atomic::Ordering;
2-
3-
use crossbeam::deque::Injector;
41
use crossbeam::queue::SegQueue;
52
use dynqueue::IntoDynQueue;
6-
use parking_lot::{MutexGuard, RwLock};
73
use rayon::iter::{IntoParallelIterator, ParallelIterator};
4+
use std::sync::atomic::Ordering;
85

96
use crate::collector::dropper::DropMessage;
10-
use crate::collector::{Collector, GcExclusiveWarrant};
7+
use crate::collector::Collector;
118
use crate::concurrency::lockout::Lockout;
129

10+
use parking_lot::MutexGuard;
11+
1312
impl Collector {
1413
pub(super) fn do_collect(&self, gc_guard: MutexGuard<'_, ()>) {
15-
// Be careful modifying this method. The tracked data and tracked handles can change underneath us
14+
// TODO: Improve this comment
15+
// Be careful modifying this method. The tracked data, reference counts, and to some extent
16+
// the graph, can change underneath us.
17+
//
1618
// Currently the state is this, as far as I can tell:
17-
// - New handles are conservatively seen as roots if seen at all while we are touching handles
18-
// (there is nowhere a new "secret root" can be created and then the old root stashed and seen as non-rooted)
19-
// - New data is treated as a special case, and only deallocated if it existed at the start of collection
20-
// - Deleted handles cannot make the graph "more connected" if the deletion was not observed
19+
// - New data is always seen as rooted as long is it is allocated after the graph freezing step
20+
// - After graph freezing (where we take all the Lockouts we can) there is no way to
21+
// smuggle items in or out of the graph
22+
// - The reference count preperation is conservative (if concurrently modified, the graph will simply look more connected)
2123

2224
trace!("Beginning collection");
2325
let _atomic_spinlock_guard = self.atomic_spinlock.lock_exclusive();
2426

25-
let current_collection = self
26-
.tracked_data
27-
.current_collection_number
28-
.load(Ordering::SeqCst);
29-
3027
// Here we synchronize destructors: this ensures that handles in objects in the background thread are dropped
31-
// Otherwise we'd see those handles as rooted and keep them around.
28+
// Otherwise we'd see those handles as rooted and keep them around. (This would not lead to incorrectness, but
29+
// this improves consistency and determinism.)
30+
//
3231
// This makes a lot of sense in the background thread (since it's totally async),
3332
// but may slow direct calls to `collect`.
3433
self.synchronize_destructors();
3534

36-
// The warrant system prevents us from scanning in-use data
37-
let warrants: Injector<GcExclusiveWarrant> = Injector::new();
38-
3935
// eprintln!("tracked data {:?}", tracked_data);
4036
// eprintln!("tracked handles {:?}", tracked_handles);
4137

42-
// In this step we calculate what's not rooted by marking all data definitively in a Gc
43-
self.tracked_data.data.par_iter(|data| {
44-
// If data.last_marked == 0, then it is new data. Update that we've seen this data
45-
// (this step helps synchronize what data is valid to be deallocated)
46-
if data.last_marked.load(Ordering::SeqCst) == 0 {
47-
data.last_marked
48-
.store(current_collection - 1, Ordering::SeqCst);
38+
// First, go through the data, resetting all the reference count trackers,
39+
// and taking exclusive warrants where possible
40+
self.tracked_data.par_iter(|data| {
41+
unsafe {
42+
// Safe as we are the collector
43+
Lockout::try_take_exclusive_access_unsafe(&data);
4944
}
45+
// This can be done concurrently with the `Lockout` managment, since the ref-count snapshot is conservative
46+
// TODO: Double check this logic
47+
data.ref_cnt.prepare_for_collection();
48+
});
5049

51-
if let Some(warrant) = Lockout::get_exclusive_warrant(data.clone()) {
52-
// Save that warrant so things can't shift around under us
53-
warrants.push(warrant);
54-
55-
// Now figure out what handles are not rooted
50+
// Then adjust reference counts to figure out what is rooted
51+
self.tracked_data.par_iter(|data| {
52+
if Lockout::unsafe_exclusive_access_taken(&data) {
5653
data.underlying_allocation.scan(|h| {
57-
h.handle_ref
58-
.v
59-
.last_non_rooted
60-
.store(current_collection, Ordering::SeqCst);
54+
h.data_ref.ref_cnt.found_once_internally();
6155
});
6256
} else {
63-
// eprintln!("failed to get warrant!");
64-
// If we can't get the warrant, then this data must be in use, so we can mark it
65-
data.last_marked.store(current_collection, Ordering::SeqCst);
57+
// Someone else had this data during the collection, so it is clearly rooted
58+
data.ref_cnt.override_mark_as_rooted();
6659
}
6760
});
6861

69-
// The handles that were not just marked need to be treated as roots
62+
// Now we need to translate our set of roots into a queue
63+
// TODO: This is the only allocation in the collector at this point, probably is removable or re-usable
7064
let roots = SegQueue::new();
71-
self.tracked_data.handles.par_iter(|handle| {
72-
// If the `last_non_rooted` number was not now, then it is a root
73-
if handle.last_non_rooted.load(Ordering::SeqCst) != current_collection {
74-
roots.push(handle);
65+
self.tracked_data.par_iter(|data| {
66+
if data.ref_cnt.is_rooted() {
67+
// We need to scan data that dynamically becomes rooted, so we use the `override_mark_as_rooted`
68+
// flag to track what we've enqued to scan already
69+
data.ref_cnt.override_mark_as_rooted();
70+
roots.push(data);
7571
}
7672
});
7773

78-
// eprintln!("roots {:?}", roots);
79-
80-
// This step is dfs through the object graph (starting with the roots)
81-
// We mark each object we find
8274
let dfs_stack = roots.into_dyn_queue();
83-
dfs_stack
84-
.into_par_iter()
85-
.for_each(|(queue, handle)| unsafe {
86-
handle.underlying_data.with_data(|data| {
87-
// If this data is new, we don't want to `Scan` it, since we may not have its Lockout
88-
// Any handles inside this could not of been seen in step 1, so they'll be rooted anyway
89-
if data.last_marked.load(Ordering::SeqCst) != 0 {
90-
// Essential note! All non-new non-warranted data is automatically marked
91-
// Thus we will never accidentally scan non-warranted data here
92-
let previous_mark =
93-
data.last_marked.swap(current_collection, Ordering::SeqCst);
94-
95-
// Since we've done an atomic swap, we know we've already scanned this iff it was marked
96-
// (excluding data marked because we couldn't get its warrant, who's handles would be seen as roots)
97-
// This stops us for scanning data more than once and, crucially, concurrently scanning the same data
98-
if previous_mark != current_collection {
99-
data.last_marked.store(current_collection, Ordering::SeqCst);
100-
101-
data.underlying_allocation.scan(|h| {
102-
let mut should_enque = false;
103-
h.handle_ref.v.underlying_data.with_data(|scanned_data| {
104-
if scanned_data.last_marked.load(Ordering::SeqCst)
105-
!= current_collection
106-
{
107-
should_enque = true;
108-
}
109-
});
110-
if should_enque {
111-
queue.enqueue(h.handle_ref.v);
112-
}
113-
});
114-
}
75+
dfs_stack.into_par_iter().for_each(|(queue, data)| {
76+
debug_assert!(!data.deallocated.load(Ordering::SeqCst));
77+
78+
if Lockout::unsafe_exclusive_access_taken(&data) {
79+
data.underlying_allocation.scan(|h| {
80+
let ref_cnt = &h.data_ref.ref_cnt;
81+
// We need to scan data that dynamically becomes rooted, so we use the `override_mark_as_rooted`
82+
// flag to track what we've enqued to scan already. (So we can't just use `is_rooted` here.)
83+
if !ref_cnt.was_overriden_as_rooted() {
84+
// This is technically racy, since we check the rooting status, THEN mark as rooted/enqueue
85+
// But that doesn't matter since the worse that can happen is that we enqueue the data twice
86+
ref_cnt.override_mark_as_rooted();
87+
queue.enqueue(h.data_ref.clone());
11588
}
116-
})
117-
});
118-
// We're done scanning things, and have established what is marked. Release the warrants
119-
drop(warrants);
89+
});
90+
} else {
91+
// Someone else had this data during the collection, so it is clearly rooted
92+
data.ref_cnt.override_mark_as_rooted();
93+
}
94+
});
95+
96+
// We are done scanning, so release any warrants
97+
self.tracked_data.par_iter(|data| unsafe {
98+
Lockout::try_release_exclusive_access_unsafe(&data);
99+
});
100+
101+
// Since new refcnts are created as rooted, and new data is created with new refcnts, we
102+
// can safely treat the refcnt data as definitive
120103

121104
// Now cleanup by removing all the data that is done for
122-
let to_drop = RwLock::new(Vec::new());
105+
let to_drop = self.dropper.get_buffer();
123106

124-
self.tracked_data.data.par_retain(|data| {
125-
// Mark the new data as in use for now
126-
// This stops us deallocating data that was allocated during collection
127-
if data.last_marked.load(Ordering::SeqCst) == 0 {
128-
data.last_marked.store(current_collection, Ordering::SeqCst);
107+
self.tracked_data.par_retain(|data| {
108+
let is_marked = data.ref_cnt.is_rooted();
109+
if is_marked {
110+
// this is marked so retain it
111+
return true;
129112
}
130113

131-
// If this is true, we just marked this data
132-
if data.last_marked.load(Ordering::SeqCst) == current_collection {
133-
// so retain it
134-
true
135-
} else {
136-
// Otherwise we didn't mark it and it should be deallocated
137-
// eprintln!("deallocating {:?}", data_ptr);
138-
// Send it to the drop thread to be dropped
139-
to_drop.write().push(data.clone());
114+
// Otherwise we didn't mark it and it should be deallocated
115+
// eprintln!("deallocating {:?}", data_ptr);
116+
// Send it to the drop thread to be dropped
117+
to_drop.push(data.clone());
140118

141-
// Don't retain this data
142-
false
143-
}
119+
// Don't retain this data
120+
false
144121
});
145122

146123
// Send off the data to be dropped in the background
@@ -153,11 +130,6 @@ impl Collector {
153130
self.trigger
154131
.set_data_count_after_collection(self.tracked_data_count());
155132

156-
// update collection number
157-
self.tracked_data
158-
.current_collection_number
159-
.fetch_add(1, Ordering::SeqCst);
160-
161133
drop(gc_guard);
162134

163135
trace!("Collection finished");

src/collector/data.rs

Lines changed: 5 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use std::sync::atomic::{AtomicBool, AtomicPtr, AtomicU64, Ordering};
1+
use std::sync::atomic::AtomicBool;
22
use std::sync::Arc;
33

44
use crate::collector::alloc::GcAllocation;
5+
use crate::collector::ref_cnt::GcRefCount;
56
use crate::concurrency::lockout::{Lockout, LockoutProvider};
67
use crate::Scan;
78

@@ -12,9 +13,8 @@ pub struct GcData {
1213
pub(crate) lockout: Lockout,
1314
/// have we started deallocating this piece of data yet?
1415
pub(crate) deallocated: AtomicBool,
15-
// During what collection was this last marked?
16-
// 0 if this is a new piece of data
17-
pub(crate) last_marked: AtomicU64,
16+
// reference count
17+
pub(crate) ref_cnt: GcRefCount,
1818
/// a wrapper to manage (ie deallocate) the underlying allocation
1919
pub(crate) underlying_allocation: GcAllocation,
2020
}
@@ -26,38 +26,7 @@ impl LockoutProvider for Arc<GcData> {
2626
}
2727

2828
impl GcData {
29-
pub fn scan_ptr(&self) -> *const dyn Scan {
29+
pub(crate) fn scan_ptr(&self) -> *const dyn Scan {
3030
self.underlying_allocation.scan_ptr
3131
}
3232
}
33-
34-
/// There is one `GcHandle` per `Gc<T>`. We need this metadata for collection
35-
#[derive(Debug)]
36-
pub struct GcHandle {
37-
/// what data is backing this handle
38-
pub(crate) underlying_data: UnderlyingData,
39-
// During what collection was this last found in a piece of GcData?
40-
// 0 if this is a new piece of data
41-
pub(crate) last_non_rooted: AtomicU64,
42-
}
43-
44-
#[derive(Debug)]
45-
pub enum UnderlyingData {
46-
Fixed(Arc<GcData>),
47-
DynamicForAtomic(Arc<AtomicPtr<GcData>>),
48-
}
49-
50-
impl UnderlyingData {
51-
// Safe only if called when the data is known to be live, and you know atomics can't be modified
52-
// (Basically only okay to call in the collector itself)
53-
#[inline]
54-
pub unsafe fn with_data<F: FnOnce(&GcData)>(&self, f: F) {
55-
match self {
56-
Self::Fixed(data) => f(&*data),
57-
Self::DynamicForAtomic(ptr) => {
58-
let arc_ptr = ptr.load(Ordering::Relaxed);
59-
f(&*arc_ptr)
60-
}
61-
}
62-
}
63-
}

0 commit comments

Comments
 (0)