Skip to content

Commit 2243fab

Browse files
committed
Auto merge of #54461 - RalfJung:pointer-provenance, r=oli-obk
miri engine: basic support for pointer provenance tracking This enriches pointers with a new member, `tag`, that can be used to do provenance tracking. This is a new type parameter that propagates up through everything. It defaults to `()` (no tag), which is also the value used by CTFE -- but miri will use another type. The only actually interesting piece here, I think, is what I had to do in the memory's `get`. The problem is that `tcx` (storing the allocations for statics) uses `()` for provenance information. But the machine might need another tag. The machine has a function to do the conversion, but if a conversion actually happened, we need to store the result of this *somewhere* -- we cannot return a pointer into `tcx` as we usually would. So I introduced `MonoHashMap` which uses `RefCell` to be able to insert new entries even when we just have a shared ref. However, it is important that we can also return shared refs into the map without holding the `RefCell` opan. This is achieved by boxing the values stored in the map, so their addresses remain stable even when the map's table gets reallocated. This is all implemented in `mono_hash_map.rs`. NOTE: This PR also contains the commits from #54380 (comment). Only the [last two commits](https://github.com/rust-lang/rust/pull/54461/files/8e74ee0998a5b11f28d61600dbb881c7168a4a40..HEAD) are new.
2 parents 71d3a71 + bc9435d commit 2243fab

File tree

19 files changed

+990
-547
lines changed

19 files changed

+990
-547
lines changed

src/librustc/ich/impls_ty.rs

+33-23
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,39 @@ for ::mir::interpret::ConstValue<'gcx> {
391391
}
392392
}
393393

394-
impl_stable_hash_for!(struct mir::interpret::Pointer {
395-
alloc_id,
396-
offset
397-
});
394+
impl<'a, Tag> HashStable<StableHashingContext<'a>>
395+
for ::mir::interpret::Pointer<Tag>
396+
where Tag: HashStable<StableHashingContext<'a>>
397+
{
398+
fn hash_stable<W: StableHasherResult>(&self,
399+
hcx: &mut StableHashingContext<'a>,
400+
hasher: &mut StableHasher<W>) {
401+
let ::mir::interpret::Pointer { alloc_id, offset, tag } = self;
402+
alloc_id.hash_stable(hcx, hasher);
403+
offset.hash_stable(hcx, hasher);
404+
tag.hash_stable(hcx, hasher);
405+
}
406+
}
407+
408+
impl<'a, Tag> HashStable<StableHashingContext<'a>>
409+
for ::mir::interpret::Scalar<Tag>
410+
where Tag: HashStable<StableHashingContext<'a>>
411+
{
412+
fn hash_stable<W: StableHasherResult>(&self,
413+
hcx: &mut StableHashingContext<'a>,
414+
hasher: &mut StableHasher<W>) {
415+
use mir::interpret::Scalar::*;
416+
417+
mem::discriminant(self).hash_stable(hcx, hasher);
418+
match self {
419+
Bits { bits, size } => {
420+
bits.hash_stable(hcx, hasher);
421+
size.hash_stable(hcx, hasher);
422+
},
423+
Ptr(ptr) => ptr.hash_stable(hcx, hasher),
424+
}
425+
}
426+
}
398427

399428
impl<'a> HashStable<StableHashingContext<'a>> for mir::interpret::AllocId {
400429
fn hash_stable<W: StableHasherResult>(
@@ -449,25 +478,6 @@ impl_stable_hash_for!(enum ::syntax::ast::Mutability {
449478
Mutable
450479
});
451480

452-
453-
impl<'a> HashStable<StableHashingContext<'a>>
454-
for ::mir::interpret::Scalar {
455-
fn hash_stable<W: StableHasherResult>(&self,
456-
hcx: &mut StableHashingContext<'a>,
457-
hasher: &mut StableHasher<W>) {
458-
use mir::interpret::Scalar::*;
459-
460-
mem::discriminant(self).hash_stable(hcx, hasher);
461-
match *self {
462-
Bits { bits, size } => {
463-
bits.hash_stable(hcx, hasher);
464-
size.hash_stable(hcx, hasher);
465-
},
466-
Ptr(ptr) => ptr.hash_stable(hcx, hasher),
467-
}
468-
}
469-
}
470-
471481
impl_stable_hash_for!(struct ty::Const<'tcx> {
472482
ty,
473483
val

src/librustc/mir/interpret/mod.rs

+46-18
Original file line numberDiff line numberDiff line change
@@ -138,54 +138,82 @@ impl<T: layout::HasDataLayout> PointerArithmetic for T {}
138138
/// each context.
139139
///
140140
/// Defaults to the index based and loosely coupled AllocId.
141+
///
142+
/// Pointer is also generic over the `Tag` associated with each pointer,
143+
/// which is used to do provenance tracking during execution.
141144
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
142-
pub struct Pointer<Id=AllocId> {
145+
pub struct Pointer<Tag=(),Id=AllocId> {
143146
pub alloc_id: Id,
144147
pub offset: Size,
148+
pub tag: Tag,
145149
}
146150

147151
/// Produces a `Pointer` which points to the beginning of the Allocation
148152
impl From<AllocId> for Pointer {
153+
#[inline(always)]
149154
fn from(alloc_id: AllocId) -> Self {
150155
Pointer::new(alloc_id, Size::ZERO)
151156
}
152157
}
153158

154-
impl<'tcx> Pointer {
159+
impl<'tcx> Pointer<()> {
160+
#[inline(always)]
155161
pub fn new(alloc_id: AllocId, offset: Size) -> Self {
156-
Pointer { alloc_id, offset }
162+
Pointer { alloc_id, offset, tag: () }
163+
}
164+
165+
#[inline(always)]
166+
pub fn with_default_tag<Tag>(self) -> Pointer<Tag>
167+
where Tag: Default
168+
{
169+
Pointer::new_with_tag(self.alloc_id, self.offset, Default::default())
170+
}
171+
}
172+
173+
impl<'tcx, Tag> Pointer<Tag> {
174+
#[inline(always)]
175+
pub fn new_with_tag(alloc_id: AllocId, offset: Size, tag: Tag) -> Self {
176+
Pointer { alloc_id, offset, tag }
157177
}
158178

159179
pub fn wrapping_signed_offset<C: HasDataLayout>(self, i: i64, cx: C) -> Self {
160-
Pointer::new(
180+
Pointer::new_with_tag(
161181
self.alloc_id,
162182
Size::from_bytes(cx.data_layout().wrapping_signed_offset(self.offset.bytes(), i)),
183+
self.tag,
163184
)
164185
}
165186

166187
pub fn overflowing_signed_offset<C: HasDataLayout>(self, i: i128, cx: C) -> (Self, bool) {
167188
let (res, over) = cx.data_layout().overflowing_signed_offset(self.offset.bytes(), i);
168-
(Pointer::new(self.alloc_id, Size::from_bytes(res)), over)
189+
(Pointer::new_with_tag(self.alloc_id, Size::from_bytes(res), self.tag), over)
169190
}
170191

171192
pub fn signed_offset<C: HasDataLayout>(self, i: i64, cx: C) -> EvalResult<'tcx, Self> {
172-
Ok(Pointer::new(
193+
Ok(Pointer::new_with_tag(
173194
self.alloc_id,
174195
Size::from_bytes(cx.data_layout().signed_offset(self.offset.bytes(), i)?),
196+
self.tag,
175197
))
176198
}
177199

178200
pub fn overflowing_offset<C: HasDataLayout>(self, i: Size, cx: C) -> (Self, bool) {
179201
let (res, over) = cx.data_layout().overflowing_offset(self.offset.bytes(), i.bytes());
180-
(Pointer::new(self.alloc_id, Size::from_bytes(res)), over)
202+
(Pointer::new_with_tag(self.alloc_id, Size::from_bytes(res), self.tag), over)
181203
}
182204

183205
pub fn offset<C: HasDataLayout>(self, i: Size, cx: C) -> EvalResult<'tcx, Self> {
184-
Ok(Pointer::new(
206+
Ok(Pointer::new_with_tag(
185207
self.alloc_id,
186208
Size::from_bytes(cx.data_layout().offset(self.offset.bytes(), i.bytes())?),
209+
self.tag
187210
))
188211
}
212+
213+
#[inline]
214+
pub fn erase_tag(self) -> Pointer {
215+
Pointer { alloc_id: self.alloc_id, offset: self.offset, tag: () }
216+
}
189217
}
190218

191219

@@ -496,15 +524,15 @@ impl<'tcx, M: fmt::Debug + Eq + Hash + Clone> AllocMap<'tcx, M> {
496524
}
497525

498526
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
499-
pub struct Allocation {
527+
pub struct Allocation<Tag=()> {
500528
/// The actual bytes of the allocation.
501529
/// Note that the bytes of a pointer represent the offset of the pointer
502530
pub bytes: Vec<u8>,
503-
/// Maps from byte addresses to allocations.
531+
/// Maps from byte addresses to extra data for each pointer.
504532
/// Only the first byte of a pointer is inserted into the map; i.e.,
505533
/// every entry in this map applies to `pointer_size` consecutive bytes starting
506534
/// at the given offset.
507-
pub relocations: Relocations,
535+
pub relocations: Relocations<Tag>,
508536
/// Denotes undefined memory. Reading from undefined memory is forbidden in miri
509537
pub undef_mask: UndefMask,
510538
/// The alignment of the allocation to detect unaligned reads.
@@ -515,7 +543,7 @@ pub struct Allocation {
515543
pub mutability: Mutability,
516544
}
517545

518-
impl Allocation {
546+
impl<Tag> Allocation<Tag> {
519547
/// Creates a read-only allocation initialized by the given bytes
520548
pub fn from_bytes(slice: &[u8], align: Align) -> Self {
521549
let mut undef_mask = UndefMask::new(Size::ZERO);
@@ -548,29 +576,29 @@ impl Allocation {
548576
impl<'tcx> ::serialize::UseSpecializedDecodable for &'tcx Allocation {}
549577

550578
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, RustcEncodable, RustcDecodable)]
551-
pub struct Relocations<Id=AllocId>(SortedMap<Size, Id>);
579+
pub struct Relocations<Tag=(), Id=AllocId>(SortedMap<Size, (Tag, Id)>);
552580

553-
impl<Id> Relocations<Id> {
581+
impl<Tag, Id> Relocations<Tag, Id> {
554582
pub fn new() -> Self {
555583
Relocations(SortedMap::new())
556584
}
557585

558586
// The caller must guarantee that the given relocations are already sorted
559587
// by address and contain no duplicates.
560-
pub fn from_presorted(r: Vec<(Size, Id)>) -> Self {
588+
pub fn from_presorted(r: Vec<(Size, (Tag, Id))>) -> Self {
561589
Relocations(SortedMap::from_presorted_elements(r))
562590
}
563591
}
564592

565-
impl Deref for Relocations {
566-
type Target = SortedMap<Size, AllocId>;
593+
impl<Tag> Deref for Relocations<Tag> {
594+
type Target = SortedMap<Size, (Tag, AllocId)>;
567595

568596
fn deref(&self) -> &Self::Target {
569597
&self.0
570598
}
571599
}
572600

573-
impl DerefMut for Relocations {
601+
impl<Tag> DerefMut for Relocations<Tag> {
574602
fn deref_mut(&mut self) -> &mut Self::Target {
575603
&mut self.0
576604
}

src/librustc/mir/interpret/value.rs

+44-24
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,47 @@ impl<'tcx> ConstValue<'tcx> {
7979
}
8080
}
8181

82-
impl<'tcx> Scalar {
82+
/// A `Scalar` represents an immediate, primitive value existing outside of a
83+
/// `memory::Allocation`. It is in many ways like a small chunk of a `Allocation`, up to 8 bytes in
84+
/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes
85+
/// of a simple value or a pointer into another `Allocation`
86+
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
87+
pub enum Scalar<Tag=(), Id=AllocId> {
88+
/// The raw bytes of a simple value.
89+
Bits {
90+
/// The first `size` bytes are the value.
91+
/// Do not try to read less or more bytes that that. The remaining bytes must be 0.
92+
size: u8,
93+
bits: u128,
94+
},
95+
96+
/// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of
97+
/// relocations, but a `Scalar` is only large enough to contain one, so we just represent the
98+
/// relocation and its associated offset together as a `Pointer` here.
99+
Ptr(Pointer<Tag, Id>),
100+
}
101+
102+
impl<'tcx> Scalar<()> {
103+
#[inline]
104+
pub fn with_default_tag<Tag>(self) -> Scalar<Tag>
105+
where Tag: Default
106+
{
107+
match self {
108+
Scalar::Ptr(ptr) => Scalar::Ptr(ptr.with_default_tag()),
109+
Scalar::Bits { bits, size } => Scalar::Bits { bits, size },
110+
}
111+
}
112+
}
113+
114+
impl<'tcx, Tag> Scalar<Tag> {
115+
#[inline]
116+
pub fn erase_tag(self) -> Scalar {
117+
match self {
118+
Scalar::Ptr(ptr) => Scalar::Ptr(ptr.erase_tag()),
119+
Scalar::Bits { bits, size } => Scalar::Bits { bits, size },
120+
}
121+
}
122+
83123
#[inline]
84124
pub fn ptr_null(cx: impl HasDataLayout) -> Self {
85125
Scalar::Bits {
@@ -208,7 +248,7 @@ impl<'tcx> Scalar {
208248
}
209249

210250
#[inline]
211-
pub fn to_ptr(self) -> EvalResult<'tcx, Pointer> {
251+
pub fn to_ptr(self) -> EvalResult<'tcx, Pointer<Tag>> {
212252
match self {
213253
Scalar::Bits { bits: 0, .. } => err!(InvalidNullPointerUsage),
214254
Scalar::Bits { .. } => err!(ReadBytesAsPointer),
@@ -317,29 +357,9 @@ impl<'tcx> Scalar {
317357
}
318358
}
319359

320-
impl From<Pointer> for Scalar {
360+
impl<Tag> From<Pointer<Tag>> for Scalar<Tag> {
321361
#[inline(always)]
322-
fn from(ptr: Pointer) -> Self {
362+
fn from(ptr: Pointer<Tag>) -> Self {
323363
Scalar::Ptr(ptr)
324364
}
325365
}
326-
327-
/// A `Scalar` represents an immediate, primitive value existing outside of a
328-
/// `memory::Allocation`. It is in many ways like a small chunk of a `Allocation`, up to 8 bytes in
329-
/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes
330-
/// of a simple value or a pointer into another `Allocation`
331-
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
332-
pub enum Scalar<Id=AllocId> {
333-
/// The raw bytes of a simple value.
334-
Bits {
335-
/// The first `size` bytes are the value.
336-
/// Do not try to read less or more bytes that that. The remaining bytes must be 0.
337-
size: u8,
338-
bits: u128,
339-
},
340-
341-
/// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of
342-
/// relocations, but a `Scalar` is only large enough to contain one, so we just represent the
343-
/// relocation and its associated offset together as a `Pointer` here.
344-
Ptr(Pointer<Id>),
345-
}

src/librustc_codegen_llvm/mir/constant.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
9292
let pointer_size = layout.pointer_size.bytes() as usize;
9393

9494
let mut next_offset = 0;
95-
for &(offset, alloc_id) in alloc.relocations.iter() {
95+
for &(offset, ((), alloc_id)) in alloc.relocations.iter() {
9696
let offset = offset.bytes();
9797
assert_eq!(offset as usize as u64, offset);
9898
let offset = offset as usize;
@@ -105,7 +105,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
105105
).expect("const_alloc_to_llvm: could not read relocation pointer") as u64;
106106
llvals.push(scalar_to_llvm(
107107
cx,
108-
Pointer { alloc_id, offset: Size::from_bytes(ptr_offset) }.into(),
108+
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)).into(),
109109
&layout::Scalar {
110110
value: layout::Primitive::Pointer,
111111
valid_range: 0..=!0

0 commit comments

Comments
 (0)