Skip to content

Commit 8dcfc38

Browse files
committed
initial work on adding 'read/write/alloc wide_str' functions
1 parent 49051e0 commit 8dcfc38

File tree

2 files changed

+144
-13
lines changed

2 files changed

+144
-13
lines changed

src/helpers.rs

Lines changed: 122 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::ffi::OsStr;
1+
use std::ffi::{OsStr, OsString};
22
use std::{iter, mem};
33
use std::convert::TryFrom;
44

@@ -456,6 +456,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
456456
}
457457
}
458458

459+
/// Dispatches to appropriate implementations for reading an OsString from Memory,
460+
/// depending on the interpretation target.
461+
fn read_os_str_from_target_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
462+
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
463+
match target_os {
464+
"linux" | "macos" => self.read_os_str_from_c_str(scalar).map(|x| x.to_os_string()),
465+
"windows" => self.read_os_str_from_wide_str(scalar),
466+
_ => throw_unsup_format!("OsString support for target OS not yet available"),
467+
}
468+
}
469+
459470
/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
460471
/// the Unix APIs usually handle.
461472
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
@@ -471,14 +482,48 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
471482
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
472483
let s = std::str::from_utf8(bytes)
473484
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
474-
Ok(&OsStr::new(s))
485+
Ok(OsStr::new(s))
475486
}
476487

477488
let this = self.eval_context_ref();
478489
let bytes = this.memory.read_c_str(scalar)?;
479490
bytes_to_os_str(bytes)
480491
}
481492

493+
/// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
494+
/// which is what the Windows APIs usually handle.
495+
fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
496+
#[cfg(target_os = "windows")]
497+
fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
498+
Ok(std::os::windows::ffi::OsStringExt::from_wide(&u16_vec[..]))
499+
}
500+
#[cfg(not(target_os = "windows"))]
501+
fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
502+
let s = String::from_utf16(&u16_vec[..])
503+
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
504+
Ok(s.into())
505+
}
506+
507+
let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
508+
u16vec_to_osstring(u16_vec)
509+
}
510+
511+
/// Dispatches to appropriate implementations for writing an OsString to Memory,
512+
/// depending on the interpretation target.
513+
fn write_os_str_to_target_str(
514+
&mut self,
515+
os_str: &OsStr,
516+
mplace: MPlaceTy<'tcx, Tag>,
517+
size: u64,
518+
) -> InterpResult<'tcx, (bool, u64)> {
519+
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
520+
match target_os {
521+
"linux" | "macos" => self.write_os_str_to_c_str(os_str, mplace.ptr, size),
522+
"windows" => self.write_os_str_to_wide_str(os_str, mplace, size),
523+
_ => throw_unsup_format!("OsString support for target OS not yet available"),
524+
}
525+
}
526+
482527
/// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
483528
/// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
484529
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
@@ -518,6 +563,66 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
518563
Ok((true, string_length))
519564
}
520565

566+
/// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what
567+
/// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying
568+
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
569+
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
570+
/// string length returned does not include the null terminator.
571+
fn write_os_str_to_wide_str(
572+
&mut self,
573+
os_str: &OsStr,
574+
mplace: MPlaceTy<'tcx, Tag>,
575+
size: u64,
576+
) -> InterpResult<'tcx, (bool, u64)> {
577+
#[cfg(target_os = "windows")]
578+
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
579+
Ok(std::os::windows::ffi::OsStrExt::encode_wide(os_str).collect())
580+
}
581+
#[cfg(not(target_os = "windows"))]
582+
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
583+
// On non-unix platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
584+
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
585+
// valid.
586+
os_str
587+
.to_str()
588+
.map(|s| s.encode_utf16().collect())
589+
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
590+
}
591+
592+
let u16_vec = os_str_to_u16vec(os_str)?;
593+
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
594+
// 0x0000 terminator to memory would cause an out-of-bounds access.
595+
let string_length = u16_vec.len() as u64;
596+
if size <= string_length {
597+
return Ok((false, string_length));
598+
}
599+
600+
let this = self.eval_context_mut();
601+
602+
// Store the UTF-16 string.
603+
let char_size = Size::from_bytes(2);
604+
for (idx, c) in u16_vec.into_iter().chain(iter::once(0x0000)).enumerate() {
605+
let place = this.mplace_field(mplace, idx as u64)?;
606+
this.write_scalar(Scalar::from_uint(c, char_size), place.into())?;
607+
}
608+
Ok((true, string_length))
609+
}
610+
611+
/// Dispatches to appropriate implementations for allocating & writing OsString in Memory,
612+
/// depending on the interpretation target.
613+
fn alloc_os_str_as_target_str(
614+
&mut self,
615+
os_str: &OsStr,
616+
memkind: MemoryKind<MiriMemoryKind>,
617+
) -> InterpResult<'tcx, MPlaceTy<'tcx, Tag>> {
618+
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
619+
match target_os {
620+
"linux" | "macos" => self.alloc_os_str_as_c_str(os_str, memkind),
621+
"windows" => self.alloc_os_str_as_wide_str(os_str, memkind),
622+
_ => throw_unsup_format!("OsString support for target OS not yet available"),
623+
}
624+
}
625+
521626
fn alloc_os_str_as_c_str(
522627
&mut self,
523628
os_str: &OsStr,
@@ -529,7 +634,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
529634
let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
530635
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
531636
self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap();
532-
arg_place.ptr.assert_ptr()
637+
Ok(arg_place)
638+
}
639+
640+
fn alloc_os_str_as_wide_str(
641+
&mut self,
642+
os_str: &OsStr,
643+
memkind: MemoryKind<MiriMemoryKind>,
644+
) -> InterpResult<'tcx, MPlaceTy<'tcx, Tag>> {
645+
let size = os_str.len() as u64 + 1; // Make space for `0x0000` terminator.
646+
let this = self.eval_context_mut();
647+
648+
let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
649+
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
650+
self.write_os_str_to_wide_str(os_str, arg_place, size).unwrap();
651+
Ok(arg_place)
533652
}
534653
}
535654

src/shims/env.rs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl<'tcx> EnvVars<'tcx> {
2929
for (name, value) in env::vars() {
3030
if !excluded_env_vars.contains(&name) {
3131
let var_ptr =
32-
alloc_env_var_as_c_str(name.as_ref(), value.as_ref(), ecx);
32+
alloc_env_var_as_target_str(name.as_ref(), value.as_ref(), ecx)?;
3333
ecx.machine.env_vars.map.insert(OsString::from(name), var_ptr);
3434
}
3535
}
@@ -38,15 +38,18 @@ impl<'tcx> EnvVars<'tcx> {
3838
}
3939
}
4040

41-
fn alloc_env_var_as_c_str<'mir, 'tcx>(
41+
fn alloc_env_var_as_target_str<'mir, 'tcx>(
4242
name: &OsStr,
4343
value: &OsStr,
4444
ecx: &mut InterpCx<'mir, 'tcx, Evaluator<'tcx>>,
45-
) -> Pointer<Tag> {
45+
) -> InterpResult<'tcx, Pointer<Tag>> {
4646
let mut name_osstring = name.to_os_string();
4747
name_osstring.push("=");
4848
name_osstring.push(value);
49-
ecx.alloc_os_str_as_c_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())
49+
Ok(ecx
50+
.alloc_os_str_as_target_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())?
51+
.ptr
52+
.assert_ptr())
5053
}
5154

5255
impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
@@ -55,8 +58,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
5558
let this = self.eval_context_mut();
5659

5760
let name_ptr = this.read_scalar(name_op)?.not_undef()?;
58-
let name = this.read_os_str_from_c_str(name_ptr)?;
59-
Ok(match this.machine.env_vars.map.get(name) {
61+
let name = this.read_os_str_from_target_str(name_ptr)?;
62+
Ok(match this.machine.env_vars.map.get(&name) {
6063
// The offset is used to strip the "{name}=" part of the string.
6164
Some(var_ptr) => {
6265
Scalar::from(var_ptr.offset(Size::from_bytes(u64::try_from(name.len()).unwrap().checked_add(1).unwrap()), this)?)
@@ -65,6 +68,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
6568
})
6669
}
6770

71+
72+
fn getenvironmentvariablew() {
73+
74+
}
75+
6876
fn setenv(
6977
&mut self,
7078
name_op: OpTy<'tcx, Tag>,
@@ -74,16 +82,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
7482

7583
let name_ptr = this.read_scalar(name_op)?.not_undef()?;
7684
let value_ptr = this.read_scalar(value_op)?.not_undef()?;
77-
let value = this.read_os_str_from_c_str(value_ptr)?;
85+
let value = this.read_os_str_from_target_str(value_ptr)?;
7886
let mut new = None;
7987
if !this.is_null(name_ptr)? {
80-
let name = this.read_os_str_from_c_str(name_ptr)?;
88+
let name = this.read_os_str_from_target_str(name_ptr)?;
8189
if !name.is_empty() && !name.to_string_lossy().contains('=') {
8290
new = Some((name.to_owned(), value.to_owned()));
8391
}
8492
}
8593
if let Some((name, value)) = new {
86-
let var_ptr = alloc_env_var_as_c_str(&name, &value, &mut this);
94+
let var_ptr = alloc_env_var_as_target_str(&name, &value, &mut this)?;
8795
if let Some(var) = this.machine.env_vars.map.insert(name.to_owned(), var_ptr) {
8896
this.memory
8997
.deallocate(var, None, MiriMemoryKind::Machine.into())?;
@@ -95,13 +103,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
95103
}
96104
}
97105

106+
fn setenvironmentvariablew() {
107+
108+
}
109+
98110
fn unsetenv(&mut self, name_op: OpTy<'tcx, Tag>) -> InterpResult<'tcx, i32> {
99111
let this = self.eval_context_mut();
100112

101113
let name_ptr = this.read_scalar(name_op)?.not_undef()?;
102114
let mut success = None;
103115
if !this.is_null(name_ptr)? {
104-
let name = this.read_os_str_from_c_str(name_ptr)?.to_owned();
116+
let name = this.read_os_str_from_target_str(name_ptr)?.to_owned();
105117
if !name.is_empty() && !name.to_string_lossy().contains('=') {
106118
success = Some(this.machine.env_vars.map.remove(&name));
107119
}

0 commit comments

Comments
 (0)