Skip to content

Commit cee15a3

Browse files
authored
Handle non-printable ASCII in str and byte arrays (#169)
`"\xff"` or `'\xff'` are valid strings/char literals in C/C++. They were not escaped and produced an invalid character that broke the compilation. After I escaped them, I realized that they cannot be translated to `\xff` in Rust `str`'s because str only accepts `\x00 - \x7f`. Byte array accepts `> \x7f`. I changed Ptr::from_string_literal to accept a byte array instead of a str. I also changed GetFmtArg to refuse to create Rust `str` that contains chars `> \x7f` and fallback to GetRawArg that produces escaped byte arrays.
1 parent d424cd1 commit cee15a3

43 files changed

Lines changed: 299 additions & 144 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cpp2rust/converter/converter.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,7 +1332,11 @@ bool Converter::GetFmtArg(clang::Expr *arg, std::string &fmt,
13321332
std::string &fmt_args, const char *&fmt_trait,
13331333
std::string &fmt_width) {
13341334
std::string arg_str = Mapper::ToString(arg);
1335-
if (clang::isa<clang::StringLiteral>(arg->IgnoreImplicit())) {
1335+
if (auto *str_lit =
1336+
clang::dyn_cast<clang::StringLiteral>(arg->IgnoreImplicit())) {
1337+
if (!IsAsciiStringLiteral(str_lit)) {
1338+
return false;
1339+
}
13361340
auto str = GetEscapedStringLiteral(arg);
13371341
std::string_view trim(str);
13381342
// Delete " from string
@@ -1373,6 +1377,8 @@ bool Converter::GetRawArg(clang::Expr *arg, std::string &raw_args) {
13731377
raw_args += "(&(" + str + ")[..(" + str + ").len() - 1]";
13741378
} else if (Mapper::ToString(arg).contains("std::endl")) {
13751379
raw_args += "(&[b'\\n']";
1380+
} else if (clang::isa<clang::StringLiteral>(arg->IgnoreImplicit())) {
1381+
raw_args += "(b" + GetEscapedStringLiteral(arg);
13761382
} else {
13771383
return false;
13781384
}
@@ -1774,8 +1780,9 @@ bool Converter::VisitFloatingLiteral(clang::FloatingLiteral *expr) {
17741780
}
17751781

17761782
bool Converter::VisitCharacterLiteral(clang::CharacterLiteral *expr) {
1783+
auto uc = static_cast<unsigned char>(expr->getValue());
17771784
std::string ch = GetEscapedCharLiteral(expr->getValue());
1778-
ch = "'" + std::move(ch) + "'";
1785+
ch = (uc > 0x7F ? "b'" : "'") + std::move(ch) + '\'';
17791786
{
17801787
PushParen paren(*this);
17811788
StrCat(ch, keyword::kAs, ToStringBase(expr->getType()));
@@ -1802,7 +1809,7 @@ std::string Converter::GetEscapedCharLiteral(char character) const {
18021809
return "\\0";
18031810
}
18041811
auto uc = static_cast<unsigned char>(character);
1805-
if (uc < 0x20 || uc == 0x7F) {
1812+
if (uc < 0x20 || uc >= 0x7F) {
18061813
return std::format("\\x{:02x}", uc);
18071814
}
18081815
return std::string(1, character);

cpp2rust/converter/converter_lib.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,15 @@ bool IsCallToOstream(clang::CallExpr *expr) {
246246
return false;
247247
}
248248

249+
bool IsAsciiStringLiteral(const clang::StringLiteral *str) {
250+
for (unsigned char c : str->getString()) {
251+
if (c > 0x7F) {
252+
return false;
253+
}
254+
}
255+
return true;
256+
}
257+
249258
std::vector<clang::CXXConstructorDecl *>
250259
GetTemplateInstantiatedCtors(clang::CXXRecordDecl *decl) {
251260
std::vector<clang::CXXConstructorDecl *> out;

cpp2rust/converter/converter_lib.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ bool IsUniquePtr(clang::QualType type);
6565

6666
bool IsCallToOstream(clang::CallExpr *expr);
6767

68+
bool IsAsciiStringLiteral(const clang::StringLiteral *str);
69+
6870
std::vector<clang::CXXConstructorDecl *>
6971
GetTemplateInstantiatedCtors(clang::CXXRecordDecl *decl);
7072

cpp2rust/converter/models/converter_refcount.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,7 @@ bool ConverterRefCount::VisitStringLiteral(clang::StringLiteral *expr) {
10441044
GetEscapedStringLiteral(expr, pad)));
10451045
return false;
10461046
}
1047-
StrCat(GetEscapedStringLiteral(expr));
1047+
StrCat(std::format("b{}", GetEscapedStringLiteral(expr, 0)));
10481048
return false;
10491049
}
10501050

libcc2rs/src/rc.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -940,9 +940,10 @@ impl fmt::Display for Ptr<u8> {
940940
}
941941
}
942942

943+
type StringLiteralMap = HashMap<&'static [u8], Rc<RefCell<Vec<u8>>>>;
944+
943945
thread_local! {
944-
static STRING_LITERALS: RefCell<HashMap<&'static str, Rc<RefCell<Vec<u8>>>>> =
945-
RefCell::new(HashMap::new());
946+
static STRING_LITERALS: RefCell<StringLiteralMap> = RefCell::new(HashMap::new());
946947
}
947948

948949
impl Ptr<u8> {
@@ -1020,12 +1021,12 @@ impl Ptr<u8> {
10201021
}
10211022

10221023
#[inline]
1023-
pub fn from_string_literal(s: &'static str) -> Self {
1024+
pub fn from_string_literal(s: &'static [u8]) -> Self {
10241025
STRING_LITERALS.with(|literals| {
10251026
let mut literals = literals.borrow_mut();
10261027
let weak = Rc::downgrade(literals.entry(s).or_insert_with(|| {
10271028
Rc::new(RefCell::new({
1028-
let mut v = s.as_bytes().to_vec();
1029+
let mut v = s.to_vec();
10291030
v.push(0);
10301031
v
10311032
}))

tests/unit/out/refcount/bool_condition_logical.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ fn main_0() -> i32 {
127127
if ((*n.borrow()) != 0) || (((*bits.borrow()) & 256_i64) != 0) {
128128
assert!(true);
129129
}
130-
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal("hi")));
130+
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal(b"hi")));
131131
let cnp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::<u8>::null()));
132132
if ((*x.borrow()) > (*y.borrow())) && (!(*cp.borrow()).is_null()) {
133133
assert!(true);

tests/unit/out/refcount/bool_condition_logical_c.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ fn main_0() -> i32 {
152152
{
153153
assert!((1 != 0));
154154
}
155-
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal("hi")));
155+
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal(b"hi")));
156156
let cnp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::<u8>::null()));
157157
if (((((((*x.borrow()) > (*y.borrow())) as i32) != 0) && (!(*cp.borrow()).is_null())) as i32)
158158
!= 0)

tests/unit/out/refcount/char_printing.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn main_0() -> i32 {
1313
let vec_: Value<Vec<u8>> = Rc::new(RefCell::new(vec![195_u8, 167_u8]));
1414
let i: Value<i32> = Rc::new(RefCell::new(27));
1515
let str: Value<Vec<u8>> = Rc::new(RefCell::new(
16-
Ptr::from_string_literal("rdas.")
16+
Ptr::from_string_literal(b"rdas.")
1717
.to_c_string_iterator()
1818
.chain(std::iter::once(0))
1919
.collect::<Vec<u8>>(),
@@ -29,12 +29,16 @@ fn main_0() -> i32 {
2929
]
3030
.concat()),
3131
);
32-
write!(
33-
libcc2rs::cout(),
34-
"0x{:x} açordas?\nSim, 0x{:x}.\n",
35-
27,
36-
(*i.borrow()),
32+
write!(libcc2rs::cout(), "0x{:x}", 27,);
33+
libcc2rs::cout().write_all(
34+
&([
35+
(b" a\xc3\xa7ordas?" as &[u8]),
36+
(&[('\n' as u8)] as &[u8]),
37+
(b"Sim, 0x" as &[u8]),
38+
]
39+
.concat()),
3740
);
41+
write!(libcc2rs::cout(), "{:x}.\n", (*i.borrow()),);
3842
write!(libcc2rs::cout(), "Hello, World!\n",);
3943
libcc2rs::cout().write_all(
4044
&([

tests/unit/out/refcount/char_printing_cerr.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn main_0() -> i32 {
1313
let vec_: Value<Vec<u8>> = Rc::new(RefCell::new(vec![195_u8, 167_u8]));
1414
let i: Value<i32> = Rc::new(RefCell::new(27));
1515
let str: Value<Vec<u8>> = Rc::new(RefCell::new(
16-
Ptr::from_string_literal("rdas.")
16+
Ptr::from_string_literal(b"rdas.")
1717
.to_c_string_iterator()
1818
.chain(std::iter::once(0))
1919
.collect::<Vec<u8>>(),
@@ -29,12 +29,16 @@ fn main_0() -> i32 {
2929
]
3030
.concat()),
3131
);
32-
write!(
33-
libcc2rs::cerr(),
34-
"0x{:x} açordas?\nSim, 0x{:x}.\n",
35-
27,
36-
(*i.borrow()),
32+
write!(libcc2rs::cerr(), "0x{:x}", 27,);
33+
libcc2rs::cerr().write_all(
34+
&([
35+
(b" a\xc3\xa7ordas?" as &[u8]),
36+
(&[('\n' as u8)] as &[u8]),
37+
(b"Sim, 0x" as &[u8]),
38+
]
39+
.concat()),
3740
);
41+
write!(libcc2rs::cerr(), "{:x}.\n", (*i.borrow()),);
3842
write!(libcc2rs::cerr(), "Hello, World!\n",);
3943
libcc2rs::cerr().write_all(
4044
&([

tests/unit/out/refcount/default_in_statics.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ thread_local!(
111111
);
112112
thread_local!(
113113
pub static static_foo_3: Value<Foo> = Rc::new(RefCell::new(Foo {
114-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("hello"))),
114+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"hello"))),
115115
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
116116
fn1: Rc::new(RefCell::new(FnPtr::null())),
117117
fn2: Rc::new(RefCell::new(FnPtr::null())),
@@ -121,14 +121,14 @@ thread_local!(
121121
thread_local!(
122122
pub static static_foo_array_4: Value<Box<[Foo]>> = Rc::new(RefCell::new(Box::new([
123123
Foo {
124-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("first"))),
124+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"first"))),
125125
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
126126
fn1: Rc::new(RefCell::new(FnPtr::null())),
127127
fn2: Rc::new(RefCell::new(FnPtr::null())),
128128
n: Rc::new(RefCell::new(1)),
129129
},
130130
Foo {
131-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("second"))),
131+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"second"))),
132132
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
133133
fn1: Rc::new(RefCell::new(FnPtr::null())),
134134
fn2: Rc::new(RefCell::new(FnPtr::null())),

0 commit comments

Comments
 (0)