Skip to content

[libc] Implemented wcrtomb internal function and public libc function #144596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS

# wchar.h entrypoints
libc.src.wchar.btowc
libc.src.wchar.wcrtomb
libc.src.wchar.wcslen
libc.src.wchar.wctob
libc.src.wchar.wmemmove
Expand Down
8 changes: 8 additions & 0 deletions libc/hdr/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ add_proxy_header_library(
libc.include.uchar
)

add_proxy_header_library(
mbstate_t
HDRS
mbstate_t.h
DEPENDS
libc.include.llvm-libc-types.mbstate_t
)

add_proxy_header_library(
div_t
HDRS
Expand Down
22 changes: 22 additions & 0 deletions libc/hdr/types/mbstate_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of macros from mbstate_t.h -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/mbstate_t.h"

#else // Overlay mode

#include "hdr/wchar_overlay.h"

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
6 changes: 5 additions & 1 deletion libc/include/llvm-libc-types/mbstate_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
#ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
#define LLVM_LIBC_TYPES_MBSTATE_T_H

// TODO: Complete this once we implement functions that operate on this type.
#include "../llvm-libc-macros/stdint-macros.h"

typedef struct {
uint32_t __field1;
uint8_t __field2;
uint8_t __field3;
} mbstate_t;

#endif // LLVM_LIBC_TYPES_MBSTATE_T_H
8 changes: 8 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ functions:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: size_t
- name: wcrtomb
standards:
- stdc
return_type: size_t
arguments:
- type: char *__restrict
- type: wchar_t
- type: mbstate_t *__restrict
- name: wcscpy
standards:
- stdc
Expand Down
16 changes: 16 additions & 0 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,19 @@ add_object_library(
libc.src.__support.math_extras
.mbstate
)

add_object_library(
wcrtomb
HDRS
wcrtomb.h
SRCS
wcrtomb.cpp
DEPENDS
libc.hdr.types.char32_t
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.error_or
libc.src.__support.common
.character_converter
.mbstate
)
6 changes: 3 additions & 3 deletions libc/src/__support/wchar/mbstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ namespace internal {

struct mbstate {
// store a partial codepoint (in UTF-32)
char32_t partial;
char32_t partial = 0;

/*
Progress towards a conversion
For utf8 -> utf32, increases with each CharacterConverter::push(utf8_byte)
For utf32 -> utf8, increases with each CharacterConverter::pop_utf8()
*/
uint8_t bytes_processed;
uint8_t bytes_processed = 0;

// Total number of bytes that will be needed to represent this character
uint8_t total_bytes;
uint8_t total_bytes = 0;
};

} // namespace internal
Expand Down
50 changes: 50 additions & 0 deletions libc/src/__support/wchar/wcrtomb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===-- Implementation of wcrtomb -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/wchar/wcrtomb.h"
#include "src/__support/error_or.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"

#include "hdr/types/char32_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
mbstate *__restrict ps) {
static_assert(sizeof(wchar_t) == 4);

CharacterConverter cr(ps);

// when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
char buf[sizeof(wchar_t) / sizeof(char)];
if (s == nullptr) {
s = buf;
wc = L'\0';
}

int status = cr.push(static_cast<char32_t>(wc));
if (status != 0)
return Error(status);

size_t count = 0;
while (!cr.isComplete()) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
*s = utf8.value();
s++;
count++;
}
return count;
}

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
26 changes: 26 additions & 0 deletions libc/src/__support/wchar/wcrtomb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===-- Implementation header for wcrtomb ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
14 changes: 14 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wcrtomb
SRCS
wcrtomb.cpp
HDRS
wcrtomb.h
DEPENDS
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
libc.src.__support.wchar.wcrtomb
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
wmemset
SRCS
Expand Down
37 changes: 37 additions & 0 deletions libc/src/wchar/wcrtomb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- Implementation of wcrtomb -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcrtomb.h"

#include "hdr/types/mbstate_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/wcrtomb.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, wcrtomb,
(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;

auto result = internal::wcrtomb(
s, wc,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));

if (!result.has_value()) {
libc_errno = EILSEQ;
return -1;
}

return result.value();
}

} // namespace LIBC_NAMESPACE_DECL
23 changes: 23 additions & 0 deletions libc/src/wchar/wcrtomb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
14 changes: 14 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ add_libc_test(
libc.src.wchar.wctob
)

add_libc_test(
wcrtomb_test
SUITE
libc_wchar_unittests
SRCS
wcrtomb_test.cpp
DEPENDS
libc.src.wchar.wcrtomb
libc.src.string.memset
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
)

add_libc_test(
wmemset_test
SUITE
Expand Down
93 changes: 93 additions & 0 deletions libc/test/src/wchar/wcrtomb_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//===-- Unittests for wcrtomb --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/string/memset.h"
#include "src/wchar/wcrtomb.h"
#include "test/UnitTest/Test.h"

TEST(LlvmLibcWCRToMBTest, OneByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'U';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(1));
ASSERT_EQ(mb[0], 'U');
}

TEST(LlvmLibcWCRToMBTest, TwoByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xff -> utf8: 0xc3 0xbf
wchar_t wc = 0xff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(2));
ASSERT_EQ(mb[0], static_cast<char>(0xc3));
ASSERT_EQ(mb[1], static_cast<char>(0xbf));
}

TEST(LlvmLibcWCRToMBTest, ThreeByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
wchar_t wc = 0xac15;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(3));
ASSERT_EQ(mb[0], static_cast<char>(0xea));
ASSERT_EQ(mb[1], static_cast<char>(0xb0));
ASSERT_EQ(mb[2], static_cast<char>(0x95));
}

TEST(LlvmLibcWCRToMBTest, FourByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
wchar_t wc = 0x1f921;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(4));
ASSERT_EQ(mb[0], static_cast<char>(0xf0));
ASSERT_EQ(mb[1], static_cast<char>(0x9f));
ASSERT_EQ(mb[2], static_cast<char>(0xa4));
ASSERT_EQ(mb[3], static_cast<char>(0xa1));
}

TEST(LlvmLibcWCRToMBTest, NullString) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'A';
char mb[4];

// should be equivalent to the call wcrtomb(buf, L'\0', state)
size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);

ASSERT_EQ(cnt1, cnt2);
}

TEST(LlvmLibcWCRToMBTest, NullState) {
wchar_t wc = L'A';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
ASSERT_EQ(cnt, static_cast<size_t>(1));
}

TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = 0x12ffff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(-1));
ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
}
Loading