Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode character support in screen tab names #1642

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Action.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,14 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
return 0;
}
const char* tab = settings->screens[i]->heading;
int len = strlen(tab);
if (x < s + len + 2) {
const char* ptr = tab;
int width = String_mbswidth(&ptr, SIZE_MAX, INT_MAX);
if (x < s + width + 2) {
settings->ssIndex = i;
setActiveScreen(settings, st, i);
return HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR;
}
s += len + 2 + SCREEN_TAB_COLUMN_GAP;
s += width + 2 + SCREEN_TAB_COLUMN_GAP;
}
return 0;
}
Expand Down
8 changes: 4 additions & 4 deletions ScreenManager.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,11 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
(*x)++;
if (*x >= l)
return false;
int nameLen = strlen(name);
int n = MINIMUM(l - *x, nameLen);
const char* ptr = name;
int nameWidth = String_mbswidth(&ptr, SIZE_MAX, l - *x);
attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
mvaddnstr(*y, *x, name, n);
*x += n;
mvaddnstr(*y, *x, name, (int)(ptr - name));
*x += nameWidth;
if (*x >= l)
return false;
attrset(CRT_colors[cur ? SCREENS_CUR_BORDER : SCREENS_OTH_BORDER]);
Expand Down
253 changes: 253 additions & 0 deletions XUtils.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ in the source distribution for its full text.
#include "XUtils.h"

#include <assert.h>
#include <ctype.h> // IWYU pragma: keep
#include <errno.h>
#include <fcntl.h>
#include <limits.h> // IWYU pragma: keep
#include <math.h>
#include <stdarg.h>
#include <stdint.h>
Expand Down Expand Up @@ -224,6 +226,257 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
return i;
}

#ifdef HAVE_LIBNCURSESW
static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
assert(!ps->buf || ps->pos < ps->size);

char tempBuf[MB_LEN_MAX];
char* dest = ps->buf ? (char*)ps->buf + ps->pos : tempBuf;

// It is unnecessarily expensive to fix the output string if the caller
// gives an incorrect buffer size. This function would not support any
// truncation of the output string.
size_t len = wcrtomb(dest, wc, &ps->mbState);
assert(len > 0);
if (len == (size_t)-1) {
assert(len != (size_t)-1);
fail();
}
if (ps->buf && len > ps->size - ps->pos) {
assert(!ps->buf || len <= ps->size - ps->pos);
fail();
}

ps->pos += len;
}
#else
static void String_encodeWChar(WCharEncoderState* ps, int c) {
assert(!ps->buf || ps->pos < ps->size);

char* buf = ps->buf;
if (buf) {
buf[ps->pos] = (char)c;
}

ps->pos += 1;
}
#endif

void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
assert(src || maxLen == 0);

size_t pos = 0;
bool wasReplaced = false;

#ifdef HAVE_LIBNCURSESW
const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
wchar_t ch;

mbstate_t decState;
memset(&decState, 0, sizeof(decState));
#else
const char replacementChar = '?';
char ch;
#endif

do {
size_t len = 0;
bool shouldReplace = false;
ch = 0;

if (pos < maxLen) {
// Read the next character from the byte sequence
#ifdef HAVE_LIBNCURSESW
mbstate_t newState;
memcpy(&newState, &decState, sizeof(newState));
len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);

assert(len != 0 || ch == 0);
switch (len) {
case (size_t)-2:
errno = EILSEQ;
shouldReplace = true;
len = maxLen - pos;
break;

case (size_t)-1:
shouldReplace = true;
len = 1;
break;

default:
memcpy(&decState, &newState, sizeof(decState));
}
#else
len = 1;
ch = src[pos];
#endif
}

pos += len;

// Filter unprintable characters
if (!shouldReplace && ch != 0) {
#ifdef HAVE_LIBNCURSESW
shouldReplace = !iswprint(ch);
#else
shouldReplace = !isprint((unsigned char)ch);
#endif
}

if (shouldReplace) {
ch = replacementChar;
if (wasReplaced) {
continue;
}
}
wasReplaced = shouldReplace;

encodeWChar(ps, ch);
} while (ch != 0);
}

char* String_makePrintable(const char* str, size_t maxLen) {
WCharEncoderState encState;

memset(&encState, 0, sizeof(encState));
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
size_t size = encState.pos;
assert(size > 0);

memset(&encState, 0, sizeof(encState));
char* buf = xMalloc(size);
encState.size = size;
encState.buf = buf;
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
assert(encState.pos == size);

return buf;
}

bool String_decodeNextWChar(MBStringDecoderState* ps) {
if (!ps->str || ps->maxLen == 0) {
return false;
}

// If the previous call of this function encounters an invalid sequence,
// do not continue (because the "mbState" object for mbrtowc() is
// undefined). The caller is supposed to reset the state.
#ifdef HAVE_LIBNCURSESW
bool isStateDefined = ps->ch != WEOF;
#else
bool isStateDefined = ps->ch != EOF;
#endif
if (!isStateDefined) {
return false;
}

#ifdef HAVE_LIBNCURSESW
wchar_t wc;
size_t len = mbrtowc(&wc, ps->str, ps->maxLen, &ps->mbState);
switch (len) {
case (size_t)-1:
// Invalid sequence
ps->ch = WEOF;
return false;

case (size_t)-2:
// Incomplete sequence
ps->str += ps->maxLen;
ps->maxLen = 0;
return false;

case 0:
assert(wc == 0);

ps->str = NULL;
ps->maxLen = 0;
ps->ch = wc;
return true;

default:
ps->str += len;
ps->maxLen -= len;
ps->ch = wc;
}
return true;
#else
ps->ch = *ps->str;
if (ps->ch == 0) {
ps->str = NULL;
ps->maxLen = 0;
} else {
ps->str++;
ps->maxLen--;
}
return true;
#endif
}

#ifndef HAVE_STRNLEN
static size_t strnlen(const char* str, size_t maxLen) {
for (size_t len = 0; len < maxLen; len++) {
if (!str[len]) {
return len;
}
}
return maxLen;
}
#endif

int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
assert(*str || maxLen == 0);

if (maxWidth < 0)
maxWidth = INT_MAX;

#ifdef HAVE_LIBNCURSESW
MBStringDecoderState state;
memset(&state, 0, sizeof(state));
state.str = *str;
state.maxLen = maxLen;

int totalWidth = 0;

while (String_decodeNextWChar(&state)) {
if (state.ch == 0)
break;

int w = wcwidth((wchar_t)state.ch);
if (w < 0) {
assert(w >= 0);
break;
}

if (w > maxWidth - totalWidth)
break;

totalWidth += w;

// If the character takes zero columns, include the character in the
// substring if the working encoding is UTF-8, and ignore it otherwise.
// In Unicode, combining characters are always placed after the base
// character, but some legacy 8-bit encodings instead place combining
// characters before the base character.
if (w <= 0 && !CRT_utf8) {
continue;
}

// (*str - start) will represent the length of the substring bounded
// by the width limit.
*str = state.str;
}

assert(state.ch != WEOF);
return totalWidth;
#else
maxLen = MINIMUM((unsigned int)maxWidth, maxLen);
size_t len = strnlen(*str, maxLen);
*str += len;
return (int)len;
#endif
}

int xAsprintf(char** strp, const char* fmt, ...) {
va_list vl;
va_start(vl, fmt);
Expand Down
37 changes: 37 additions & 0 deletions XUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,32 @@ in the source distribution for its full text.

#include "Compat.h"
#include "Macros.h"
#include "ProvideCurses.h"


typedef struct WCharEncoderState_ {
size_t pos;
size_t size;
void* buf;
mbstate_t mbState;
} WCharEncoderState;

typedef struct MBStringDecoderState_ {
const char* str;
size_t maxLen;
#ifdef HAVE_LIBNCURSESW
wint_t ch;
mbstate_t mbState;
#else
int ch;
#endif
} MBStringDecoderState;

#ifdef HAVE_LIBNCURSESW
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
#else
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
#endif

ATTR_NORETURN
void fail(void);
Expand Down Expand Up @@ -102,6 +127,18 @@ static inline char* String_strchrnul(const char* s, int c) {
ATTR_NONNULL ATTR_ACCESS3_W(1, 3) ATTR_ACCESS3_R(2, 3)
size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t size);

ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);

ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
char* String_makePrintable(const char* str, size_t maxLen);

ATTR_NONNULL
bool String_decodeNextWChar(MBStringDecoderState* ps);

ATTR_NONNULL ATTR_ACCESS2_RW(1)
int String_mbswidth(const char** str, size_t maxLen, int maxWidth);

ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
int xAsprintf(char** strp, const char* fmt, ...);

Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ AC_CHECK_FUNCS([ \
sched_getscheduler \
sched_setscheduler \
strchrnul \
strnlen \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I like this, when we already have plenty of code, that requires this function without prior check …

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A quick grep -r strnlen shows no code in htop has been using strnlen before this PR.

])

if test "$my_htop_platform" = darwin; then
Expand Down
Loading