Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@ include::config/sequencer.txt[]

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Junio C Hamano wrote (reply to this):

"Johannes Schindelin via GitGitGadget" <[email protected]>
writes:

> diff --git a/Documentation/config/sideband.txt b/Documentation/config/sideband.txt
> new file mode 100644
> index 0000000000..3fb5045cd7
> --- /dev/null
> +++ b/Documentation/config/sideband.txt
> @@ -0,0 +1,5 @@
> +sideband.allowControlCharacters::
> +	By default, control characters that are delivered via the sideband
> +	are masked, to prevent potentially unwanted ANSI escape sequences
> +	from being sent to the terminal. Use this config setting to override
> +	this behavior.

Two thoughts.

 - Users may want to say "I trust this remote host" or "I trust this
   remote repository".  For that, something similar to what we do to
   `http.variable` to allow `http.<url>.variable` to take precedence
   over `http.variable` would be necessary.

 - It may no longer matter but a remote repository that may send
   messages as strings encoded in ISO/IEC 2022 would need to set
   this, merely to make the messages human-readable.  There may be
   other reasons the trusted repositories want to send "escape
   sequences".

It might even be a good idea to make the default setting of this
variable "allow", except for the initial connections to repositories
(i.e., "git clone $URL", and "git fetch/ls-remote $URL" with an
explicit $URL without using a nickname recorded in our .git/config),
as visiting a potentially malicious remote repository you are not
familiar with may not be uncommon, and users may deserve protection
over inconvenience.

But once the user establishes a working relationship with a remote
repository, would it be a lot more common to trust the contents
there than be on the lookout that the repository may spew bad
strings of bytes at your standard error stream, I have to wonder.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Johannes Schindelin wrote (reply to this):

Hi Junio,

On Thu, 18 Dec 2025, Junio C Hamano wrote:

> "Johannes Schindelin via GitGitGadget" <[email protected]>
> writes:
> 
> > diff --git a/Documentation/config/sideband.txt b/Documentation/config/sideband.txt
> > new file mode 100644
> > index 0000000000..3fb5045cd7
> > --- /dev/null
> > +++ b/Documentation/config/sideband.txt
> > @@ -0,0 +1,5 @@
> > +sideband.allowControlCharacters::
> > +	By default, control characters that are delivered via the sideband
> > +	are masked, to prevent potentially unwanted ANSI escape sequences
> > +	from being sent to the terminal. Use this config setting to override
> > +	this behavior.
> 
> Two thoughts.
> 
>  - Users may want to say "I trust this remote host" or "I trust this
>    remote repository".  For that, something similar to what we do to
>    `http.variable` to allow `http.<url>.variable` to take precedence
>    over `http.variable` would be necessary.

Good idea! What do you think about something like this?

-- snip --
diff --git a/http.c b/http.c
index d59e59f66b1..14b5a95586c 100644
--- a/http.c
+++ b/http.c
@@ -19,6 +19,7 @@
 #include "string-list.h"
 #include "object-file.h"
 #include "object-store-ll.h"
+#include "sideband.h"
 
 static struct trace_key trace_curl = TRACE_KEY_INIT(CURL);
 static int trace_curl_data = 1;
@@ -566,6 +567,9 @@ static int http_options(const char *var, const char *value,
 		return 0;
 	}
 
+	if (!strcmp("http.sanitizesideband", var))
+		return sideband_allow_control_characters_config(var, value);
+
 	/* Fall back on the default ones */
 	return git_default_config(var, value, ctx, data);
 }
diff --git a/sideband.c b/sideband.c
index 725e24db0db..178c1320cac 100644
--- a/sideband.c
+++ b/sideband.c
@@ -26,13 +26,14 @@ static struct keyword_entry keywords[] = {
 };
 
 static enum {
+	ALLOW_CONTROL_SEQUENCES_UNSET = -1,
 	ALLOW_NO_CONTROL_CHARACTERS = 0,
 	ALLOW_ANSI_COLOR_SEQUENCES = 1<<0,
 	ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1,
 	ALLOW_ANSI_ERASE = 1<<2,
 	ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES,
 	ALLOW_ALL_CONTROL_CHARACTERS = 1<<3,
-} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES;
+} allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET;
 
 static inline int skip_prefix_in_csv(const char *value, const char *prefix,
 				     const char **out)
@@ -44,8 +45,19 @@ static inline int skip_prefix_in_csv(const char *value, const char *prefix,
 	return 1;
 }
 
-static void parse_allow_control_characters(const char *value)
+int sideband_allow_control_characters_config(const char *var, const char *value)
 {
+	switch (git_parse_maybe_bool(value)) {
+	case 0:
+		allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS;
+		return 0;
+	case 1:
+		allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS;
+		return 0;
+	default:
+		break;
+	}
+
 	allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS;
 	while (*value) {
 		if (skip_prefix_in_csv(value, "default", &value))
@@ -61,9 +73,9 @@ static void parse_allow_control_characters(const char *value)
 		else if (skip_prefix_in_csv(value, "false", &value))
 			allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS;
 		else
-			warning(_("unrecognized value for `sideband."
-				  "allowControlCharacters`: '%s'"), value);
+			warning(_("unrecognized value for '%s': '%s'"), var, value);
 	}
+	return 0;
 }
 
 /* Returns a color setting (GIT_COLOR_NEVER, etc). */
@@ -79,20 +91,12 @@ static int use_sideband_colors(void)
 	if (use_sideband_colors_cached >= 0)
 		return use_sideband_colors_cached;
 
-	switch (git_config_get_maybe_bool("sideband.allowcontrolcharacters", &i)) {
-	case 0: /* Boolean value */
-		allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS :
-			ALLOW_NO_CONTROL_CHARACTERS;
-		break;
-	case -1: /* non-Boolean value */
-		if (git_config_get_string_tmp("sideband.allowcontrolcharacters",
-					      &value))
-			; /* huh? `get_maybe_bool()` returned -1 */
-		else
-			parse_allow_control_characters(value);
-		break;
-	default:
-		break; /* not configured */
+	if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) {
+		if (!git_config_get_value("sideband.allowcontrolcharacters", &value))
+			sideband_allow_control_characters_config("sideband.allowcontrolcharacters", value);
+
+		if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET)
+			allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES;
 	}
 
 	if (!git_config_get_string_tmp(key, &value))
diff --git a/sideband.h b/sideband.h
index 5a25331be55..e711ad0f4e0 100644
--- a/sideband.h
+++ b/sideband.h
@@ -30,4 +30,11 @@ int demultiplex_sideband(const char *me, int status,
 
 void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_max);
 
+/*
+ * Parse and set the sideband allow control characters configuration.
+ * The var parameter should be the key name (without section prefix).
+ * Returns 0 if the variable was recognized and handled, non-zero otherwise.
+ */
+int sideband_allow_control_characters_config(const char *var, const char *value);
+
 #endif
-- snap --

If this is the direction you're thinking, I'll polish it and integrate it
into v3.

>  - It may no longer matter but a remote repository that may send
>    messages as strings encoded in ISO/IEC 2022 would need to set
>    this, merely to make the messages human-readable.  There may be
>    other reasons the trusted repositories want to send "escape
>    sequences".

If the remote side has no way to determine whether the client side is
connected to a terminal or not (which we have already established in this
thread), it has even less chance to determine which character encoding is
in use...

> It might even be a good idea to make the default setting of this
> variable "allow", except for the initial connections to repositories
> (i.e., "git clone $URL", and "git fetch/ls-remote $URL" with an
> explicit $URL without using a nickname recorded in our .git/config),
> as visiting a potentially malicious remote repository you are not
> familiar with may not be uncommon, and users may deserve protection
> over inconvenience.
> 
> But once the user establishes a working relationship with a remote
> repository, would it be a lot more common to trust the contents
> there than be on the lookout that the repository may spew bad
> strings of bytes at your standard error stream, I have to wonder.

I am not so sure whether that would be desirable, for (at least :-) ) two
reasons:

- `git fetch` with an explicit URL is sometimes used outside clone
  scenarios, and in some clone-type scenarios, `git clone` cannot be used
  (e.g. to establish credentials or to determine the appropriate sparse
  checkout based on information from the tip revision).

  I know that it is a delicate balance to strike between convenience and
  security. Yet I also know that users prefer easy-to-explain mental
  models and this logic would be a bit hard to explain: Why disallow
  something while cloning or fetching with an explicit URL while allowing
  the very same thing in a subsequent fetch?

  tl;dr I expect users to be much more okay with the strategy to disallow
  all but very few ANSI sequences by default, with a message that tells
  them what to do if they want to enable more (or all) control sequences.

- I do not see how the user can inspect what the remote side does, even
  after an initial clone. Therefore users would not have any reasonable
  chance to gain any confidence that the remote side isn't doing anything
  malicious. To the contrary, remote servers could specifically "behave"
  during a clone, and launch the attack only during a fetch (indicated by
  "have" lines in the request).

  tl;dr remote servers don't get more trustworthy just by successfully
  serving clones.

Does that reasoning make sense to you?

Ciao,
Johannes

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Junio C Hamano wrote (reply to this):

Johannes Schindelin <[email protected]> writes:

> Good idea! What do you think about something like this?

It may be easier to hack up to piggyback on the http.*.variable
infrastructure, but I do not like the smell of it very much, because
the implementation ties it too tightly to the http transport; I
think this should live in one layer up (transport?).

> If this is the direction you're thinking, I'll polish it and integrate it
> into v3.

In other words, it would be more like sideband.allowEscapeSequences
that is overridden by sideband.<url>.allowEscapeSequences was what I
had in mind.  Or even transfer.allowEscapeSequencesInSideband that
is overridden by transfer.<url>.allowEscapeSequencesInSideband.

>>  - It may no longer matter but a remote repository that may send
>>    messages as strings encoded in ISO/IEC 2022 would need to set
>>    this, merely to make the messages human-readable.  There may be
>>    other reasons the trusted repositories want to send "escape
>>    sequences".
>
> If the remote side has no way to determine whether the client side is
> connected to a terminal or not (which we have already established in this
> thread), it has even less chance to determine which character encoding is
> in use...

Then I think you need to re-read brian's

  https://lore.kernel.org/git/[email protected]/

In any case, I do not think ISO/IEC 2022 matters as much as it used
to back when the reencode_string_iconv() was written (which was the
topic of another thread regarding the broken iconv on macOS wrt
2022).  But even if we limit ourselves to UTF-8, brian's point that
applications do assume certain characteristics on its clients and
implements unportable stuff.  A project targetting developers and/or
users from certain locale may use their own hooks that assumes the
clients understands strings in certain language in certain encoding.

And to serve these projects better, classes like "pass colors",
"pass cursor movements", might help than just "pass everything" vs
"deny everything", but we probably want to try to keep it as simple
as possible; trying to make it finer grained with extra complexity
would only make our efforts look like whack-a-mole X-<.

>> It might even be a good idea to make the default setting of this
>> variable "allow", except for the initial connections to repositories
>> (i.e., "git clone $URL", and "git fetch/ls-remote $URL" with an
>> explicit $URL without using a nickname recorded in our .git/config),
>> as visiting a potentially malicious remote repository you are not
>> familiar with may not be uncommon, and users may deserve protection
>> over inconvenience.
>> 
>> But once the user establishes a working relationship with a remote
>> repository, would it be a lot more common to trust the contents
>> there than be on the lookout that the repository may spew bad
>> strings of bytes at your standard error stream, I have to wonder.

>   tl;dr remote servers don't get more trustworthy just by successfully
>   serving clones.

The "successfully serving clone" has nothing to do with the reason
why I suggested to deny by default in "clone" and anything that gets
$URL not remote nickname.  I am roughly equating the fact that the
user cloned *and* *then* continues to interact with the project that
is served from that remote repository (hence using the remote
nickname) with the willingness by the user to trust that particular
remote repository.

include::config/showbranch.txt[]

include::config/sideband.txt[]

include::config/sparse.txt[]

include::config/splitindex.txt[]
Expand Down
24 changes: 24 additions & 0 deletions Documentation/config/sideband.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
sideband.allowControlCharacters::
By default, control characters that are delivered via the sideband
are masked, except ANSI color sequences. This prevents potentially
unwanted ANSI escape sequences from being sent to the terminal. Use
this config setting to override this behavior (the value can be
a comma-separated list of the following keywords):
+
--
default::
color::
Allow ANSI color sequences, line feeds and horizontal tabs,
but mask all other control characters. This is the default.
cursor::
Allow control sequences that move the cursor. This is
disabled by default.
erase::
Allow control sequences that erase charactrs. This is
disabled by default.
false::
Mask all control characters other than line feeds and
horizontal tabs.
true::
Allow all control characters to be sent to the terminal.
--
148 changes: 146 additions & 2 deletions sideband.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,47 @@ static struct keyword_entry keywords[] = {
{ "error", GIT_COLOR_BOLD_RED },
};

static enum {
ALLOW_NO_CONTROL_CHARACTERS = 0,
ALLOW_ANSI_COLOR_SEQUENCES = 1<<0,
ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1,
ALLOW_ANSI_ERASE = 1<<2,
ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES,
ALLOW_ALL_CONTROL_CHARACTERS = 1<<3,
} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES;

static inline int skip_prefix_in_csv(const char *value, const char *prefix,
const char **out)
{
if (!skip_prefix(value, prefix, &value) ||
(*value && *value != ','))
return 0;
*out = value + !!*value;
return 1;
}

static void parse_allow_control_characters(const char *value)
{
allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS;
while (*value) {
if (skip_prefix_in_csv(value, "default", &value))
allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES;
else if (skip_prefix_in_csv(value, "color", &value))
allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES;
else if (skip_prefix_in_csv(value, "cursor", &value))
allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS;
else if (skip_prefix_in_csv(value, "erase", &value))
allow_control_characters |= ALLOW_ANSI_ERASE;
else if (skip_prefix_in_csv(value, "true", &value))
allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS;
else if (skip_prefix_in_csv(value, "false", &value))
allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS;
else
warning(_("unrecognized value for `sideband."
"allowControlCharacters`: '%s'"), value);
}
}

/* Returns a color setting (GIT_COLOR_NEVER, etc). */
static int use_sideband_colors(void)
{
Expand All @@ -38,6 +79,22 @@ static int use_sideband_colors(void)
if (use_sideband_colors_cached >= 0)
return use_sideband_colors_cached;

switch (git_config_get_maybe_bool("sideband.allowcontrolcharacters", &i)) {
case 0: /* Boolean value */
allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS :
ALLOW_NO_CONTROL_CHARACTERS;
break;
case -1: /* non-Boolean value */
if (git_config_get_string_tmp("sideband.allowcontrolcharacters",
&value))
; /* huh? `get_maybe_bool()` returned -1 */
else
parse_allow_control_characters(value);
break;
default:
break; /* not configured */
}

if (!git_config_get_string_tmp(key, &value))
use_sideband_colors_cached = git_config_colorbool(key, value);
else if (!git_config_get_string_tmp("color.ui", &value))
Expand Down Expand Up @@ -65,6 +122,93 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref
list_config_item(list, prefix, keywords[i].keyword);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Phillip Wood wrote (reply to this):

Hi Dscho

Just a couple of small comments

On 14/01/2025 18:19, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <[email protected]>
> > +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n)
> +{
> +	strbuf_grow(dest, n);
> +	for (; n && *src; src++, n--) {
> +		if (!iscntrl(*src) || *src == '\t' || *src == '\n')

Isn't it a bug to pass '\n' to maybe_colorize_sideband() ?

> +			strbuf_addch(dest, *src);
> +		else {
> +			strbuf_addch(dest, '^');
> +			strbuf_addch(dest, 0x40 + *src);

This will escape DEL ('\x7f') as "^\xbf" which is invalid in utf-8 locales. Perhaps we could use "^?" for that instead.

> +test_expect_success 'disallow (color) control sequences in sideband' '
> +	write_script .git/color-me-surprised <<-\EOF &&
> +	printf "error: Have you \\033[31mread\\033[m this?\\n" >&2
> +	exec "$@"
> +	EOF
> +	test_config_global uploadPack.packObjectshook ./color-me-surprised &&
> +	test_commit need-at-least-one-commit &&
> +	git clone --no-local . throw-away 2>stderr &&
> +	test_decode_color <stderr >decoded &&
> +	test_grep ! RED decoded

I'd be happier if we used test_cmp() here so that we check that the sanitized version matches what we expect and the test does not pass if there a typo in the script above stops it from writing the SGR code for red.

Best Wishes

Phillip

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Johannes Schindelin wrote (reply to this):

Hi Phillip,

On Wed, 15 Jan 2025, Phillip Wood wrote:

> Just a couple of small comments
> 
> On 14/01/2025 18:19, Johannes Schindelin via GitGitGadget wrote:
> > From: Johannes Schindelin <[email protected]>
> > 
> > +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int
> > n)
> > +{
> > +	strbuf_grow(dest, n);
> > +	for (; n && *src; src++, n--) {
> > +		if (!iscntrl(*src) || *src == '\t' || *src == '\n')
> 
> Isn't it a bug to pass '\n' to maybe_colorize_sideband() ?

While a band 2 message is indeed split by newlines and fed to this
function line by line, which is the case for a long time already: since
ed1902ef5c6 (cope with multiple line breaks within sideband progress
messages, 2007-10-16), the same is not true for band 3 messages: They pass
the entire message in one go (and for multi-line payload, only the first
line is prefixed with `remote:`, which is arguably a bug, but not one that
is within this here patch series' scope).

See
https://gitlab.com/git-scm/git/-/blob/v2.52.0/sideband.c#L191 and
https://gitlab.com/git-scm/git/-/blob/v2.52.0/sideband.c#L176,
respectively.

So no, I don't think that we can currently consider it a bug to pass `\n`
as part of the `src` parameter to `maybe_colorize_sideband()`.

> > +			strbuf_addch(dest, *src);
> > +		else {
> > +			strbuf_addch(dest, '^');
> > +			strbuf_addch(dest, 0x40 + *src);
> 
> This will escape DEL ('\x7f') as "^\xbf" which is invalid in utf-8 locales.
> Perhaps we could use "^?" for that instead.

Good point! This seems to be the historical way to escape DEL, probably
because 0x3f ('?') is 0x7f + 0x40 truncated to 7 bits. I'll do this in the
next iteration:

-- snip --
diff --git a/sideband.c b/sideband.c
index f613d4d6cc3..684621579fd 100644
--- a/sideband.c
+++ b/sideband.c
@@ -175,7 +175,7 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n)
 			n -= i;
 		} else {
 			strbuf_addch(dest, '^');
-			strbuf_addch(dest, 0x40 + *src);
+			strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src);
 		}
 	}
 }
-- snap --

> 
> > +test_expect_success 'disallow (color) control sequences in sideband' '
> > +	write_script .git/color-me-surprised <<-\EOF &&
> > +	printf "error: Have you \\033[31mread\\033[m this?\\n" >&2
> > +	exec "$@"
> > +	EOF
> > +	test_config_global uploadPack.packObjectshook ./color-me-surprised &&
> > +	test_commit need-at-least-one-commit &&
> > +	git clone --no-local . throw-away 2>stderr &&
> > +	test_decode_color <stderr >decoded &&
> > +	test_grep ! RED decoded
> 
> I'd be happier if we used test_cmp() here so that we check that the sanitized
> version matches what we expect and the test does not pass if there a typo in
> the script above stops it from writing the SGR code for red.

I often debug test failures in Git's test suite and one of the most
annoying category of test failures is when test cases expect byte-wise
exact Git output that changed for totally legitimate reasons [*1*].

Even worse: In many of those instances, the _intent_ of the check is not
even clear from that `test_cmp` and has to be reconstructed, a boring,
tedious task with little benefit to show for the effort.

I much prefer tests like this one, where a precise `test_grep` states
exactly what it expects to be present, or missing. The intent of such a
command is much clearer than that of `test_cmp expect actual`.

So, much as I appreciate your suggestion, I would prefer to keep the code
as-is.

Ciao,
Johannes

Footnote *1*: This really is not hypothetical. I had to battle quite a bit
with unstable compression sizes that are part of a `test_cmp` comparison,
https://github.com/git-for-windows/git/pull/5926#issuecomment-3486556940
shows a bit of the problems but is very shy about providing the specific
number of days I spent on addressing this issue. In hindsight, I should
have spent at most two hours on converting that from a byte-wise
comparison to a qualitative comparison.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Andreas Schwab wrote (reply to this):

On Jan 14 2025, Johannes Schindelin via GitGitGadget wrote:

> diff --git a/sideband.c b/sideband.c
> index 02805573fab..c0b1cb044a3 100644
> --- a/sideband.c
> +++ b/sideband.c
> @@ -65,6 +65,19 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref
>  		list_config_item(list, prefix, keywords[i].keyword);
>  }
>  
> +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n)
> +{
> +	strbuf_grow(dest, n);
> +	for (; n && *src; src++, n--) {
> +		if (!iscntrl(*src) || *src == '\t' || *src == '\n')

The argument of iscntrl needs to be converted to unsigned char.

-- 
Andreas Schwab, [email protected]
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Junio C Hamano wrote (reply to this):

Andreas Schwab <[email protected]> writes:

> On Jan 14 2025, Johannes Schindelin via GitGitGadget wrote:
>
>> diff --git a/sideband.c b/sideband.c
>> index 02805573fab..c0b1cb044a3 100644
>> --- a/sideband.c
>> +++ b/sideband.c
>> @@ -65,6 +65,19 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref
>>  		list_config_item(list, prefix, keywords[i].keyword);
>>  }
>>  
>> +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n)
>> +{
>> +	strbuf_grow(dest, n);
>> +	for (; n && *src; src++, n--) {
>> +		if (!iscntrl(*src) || *src == '\t' || *src == '\n')
>
> The argument of iscntrl needs to be converted to unsigned char.

If this were system-provided one, you are absolutely correct.

But I think this comes from 

sane-ctype.h:15:#undef iscntrl
sane-ctype.h:40:#define iscntrl(x) (sane_istest(x,GIT_CNTRL))

and sane_istest() does the casting to uchar for us, so this may be
OK (even if it may be a bit misleading).

}

static int handle_ansi_sequence(struct strbuf *dest, const char *src, int n)
{
int i;

/*
* Valid ANSI color sequences are of the form
*
* ESC [ [<n> [; <n>]*] m
*
* These are part of the Select Graphic Rendition sequences which
* contain more than just color sequences, for more details see
* https://en.wikipedia.org/wiki/ANSI_escape_code#SGR.
*
* The cursor movement sequences are:
*
* ESC [ n A - Cursor up n lines (CUU)
* ESC [ n B - Cursor down n lines (CUD)
* ESC [ n C - Cursor forward n columns (CUF)
* ESC [ n D - Cursor back n columns (CUB)
* ESC [ n E - Cursor next line, beginning (CNL)
* ESC [ n F - Cursor previous line, beginning (CPL)
* ESC [ n G - Cursor to column n (CHA)
* ESC [ n ; m H - Cursor position (row n, col m) (CUP)
* ESC [ n ; m f - Same as H (HVP)
*
* The sequences to erase characters are:
*
*
* ESC [ 0 J - Clear from cursor to end of screen (ED)
* ESC [ 1 J - Clear from cursor to beginning of screen (ED)
* ESC [ 2 J - Clear entire screen (ED)
* ESC [ 3 J - Clear entire screen + scrollback (ED) - xterm extension
* ESC [ 0 K - Clear from cursor to end of line (EL)
* ESC [ 1 K - Clear from cursor to beginning of line (EL)
* ESC [ 2 K - Clear entire line (EL)
* ESC [ n M - Delete n lines (DL)
* ESC [ n P - Delete n characters (DCH)
* ESC [ n X - Erase n characters (ECH)
*
* For a comprehensive list of common ANSI Escape sequences, see
* https://www.xfree86.org/current/ctlseqs.html
*/

if (n < 3 || src[0] != '\x1b' || src[1] != '[')
return 0;

for (i = 2; i < n; i++) {
if (((allow_control_characters & ALLOW_ANSI_COLOR_SEQUENCES) &&
src[i] == 'm') ||
((allow_control_characters & ALLOW_ANSI_CURSOR_MOVEMENTS) &&
strchr("ABCDEFGHf", src[i])) ||
((allow_control_characters & ALLOW_ANSI_ERASE) &&
strchr("JKMPX", src[i]))) {
strbuf_add(dest, src, i + 1);
return i;
}
if (!isdigit(src[i]) && src[i] != ';')
break;
}

return 0;
}

static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n)
{
int i;

if ((allow_control_characters & ALLOW_ALL_CONTROL_CHARACTERS)) {
strbuf_add(dest, src, n);
return;
}

strbuf_grow(dest, n);
for (; n && *src; src++, n--) {
if (!iscntrl(*src) || *src == '\t' || *src == '\n')
strbuf_addch(dest, *src);
else if (allow_control_characters != ALLOW_NO_CONTROL_CHARACTERS &&
(i = handle_ansi_sequence(dest, src, n))) {
src += i;
n -= i;
} else {
strbuf_addch(dest, '^');
strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src);
}
}
}

/*
* Optionally highlight one keyword in remote output if it appears at the start
* of the line. This should be called for a single line only, which is
Expand All @@ -80,7 +224,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n)
int i;

if (!want_color_stderr(use_sideband_colors())) {
strbuf_add(dest, src, n);
strbuf_add_sanitized(dest, src, n);
return;
}

Expand Down Expand Up @@ -113,7 +257,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n)
}
}

strbuf_add(dest, src, n);
strbuf_add_sanitized(dest, src, n);
}


Expand Down
68 changes: 68 additions & 0 deletions t/t5409-colorize-remote-messages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,72 @@ test_expect_success 'fallback to color.ui' '
grep "<BOLD;RED>error<RESET>: error" decoded
'

test_expect_success 'disallow (color) control sequences in sideband' '
write_script .git/color-me-surprised <<-\EOF &&
printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2
exec "$@"
EOF
test_config_global uploadPack.packObjectsHook ./color-me-surprised &&
test_commit need-at-least-one-commit &&

git clone --no-local . throw-away 2>stderr &&
test_decode_color <stderr >decoded &&
test_grep RED decoded &&
test_grep "\\^G" stderr &&
tr -dc "\\007" <stderr >actual &&
test_must_be_empty actual &&

rm -rf throw-away &&
git -c sideband.allowControlCharacters=false \
clone --no-local . throw-away 2>stderr &&
test_decode_color <stderr >decoded &&
test_grep ! RED decoded &&
test_grep "\\^G" stderr &&

rm -rf throw-away &&
git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr &&
test_decode_color <stderr >decoded &&
test_grep RED decoded &&
tr -dc "\\007" <stderr >actual &&
test_file_not_empty actual
'

test_decode_csi() {
awk '{
while (match($0, /\033/) != 0) {
printf "%sCSI ", substr($0, 1, RSTART-1);
$0 = substr($0, RSTART + RLENGTH, length($0) - RSTART - RLENGTH + 1);
}
print
}'
}

test_expect_success 'control sequences in sideband allowed by default' '
write_script .git/color-me-surprised <<-\EOF &&
printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2
exec "$@"
EOF
test_config_global uploadPack.packObjectsHook ./color-me-surprised &&
test_commit need-at-least-one-commit-at-least &&

rm -rf throw-away &&
git clone --no-local . throw-away 2>stderr &&
test_decode_color <stderr >color-decoded &&
test_decode_csi <color-decoded >decoded &&
test_grep ! "CSI \\[K" decoded &&
test_grep ! "CSI \\[G" decoded &&
test_grep "\\^\\[?25l" decoded &&

rm -rf throw-away &&
git -c sideband.allowControlCharacters=erase,cursor,color \
clone --no-local . throw-away 2>stderr &&
test_decode_color <stderr >color-decoded &&
test_decode_csi <color-decoded >decoded &&
test_grep "RED" decoded &&
test_grep "CSI \\[K" decoded &&
test_grep "CSI \\[G" decoded &&
test_grep ! "\\^\\[\\[K" decoded &&
test_grep ! "\\^\\[\\[G" decoded
'

test_done
Loading