Skip to content

Commit b575654

Browse files
authored
Decode quoted-printable UTF8 in email subjects (#25)
1 parent 3772c9e commit b575654

File tree

2 files changed

+68
-1
lines changed

2 files changed

+68
-1
lines changed

gitdiff/patch_header.go

+27-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"io"
8+
"mime/quotedprintable"
89
"net/mail"
910
"strconv"
1011
"strings"
@@ -457,5 +458,30 @@ func parseSubject(s string) (string, string) {
457458
break
458459
}
459460

460-
return s[:at], s[at:]
461+
return s[:at], decodeSubject(s[at:])
462+
}
463+
464+
// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
465+
// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
466+
// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
467+
func decodeSubject(encoded string) string {
468+
if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
469+
// not UTF-8 encoded
470+
return encoded
471+
}
472+
473+
// If the subject is too long, `git format-patch` may produce a subject line across
474+
// multiple lines. When parsed, this can look like the following:
475+
// <UTF8-prefix><first-line> <UTF8-prefix><second-line>
476+
payload := " " + encoded
477+
payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
478+
payload = strings.ReplaceAll(payload, "?=", "")
479+
480+
decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
481+
if err != nil {
482+
// if err, abort decoding and return original subject
483+
return encoded
484+
}
485+
486+
return string(decoded)
461487
}

gitdiff/patch_header_test.go

+41
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ func TestParsePatchHeader(t *testing.T) {
138138
}
139139
expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60))
140140
expectedTitle := "A sample commit to test header parsing"
141+
expectedEmojiOneLineTitle := "🤖 Enabling auto-merging"
142+
expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet"
141143
expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line."
142144
expectedBodyAppendix := "CC: Joe Smith <[email protected]>"
143145

@@ -267,6 +269,45 @@ Another body line.
267269
Body: expectedBody,
268270
},
269271
},
272+
"mailboxEmojiOneLine": {
273+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
274+
From: Morton Haypenny <[email protected]>
275+
Date: Sat, 11 Apr 2020 15:21:23 -0700
276+
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?=
277+
278+
The medium format shows the body, which
279+
may wrap on to multiple lines.
280+
281+
Another body line.
282+
`,
283+
Header: PatchHeader{
284+
SHA: expectedSHA,
285+
Author: expectedIdentity,
286+
AuthorDate: expectedDate,
287+
Title: expectedEmojiOneLineTitle,
288+
Body: expectedBody,
289+
},
290+
},
291+
"mailboxEmojiMultiLine": {
292+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
293+
From: Morton Haypenny <[email protected]>
294+
Date: Sat, 11 Apr 2020 15:21:23 -0700
295+
Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?=
296+
=?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?=
297+
298+
The medium format shows the body, which
299+
may wrap on to multiple lines.
300+
301+
Another body line.
302+
`,
303+
Header: PatchHeader{
304+
SHA: expectedSHA,
305+
Author: expectedIdentity,
306+
AuthorDate: expectedDate,
307+
Title: expectedEmojiMultiLineTitle,
308+
Body: expectedBody,
309+
},
310+
},
270311
"mailboxAppendix": {
271312
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
272313
From: Morton Haypenny <[email protected]>

0 commit comments

Comments
 (0)