Skip to content

Commit 03daf96

Browse files
authored
Add option to control patch subject cleaning (#36)
When processing mail-formatted patches, the default cleanup removed all leading content in square brackets, but this pattern is often used to identify tickets or other information that should remain in the commit title. Git supports disabling this the the `-k` and `-b` flags, which we simulate with the new SubjectCleanMode options. Use WithSubjectCleanMode(SubjectCleanPatchOnly) to only remove bracketed strings that contain "PATCH", keeping others that are (probably) part of the actual commit message. Note that because of the mail parsing library, we cannot replicate the `-k` flag exactly and always clean leading and trailing whitespace.
1 parent dc43dbf commit 03daf96

File tree

3 files changed

+232
-93
lines changed

3 files changed

+232
-93
lines changed

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,7 @@ The parsing code has also had a modest amount of fuzz testing.
101101
context of each fragment must exactly match the source file; `git apply`
102102
implements a search algorithm that tries different lines and amounts of
103103
context, with further options to normalize or ignore whitespace changes.
104+
105+
7. When parsing mail-formatted patch headers, leading and trailing whitespace
106+
is always removed from `Subject` lines. There is no exact equivalent to `git
107+
mailinfo -k`.

gitdiff/patch_header.go

+81-47
Original file line numberDiff line numberDiff line change
@@ -165,34 +165,71 @@ func ParsePatchDate(s string) (time.Time, error) {
165165
return time.Time{}, fmt.Errorf("unknown date format: %s", s)
166166
}
167167

168-
// ParsePatchHeader parses a preamble string as returned by Parse into a
168+
// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
169+
type PatchHeaderOption func(*patchHeaderOptions)
170+
171+
// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
172+
// parsing mail-formatted patches.
173+
type SubjectCleanMode int
174+
175+
const (
176+
// SubjectCleanWhitespace removes leading and trailing whitespace.
177+
SubjectCleanWhitespace SubjectCleanMode = iota
178+
179+
// SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
180+
// "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
181+
// This is the default behavior of git (see `git mailinfo`) and this
182+
// package.
183+
SubjectCleanAll
184+
185+
// SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
186+
// leading strings enclosed by '[' and ']' if they start with "PATCH".
187+
SubjectCleanPatchOnly
188+
)
189+
190+
// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
191+
// default, uses SubjectCleanAll.
192+
func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
193+
return func(opts *patchHeaderOptions) {
194+
opts.subjectCleanMode = m
195+
}
196+
}
197+
198+
type patchHeaderOptions struct {
199+
subjectCleanMode SubjectCleanMode
200+
}
201+
202+
// ParsePatchHeader parses the preamble string returned by [Parse] into a
169203
// PatchHeader. Due to the variety of header formats, some fields of the parsed
170204
// PatchHeader may be unset after parsing.
171205
//
172206
// Supported formats are the short, medium, full, fuller, and email pretty
173-
// formats used by git diff, git log, and git show and the UNIX mailbox format
174-
// used by git format-patch.
207+
// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
208+
// format used by `git format-patch`.
175209
//
176-
// If ParsePatchHeader detects that it is handling an email, it will
177-
// remove extra content at the beginning of the title line, such as
178-
// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
179-
// SubjectPrefix will be set to the value of this removed string.
180-
// (`git mailinfo` is the core part of `git am` that pulls information
181-
// out of an individual mail.)
210+
// When parsing mail-formatted headers, ParsePatchHeader tries to remove
211+
// email-specific content from the title and body:
182212
//
183-
// Additionally, if ParsePatchHeader detects that it's handling an
184-
// email, it will remove a `---` line and put anything after it into
185-
// BodyAppendix.
213+
// - Based on the SubjectCleanMode, remove prefixes like reply markers and
214+
// "[PATCH]" strings from the subject, saving any removed content in the
215+
// SubjectPrefix field. Parsing always discards leading and trailing
216+
// whitespace from the subject line. The default mode is SubjectCleanAll.
186217
//
187-
// Those wishing the effect of a plain `git am` should use
188-
// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
189-
// `PatchHeader.Message()`). Those wishing to retain the subject
190-
// prefix and appendix material should use `PatchHeader.SubjectPrefix
191-
// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
192-
// PatchHeader.BodyAppendix`.
193-
func ParsePatchHeader(header string) (*PatchHeader, error) {
194-
header = strings.TrimSpace(header)
218+
// - If the body contains a "---" line (3 hyphens), remove that line and any
219+
// content after it from the body and save it in the BodyAppendix field.
220+
//
221+
// ParsePatchHeader tries to process content it does not understand wthout
222+
// returning errors, but will return errors if well-identified content like
223+
// dates or identies uses unknown or invalid formats.
224+
func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
225+
opts := patchHeaderOptions{
226+
subjectCleanMode: SubjectCleanAll, // match git defaults
227+
}
228+
for _, optFn := range options {
229+
optFn(&opts)
230+
}
195231

232+
header = strings.TrimSpace(header)
196233
if header == "" {
197234
return &PatchHeader{}, nil
198235
}
@@ -208,12 +245,12 @@ func ParsePatchHeader(header string) (*PatchHeader, error) {
208245

209246
switch {
210247
case strings.HasPrefix(firstLine, mailHeaderPrefix):
211-
return parseHeaderMail(firstLine, strings.NewReader(rest))
248+
return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
212249

213250
case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
214251
// With a minimum header, the first line is part of the actual mail
215252
// content and needs to be parsed as part of the "rest"
216-
return parseHeaderMail("", strings.NewReader(header))
253+
return parseHeaderMail("", strings.NewReader(header), opts)
217254

218255
case strings.HasPrefix(firstLine, prettyHeaderPrefix):
219256
return parseHeaderPretty(firstLine, strings.NewReader(rest))
@@ -366,7 +403,7 @@ func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (st
366403
return body.String(), appendix.String()
367404
}
368405

369-
func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
406+
func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
370407
msg, err := mail.ReadMessage(r)
371408
if err != nil {
372409
return nil, err
@@ -403,7 +440,7 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
403440
}
404441

405442
subject := msg.Header.Get("Subject")
406-
h.SubjectPrefix, h.Title = parseSubject(subject)
443+
h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
407444

408445
s := bufio.NewScanner(msg.Body)
409446
h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
@@ -414,23 +451,24 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
414451
return h, nil
415452
}
416453

417-
// Takes an email subject and returns the patch prefix and commit
418-
// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
419-
// v3 3/5] ` and `Implement foo`
420-
func parseSubject(s string) (string, string) {
421-
// This is meant to be compatible with
422-
// https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
423-
// If compatibility with `git am` drifts, go there to see if there
424-
// are any updates.
454+
func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
455+
switch mode {
456+
case SubjectCleanAll, SubjectCleanPatchOnly:
457+
case SubjectCleanWhitespace:
458+
return "", strings.TrimSpace(decodeSubject(s))
459+
default:
460+
panic(fmt.Sprintf("unknown clean mode: %d", mode))
461+
}
462+
463+
// Based on the algorithm from Git in mailinfo.c:cleanup_subject()
464+
// If compatibility with `git am` drifts, go there to see if there are any updates.
425465

426466
at := 0
427467
for at < len(s) {
428468
switch s[at] {
429469
case 'r', 'R':
430470
// Detect re:, Re:, rE: and RE:
431-
if at+2 < len(s) &&
432-
(s[at+1] == 'e' || s[at+1] == 'E') &&
433-
s[at+2] == ':' {
471+
if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
434472
at += 3
435473
continue
436474
}
@@ -441,25 +479,21 @@ func parseSubject(s string) (string, string) {
441479
continue
442480

443481
case '[':
444-
// Look for closing parenthesis
445-
j := at + 1
446-
for ; j < len(s); j++ {
447-
if s[j] == ']' {
448-
break
482+
if i := strings.IndexByte(s[at:], ']'); i > 0 {
483+
if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
484+
at += i + 1
485+
continue
449486
}
450487
}
451-
452-
if j < len(s) {
453-
at = j + 1
454-
continue
455-
}
456488
}
457489

458-
// Only loop if we actually removed something
490+
// Nothing was removed, end processing
459491
break
460492
}
461493

462-
return s[:at], decodeSubject(s[at:])
494+
prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
495+
subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
496+
return
463497
}
464498

465499
// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result

0 commit comments

Comments
 (0)