Skip to content

Commit 46ed40e

Browse files
[linter-miner] linter: add lenstringsplit analyzer (#41090)
1 parent e33e3e5 commit 46ed40e

6 files changed

Lines changed: 324 additions & 0 deletions

File tree

cmd/linters/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/github/gh-aw/pkg/linters/httpnoctx"
3232
"github.com/github/gh-aw/pkg/linters/jsonmarshalignoredeerror"
3333
"github.com/github/gh-aw/pkg/linters/largefunc"
34+
"github.com/github/gh-aw/pkg/linters/lenstringsplit"
3435
"github.com/github/gh-aw/pkg/linters/lenstringzero"
3536
"github.com/github/gh-aw/pkg/linters/manualmutexunlock"
3637
"github.com/github/gh-aw/pkg/linters/osexitinlibrary"
@@ -81,6 +82,7 @@ func main() {
8182
strconvparseignorederror.Analyzer,
8283
jsonmarshalignoredeerror.Analyzer,
8384
lenstringzero.Analyzer,
85+
lenstringsplit.Analyzer,
8486
timeafterleak.Analyzer,
8587
timesleepnocontext.Analyzer,
8688
tolowerequalfold.Analyzer,
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# ADR-41090: Add `lenstringsplit` linter for `len(strings.Split(...))` expressions
2+
3+
**Date**: 2026-06-23
4+
**Status**: Draft
5+
6+
## Context
7+
8+
The codebase maintains a custom `go/analysis` linter suite (`pkg/linters/*`, registered in `cmd/linters/main.go`) to enforce house style and catch performance and correctness anti-patterns that off-the-shelf golangci-lint rules miss. The expression `len(strings.Split(s, sep))` allocates a full `[]string` slice only to discard it after taking the count; `strings.Count(s, sep)+1` produces the identical value with zero intermediate allocation. A code-pattern scan found four occurrences of this pattern in non-test files (`pkg/workflow/codex_logs.go`, `pkg/workflow/xml_comments.go`, `pkg/workflow/pip.go`, `pkg/parser/schema_errors.go`), establishing it as a recurring rather than one-off issue. A new rule must fit the established analyzer framework and produce zero false positives.
9+
10+
## Decision
11+
12+
We will add a new `go/analysis` analyzer package `pkg/linters/lenstringsplit` and register it in `cmd/linters/main.go`. The analyzer visits every `*ast.CallExpr` via the shared `inspect` pass, matches the builtin `len` applied to a single argument that is itself a call, and resolves that inner call's selector type against the standard-library `strings` package (via `astutil.IsPkgSelector`) to confirm it is `strings.Split`. Test files are skipped via `filecheck.IsTestFile`. The diagnostic is purely advisory (`pass.ReportRangef`) and names the exact replacement.
13+
14+
## Alternatives Considered
15+
16+
### Alternative 1: Rely on an off-the-shelf linter
17+
18+
No enabled golangci-lint rule flags `len(strings.Split(...))`. Adopting or re-enabling an external linter to cover this would pull in unrelated checks and is subject to upstream timelines outside our control. Rejected in favor of a focused local analyzer consistent with the existing `pkg/linters` suite (e.g. the sibling `lenstringzero` rule).
19+
20+
### Alternative 2: Name-based matching on `Split()` without package resolution
21+
22+
The analyzer could flag any selector call ending in `.Split()` without consulting type information. This is simpler but would misreport unrelated `Split()` methods on user types (e.g. `bytes.Split`, or a custom splitter). Rejected; resolving the selector against the `strings` package keeps false positives at zero, as exercised by the `strings.Fields` and pre-split-slice negative fixtures.
23+
24+
## Consequences
25+
26+
### Positive
27+
28+
- Catches a recurring, measurable allocation waste (four confirmed sites) that no currently enabled linter detects, extending the suite alongside the sibling `lenstringzero` analyzer.
29+
- Package-resolved matching yields zero false positives: the testdata fixtures confirm `strings.Fields`, plain `len(slice)`, and bare `strings.Split` (result actually used) are not flagged.
30+
- Follows the established analyzer pattern, so it is consistent with the rest of the suite and covered by `analysistest`-based tests.
31+
32+
### Negative
33+
34+
- Adds another analyzer to maintain and run, marginally increasing lint time and the registration surface in `cmd/linters/main.go`.
35+
- Only the literal `len(strings.Split(...))` shape is detected; an equivalent count taken through an intermediate variable (`parts := strings.Split(...); len(parts)`) is out of scope.
36+
37+
### Neutral
38+
39+
- The rule is purely diagnostic; it offers no automated fix/suggested edit, so the four existing call sites must be migrated manually if desired.
40+
- `strings.SplitN` and `strings.SplitAfter` are not matched — only `Split` — keeping the rule narrowly scoped to the exact wasteful idiom.
41+
42+
---
43+
44+
*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/28048403873) workflow. The PR author must review, complete, and finalize this document before the PR can merge.*
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Package lenstringsplit implements a Go analysis linter that flags
2+
// len(strings.Split(s, sep)) expressions with a provably non-empty separator
3+
// that allocate a []string just to count substrings. strings.Count(s, sep)+1
4+
// achieves the same result for non-empty separators without the intermediate
5+
// allocation.
6+
package lenstringsplit
7+
8+
import (
9+
"fmt"
10+
"go/ast"
11+
"go/constant"
12+
"go/token"
13+
"go/types"
14+
15+
"golang.org/x/tools/go/analysis"
16+
"golang.org/x/tools/go/analysis/passes/inspect"
17+
18+
"github.com/github/gh-aw/pkg/linters/internal/astutil"
19+
"github.com/github/gh-aw/pkg/linters/internal/filecheck"
20+
"github.com/github/gh-aw/pkg/linters/internal/nolint"
21+
)
22+
23+
// Analyzer is the len-strings-split analysis pass.
24+
var Analyzer = &analysis.Analyzer{
25+
Name: "lenstringsplit",
26+
Doc: "reports len(strings.Split(s, sep)) expressions with a provably non-empty separator that allocate a []string just to count substrings; use strings.Count(s, sep)+1 instead",
27+
URL: "https://github.com/github/gh-aw/tree/main/pkg/linters/lenstringsplit",
28+
Requires: []*analysis.Analyzer{inspect.Analyzer},
29+
Run: run,
30+
}
31+
32+
func run(pass *analysis.Pass) (any, error) {
33+
insp, err := astutil.Inspector(pass)
34+
if err != nil {
35+
return nil, err
36+
}
37+
noLintLinesByFile := nolint.BuildLineIndex(pass, "lenstringsplit")
38+
39+
nodeFilter := []ast.Node{(*ast.CallExpr)(nil)}
40+
41+
insp.Preorder(nodeFilter, func(n ast.Node) {
42+
outer, ok := n.(*ast.CallExpr)
43+
if !ok {
44+
return
45+
}
46+
47+
if !isBuiltinLen(pass, outer) {
48+
return
49+
}
50+
51+
if len(outer.Args) != 1 {
52+
return
53+
}
54+
inner, ok := outer.Args[0].(*ast.CallExpr)
55+
if !ok {
56+
return
57+
}
58+
if !isStringsSplit(pass, inner) {
59+
return
60+
}
61+
if !hasProvablyNonEmptySeparator(pass, inner) {
62+
return
63+
}
64+
65+
pos := pass.Fset.PositionFor(outer.Pos(), false)
66+
if filecheck.IsTestFile(pos.Filename) {
67+
return
68+
}
69+
if nolint.HasDirective(pos, noLintLinesByFile) {
70+
return
71+
}
72+
73+
pass.Report(analysis.Diagnostic{
74+
Pos: outer.Pos(),
75+
End: outer.End(),
76+
Message: "len(strings.Split(...)) allocates a []string just to count substrings; use strings.Count(...)+1 instead",
77+
SuggestedFixes: buildCountFix(pass, outer, inner),
78+
})
79+
})
80+
81+
return nil, nil
82+
}
83+
84+
// isBuiltinLen reports whether call is an invocation of the builtin len function.
85+
func isBuiltinLen(pass *analysis.Pass, call *ast.CallExpr) bool {
86+
ident, ok := call.Fun.(*ast.Ident)
87+
if !ok || ident.Name != "len" {
88+
return false
89+
}
90+
obj := pass.TypesInfo.Uses[ident]
91+
return obj == nil || obj == types.Universe.Lookup("len")
92+
}
93+
94+
// isStringsSplit reports whether call is strings.Split from the standard
95+
// library "strings" package.
96+
func isStringsSplit(pass *analysis.Pass, call *ast.CallExpr) bool {
97+
sel, ok := call.Fun.(*ast.SelectorExpr)
98+
if !ok || sel.Sel.Name != "Split" {
99+
return false
100+
}
101+
return astutil.IsPkgSelector(pass, sel, "strings")
102+
}
103+
104+
func hasProvablyNonEmptySeparator(pass *analysis.Pass, call *ast.CallExpr) bool {
105+
if len(call.Args) != 2 {
106+
return false
107+
}
108+
if lit, ok := call.Args[1].(*ast.BasicLit); ok && lit.Kind == token.STRING {
109+
return lit.Value != `""`
110+
}
111+
tv, ok := pass.TypesInfo.Types[call.Args[1]]
112+
if !ok || tv.Value == nil || tv.Value.Kind() != constant.String {
113+
return false
114+
}
115+
return constant.StringVal(tv.Value) != ""
116+
}
117+
118+
func buildCountFix(pass *analysis.Pass, outer, inner *ast.CallExpr) []analysis.SuggestedFix {
119+
if len(inner.Args) != 2 {
120+
return nil
121+
}
122+
123+
sText := astutil.NodeText(pass.Fset, inner.Args[0])
124+
sepText := astutil.NodeText(pass.Fset, inner.Args[1])
125+
pkgText := splitPkgText(pass, inner)
126+
if sText == "" || sepText == "" || pkgText == "" {
127+
return nil
128+
}
129+
130+
return []analysis.SuggestedFix{{
131+
Message: "Replace with strings.Count(...)+1",
132+
TextEdits: []analysis.TextEdit{{
133+
Pos: outer.Pos(),
134+
End: outer.End(),
135+
NewText: fmt.Appendf(nil, "%s.Count(%s, %s)+1", pkgText, sText, sepText),
136+
}},
137+
}}
138+
}
139+
140+
func splitPkgText(pass *analysis.Pass, call *ast.CallExpr) string {
141+
sel, ok := call.Fun.(*ast.SelectorExpr)
142+
if !ok {
143+
return ""
144+
}
145+
return astutil.NodeText(pass.Fset, sel.X)
146+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//go:build !integration
2+
3+
package lenstringsplit_test
4+
5+
import (
6+
"testing"
7+
8+
"golang.org/x/tools/go/analysis/analysistest"
9+
10+
"github.com/github/gh-aw/pkg/linters/lenstringsplit"
11+
)
12+
13+
func TestLenStringSplit(t *testing.T) {
14+
testdata := analysistest.TestData()
15+
analysistest.RunWithSuggestedFixes(t, testdata, lenstringsplit.Analyzer, "lenstringsplit")
16+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package lenstringsplit
2+
3+
import "strings"
4+
5+
const comma = ","
6+
7+
// flagged: len(strings.Split(...)) with a non-empty separator inside a return statement.
8+
func countLines(content string) int {
9+
return len(strings.Split(content, "\n")) // want `len\(strings\.Split\(\.\.\.`
10+
}
11+
12+
// flagged: len(strings.Split(...)) assigned to a variable.
13+
func countFields(s string) int {
14+
n := len(strings.Split(s, comma)) // want `len\(strings\.Split\(\.\.\.`
15+
return n
16+
}
17+
18+
// not flagged: len() is not called on the split result.
19+
func splitAndUse(s string) []string {
20+
return strings.Split(s, "/")
21+
}
22+
23+
// not flagged: len applied to a pre-split slice, not a strings.Split call.
24+
func countFromSlice(parts []string) int {
25+
return len(parts)
26+
}
27+
28+
// not flagged: strings.Fields is a different function.
29+
func countWords(s string) int {
30+
return len(strings.Fields(s))
31+
}
32+
33+
// not flagged: empty separators are not equivalent to strings.Count(...)+1.
34+
func countRunes(s string) int {
35+
return len(strings.Split(s, ""))
36+
}
37+
38+
func customLenNotFlagged(s string) int {
39+
len := func(parts []string) int {
40+
return 0
41+
}
42+
return len(strings.Split(s, ","))
43+
}
44+
45+
// not flagged: a nolint directive suppresses the diagnostic.
46+
func suppressedCount(s string) int {
47+
return len(strings.Split(s, ",")) //nolint:lenstringsplit
48+
}
49+
50+
// not flagged: strings.SplitN has different semantics.
51+
func countSplitN(s string) int {
52+
return len(strings.SplitN(s, ",", 3))
53+
}
54+
55+
// not flagged: strings.SplitAfter is a distinct function.
56+
func countSplitAfter(s string) int {
57+
return len(strings.SplitAfter(s, ","))
58+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package lenstringsplit
2+
3+
import "strings"
4+
5+
const comma = ","
6+
7+
// flagged: len(strings.Split(...)) with a non-empty separator inside a return statement.
8+
func countLines(content string) int {
9+
return strings.Count(content, "\n")+1 // want `len\(strings\.Split\(\.\.\.`
10+
}
11+
12+
// flagged: len(strings.Split(...)) assigned to a variable.
13+
func countFields(s string) int {
14+
n := strings.Count(s, comma)+1 // want `len\(strings\.Split\(\.\.\.`
15+
return n
16+
}
17+
18+
// not flagged: len() is not called on the split result.
19+
func splitAndUse(s string) []string {
20+
return strings.Split(s, "/")
21+
}
22+
23+
// not flagged: len applied to a pre-split slice, not a strings.Split call.
24+
func countFromSlice(parts []string) int {
25+
return len(parts)
26+
}
27+
28+
// not flagged: strings.Fields is a different function.
29+
func countWords(s string) int {
30+
return len(strings.Fields(s))
31+
}
32+
33+
// not flagged: empty separators are not equivalent to strings.Count(...)+1.
34+
func countRunes(s string) int {
35+
return len(strings.Split(s, ""))
36+
}
37+
38+
func customLenNotFlagged(s string) int {
39+
len := func(parts []string) int {
40+
return 0
41+
}
42+
return len(strings.Split(s, ","))
43+
}
44+
45+
// not flagged: a nolint directive suppresses the diagnostic.
46+
func suppressedCount(s string) int {
47+
return len(strings.Split(s, ",")) //nolint:lenstringsplit
48+
}
49+
50+
// not flagged: strings.SplitN has different semantics.
51+
func countSplitN(s string) int {
52+
return len(strings.SplitN(s, ",", 3))
53+
}
54+
55+
// not flagged: strings.SplitAfter is a distinct function.
56+
func countSplitAfter(s string) int {
57+
return len(strings.SplitAfter(s, ","))
58+
}

0 commit comments

Comments
 (0)