From b52ef50a01614176d35be4b9da4cb5442d04da58 Mon Sep 17 00:00:00 2001 From: ajayk Date: Sun, 5 Jan 2025 00:15:12 -0800 Subject: [PATCH] regex is slow --- Makefile | 2 +- pkg/sbom/package.go | 26 ++++++++--------- pkg/sbom/package_test.go | 61 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 16 deletions(-) create mode 100644 pkg/sbom/package_test.go diff --git a/Makefile b/Makefile index eebfcc349..43571674e 100644 --- a/Makefile +++ b/Makefile @@ -117,7 +117,7 @@ GOLANGCI_LINT_BIN = $(GOLANGCI_LINT_DIR)/golangci-lint setup-golangci-lint: rm -f $(GOLANGCI_LINT_BIN) || : set -e ; - GOBIN=$(GOLANGCI_LINT_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.0; + GOBIN=$(GOLANGCI_LINT_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.63.4; .PHONY: fmt fmt: ## Format all go files diff --git a/pkg/sbom/package.go b/pkg/sbom/package.go index c6a7dad57..949b6ecad 100644 --- a/pkg/sbom/package.go +++ b/pkg/sbom/package.go @@ -20,7 +20,6 @@ package sbom import ( "context" "fmt" - "regexp" "sort" "strconv" "strings" @@ -174,10 +173,6 @@ func (p Package) getExternalRefs() []spdx.ExternalRef { return result } -// invalidIDCharsRe is a regular expression that matches characters not -// considered valid in SPDX identifiers. -var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`) - // stringToIdentifier converts a string to a valid SPDX identifier by replacing // invalid characters. Colons and slashes are replaced by dashes, and all other // invalid characters are replaced by their Unicode code point prefixed with @@ -189,20 +184,21 @@ var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`) // "foo/bar" -> "foo-bar" // "foo bar" -> "fooC32bar" func stringToIdentifier(in string) string { - in = strings.ReplaceAll(in, ":", "-") - in = strings.ReplaceAll(in, "/", "-") - - invalidCharReplacer := func(s string) string { - sb := strings.Builder{} - for _, r := range s { + var sb strings.Builder + sb.Grow(len(in)) + + for _, r := range in { + switch { + case r == ':' || r == '/': + sb.WriteRune('-') + case r == '-' || r == '.' || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9'): + sb.WriteRune(r) + default: sb.WriteString(encodeInvalidRune(r)) } - return sb.String() } - - return invalidIDCharsRe.ReplaceAllStringFunc(in, invalidCharReplacer) + return sb.String() } - func encodeInvalidRune(r rune) string { return "C" + strconv.Itoa(int(r)) } diff --git a/pkg/sbom/package_test.go b/pkg/sbom/package_test.go new file mode 100644 index 000000000..6c6154ea4 --- /dev/null +++ b/pkg/sbom/package_test.go @@ -0,0 +1,61 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sbom + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_stringToIdentifier(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "basic_colon", + input: "foo:bar", + expected: "foo-bar", // Colons replaced with dashes. + }, + { + name: "basic_slash", + input: "foo/bar", + expected: "foo-bar", // Slashes replaced with dashes. + }, + { + name: "space_replacement", + input: "foo bar", + expected: "fooC32bar", // Spaces encoded as Unicode prefix. + }, + { + name: "mixed_colon_and_slash", + input: "foo:bar/baz", + expected: "foo-bar-baz", // Mixed colons and slashes replaced with dashes. + }, + { + name: "valid_characters_unchanged", + input: "example-valid.123", + expected: "example-valid.123", // Valid characters remain unchanged. + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := stringToIdentifier(test.input) + require.Equal(t, test.expected, result, "unexpected result for input %q", test.input) + }) + } +}