Skip to content

Commit

Permalink
regex is slow
Browse files Browse the repository at this point in the history
  • Loading branch information
ajayk committed Jan 5, 2025
1 parent 4c8ea86 commit b52ef50
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ GOLANGCI_LINT_BIN = $(GOLANGCI_LINT_DIR)/golangci-lint
setup-golangci-lint:
rm -f $(GOLANGCI_LINT_BIN) || :
set -e ;
GOBIN=$(GOLANGCI_LINT_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.0;
GOBIN=$(GOLANGCI_LINT_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.63.4;

.PHONY: fmt
fmt: ## Format all go files
Expand Down
26 changes: 11 additions & 15 deletions pkg/sbom/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package sbom
import (
"context"
"fmt"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -174,10 +173,6 @@ func (p Package) getExternalRefs() []spdx.ExternalRef {
return result
}

// invalidIDCharsRe is a regular expression that matches characters not
// considered valid in SPDX identifiers.
var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`)

// stringToIdentifier converts a string to a valid SPDX identifier by replacing
// invalid characters. Colons and slashes are replaced by dashes, and all other
// invalid characters are replaced by their Unicode code point prefixed with
Expand All @@ -189,20 +184,21 @@ var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`)
// "foo/bar" -> "foo-bar"
// "foo bar" -> "fooC32bar"
func stringToIdentifier(in string) string {
in = strings.ReplaceAll(in, ":", "-")
in = strings.ReplaceAll(in, "/", "-")

invalidCharReplacer := func(s string) string {
sb := strings.Builder{}
for _, r := range s {
var sb strings.Builder
sb.Grow(len(in))

for _, r := range in {
switch {
case r == ':' || r == '/':
sb.WriteRune('-')
case r == '-' || r == '.' || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9'):
sb.WriteRune(r)
default:
sb.WriteString(encodeInvalidRune(r))
}
return sb.String()
}

return invalidIDCharsRe.ReplaceAllStringFunc(in, invalidCharReplacer)
return sb.String()
}

func encodeInvalidRune(r rune) string {
return "C" + strconv.Itoa(int(r))
}
61 changes: 61 additions & 0 deletions pkg/sbom/package_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2024 Chainguard, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sbom

import (
"testing"

"github.com/stretchr/testify/require"
)

func Test_stringToIdentifier(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "basic_colon",
input: "foo:bar",
expected: "foo-bar", // Colons replaced with dashes.
},
{
name: "basic_slash",
input: "foo/bar",
expected: "foo-bar", // Slashes replaced with dashes.
},
{
name: "space_replacement",
input: "foo bar",
expected: "fooC32bar", // Spaces encoded as Unicode prefix.
},
{
name: "mixed_colon_and_slash",
input: "foo:bar/baz",
expected: "foo-bar-baz", // Mixed colons and slashes replaced with dashes.
},
{
name: "valid_characters_unchanged",
input: "example-valid.123",
expected: "example-valid.123", // Valid characters remain unchanged.
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := stringToIdentifier(test.input)
require.Equal(t, test.expected, result, "unexpected result for input %q", test.input)
})
}
}

0 comments on commit b52ef50

Please sign in to comment.