Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: run file hash algorithms in parallel #3636

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/syft/internal/commands/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/anchore/clio"
"github.com/anchore/fangs"
"github.com/anchore/go-collections"
"github.com/anchore/go-sync"
"github.com/anchore/stereoscope"
"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/cmd/syft/internal/options"
Expand Down Expand Up @@ -184,6 +185,8 @@ func runScan(ctx context.Context, id clio.Identification, opts *scanOptions, use
}
}

ctx = sync.SetContextExecutor(ctx, sync.NewExecutor(opts.Parallelism))

src, err := getSource(ctx, &opts.Catalog, userInput, sources...)

if err != nil {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ require (
github.com/OneOfOne/xxhash v1.2.8
github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/go-sync v0.0.0-20250207155614-8cc2f19d469f
github.com/hashicorp/hcl/v2 v2.23.0
github.com/magiconair/properties v1.8.9
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb h1:iDMnx6LIj
github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb/go.mod h1:DmTY2Mfcv38hsHbG78xMiTDdxFtkHpgYNVDPsF2TgHk=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA=
github.com/anchore/go-sync v0.0.0-20250207155614-8cc2f19d469f h1:ZGehM2yWw5Ys8sfXCGdeYUIHCZxtFUzY5DPO/mDL/ss=
github.com/anchore/go-sync v0.0.0-20250207155614-8cc2f19d469f/go.mod h1:IUw+ZYPpxADtssCML2cyxaEV0RzfK6PajiBVQyGSyG4=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=
Expand Down
6 changes: 4 additions & 2 deletions internal/file/digest.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package file

import (
"context"
"crypto"
"fmt"
"hash"
"io"
"strings"

"github.com/anchore/go-sync"
"github.com/anchore/syft/syft/file"
)

Expand All @@ -21,7 +23,7 @@ func supportedHashAlgorithms() []crypto.Hash {
}
}

func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
func NewDigestsFromFile(_ context.Context, closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
hashes = NormalizeHashes(hashes)
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(hashes))
Expand All @@ -31,7 +33,7 @@ func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Dige
writers[idx] = hashers[idx]
}

size, err := io.Copy(io.MultiWriter(writers...), closer)
size, err := io.Copy(sync.ParallelWriter(sync.NewExecutor(len(writers)), writers...), closer)
if err != nil {
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion internal/file/digest_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package file

import (
"context"
"crypto"
"os"
"testing"
Expand Down Expand Up @@ -81,7 +82,7 @@ func TestNewDigestsFromFile(t *testing.T) {
fh, err := os.Open(tt.fixture)
require.NoError(t, err)

got, err := NewDigestsFromFile(fh, tt.hashes)
got, err := NewDigestsFromFile(context.TODO(), fh, tt.hashes)
tt.wantErr(t, err)
if err != nil {
return
Expand Down
29 changes: 16 additions & 13 deletions syft/file/cataloger/filedigest/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/dustin/go-humanize"

"github.com/anchore/go-sync"
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
Expand All @@ -34,7 +35,6 @@ func NewCataloger(hashes []crypto.Hash) *Cataloger {
func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordinates ...file.Coordinates) (map[file.Coordinates][]file.Digest, error) {
results := make(map[file.Coordinates][]file.Digest)
var locations []file.Location
var errs error

if len(coordinates) == 0 {
locations = intCataloger.AllRegularFiles(ctx, resolver)
Expand All @@ -49,41 +49,44 @@ func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordin
}

prog := catalogingProgress(int64(len(locations)))
for _, location := range locations {
result, err := i.catalogLocation(resolver, location)

err := sync.Reduce(sync.ContextExecutor(ctx), sync.ToSeq(locations), func(location file.Location, digests []file.Digest) {
if len(digests) > 0 {
results[location.Coordinates] = digests
}
}, func(location file.Location) ([]file.Digest, error) {
result, err := i.catalogLocation(ctx, resolver, location)

if errors.Is(err, ErrUndigestableFile) {
continue
return nil, nil
}

prog.AtomicStage.Set(location.Path())

if internal.IsErrPathPermission(err) {
log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err)
errs = unknown.Append(errs, location, err)
continue
return nil, unknown.New(location, err)
}

if err != nil {
prog.SetError(err)
errs = unknown.Append(errs, location, err)
continue
return nil, unknown.New(location, err)
}

prog.Increment()

results[location.Coordinates] = result
}
return result, nil
})

log.Debugf("file digests cataloger processed %d files", prog.Current())

prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current())))
prog.SetCompleted()

return results, errs
return results, err
}

func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) ([]file.Digest, error) {
func (i *Cataloger) catalogLocation(ctx context.Context, resolver file.Resolver, location file.Location) ([]file.Digest, error) {
meta, err := resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
Expand All @@ -100,7 +103,7 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati
}
defer internal.CloseAndLogError(contentReader, location.AccessPath)

digests, err := intFile.NewDigestsFromFile(contentReader, i.hashes)
digests, err := intFile.NewDigestsFromFile(ctx, contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}
Expand Down
4 changes: 2 additions & 2 deletions syft/file/cataloger/filedigest/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ func TestDigestsCataloger(t *testing.T) {
name: "md5",
digests: []crypto.Hash{crypto.MD5},
files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"},
expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5),
expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5),
},
{
name: "md5-sha1-sha256",
digests: []crypto.Hash{crypto.MD5, crypto.SHA1, crypto.SHA256},
files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"},
expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256),
expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256),
},
}

Expand Down
6 changes: 3 additions & 3 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
}

// grab and assign digest for the entire archive
digests, err := getDigestsFromArchive(j.archivePath)
digests, err := getDigestsFromArchive(ctx, j.archivePath)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -475,15 +475,15 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
return pkgs, nil
}

func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Digest, error) {
archiveCloser, err := os.Open(archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", archivePath, err)
}
defer internal.CloseAndLogError(archiveCloser, archivePath)

// grab and assign digest for the entire archive
digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
digests, err := intFile.NewDigestsFromFile(ctx, archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", archivePath, err)
}
Expand Down
3 changes: 2 additions & 1 deletion syft/source/filesource/file_source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package filesource

import (
"context"
"crypto"
"fmt"
"os"
Expand Down Expand Up @@ -68,7 +69,7 @@ func New(cfg Config) (source.Source, error) {

defer fh.Close()

digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms)
digests, err = intFile.NewDigestsFromFile(context.TODO(), fh, cfg.DigestAlgorithms)
if err != nil {
return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
}
Expand Down
Loading