diff --git a/pkg/build/package.go b/pkg/build/package.go index beefe43f6..c46cb5f23 100644 --- a/pkg/build/package.go +++ b/pkg/build/package.go @@ -18,7 +18,6 @@ import ( "bytes" "context" "crypto/sha256" - "debug/elf" "encoding/hex" "encoding/json" "fmt" @@ -26,17 +25,17 @@ import ( "io/fs" "os" "path/filepath" - "regexp" "runtime" - "sort" "strings" "text/template" - "github.com/chainguard-dev/go-pkgconfig" + "golang.org/x/exp/slices" + "github.com/klauspost/compress/gzip" "github.com/klauspost/pgzip" "chainguard.dev/melange/pkg/config" + "chainguard.dev/melange/pkg/sca" "chainguard.dev/apko/pkg/log" "github.com/chainguard-dev/go-apk/pkg/tarball" @@ -54,8 +53,6 @@ import ( // concurrent builds on giant machines, and uses only 1 core on tiny machines. var pgzipThreads = min(runtime.GOMAXPROCS(0), 8) -var libDirs = []string{"lib", "usr/lib", "lib64", "usr/lib64"} - func min(l, r int) int { if l < r { return l @@ -310,409 +307,6 @@ func (pc *PackageBuild) SignatureName() string { return fmt.Sprintf(".SIGN.RSA.%s.pub", filepath.Base(pc.Build.SigningKey)) } -type DependencyGenerator func(*PackageBuild, *config.Dependencies) error - -func dedup(in []string) []string { - sort.Strings(in) - out := make([]string, 0, len(in)) - - var prev string - for _, cur := range in { - if cur == prev { - continue - } - out = append(out, cur) - prev = cur - } - - return out -} - -func allowedPrefix(path string, prefixes []string) bool { - for _, pfx := range prefixes { - if strings.HasPrefix(path, pfx) { - return true - } - } - - return false -} - -var cmdPrefixes = []string{"bin", "sbin", "usr/bin", "usr/sbin"} - -func generateCmdProviders(pc *PackageBuild, generated *config.Dependencies) error { - if pc.Options.NoCommands { - return nil - } - - pc.Logger.Printf("scanning for commands...") - - fsys := readlinkFS(pc.WorkspaceSubdir()) - if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - fi, err := d.Info() - if err != nil { - return err - } - - mode := fi.Mode() - if !mode.IsRegular() { - return nil - } - - if mode.Perm()&0555 == 0555 { - if allowedPrefix(path, cmdPrefixes) { - basename := filepath.Base(path) - generated.Provides = append(generated.Provides, fmt.Sprintf("cmd:%s=%s-r%d", basename, pc.Origin.Package.Version, pc.Origin.Package.Epoch)) - } - } - - return nil - }); err != nil { - return err - } - - return nil -} - -// findInterpreter looks for the PT_INTERP header and extracts the interpreter so that it -// may be used as a dependency. -func findInterpreter(bin *elf.File) (string, error) { - for _, prog := range bin.Progs { - if prog.Type != elf.PT_INTERP { - continue - } - - reader := prog.Open() - interpBuf, err := io.ReadAll(reader) - if err != nil { - return "", err - } - - interpBuf = bytes.Trim(interpBuf, "\x00") - return string(interpBuf), nil - } - - return "", nil -} - -// dereferenceCrossPackageSymlink attempts to dereference a symlink across multiple package -// directories. -func (pc *PackageBuild) dereferenceCrossPackageSymlink(path string) (string, error) { - targetPackageNames := []string{pc.PackageName, pc.Build.Configuration.Package.Name} - realPath, err := os.Readlink(filepath.Join(pc.WorkspaceSubdir(), path)) - if err != nil { - return "", err - } - - realPath = filepath.Base(realPath) - - for _, subPkg := range pc.Build.Configuration.Subpackages { - targetPackageNames = append(targetPackageNames, subPkg.Name) - } - - for _, pkgName := range targetPackageNames { - basePath := filepath.Join(pc.Build.WorkspaceDir, "melange-out", pkgName) - - for _, libDir := range libDirs { - testPath := filepath.Join(basePath, libDir, realPath) - - if _, err := os.Stat(testPath); err == nil { - return testPath, nil - } - } - } - - return "", nil -} - -func generateSharedObjectNameDeps(pc *PackageBuild, generated *config.Dependencies) error { - pc.Logger.Printf("scanning for shared object dependencies...") - - depends := map[string][]string{} - - fsys := readlinkFS(pc.WorkspaceSubdir()) - if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - fi, err := d.Info() - if err != nil { - return err - } - - mode := fi.Mode() - - // If it is a symlink, lets check and see if it is a library SONAME. - if mode.Type()&fs.ModeSymlink == fs.ModeSymlink { - if !strings.Contains(path, ".so") { - return nil - } - - realPath, err := pc.dereferenceCrossPackageSymlink(path) - if err != nil { - return nil - } - - if realPath != "" { - ef, err := elf.Open(realPath) - if err != nil { - return nil - } - defer ef.Close() - - sonames, err := ef.DynString(elf.DT_SONAME) - // most likely SONAME is not set on this object - if err != nil { - pc.Logger.Warnf("library %s lacks SONAME", path) - return nil - } - - for _, soname := range sonames { - generated.Runtime = append(generated.Runtime, fmt.Sprintf("so:%s", soname)) - } - } - - return nil - } - - // If it is not a regular file, we are finished processing it. - if !mode.IsRegular() { - return nil - } - - if mode.Perm()&0555 == 0555 { - basename := filepath.Base(path) - - // most likely a shell script instead of an ELF, so treat any - // error as non-fatal. - // TODO(kaniini): use DirFS for this - ef, err := elf.Open(filepath.Join(pc.WorkspaceSubdir(), path)) - if err != nil { - return nil - } - defer ef.Close() - - interp, err := findInterpreter(ef) - if err != nil { - return err - } - if interp != "" && !pc.Options.NoDepends { - pc.Logger.Printf("interpreter for %s => %s", basename, interp) - - // musl interpreter is a symlink back to itself, so we want to use the non-symlink name as - // the dependency. - interpName := fmt.Sprintf("so:%s", filepath.Base(interp)) - interpName = strings.ReplaceAll(interpName, "so:ld-musl", "so:libc.musl") - generated.Runtime = append(generated.Runtime, interpName) - } - - libs, err := ef.ImportedLibraries() - if err != nil { - pc.Logger.Warnf("WTF: ImportedLibraries() returned error: %v", err) - return nil - } - - if !pc.Options.NoDepends { - for _, lib := range libs { - if strings.Contains(lib, ".so.") { - generated.Runtime = append(generated.Runtime, fmt.Sprintf("so:%s", lib)) - depends[lib] = append(depends[lib], path) - } - } - } - - // An executable program should never have a SONAME, but apparently binaries built - // with some versions of jlink do. Thus, if an interpreter is set (meaning it is an - // executable program), we do not scan the object for SONAMEs. - // - // Ugh: libc.so.6 has an PT_INTERP set on itself to make the `/lib/libc.so.6 --about` - // functionality work. So we always generate provides entries for libc. - if !pc.Options.NoProvides && (interp == "" || strings.HasPrefix(basename, "libc")) { - sonames, err := ef.DynString(elf.DT_SONAME) - // most likely SONAME is not set on this object - if err != nil { - pc.Logger.Warnf("library %s lacks SONAME", path) - return nil - } - - for _, soname := range sonames { - parts := strings.Split(soname, ".so.") - - var libver string - if len(parts) > 1 { - libver = parts[1] - } else { - libver = "0" - } - - if allowedPrefix(path, libDirs) { - generated.Provides = append(generated.Provides, fmt.Sprintf("so:%s=%s", soname, libver)) - } else { - generated.Vendored = append(generated.Vendored, fmt.Sprintf("so:%s=%s", soname, libver)) - } - } - } - } - - return nil - }); err != nil { - return err - } - - if pc.Build.DependencyLog != "" { - pc.Logger.Printf("writing dependency log") - - logFile, err := os.Create(fmt.Sprintf("%s.%s", pc.Build.DependencyLog, pc.Arch)) - if err != nil { - pc.Logger.Warnf("Unable to open dependency log: %v", err) - } - defer logFile.Close() - - je := json.NewEncoder(logFile) - if err := je.Encode(depends); err != nil { - return err - } - } - - return nil -} - -var pkgConfigVersionRegexp = regexp.MustCompile("-(alpha|beta|rc|pre)") - -// TODO(kaniini): Turn this feature on once enough of Wolfi is built with provider data. -var generateRuntimePkgConfigDeps = false - -// generatePkgConfigDeps generates a list of provided pkg-config package names and versions, -// as well as dependency relationships. -func generatePkgConfigDeps(pc *PackageBuild, generated *config.Dependencies) error { - pc.Logger.Printf("scanning for pkg-config data...") - - fsys := readlinkFS(pc.WorkspaceSubdir()) - if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - if !strings.HasSuffix(path, ".pc") { - return nil - } - - fi, err := d.Info() - if err != nil { - return err - } - - mode := fi.Mode() - - // Sigh. ncurses uses symlinks to alias .pc files to other .pc files. - // Skip the symlinks for now. - if mode.Type()&fs.ModeSymlink == fs.ModeSymlink { - return nil - } - - pkg, err := pkgconfig.Load(filepath.Join(pc.WorkspaceSubdir(), path)) - if err != nil { - pc.Logger.Warnf("Unable to load .pc file (%s) using pkgconfig: %v", path, err) - return nil - } - - pcName := filepath.Base(path) - pcName, _ = strings.CutSuffix(pcName, ".pc") - - apkVersion := pkgConfigVersionRegexp.ReplaceAllString(pkg.Version, "_$1") - if !pc.Options.NoProvides { - generated.Provides = append(generated.Provides, fmt.Sprintf("pc:%s=%s", pcName, apkVersion)) - } - - if generateRuntimePkgConfigDeps { - // TODO(kaniini): Capture version relationships here too. In practice, this does not matter - // so much though for us. - for _, dep := range pkg.Requires { - generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) - } - - for _, dep := range pkg.RequiresPrivate { - generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) - } - - for _, dep := range pkg.RequiresInternal { - generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) - } - } - - return nil - }); err != nil { - return err - } - - return nil -} - -// generatePythonDeps generates a python3~$VERSION dependency for packages which ship -// Python modules. -func generatePythonDeps(pc *PackageBuild, generated *config.Dependencies) error { - var pythonModuleVer string - pc.Logger.Printf("scanning for python modules...") - - fsys := readlinkFS(pc.WorkspaceSubdir()) - if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - // Python modules are installed in paths such as /usr/lib/pythonX.Y/site-packages/..., - // so if we find a directory named site-packages, and its parent is a pythonX.Y directory, - // then we have a Python module directory. - basename := filepath.Base(path) - if basename != "site-packages" { - return nil - } - - parent := filepath.Dir(path) - basename = filepath.Base(parent) - if !strings.HasPrefix(basename, "python") { - return nil - } - - // This probably shouldn't ever happen, but lets check to make sure. - if !d.IsDir() { - return nil - } - - // This takes the X.Y part of the pythonX.Y directory name as the version to pin against. - // If the X.Y part is not present, then pythonModuleVer will remain an empty string and - // no dependency will be generated. - pythonModuleVer = basename[6:] - return nil - }); err != nil { - return err - } - - // Nothing to do... - if pythonModuleVer == "" { - return nil - } - - // Do not add a Python dependency if one already exists. - for _, dep := range pc.Dependencies.Runtime { - if strings.HasPrefix(dep, "python") { - pc.Logger.Warnf("%s: Python dependency %q already specified, consider removing it in favor of SCA-generated dependency", pc.PackageName, dep) - return nil - } - } - - // We use the python3 name here instead of the python-3 name so that we can be - // compatible with Alpine and Adelie. Only Wolfi provides the python-3 name. - generated.Runtime = append(generated.Runtime, fmt.Sprintf("python3~%s", pythonModuleVer)) - - return nil -} - // removeSelfProvidedDeps removes dependencies which are provided by the package itself. func removeSelfProvidedDeps(runtimeDeps, providedDeps []string) []string { providedDepsMap := map[string]bool{} @@ -737,15 +331,26 @@ func removeSelfProvidedDeps(runtimeDeps, providedDeps []string) []string { func (pc *PackageBuild) GenerateDependencies() error { generated := config.Dependencies{} - generators := []DependencyGenerator{ - generateSharedObjectNameDeps, - generateCmdProviders, - generatePkgConfigDeps, - generatePythonDeps, + + hdl := SCABuildInterface{ + PackageBuild: pc, } - for _, gen := range generators { - if err := gen(pc, &generated); err != nil { + if err := sca.Analyze(&hdl, &generated); err != nil { + return fmt.Errorf("analyzing package: %w", err) + } + + if pc.Build.DependencyLog != "" { + pc.Logger.Printf("writing dependency log") + + logFile, err := os.Create(fmt.Sprintf("%s.%s", pc.Build.DependencyLog, pc.Arch)) + if err != nil { + pc.Logger.Warnf("Unable to open dependency log: %v", err) + } + defer logFile.Close() + + je := json.NewEncoder(logFile) + if err := je.Encode(&generated); err != nil { return err } } @@ -757,10 +362,10 @@ func (pc *PackageBuild) GenerateDependencies() error { unvendored := removeSelfProvidedDeps(generated.Runtime, generated.Vendored) newruntime := append(pc.Dependencies.Runtime, unvendored...) - pc.Dependencies.Runtime = dedup(newruntime) + pc.Dependencies.Runtime = slices.Compact(newruntime) newprovides := append(pc.Dependencies.Provides, generated.Provides...) - pc.Dependencies.Provides = dedup(newprovides) + pc.Dependencies.Provides = slices.Compact(newprovides) pc.Dependencies.Runtime = removeSelfProvidedDeps(pc.Dependencies.Runtime, pc.Dependencies.Provides) diff --git a/pkg/build/pipeline.go b/pkg/build/pipeline.go index ad538afde..708a2c6f2 100644 --- a/pkg/build/pipeline.go +++ b/pkg/build/pipeline.go @@ -23,6 +23,8 @@ import ( "strconv" "strings" + "golang.org/x/exp/slices" + "go.opentelemetry.io/otel" "gopkg.in/yaml.v3" @@ -476,7 +478,7 @@ func (pctx *PipelineContext) ApplyNeeds(pb *PipelineBuild) error { } } - ic.Contents.Packages = dedup(ic.Contents.Packages) + ic.Contents.Packages = slices.Compact(ic.Contents.Packages) for _, sp := range pctx.Pipeline.Pipeline { spctx, err := NewPipelineContext(&sp, pb.Build.Logger) diff --git a/pkg/build/sca_interface.go b/pkg/build/sca_interface.go new file mode 100644 index 000000000..af819c3fb --- /dev/null +++ b/pkg/build/sca_interface.go @@ -0,0 +1,86 @@ +// Copyright 2023 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package build + +import ( + "fmt" + "path/filepath" + + "chainguard.dev/apko/pkg/log" + "chainguard.dev/melange/pkg/config" + "chainguard.dev/melange/pkg/sca" +) + +// SCABuildInterface provides an implementation of SCAHandle which maps to +// a package build object. +type SCABuildInterface struct { + // PackageBuild represents the underlying package build object. + PackageBuild *PackageBuild +} + +// PackageName returns the currently built package name. +func (sca *SCABuildInterface) PackageName() string { + return sca.PackageBuild.PackageName +} + +// RelativeNames returns all the package names relating to the package being +// built. +func (sca *SCABuildInterface) RelativeNames() []string { + targets := []string{sca.PackageBuild.Origin.Package.Name} + + for _, target := range sca.PackageBuild.Build.Configuration.Subpackages { + targets = append(targets, target.Name) + } + + return targets +} + +// Version returns the version of the package being built including epoch. +func (sca *SCABuildInterface) Version() string { + return fmt.Sprintf("%s-r%d", sca.PackageBuild.Origin.Package.Version, sca.PackageBuild.Origin.Package.Epoch) +} + +// FilesystemForRelative implements an abstract filesystem for any of the packages being +// built. +func (scabi *SCABuildInterface) FilesystemForRelative(pkgName string) (sca.SCAFS, error) { + pkgDir := filepath.Join(scabi.PackageBuild.Build.WorkspaceDir, "melange-out", pkgName) + rlFS := readlinkFS(pkgDir) + scaFS, ok := rlFS.(sca.SCAFS) + if !ok { + return nil, fmt.Errorf("SCAFS not implemented") + } + + return scaFS, nil +} + +// Filesystem implements an abstract filesystem providing access to a package filesystem. +func (sca *SCABuildInterface) Filesystem() (sca.SCAFS, error) { + return sca.FilesystemForRelative(sca.PackageName()) +} + +// Logger returns a logger for use by the SCA engine. +func (sca *SCABuildInterface) Logger() log.Logger { + return sca.PackageBuild.Logger +} + +// Options returns the configured SCA engine options for the package being built. +func (sca *SCABuildInterface) Options() config.PackageOption { + return sca.PackageBuild.Options +} + +// BaseDependencies returns the base dependencies for the package being built. +func (sca *SCABuildInterface) BaseDependencies() config.Dependencies { + return sca.PackageBuild.Dependencies +} diff --git a/pkg/sca/sca.go b/pkg/sca/sca.go new file mode 100644 index 000000000..a191cb24f --- /dev/null +++ b/pkg/sca/sca.go @@ -0,0 +1,525 @@ +// Copyright 2022 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sca + +import ( + "bytes" + "debug/elf" + "fmt" + "io" + "io/fs" + "path/filepath" + "regexp" + "strings" + + "chainguard.dev/apko/pkg/log" + apkofs "github.com/chainguard-dev/go-apk/pkg/fs" + "github.com/chainguard-dev/go-pkgconfig" + + "chainguard.dev/melange/pkg/config" +) + +var libDirs = []string{"lib", "usr/lib", "lib64", "usr/lib64"} + +// SCAFS represents the minimum required filesystem accessors which are needed by +// the SCA engine. +type SCAFS interface { + apkofs.ReadLinkFS + apkofs.XattrFS + + Stat(name string) (fs.FileInfo, error) +} + +// SCAHandle represents all of the state necessary to analyze a package. +type SCAHandle interface { + // PackageName returns the name of the current package being analyzed. + PackageName() string + + // RelativeNames returns the name of other packages related to the current + // package being analyzed. + RelativeNames() []string + + // Version returns the version and epoch of the package being analyzed. + Version() string + + // FilesystemForRelative returns a usable filesystem representing the package + // contents for a given package name. + FilesystemForRelative(pkgName string) (SCAFS, error) + + // Filesystem returns a usable filesystem representing the current package. + // It is equivalent to FilesystemForRelative(PackageName()). + Filesystem() (SCAFS, error) + + // Logger returns a log.Logger. + Logger() log.Logger + + // Options returns a config.PackageOption struct. + Options() config.PackageOption + + // BaseDependencies returns the underlying set of declared dependencies before + // the SCA engine runs. + BaseDependencies() config.Dependencies +} + +// DependencyGenerator takes an SCAHandle and config.Dependencies pointer and returns +// findings based on analysis. +type DependencyGenerator func(SCAHandle, *config.Dependencies) error + +func allowedPrefix(path string, prefixes []string) bool { + for _, pfx := range prefixes { + if strings.HasPrefix(path, pfx) { + return true + } + } + + return false +} + +var cmdPrefixes = []string{"bin", "sbin", "usr/bin", "usr/sbin"} + +func generateCmdProviders(hdl SCAHandle, generated *config.Dependencies) error { + if hdl.Options().NoCommands { + return nil + } + + hdl.Logger().Printf("scanning for commands...") + fsys, err := hdl.Filesystem() + if err != nil { + return err + } + + if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + fi, err := d.Info() + if err != nil { + return err + } + + mode := fi.Mode() + if !mode.IsRegular() { + return nil + } + + if mode.Perm()&0555 == 0555 { + if allowedPrefix(path, cmdPrefixes) { + basename := filepath.Base(path) + generated.Provides = append(generated.Provides, fmt.Sprintf("cmd:%s=%s", basename, hdl.Version())) + } + } + + return nil + }); err != nil { + return err + } + + return nil +} + +// findInterpreter looks for the PT_INTERP header and extracts the interpreter so that it +// may be used as a dependency. +func findInterpreter(bin *elf.File) (string, error) { + for _, prog := range bin.Progs { + if prog.Type != elf.PT_INTERP { + continue + } + + reader := prog.Open() + interpBuf, err := io.ReadAll(reader) + if err != nil { + return "", err + } + + interpBuf = bytes.Trim(interpBuf, "\x00") + return string(interpBuf), nil + } + + return "", nil +} + +// dereferenceCrossPackageSymlink attempts to dereference a symlink across multiple package +// directories. +func dereferenceCrossPackageSymlink(hdl SCAHandle, path string) (string, string, error) { + targetPackageNames := hdl.RelativeNames() + + pkgFS, err := hdl.Filesystem() + if err != nil { + return "", "", err + } + + realPath, err := pkgFS.Readlink(path) + if err != nil { + return "", "", err + } + + realPath = filepath.Base(realPath) + + for _, pkgName := range targetPackageNames { + baseFS, err := hdl.FilesystemForRelative(pkgName) + if err != nil { + return "", "", err + } + + for _, libDir := range libDirs { + testPath := filepath.Join(libDir, realPath) + + if _, err := baseFS.Stat(testPath); err == nil { + return pkgName, testPath, nil + } + } + } + + return "", "", nil +} + +func generateSharedObjectNameDeps(hdl SCAHandle, generated *config.Dependencies) error { + hdl.Logger().Printf("scanning for shared object dependencies...") + + depends := map[string][]string{} + fsys, err := hdl.Filesystem() + if err != nil { + return err + } + + if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + fi, err := d.Info() + if err != nil { + return err + } + + mode := fi.Mode() + + // If it is a symlink, lets check and see if it is a library SONAME. + if mode.Type()&fs.ModeSymlink == fs.ModeSymlink { + if !strings.Contains(path, ".so") { + return nil + } + + targetPkg, realPath, err := dereferenceCrossPackageSymlink(hdl, path) + if err != nil { + return nil + } + + targetFS, err := hdl.FilesystemForRelative(targetPkg) + if err != nil { + return nil + } + + if realPath != "" { + rawFile, err := targetFS.Open(realPath) + if err != nil { + return nil + } + defer rawFile.Close() + + seekableFile, ok := rawFile.(io.ReaderAt) + if !ok { + return nil + } + + ef, err := elf.NewFile(seekableFile) + if err != nil { + return nil + } + defer ef.Close() + + sonames, err := ef.DynString(elf.DT_SONAME) + // most likely SONAME is not set on this object + if err != nil { + hdl.Logger().Warnf("library %s lacks SONAME", path) + return nil + } + + for _, soname := range sonames { + generated.Runtime = append(generated.Runtime, fmt.Sprintf("so:%s", soname)) + } + } + + return nil + } + + // If it is not a regular file, we are finished processing it. + if !mode.IsRegular() { + return nil + } + + if mode.Perm()&0555 == 0555 { + basename := filepath.Base(path) + + // most likely a shell script instead of an ELF, so treat any + // error as non-fatal. + rawFile, err := fsys.Open(path) + if err != nil { + return nil + } + defer rawFile.Close() + + seekableFile, ok := rawFile.(io.ReaderAt) + if !ok { + return nil + } + + ef, err := elf.NewFile(seekableFile) + if err != nil { + return nil + } + defer ef.Close() + + interp, err := findInterpreter(ef) + if err != nil { + return err + } + if interp != "" && !hdl.Options().NoDepends { + hdl.Logger().Printf("interpreter for %s => %s", basename, interp) + + // musl interpreter is a symlink back to itself, so we want to use the non-symlink name as + // the dependency. + interpName := fmt.Sprintf("so:%s", filepath.Base(interp)) + interpName = strings.ReplaceAll(interpName, "so:ld-musl", "so:libc.musl") + generated.Runtime = append(generated.Runtime, interpName) + } + + libs, err := ef.ImportedLibraries() + if err != nil { + hdl.Logger().Warnf("WTF: ImportedLibraries() returned error: %v", err) + return nil + } + + if !hdl.Options().NoDepends { + for _, lib := range libs { + if strings.Contains(lib, ".so.") { + generated.Runtime = append(generated.Runtime, fmt.Sprintf("so:%s", lib)) + depends[lib] = append(depends[lib], path) + } + } + } + + // An executable program should never have a SONAME, but apparently binaries built + // with some versions of jlink do. Thus, if an interpreter is set (meaning it is an + // executable program), we do not scan the object for SONAMEs. + // + // Ugh: libc.so.6 has an PT_INTERP set on itself to make the `/lib/libc.so.6 --about` + // functionality work. So we always generate provides entries for libc. + if !hdl.Options().NoProvides && (interp == "" || strings.HasPrefix(basename, "libc")) { + sonames, err := ef.DynString(elf.DT_SONAME) + // most likely SONAME is not set on this object + if err != nil { + hdl.Logger().Warnf("library %s lacks SONAME", path) + return nil + } + + for _, soname := range sonames { + parts := strings.Split(soname, ".so.") + + var libver string + if len(parts) > 1 { + libver = parts[1] + } else { + libver = "0" + } + + if allowedPrefix(path, libDirs) { + generated.Provides = append(generated.Provides, fmt.Sprintf("so:%s=%s", soname, libver)) + } else { + generated.Vendored = append(generated.Vendored, fmt.Sprintf("so:%s=%s", soname, libver)) + } + } + } + } + + return nil + }); err != nil { + return err + } + + return nil +} + +var pkgConfigVersionRegexp = regexp.MustCompile("-(alpha|beta|rc|pre)") + +// TODO(kaniini): Turn this feature on once enough of Wolfi is built with provider data. +var generateRuntimePkgConfigDeps = false + +// generatePkgConfigDeps generates a list of provided pkg-config package names and versions, +// as well as dependency relationships. +func generatePkgConfigDeps(hdl SCAHandle, generated *config.Dependencies) error { + hdl.Logger().Printf("scanning for pkg-config data...") + + fsys, err := hdl.Filesystem() + if err != nil { + return err + } + + if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if !strings.HasSuffix(path, ".pc") { + return nil + } + + fi, err := d.Info() + if err != nil { + return err + } + + mode := fi.Mode() + + // Sigh. ncurses uses symlinks to alias .pc files to other .pc files. + // Skip the symlinks for now. + if mode.Type()&fs.ModeSymlink == fs.ModeSymlink { + return nil + } + + // TODO(kaniini): Sigh. apkofs should have ReadFile by default. + dataFile, err := fsys.Open(path) + if err != nil { + return nil + } + defer dataFile.Close() + + data, err := io.ReadAll(dataFile) + if err != nil { + return nil + } + + // TODO(kaniini): Sigh. go-pkgconfig should support reading from any io.Reader. + pkg, err := pkgconfig.Parse(string(data)) + if err != nil { + hdl.Logger().Warnf("Unable to load .pc file (%s) using pkgconfig: %v", path, err) + return nil + } + + pcName := filepath.Base(path) + pcName, _ = strings.CutSuffix(pcName, ".pc") + + apkVersion := pkgConfigVersionRegexp.ReplaceAllString(pkg.Version, "_$1") + if !hdl.Options().NoProvides { + generated.Provides = append(generated.Provides, fmt.Sprintf("pc:%s=%s", pcName, apkVersion)) + } + + if generateRuntimePkgConfigDeps { + // TODO(kaniini): Capture version relationships here too. In practice, this does not matter + // so much though for us. + for _, dep := range pkg.Requires { + generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) + } + + for _, dep := range pkg.RequiresPrivate { + generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) + } + + for _, dep := range pkg.RequiresInternal { + generated.Runtime = append(generated.Runtime, fmt.Sprintf("pc:%s", dep.Identifier)) + } + } + + return nil + }); err != nil { + return err + } + + return nil +} + +// generatePythonDeps generates a python3~$VERSION dependency for packages which ship +// Python modules. +func generatePythonDeps(hdl SCAHandle, generated *config.Dependencies) error { + var pythonModuleVer string + hdl.Logger().Printf("scanning for python modules...") + + fsys, err := hdl.Filesystem() + if err != nil { + return err + } + + if err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + // Python modules are installed in paths such as /usr/lib/pythonX.Y/site-packages/..., + // so if we find a directory named site-packages, and its parent is a pythonX.Y directory, + // then we have a Python module directory. + basename := filepath.Base(path) + if basename != "site-packages" { + return nil + } + + parent := filepath.Dir(path) + basename = filepath.Base(parent) + if !strings.HasPrefix(basename, "python") { + return nil + } + + // This probably shouldn't ever happen, but lets check to make sure. + if !d.IsDir() { + return nil + } + + // This takes the X.Y part of the pythonX.Y directory name as the version to pin against. + // If the X.Y part is not present, then pythonModuleVer will remain an empty string and + // no dependency will be generated. + pythonModuleVer = basename[6:] + return nil + }); err != nil { + return err + } + + // Nothing to do... + if pythonModuleVer == "" { + return nil + } + + // Do not add a Python dependency if one already exists. + for _, dep := range hdl.BaseDependencies().Runtime { + if strings.HasPrefix(dep, "python") { + hdl.Logger().Warnf("%s: Python dependency %q already specified, consider removing it in favor of SCA-generated dependency", hdl.PackageName(), dep) + return nil + } + } + + // We use the python3 name here instead of the python-3 name so that we can be + // compatible with Alpine and Adelie. Only Wolfi provides the python-3 name. + generated.Runtime = append(generated.Runtime, fmt.Sprintf("python3~%s", pythonModuleVer)) + + return nil +} + +// Analyze runs the SCA analyzers on a given SCA handle, modifying the generated dependencies +// set as needed. +func Analyze(hdl SCAHandle, generated *config.Dependencies) error { + generators := []DependencyGenerator{ + generateSharedObjectNameDeps, + generateCmdProviders, + generatePkgConfigDeps, + generatePythonDeps, + } + + for _, gen := range generators { + if err := gen(hdl, generated); err != nil { + return err + } + } + + return nil +}