Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Squashed all layers #3138

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions internal/task/scope_tasks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package task

import (
"context"
"fmt"

"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)

func NewScopeTask() Task {
fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error {
accessor := builder.(sbomsync.Accessor)

// remove all packages that doesn't exist in the final state of the image
builder.DeletePackages(packagesToRemove(accessor)...)
return nil
}

return NewTask("squashed-with-all-layers-cleaner", fn)
}

func packagesToRemove(accessor sbomsync.Accessor) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
filterDuplicates := make(map[string]bool)
for p := range s.Artifacts.Packages.Enumerate() {
noSquashed := true
noPrimary := true
for _, l := range p.Locations.ToSlice() {
scope := l.LocationMetadata.Annotations[file.ScopeAnnotationKey]
evidence := l.LocationMetadata.Annotations[pkg.EvidenceAnnotationKey]
if scope == file.SquashedScopeAnnotation && evidence == pkg.PrimaryEvidenceAnnotation || scope == file.SquashedScopeAnnotation && p.Type == pkg.BinaryPkg {
noSquashed = false
break
}
if scope == "" && evidence == pkg.PrimaryEvidenceAnnotation {
if exists := filterDuplicates[getKey(p, l)]; exists {
break
}
filterDuplicates[getKey(p, l)] = true
noPrimary = false
break
}
}

if noSquashed && noPrimary {
pkgsToDelete = append(pkgsToDelete, p.ID())
}
}
})
return pkgsToDelete
}

func getKey(pkg pkg.Package, loc file.Location) string {
return fmt.Sprintf("%s-%s-%s-%s", pkg.Name, pkg.Version, loc.RealPath, loc.AccessPath)
}
17 changes: 17 additions & 0 deletions syft/create_sbom_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task

// generate package and file tasks based on the configuration
environmentTasks := c.environmentTasks()
scopeTasks := c.scopeTasks()
relationshipsTasks := c.relationshipTasks(src)
unknownTasks := c.unknownsTasks()
fileTasks := c.fileTasks()
Expand All @@ -195,6 +196,11 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task
taskGroups = append(taskGroups, append(pkgTasks, fileTasks...))
}

// all scope work must be done after all nodes (files and packages) have been cataloged and before the relationship
if len(scopeTasks) > 0 {
taskGroups = append(taskGroups, scopeTasks)
}

// all relationship work must be done after all nodes (files and packages) have been cataloged
if len(relationshipsTasks) > 0 {
taskGroups = append(taskGroups, relationshipsTasks)
Expand Down Expand Up @@ -328,6 +334,17 @@ func (c *CreateSBOMConfig) userPackageTasks(cfg task.CatalogingFactoryConfig) ([
return persistentPackageTasks, selectablePackageTasks, nil
}

// scopeTasks returns the set of tasks that should be run to generate additional scope information
func (c *CreateSBOMConfig) scopeTasks() []task.Task {
var tsks []task.Task
if c.Search.Scope == source.SquashWithAllLayersScope {
if t := task.NewScopeTask(); t != nil {
tsks = append(tsks, t)
}
}
return tsks
}

// relationshipTasks returns the set of tasks that should be run to generate additional relationships as well as
// prune existing relationships.
func (c *CreateSBOMConfig) relationshipTasks(src source.Description) []task.Task {
Expand Down
7 changes: 7 additions & 0 deletions syft/file/scope.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package file

const (
ScopeAnnotationKey = "scope"
SquashedScopeAnnotation = "squashed"
AllLayersScopeAnnotation = "all-layers"
)
149 changes: 149 additions & 0 deletions syft/internal/fileresolver/container_image_squash_all_layers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package fileresolver

import (
"context"
"fmt"
"io"

"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/syft/file"
)

var _ file.Resolver = (*ContainerImageSquashAllLayers)(nil)

// ContainerImageSquashAllLayers implements path and content access for the Squashed all layers source option for container image data sources.
type ContainerImageSquashAllLayers struct {
squashed *ContainerImageSquash
allLayers *ContainerImageAllLayers
}

// NewFromContainerImageSquashAllLayers returns a new resolver from the perspective of all image layers for the given image.
func NewFromContainerImageSquashAllLayers(img *image.Image) (*ContainerImageSquashAllLayers, error) {
squashed, err := NewFromContainerImageSquash(img)
if err != nil {
return nil, err
}

allLayers, err := NewFromContainerImageAllLayers(img)
if err != nil {
return nil, err
}

return &ContainerImageSquashAllLayers{
squashed: squashed,
allLayers: allLayers,
}, nil
}

// HasPath indicates if the given path exists in the underlying source.
func (i *ContainerImageSquashAllLayers) HasPath(path string) bool {
return i.squashed.HasPath(path)
}

// FilesByPath returns all file.References that match the given paths from any layer in the image.
func (i *ContainerImageSquashAllLayers) FilesByPath(paths ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByPath(paths...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByPath(paths...)
if err != nil {
return nil, err
}

mergedLocations := i.mergeLocations(squashedLocations, allLayersLocations)

return mergedLocations, nil
}

// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (i *ContainerImageSquashAllLayers) FilesByGlob(patterns ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByGlob(patterns...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByGlob(patterns...)
if err != nil {
return nil, err
}

mergedLocations := i.mergeLocations(squashedLocations, allLayersLocations)

return mergedLocations, nil
}

// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
func (i *ContainerImageSquashAllLayers) RelativeFileByPath(location file.Location, path string) *file.Location {
return i.squashed.RelativeFileByPath(location, path)
}

// FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer.
// If the path does not exist an error is returned.
func (i *ContainerImageSquashAllLayers) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
return i.squashed.FileContentsByLocation(location)
}

func (i *ContainerImageSquashAllLayers) FilesByMIMEType(types ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByMIMEType(types...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByMIMEType(types...)
if err != nil {
return nil, err
}

mergedLocations := i.mergeLocations(squashedLocations, allLayersLocations)

return mergedLocations, nil
}

func (i *ContainerImageSquashAllLayers) AllLocations(ctx context.Context) <-chan file.Location {
return i.squashed.AllLocations(ctx)
}

func (i *ContainerImageSquashAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
return i.squashed.FileMetadataByLocation(location)
}

func (i *ContainerImageSquashAllLayers) mergeLocations(squashedLocations, allLayersLocations []file.Location) []file.Location {
var mergedLocations []file.Location
for _, l := range squashedLocations {
l.Annotations[file.ScopeAnnotationKey] = file.SquashedScopeAnnotation
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
},
})
}

for _, l := range allLayersLocations {
l.Annotations[file.ScopeAnnotationKey] = file.AllLayersScopeAnnotation
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
},
})
}

filteredMap := make(map[string]bool)
var filteredMergedLocations []file.Location
for _, l := range mergedLocations {
if _, exists := filteredMap[getKey(l)]; !exists {
filteredMap[getKey(l)] = true
filteredMergedLocations = append(filteredMergedLocations, l)
}
}

return filteredMergedLocations
}

func getKey(l file.Location) string {
return fmt.Sprintf("%s-%s-%s", l.RealPath, l.AccessPath, l.FileSystemID)
}
Loading
Loading