Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 39 additions & 3 deletions internal/setup/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package setup
import (
"errors"
"fmt"
"maps"
"os"
"path/filepath"
"slices"
Expand Down Expand Up @@ -273,14 +274,46 @@ func (r *Release) validate() error {
}

// Check for glob and generate conflicts.
//
// This is the most time-consuming part of every command because checking
// each pair of paths uses an n^2 algorithm; specifically due to glob
// handling. We can speed up the process by exploiting the fact that
// conflicts are not allowed which means that in valid releases globs will
// appear at only the specific parts of the paths. There will always be a
// non-trivial prefix of the path that doesn't contain globs.
//
// We will speed up the search by only looking for conflicts in paths that
// share this same prefix. By collecting all paths and doing binary search
// we can find the start of the prefix. This is much simpler than other
// data structures like tries or radix trees which have similar algorithmic
// complexities.
allPaths := slices.Collect(maps.Keys(paths))
slices.Sort(allPaths)
for oldPath, oldSlices := range paths {
for _, old := range oldSlices {
oldInfo := old.Contents[oldPath]
if oldInfo.Kind != GeneratePath && oldInfo.Kind != GlobPath {
break
}
for newPath, newSlices := range paths {
if oldPath == newPath {

prefixLen := strings.IndexAny(oldPath, "*?")
if prefixLen == -1 {
return fmt.Errorf("internal error: invalid path: generate or glob path does not contain ' ?' or '*': %q", oldPath)
}
searchKey := oldPath[:prefixLen]
// startIndex is the position of the prefix or the position where
// the prefix would have been found.
startIndex, _ := slices.BinarySearch(allPaths, searchKey)
for i := startIndex; i < len(allPaths); i++ {
newPath := allPaths[i]
if !strings.HasPrefix(newPath, searchKey) {
// Iterate until the prefix no longer matches, which means
// all other paths will fail the comparison below.
break
}
newSlices := paths[newPath]
// We know prefixes match, we can skip that part of the string.
if oldPath[len(searchKey)-1:] == newPath[len(searchKey)-1:] {
// Identical paths have been filtered earlier.
continue
}
Expand All @@ -291,13 +324,16 @@ func (r *Release) validate() error {
continue
}
}
if strdist.GlobPath(newPath, oldPath) {
// We know prefixes match, we can skip that part of the string.
if strdist.GlobPath(newPath[len(searchKey)-1:], oldPath[len(searchKey)-1:]) {
if (old.Package > new.Package) || (old.Package == new.Package && old.Name > new.Name) ||
(old.Package == new.Package && old.Name == new.Name && oldPath > newPath) {
old, new = new, old
oldPath, newPath = newPath, oldPath
}
return fmt.Errorf("slices %s and %s conflict on %s and %s", old, new, oldPath, newPath)
} else {
break
}
}
}
Expand Down
99 changes: 99 additions & 0 deletions internal/setup/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,86 @@ var setupTests = []setupTest{{
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path1",
}, {
summary: "When multiple prefixes match all are tested",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice1:
contents:
/path/*/foo:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice1:
contents:
/path/bar/foo1:
/path/bar/f*:
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*",
}, {
summary: "Later matching paths are tested after several non-matches",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice1:
contents:
/path/*/foo:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice1:
contents:
/path/bar/f:
/path/bar/fa:
/path/bar/fb:
/path/bar/f*:
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*",
}, {
summary: "Conflicting globs with wildcard in the middle of the segment",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/ab*cd/file:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/ab12cd/file:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/ab\*cd/file and /path/ab12cd/file`,
}, {
summary: "Conflicting globs when one side uses double-star",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/**/file:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/*/file:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/\*\*/file and /path/\*/file`,
}, {
summary: "Directories must be suffixed with /",
input: map[string]string{
Expand Down Expand Up @@ -1943,6 +2023,25 @@ var setupTests = []setupTest{{
`,
},
relerror: `slices mypkg_myslice1 and mypkg_myslice2 conflict on /path/subdir/\*\* and /path/\*\*`,
}, {
summary: "Generate paths conflict with glob paths sharing the prefix",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/subdir/**: {generate: manifest}
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/subdir/f*:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/subdir/\*\* and /path/subdir/f\*`,
}, {
summary: `No other options in "generate" paths`,
input: map[string]string{
Expand Down
135 changes: 82 additions & 53 deletions internal/strdist/strdist.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
}
lst := make([]CostInt, len(b)+1)
bl := 0
for bi, br := range b {
bl++
for _, br := range b {
cost := f(-1, br)
if cost.InsertB == Inhibit || lst[bi] == Inhibit {
lst[bi+1] = Inhibit
if cost.InsertB == Inhibit || lst[bl] == Inhibit {
lst[bl+1] = Inhibit
} else {
lst[bi+1] = lst[bi] + cost.InsertB
lst[bl+1] = lst[bl] + cost.InsertB
}
bl++
}
lst = lst[:bl+1]
// Not required, but caching means preventing the fast path
Expand Down Expand Up @@ -87,8 +87,7 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
if debug {
debugf("... %v", lst)
}
_ = stop
if cut != 0 && stop {
if cut != 0 && len(b) > 0 && stop {
break
}
}
Expand All @@ -105,66 +104,96 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
// * - Any zero or more characters, except for /
// ** - Any zero or more characters, including /
func GlobPath(a, b string) bool {
if !wildcardPrefixMatch(a, b) {
// Fast path.
return false
// Computing the actual distance is slow as its complexity is
// O(len(a) * len(b)). If the paths contain globs there is no way around
// it, but we can be clever about creating segments from the paths and only
// calling the distance function when necessary. If there is no glob the
// complexity becomes O(len(a) + len(b)).
//
// The algorithm works by separating the path into segments delimited by
// '/'. We compare the segments in order for a and b, we have three cases:
// 1) No segment uses globs, comparison is memcmp of both strings.
// 2) One of the strings uses a single "*" or "?". We call the distance
// function only with the segment which reduces the algorithmic
// complexity by reducing the length.
// 3) One of the strings uses "**". We need to call distance on the
// the rest of both strings and we can no longer rely on segments.
//
// Crucially, this optimization works because:
// * There are few paths in the releases that use "**", because it results
// in conflict easily. In fact, it is usually used to extract a whole
// directory, meaning the prefix segements should be unique and we can
// avoid computing "**" completely.
distance := func(a, b string) bool {
a = strings.ReplaceAll(a, "**", "⁑")
b = strings.ReplaceAll(b, "**", "⁑")
return Distance(a, b, globCost, 1) == 0
}
if !wildcardSuffixMatch(a, b) {
// Fast path.
return false

// Returns the index where the segment ends (next '/' or the end of the
// string) and whether the string has a "*" or "?". This function will only
// traverse the string once.
segmentEnd := func(s string) (end int, hasGlob bool) {
end = strings.IndexAny(s, "*?/")
if end == -1 {
end = len(s) - 1
} else if s[end] == '*' || s[end] == '?' {
slash := strings.IndexRune(s[end:], '/')
if slash != -1 {
end = end + slash
} else {
end = len(s) - 1
}
hasGlob = true
}
return end, hasGlob
}

for len(a) > 0 && len(b) > 0 {
endA, globA := segmentEnd(a)
endB, globB := segmentEnd(b)

segmentA := a[:endA+1]
segmentB := b[:endB+1]
if strings.Contains(segmentA, "**") || strings.Contains(segmentB, "**") {
// We need to match the rest of the string with the slow path, no
// other way around it.
return distance(a, b)
} else if globA || globB {
if !distance(segmentA, segmentB) {
return false
}
} else {
if segmentA != segmentB {
return false
}
}

a = a[endA+1:]
b = b[endB+1:]
}

a = strings.ReplaceAll(a, "**", "⁑")
b = strings.ReplaceAll(b, "**", "⁑")
return Distance(a, b, globCost, 1) == 0
// One string is empty, this call is linear.
return distance(a, b)
}

func globCost(ar, br rune) Cost {
if ar == '⁑' || br == '⁑' {
return Cost{SwapAB: 0, DeleteA: 0, InsertB: 0}
}
if ar == '/' || br == '/' {
return Cost{SwapAB: Inhibit, DeleteA: Inhibit, InsertB: Inhibit}
}
if ar == '*' || br == '*' {
if ar == '*' && br == '/' {
return Cost{SwapAB: Inhibit, DeleteA: 0, InsertB: Inhibit}
} else if ar == '/' && br == '*' {
return Cost{SwapAB: Inhibit, DeleteA: Inhibit, InsertB: 0}
}
return Cost{SwapAB: 0, DeleteA: 0, InsertB: 0}
}
if ar == '/' || br == '/' {
return Cost{SwapAB: Inhibit, DeleteA: Inhibit, InsertB: Inhibit}
}
if ar == '?' || br == '?' {
return Cost{SwapAB: 0, DeleteA: 1, InsertB: 1}
}
return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1}
}

// wildcardPrefixMatch compares whether the prefixes of a and b are equal up
// to the shortest one. The prefix is defined as the longest substring that
// starts at index 0 and does not contain a wildcard.
func wildcardPrefixMatch(a, b string) bool {
ai := strings.IndexAny(a, "*?")
bi := strings.IndexAny(b, "*?")
if ai == -1 {
ai = len(a)
}
if bi == -1 {
bi = len(b)
}
mini := min(ai, bi)
return a[:mini] == b[:mini]
}

// wildcardSuffixMatch compares whether the suffixes of a and b are equal up
// to the shortest one. The suffix is defined as the longest substring that ends
// at the string length and does not contain a wildcard.
func wildcardSuffixMatch(a, b string) bool {
ai := strings.LastIndexAny(a, "*?")
la := 0
if ai != -1 {
la = len(a) - ai - 1
}
lb := 0
bi := strings.LastIndexAny(b, "*?")
if bi != -1 {
lb = len(b) - bi - 1
}
minl := min(la, lb)
return a[len(a)-minl:] == b[len(b)-minl:]
}
6 changes: 6 additions & 0 deletions internal/strdist/strdist_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ var distanceTests = []distanceTest{
{f: strdist.GlobCost, r: 1, a: "a**f/hij", b: "abc/def/hik"},
{f: strdist.GlobCost, r: 2, a: "a**fg", b: "abc/def/hik"},
{f: strdist.GlobCost, r: 0, a: "a**f/hij/klm", b: "abc/d**m"},
{f: strdist.GlobCost, r: 1, a: "**a", b: ""},
{f: strdist.GlobCost, r: 0, a: "/*foo/", b: "/foo/"},
{f: strdist.GlobCost, r: 1, a: "*f", b: ""},
{f: strdist.GlobCost, r: 1, a: "", b: "*f"},
{f: strdist.GlobCost, r: 0, a: "?**", b: "f/foo"},
{f: strdist.GlobCost, r: 0, a: "/aa/a", b: "/a?**"},
}

func (s *S) TestDistance(c *C) {
Expand Down
Loading