Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: experiment with custom git archive command #424

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a926a1a
wip
keegancsmith Sep 16, 2022
e7e65e6
fix test
keegancsmith Sep 16, 2022
ccf69b3
same output as git archive for "tar t"
keegancsmith Sep 16, 2022
950b26a
capture state in archiveWriter struct for better readability
keegancsmith Sep 16, 2022
89c6970
set mode
keegancsmith Sep 16, 2022
db2d110
do not do dotgit detection since it brakes bare repos
keegancsmith Sep 17, 2022
91ffa5b
cpu_profile flag
keegancsmith Sep 19, 2022
ee1cc02
introduce manual stack for better profile output
keegancsmith Sep 19, 2022
04081cc
try out keepdescriptors
keegancsmith Sep 19, 2022
6500eb1
add memprofile
keegancsmith Sep 20, 2022
753864c
optionally buffer output if GIT_SG_BUFFER is set
keegancsmith Sep 20, 2022
cebf65b
add experimental GIT_SG_FILTER which just filters git archive
keegancsmith Sep 20, 2022
83325cf
getting started on git-cat-file integration
keegancsmith Sep 21, 2022
15902a2
add contents method for git-cat-file
keegancsmith Sep 21, 2022
0027f41
handle missing refs
keegancsmith Sep 21, 2022
c95ac4f
factor out common logic in cat-file
keegancsmith Sep 21, 2022
5daae67
add hash native API to catfile
keegancsmith Sep 21, 2022
801d2a4
make archive writer based on tree entries instead of object.TreeEntry
keegancsmith Sep 21, 2022
f7a1f9c
move archive code into own file
keegancsmith Sep 21, 2022
a4cdae0
wip interface to allow swapping out backend for archive writer
keegancsmith Sep 21, 2022
ae81031
implement TreeEntries for cat-file
keegancsmith Sep 22, 2022
d707154
test all modes
keegancsmith Sep 22, 2022
d88c93f
wip lstree
keegancsmith Sep 22, 2022
d29b6ba
refactor catfile to separate out gitCatFileBatchReader
keegancsmith Sep 22, 2022
01adb4e
ls-tree just writing an archive of names
keegancsmith Sep 22, 2022
d45e927
ls-tree implemented
keegancsmith Sep 23, 2022
5040bd7
skip TestDo on CI if missing .git
keegancsmith Sep 23, 2022
b07c069
check for .git in all tests
keegancsmith Sep 23, 2022
615d1d9
implement archiver via git-lfs/gitobj
keegancsmith Oct 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add experimental GIT_SG_FILTER which just filters git archive
This is significantly faster than using go-git.
  • Loading branch information
keegancsmith committed Oct 10, 2022

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit cebf65b038d7697a371767cf995eb0b3f0d10d11
75 changes: 75 additions & 0 deletions cmd/git-sg/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package main

import (
"archive/tar"
"io"
"os/exec"

"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/object"
)

func archiveFilter(w io.Writer, repo *git.Repository, tree *object.Tree, opts *archiveOpts) (err error) {
// 32*1024 is the same size used by io.Copy
buf := make([]byte, 32*1024)

cmd := exec.Command("git", "archive", "--worktree-attributes", "--format=tar", tree.Hash.String(), "--")
r, err := cmd.StdoutPipe()
if err != nil {
return err
}
defer r.Close()

tr := tar.NewReader(r)
tw := tar.NewWriter(w)

err = cmd.Start()
if err != nil {
return err
}

done := false
defer func() {
if done {
return
}
err2 := cmd.Process.Kill()
if err == nil {
err = err2
}
}()

for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}

if opts.Ignore(hdr.Name) {
continue
} else if reason := opts.SkipContent(hdr); reason != "" {
hdr.Size = 0
hdr.PAXRecords = map[string]string{"SG.skip": reason}
hdr.Format = tar.FormatPAX
if err := tw.WriteHeader(hdr); err != nil {
return err
}
continue
}

tw.WriteHeader(hdr)
if _, err := io.CopyBuffer(tw, tr, buf); err != nil {
return err
}
}

if err := tw.Close(); err != nil {
return err
}

done = true
return cmd.Wait()
}
29 changes: 18 additions & 11 deletions cmd/git-sg/main.go
Original file line number Diff line number Diff line change
@@ -43,15 +43,31 @@ func do(w io.Writer) error {
return err
}

return archiveWrite(w, r, root, &archiveOpts{
// Gating this right now because I get inconsistent performance on my
// macbook. Want to test on linux and larger repos.
if os.Getenv("GIT_SG_BUFFER") != "" {
log.Println("buffering output")
bw := bufio.NewWriter(w)
defer bw.Flush()
w = bw
}

opts := &archiveOpts{
Ignore: getIgnoreFilter(r, root),
SkipContent: func(hdr *tar.Header) string {
if hdr.Size > 2<<20 {
return "large file"
}
return ""
},
})
}

if os.Getenv("GIT_SG_FILTER") != "" {
log.Println("filtering git archive output")
return archiveFilter(w, r, root, opts)
}

return archiveWrite(w, r, root, opts)
}

type archiveOpts struct {
@@ -64,15 +80,6 @@ type archiveOpts struct {
}

func archiveWrite(w io.Writer, repo *git.Repository, tree *object.Tree, opts *archiveOpts) error {
// Gating this right now because I get inconsistent performance on my
// macbook. Want to test on linux and larger repos.
if os.Getenv("GIT_SG_BUFFER") != "" {
log.Println("buffering output")
bw := bufio.NewWriter(w)
defer bw.Flush()
w = bw
}

a := &archiveWriter{
w: tar.NewWriter(w),
repo: repo,