Skip to content

Commit

Permalink
filter-repo: handle LFS orphan tracking with partial history rewrites
Browse files Browse the repository at this point in the history
When we do a partial history rewrite, it is more challenging to figure
out which LFS objects used to be tracked in history or which LFS objects
are now tracked in history after the rewrite, because we are not
processing the full history.  In such cases, walk the full history
separately at the beginning and end of the run to get the full LFS
object usage information.

Signed-off-by: Elijah Newren <[email protected]>
  • Loading branch information
newren committed Nov 21, 2024
1 parent c7d0f15 commit 3f9b92a
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 5 deletions.
45 changes: 42 additions & 3 deletions git-filter-repo
Original file line number Diff line number Diff line change
Expand Up @@ -2955,12 +2955,13 @@ class LFSObjectTracker:
self.id_to_object_map = {}
self.objects = set()

def __init__(self, file_info, check_sources):
def __init__(self, file_info, check_sources, check_targets):
self.source_objects = LFSObjectTracker.LFSObjs()
self.target_objects = LFSObjectTracker.LFSObjs()
self.hash_to_object_map = {}
self.file_info = file_info
self.check_sources = check_sources
self.check_targets = check_targets

def _get_lfs_values(self, contents):
values = {}
Expand Down Expand Up @@ -3006,6 +3007,8 @@ class LFSObjectTracker:
mymap.objects.add(lfs_object_id)

def check_output_object(self, obj):
if not self.check_targets:
return
if type(obj) == Blob:
self.check_blob_data(obj.data, obj.id, False)
elif type(obj) == Commit:
Expand All @@ -3014,6 +3017,30 @@ class LFSObjectTracker:
continue
self.check_file_change_data(change.blob_id, False)

def find_all_lfs_objects_in_repo(self, repo, source):
if not source:
self.file_info = FileInfoValueHelper(None, None, repo)
p = subproc.Popen(["git", "rev-list", "--objects", "--all"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=repo)
for line in p.stdout.readlines():
try:
(git_oid, filename) = line.split()
except ValueError:
# Commit and tree objects only have oid
continue

mymap = self.source_objects if source else self.target_objects
size = self.file_info.get_size_by_identifier(git_oid)
if size >= 1024:
continue
contents = self.file_info.get_contents_by_identifier(git_oid)
lfs_object_id = self._get_lfs_values(contents).get(b'oid')
if lfs_object_id:
mymap.objects.add(lfs_object_id)
if not source:
self.file_info.finalize()

class InputFileBackup:
def __init__(self, input_file, output_file):
self.input_file = input_file
Expand Down Expand Up @@ -3307,16 +3334,22 @@ class RepoFilter(object):
return

# Set up the object tracker
check_sources = not self._already_ran
check_sources = not self._already_ran and not self._args.partial
check_targets = not self._args.partial
self._lfs_object_tracker = LFSObjectTracker(self._file_info_value,
check_sources)
check_sources,
check_targets)
self._parser._lfs_object_tracker = self._lfs_object_tracker # kinda gross

# Get initial objects
if self._already_ran:
with open(lfs_objects_file, 'br') as f:
for line in f:
self._lfs_object_tracker.source_objects.objects.add(line.strip())
elif self._args.partial:
source = True
self._lfs_object_tracker.find_all_lfs_objects_in_repo(source_working_dir,
source)

@staticmethod
def loose_objects_are_replace_refs(git_dir, refs, num_loose_objects):
Expand Down Expand Up @@ -4682,6 +4715,12 @@ class RepoFilter(object):
print("NOTE: LFS object orphaning not checked (LFS not in use)")
return

if self._args.partial:
target_working_dir = self._args.target or b'.'
source = False
self._lfs_object_tracker.find_all_lfs_objects_in_repo(target_working_dir,
source)

with open(os.path.join(metadata_dir, b'original_lfs_objects'), 'bw') as f:
for obj in sorted(self._lfs_object_tracker.source_objects.objects):
f.write(obj+b"\n")
Expand Down
4 changes: 2 additions & 2 deletions t/t9393-filter-repo-rerun.sh
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ test_expect_success 'lfs: orphaning across multiple runs with blob callback' '
)
'

test_expect_failure 'lfs: partial history rewrite affecting orphaning' '
test_expect_success 'lfs: partial history rewrite affecting orphaning' '
test_create_repo lfs_partial_history &&
(
cd lfs_partial_history &&
Expand Down Expand Up @@ -792,7 +792,7 @@ test_expect_failure 'lfs: partial history rewrite affecting orphaning' '
)
'

test_expect_failure 'lfs: full rewrite then partial' '
test_expect_success 'lfs: full rewrite then partial' '
test_create_repo lfs_full_then_partial &&
(
cd lfs_full_then_partial &&
Expand Down

0 comments on commit 3f9b92a

Please sign in to comment.