diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 0d42c75f9e0..0adab07d0e6 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -47,7 +47,7 @@ pub fn blame_file( options, )?; let statistics = outcome.statistics; - write_blame_entries(out, outcome)?; + show_blame_entries(out, outcome, file)?; if let Some(err) = err { writeln!(err, "{statistics:#?}")?; @@ -55,7 +55,11 @@ pub fn blame_file( Ok(()) } -fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> { +fn show_blame_entries( + mut out: impl std::io::Write, + outcome: gix::blame::Outcome, + source_file_name: gix::bstr::BString, +) -> Result<(), std::io::Error> { for (entry, lines_in_hunk) in outcome.entries_with_lines() { for ((actual_lno, source_lno), line) in entry .range_in_blamed_file() @@ -64,11 +68,15 @@ fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcom { write!( out, - "{short_id} {line_no} {src_line_no} {line}", - line_no = actual_lno + 1, - src_line_no = source_lno + 1, + "{short_id} {line_no} ", short_id = entry.commit_id.to_hex_with_len(8), + line_no = actual_lno + 1, )?; + + let source_file_name = entry.source_file_name.as_ref().unwrap_or(&source_file_name); + write!(out, "{source_file_name} ")?; + + write!(out, "{src_line_no} {line}", src_line_no = source_lno + 1)?; } } diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs index 979cc8cd6ef..d90a05127cc 100644 --- a/gix-blame/src/error.rs +++ b/gix-blame/src/error.rs @@ -27,6 +27,8 @@ pub enum Error { Traverse(#[source] Box), #[error(transparent)] DiffTree(#[from] gix_diff::tree::Error), + #[error(transparent)] + DiffTreeWithRewrites(#[from] gix_diff::tree_with_rewrites::Error), #[error("Invalid line range was given, line range is expected to be a 1-based inclusive range in the format ','")] InvalidLineRange, #[error("Failure to decode commit during traversal")] diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 03429b61b12..298d8e38be2 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -102,6 +102,7 @@ pub fn file( hunks_to_blame.push(UnblamedHunk { range_in_blamed_file: range.clone(), suspects: [(suspect, range)].into(), + source_file_name: None, }); } @@ -120,12 +121,19 @@ pub fn file( break; } - let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.has_suspect(&suspect)); - if !is_still_suspect { + let first_hunk_for_suspect = hunks_to_blame.iter().find(|hunk| hunk.has_suspect(&suspect)); + let Some(first_hunk_for_suspect) = first_hunk_for_suspect else { // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with // the next one. continue 'outer; - } + }; + + // We know `first_hunk_for_suspect` can’t be `None` here because we check `is_some()` + // above. + let current_file_path = first_hunk_for_suspect + .source_file_name + .clone() + .unwrap_or_else(|| file_path.to_owned()); let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?; let commit_time = commit.commit_time()?; @@ -165,7 +173,7 @@ pub fn file( entry = find_path_entry_in_commit( &odb, &suspect, - file_path, + current_file_path.as_ref(), cache.as_ref(), &mut buf, &mut buf2, @@ -216,7 +224,7 @@ pub fn file( if let Some(parent_entry_id) = find_path_entry_in_commit( &odb, parent_id, - file_path, + current_file_path.as_ref(), cache.as_ref(), &mut buf, &mut buf2, @@ -239,15 +247,17 @@ pub fn file( queue.insert(parent_commit_time, parent_id); let changes_for_file_path = tree_diff_at_file_path( &odb, - file_path, + current_file_path.as_ref(), suspect, parent_id, cache.as_ref(), &mut stats, &mut diff_state, + resource_cache, &mut buf, &mut buf2, &mut buf3, + options.rewrites, )?; let Some(modification) = changes_for_file_path else { if more_than_one_parent { @@ -263,7 +273,7 @@ pub fn file( }; match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { + TreeDiffChange::Addition => { if more_than_one_parent { // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which @@ -272,20 +282,44 @@ pub fn file( break 'outer; } } - gix_diff::tree::recorder::Change::Deletion { .. } => { + TreeDiffChange::Deletion => { unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted") } - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + TreeDiffChange::Modification { previous_id, id } => { let changes = blob_changes( &odb, resource_cache, - oid, - previous_oid, + id, + previous_id, + file_path, + file_path, + options.diff_algorithm, + &mut stats, + )?; + hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id); + } + TreeDiffChange::Rewrite { + source_location, + source_id, + id, + } => { + let changes = blob_changes( + &odb, + resource_cache, + id, + source_id, file_path, + source_location.as_ref(), options.diff_algorithm, &mut stats, )?; hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id); + + for hunk in hunks_to_blame.iter_mut() { + if hunk.has_suspect(&parent_id) { + hunk.source_file_name = Some(source_location.clone()); + } + } } } } @@ -382,6 +416,7 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32) .expect("BUG: hunks are never zero-sized"), commit_id: previous_entry.commit_id, + source_file_name: previous_entry.source_file_name.clone(), }; acc.pop(); @@ -399,6 +434,59 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { }) } +/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`], +/// keeping only the blame-relevant information. +enum TreeDiffChange { + Addition, + Deletion, + Modification { + previous_id: ObjectId, + id: ObjectId, + }, + Rewrite { + source_location: BString, + source_id: ObjectId, + id: ObjectId, + }, +} + +impl From for TreeDiffChange { + fn from(value: gix_diff::tree::recorder::Change) -> Self { + use gix_diff::tree::recorder::Change; + + match value { + Change::Addition { .. } => Self::Addition, + Change::Deletion { .. } => Self::Deletion, + Change::Modification { previous_oid, oid, .. } => Self::Modification { + previous_id: previous_oid, + id: oid, + }, + } + } +} + +impl From for TreeDiffChange { + fn from(value: gix_diff::tree_with_rewrites::Change) -> Self { + use gix_diff::tree_with_rewrites::Change; + + match value { + Change::Addition { .. } => Self::Addition, + Change::Deletion { .. } => Self::Deletion, + Change::Modification { previous_id, id, .. } => Self::Modification { previous_id, id }, + Change::Rewrite { + source_location, + source_id, + id, + .. + } => Self::Rewrite { + source_location, + source_id, + id, + }, + } + } +} + #[allow(clippy::too_many_arguments)] fn tree_diff_at_file_path( odb: impl gix_object::Find + gix_object::FindHeader, @@ -408,10 +496,12 @@ fn tree_diff_at_file_path( cache: Option<&gix_commitgraph::Graph>, stats: &mut Statistics, state: &mut gix_diff::tree::State, + resource_cache: &mut gix_diff::blob::Platform, commit_buf: &mut Vec, lhs_tree_buf: &mut Vec, rhs_tree_buf: &mut Vec, -) -> Result, Error> { + rewrites: Option, +) -> Result, Error> { let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?; let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?; @@ -422,6 +512,44 @@ fn tree_diff_at_file_path( let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?; stats.trees_decoded += 1; + let result = tree_diff_without_rewrites_at_file_path(&odb, file_path, stats, state, parent_tree_iter, tree_iter)?; + + // Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a + // second time with rename tracking when the change is either an `Addition` or a `Deletion` + // because those can turn out to have been a `Rewrite`. + // TODO(perf): renames are usually rare enough to not care about the work duplication done here. + // But in theory, a rename tracker could be used by us, on demand, and we could stuff the + // changes in there and have it find renames, without repeating the diff. + if matches!(result, Some(TreeDiffChange::Modification { .. })) { + return Ok(result); + } + let Some(rewrites) = rewrites else { + return Ok(result); + }; + + let result = tree_diff_with_rewrites_at_file_path( + &odb, + file_path, + stats, + state, + resource_cache, + parent_tree_iter, + tree_iter, + rewrites, + )?; + + Ok(result) +} + +#[allow(clippy::too_many_arguments)] +fn tree_diff_without_rewrites_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + parent_tree_iter: gix_object::TreeRefIter<'_>, + tree_iter: gix_object::TreeRefIter<'_>, +) -> Result, Error> { struct FindChangeToPath { inner: gix_diff::tree::Recorder, interesting_path: BString, @@ -509,17 +637,62 @@ fn tree_diff_at_file_path( stats.trees_diffed += 1; match result { - Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change), + Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(Into::into)), Err(error) => Err(Error::DiffTree(error)), } } +#[allow(clippy::too_many_arguments)] +fn tree_diff_with_rewrites_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + resource_cache: &mut gix_diff::blob::Platform, + parent_tree_iter: gix_object::TreeRefIter<'_>, + tree_iter: gix_object::TreeRefIter<'_>, + rewrites: gix_diff::Rewrites, +) -> Result, Error> { + let mut change: Option = None; + + let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options { + location: Some(gix_diff::tree::recorder::Location::Path), + rewrites: Some(rewrites), + }; + let result = gix_diff::tree_with_rewrites( + parent_tree_iter, + tree_iter, + resource_cache, + state, + &odb, + |change_ref| -> Result<_, std::convert::Infallible> { + if change_ref.location() == file_path { + change = Some(change_ref.into_owned()); + Ok(gix_diff::tree_with_rewrites::Action::Cancel) + } else { + Ok(gix_diff::tree_with_rewrites::Action::Continue) + } + }, + options, + ); + stats.trees_diffed_with_rewrites += 1; + + match result { + Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => { + Ok(change.map(Into::into)) + } + Err(error) => Err(Error::DiffTreeWithRewrites(error)), + } +} + +#[allow(clippy::too_many_arguments)] fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, oid: ObjectId, previous_oid: ObjectId, file_path: &BStr, + previous_file_path: &BStr, diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { @@ -579,7 +752,7 @@ fn blob_changes( resource_cache.set_resource( previous_oid, gix_object::tree::EntryKind::Blob, - file_path, + previous_file_path, gix_diff::blob::ResourceKind::OldOrSource, &odb, )?; diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 935fb09eed8..85b3f3a8f31 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -393,11 +393,13 @@ impl UnblamedHunk { range_in_blamed_file: self.range_in_blamed_file.start ..(self.range_in_blamed_file.start + split_at_from_start), suspects: new_suspects_before.collect(), + source_file_name: self.source_file_name.clone(), }; let new_hunk_after = Self { range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) ..(self.range_in_blamed_file.end), suspects: new_suspects_after.collect(), + source_file_name: self.source_file_name, }; Either::Right((new_hunk_before, new_hunk_after)) @@ -445,6 +447,7 @@ impl BlameEntry { start_in_source_file: range_in_source_file.start, len: force_non_zero(range_in_source_file.len() as u32), commit_id, + source_file_name: unblamed_hunk.source_file_name.clone(), }) } } diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 9185ca50633..408a7799949 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -14,6 +14,7 @@ fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset UnblamedHunk { range_in_blamed_file, suspects: [(suspect, range_in_destination)].into(), + source_file_name: None, } } @@ -74,7 +75,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -82,7 +84,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..3, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -108,7 +111,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -117,11 +121,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 0..2, - suspects: [(parent, 0..2)].into() + suspects: [(parent, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, - suspects: [(suspect, 2..3)].into() + suspects: [(suspect, 2..3)].into(), + source_file_name: None, } ] ); @@ -148,7 +154,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 13..15, - suspects: [(suspect, 13..15)].into() + suspects: [(suspect, 13..15)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -157,11 +164,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 10..12, - suspects: [(parent, 5..7)].into() + suspects: [(parent, 5..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 12..13, - suspects: [(suspect, 12..13)].into() + suspects: [(suspect, 12..13)].into(), + source_file_name: None, } ] ); @@ -189,7 +198,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 15..17, - suspects: [(suspect, 10..12)].into() + suspects: [(suspect, 10..12)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -198,11 +208,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(parent, 7..9)].into() + suspects: [(parent, 7..9)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..15, - suspects: [(suspect, 9..10)].into() + suspects: [(suspect, 9..10)].into(), + source_file_name: None, } ] ); @@ -229,7 +241,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -237,7 +250,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..3, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(2)); @@ -264,7 +278,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 4..5, - suspects: [(suspect, 3..4)].into() + suspects: [(suspect, 3..4)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -272,7 +287,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 1..4, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(2)); @@ -299,7 +315,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 6..7, - suspects: [(suspect, 5..6)].into() + suspects: [(suspect, 5..6)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -308,11 +325,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 3..4, - suspects: [(parent, 0..1)].into() + suspects: [(parent, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, } ] ); @@ -342,7 +361,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 23..24, - suspects: [(suspect, 25..26)].into() + suspects: [(suspect, 25..26)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -371,7 +391,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 23..24, - suspects: [(suspect, 21..22)].into() + suspects: [(suspect, 21..22)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -401,11 +422,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 71..107, - suspects: [(parent, 70..106)].into() + suspects: [(parent, 70..106)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 107..109, - suspects: [(suspect, 106..108)].into() + suspects: [(suspect, 106..108)].into(), + source_file_name: None, } ] ); @@ -436,11 +459,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 149..155, - suspects: [(parent, 137..143)].into() + suspects: [(parent, 137..143)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 155..156, - suspects: [(suspect, 143..144)].into() + suspects: [(suspect, 143..144)].into(), + source_file_name: None, } ] ); @@ -470,7 +495,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 3..6, - suspects: [(parent, 5..8)].into() + suspects: [(parent, 5..8)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Deleted(3)); @@ -497,7 +523,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 9..11, - suspects: [(suspect, 6..8)].into() + suspects: [(suspect, 6..8)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -526,7 +553,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 4..15, - suspects: [(suspect, 5..16)].into() + suspects: [(suspect, 5..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -555,7 +583,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 23..25, - suspects: [(suspect, 25..27)].into() + suspects: [(suspect, 25..27)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -586,7 +615,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 15..16, - suspects: [(parent, 16..17)].into() + suspects: [(parent, 16..17)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -613,7 +643,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 23..25, - suspects: [(suspect, 22..24)].into() + suspects: [(suspect, 22..24)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -644,7 +675,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 2..5, - suspects: [(suspect, 5..8)].into() + suspects: [(suspect, 5..8)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -671,7 +703,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 14..19, - suspects: [(suspect, 15..20)].into() + suspects: [(suspect, 15..20)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -679,7 +712,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(parent, 10..12)].into() + suspects: [(parent, 10..12)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -708,7 +742,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 110..114, - suspects: [(parent, 106..110)].into() + suspects: [(parent, 106..110)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -734,7 +769,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5)].into() + suspects: [(suspect, 0..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -764,7 +800,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(parent, 0..5)].into() + suspects: [(parent, 0..5)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -785,6 +822,7 @@ mod process_change { Some(UnblamedHunk { range_in_blamed_file: 22..30, suspects: [(suspect, 21..29)].into(), + source_file_name: None, }), Some(Change::Unchanged(21..23)), ); @@ -793,7 +831,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 22..30, - suspects: [(suspect, 21..29)].into() + suspects: [(suspect, 21..29)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -823,7 +862,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(parent, 0..5)].into() + suspects: [(parent, 0..5)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -849,7 +889,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 2..16, - suspects: [(suspect, 2..16)].into() + suspects: [(suspect, 2..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -877,7 +918,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 14..16, - suspects: [(suspect, 14..16)].into() + suspects: [(suspect, 14..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -885,7 +927,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 2..14, - suspects: [(parent, 2..14)].into() + suspects: [(parent, 2..14)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Deleted(4)); @@ -989,6 +1032,7 @@ mod process_changes { [UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, },] ); } @@ -1007,10 +1051,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, }, ] ); @@ -1034,14 +1080,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..2, suspects: [(parent, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..4, suspects: [(suspect, 2..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 2..4)].into(), + source_file_name: None, }, ] ); @@ -1065,14 +1114,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..1, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 1..4, suspects: [(suspect, 1..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, } ] ); @@ -1092,10 +1144,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..1, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 1..6, suspects: [(parent, 0..5)].into(), + source_file_name: None, } ] ); @@ -1115,10 +1169,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 3..6, suspects: [(parent, 0..3)].into(), + source_file_name: None, } ] ); @@ -1138,10 +1194,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 3..5)].into(), + source_file_name: None, } ] ); @@ -1160,6 +1218,7 @@ mod process_changes { [UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, }] ); } @@ -1178,10 +1237,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..2, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(parent, 2..3)].into(), + source_file_name: None, } ] ); @@ -1205,14 +1266,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..2, suspects: [(suspect, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(parent, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 3..4, suspects: [(suspect, 3..4)].into(), + source_file_name: None, }, ] ); @@ -1226,10 +1290,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..30, suspects: [(suspect, 0..30)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 31..37, suspects: [(suspect, 31..37)].into(), + source_file_name: None, }, ]; let changes = vec![ @@ -1244,19 +1310,23 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..16, - suspects: [(parent, 0..16)].into() + suspects: [(parent, 0..16)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 16..17, - suspects: [(suspect, 16..17)].into() + suspects: [(suspect, 16..17)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 17..30, - suspects: [(parent, 16..29)].into() + suspects: [(parent, 16..29)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 31..37, - suspects: [(parent, 30..36)].into() + suspects: [(parent, 30..36)].into(), + source_file_name: None, } ] ); @@ -1270,14 +1340,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..3, suspects: [(suspect, 1..3)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 5..7, suspects: [(suspect, 5..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 8..10, suspects: [(suspect, 8..10)].into(), + source_file_name: None, }, ]; let changes = vec![ @@ -1293,22 +1366,27 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..3, suspects: [(parent, 1..3)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 5..6, suspects: [(parent, 5..6)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 6..7, suspects: [(suspect, 6..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 8..9, suspects: [(suspect, 8..9)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 9..10, suspects: [(parent, 6..7)].into(), + source_file_name: None, }, ] ); @@ -1330,11 +1408,13 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..4, - suspects: [(suspect, 0..4)].into() + suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..7, - suspects: [(parent, 3..6)].into() + suspects: [(parent, 3..6)].into(), + source_file_name: None, } ] ); @@ -1356,19 +1436,23 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 13..14, - suspects: [(suspect, 13..14)].into() + suspects: [(suspect, 13..14)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..16, suspects: [(parent, 10..12)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 10..14, suspects: [(suspect, 10..14)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..17, suspects: [(parent, 10..13)].into(), + source_file_name: None, }, ] ); diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 84b107b6f8d..5672eaf679d 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -147,6 +147,8 @@ pub struct Options { pub range: BlameRanges, /// Don't consider commits before the given date. pub since: Option, + /// Determine if rename tracking should be performed, and how. + pub rewrites: Option, } /// The outcome of [`file()`](crate::file()). @@ -172,6 +174,10 @@ pub struct Statistics { /// are likely partial as they are cancelled as soon as a change to the blamed file is /// detected. pub trees_diffed: usize, + /// The amount of tree-diffs to see if the file was moved (or rewritten, in git terminology). + /// These diffs are likely partial as they are cancelled as soon as a change to the blamed file + /// is detected. + pub trees_diffed_with_rewrites: usize, /// The amount of blobs there were compared to each other to learn what changed between commits. /// Note that in order to diff a blob, one needs to load both versions from the database. pub blobs_diffed: usize, @@ -275,11 +281,19 @@ pub struct BlameEntry { pub len: NonZeroU32, /// The commit that introduced the section into the *Source File*. pub commit_id: ObjectId, + /// The *Source File*'s name, in case it differs from *Blamed File*'s name. + /// This happens when the file was renamed. + pub source_file_name: Option, } impl BlameEntry { /// Create a new instance. - pub fn new(range_in_blamed_file: Range, range_in_source_file: Range, commit_id: ObjectId) -> Self { + pub fn new( + range_in_blamed_file: Range, + range_in_source_file: Range, + commit_id: ObjectId, + source_file_name: Option, + ) -> Self { debug_assert!( range_in_blamed_file.end > range_in_blamed_file.start, "{range_in_blamed_file:?}" @@ -295,6 +309,7 @@ impl BlameEntry { start_in_source_file: range_in_source_file.start, len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"), commit_id, + source_file_name, } } } @@ -331,6 +346,8 @@ pub struct UnblamedHunk { /// equal to `range_in_blamed_file`. Since `suspects` rarely contains more than 1 item, it can /// efficiently be stored as a `SmallVec`. pub suspects: SmallVec<[(ObjectId, Range); 1]>, + /// The *Source File*'s name, in case it differs from *Blamed File*'s name. + pub source_file_name: Option, } impl UnblamedHunk { diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index a4535b0eaf1..7b7d1e39e1d 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::{collections::BTreeMap, path::PathBuf}; use gix_blame::BlameRanges; use gix_hash::ObjectId; @@ -6,10 +6,11 @@ use gix_object::bstr; struct Baseline<'a> { lines: bstr::Lines<'a>, + filenames: BTreeMap, } mod baseline { - use std::path::Path; + use std::{collections::BTreeMap, path::Path}; use gix_blame::BlameEntry; use gix_hash::ObjectId; @@ -40,10 +41,30 @@ mod baseline { } impl Baseline<'_> { - pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + pub fn collect( + baseline_path: impl AsRef, + source_file_name: gix_object::bstr::BString, + ) -> std::io::Result> { let content = std::fs::read(baseline_path)?; + let baseline = Baseline { + lines: content.lines(), + filenames: BTreeMap::default(), + }; - Ok(Baseline { lines: content.lines() }.collect()) + Ok(baseline + .map(|entry| { + let source_file_name = if entry.source_file_name.as_ref() == Some(&source_file_name) { + None + } else { + entry.source_file_name + }; + + BlameEntry { + source_file_name, + ..entry + } + }) + .collect()) } } @@ -54,6 +75,7 @@ mod baseline { let mut ranges = None; let mut commit_id = gix_hash::Kind::Sha1.null(); let mut skip_lines: u32 = 0; + let mut source_file_name: Option = None; for line in self.lines.by_ref() { if line.starts_with(b"\t") { @@ -94,6 +116,12 @@ mod baseline { (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); assert!(ranges.is_none(), "should not overwrite existing ranges"); ranges = Some((blame_range, source_range)); + } else if fields[0] == "filename" { + // We need to store `source_file_name` as it is not repeated for subsequent + // hunks that have the same `commit_id`. + source_file_name = Some(fields[1].into()); + + self.filenames.insert(commit_id, fields[1].into()); } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { panic!("unexpected line: '{:?}'", line.as_bstr()); } @@ -103,7 +131,12 @@ mod baseline { // No new lines were parsed, so we assume the iterator is finished. return None; }; - Some(BlameEntry::new(range_in_blamed_file, range_in_source_file, commit_id)) + Some(BlameEntry::new( + range_in_blamed_file, + range_in_source_file, + commit_id, + source_file_name.or_else(|| self.filenames.get(&commit_id).cloned()), + )) } } } @@ -186,16 +219,19 @@ macro_rules! mktest { suspect, } = Fixture::new()?; + let source_file_name: gix_object::bstr::BString = format!("{}.txt", $case).into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - format!("{}.txt", $case).as_str().into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, )? .entries; @@ -203,10 +239,10 @@ macro_rules! mktest { assert_eq!(lines_blamed.len(), $number_of_lines); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?; + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)), source_file_name)?; assert_eq!(baseline.len(), $number_of_lines); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); Ok(()) } }; @@ -231,6 +267,15 @@ mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1); mktest!(sub_directory, "sub-directory/sub-directory", 3); +mktest!(after_rename, "after-rename", 1); +mktest!(after_second_rename, "after-second-rename", 1); +mktest!(after_rewrite, "after-rewrite", 3); +mktest!( + after_move_to_sub_directory, + "sub-directory/after-move-to-sub-directory", + 1 +); + mktest!(resolved_conflict, "resolved-conflict", 2); mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1); mktest!( @@ -259,16 +304,19 @@ fn diff_disparity() { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = format!("{case}.txt").into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - format!("{case}.txt").as_str().into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -277,9 +325,9 @@ fn diff_disparity() { assert_eq!(lines_blamed.len(), 5); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); + let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline")), source_file_name).unwrap(); - assert_eq!(lines_blamed, baseline, "{case}"); + pretty_assertions::assert_eq!(lines_blamed, baseline, "{case}"); } } @@ -291,16 +339,19 @@ fn since() { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: Some(gix_date::parse("2025-01-31", None).unwrap()), + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -309,9 +360,9 @@ fn since() { assert_eq!(lines_blamed.len(), 1); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-since.baseline")).unwrap(); + let baseline = Baseline::collect(git_dir.join("simple-since.baseline"), source_file_name).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } mod blame_ranges { @@ -326,16 +377,19 @@ mod blame_ranges { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::from_range(1..=2), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -344,9 +398,9 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 2); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-1-2.baseline")).unwrap(); + let baseline = Baseline::collect(git_dir.join("simple-lines-1-2.baseline"), source_file_name).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } #[test] @@ -362,16 +416,19 @@ mod blame_ranges { ranges.add_range(1..=1); // Duplicate range, should be ignored ranges.add_range(4..=4); // Line 4 + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, since: None, + rewrites: None, }, ) .unwrap() @@ -380,9 +437,13 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); + let baseline = Baseline::collect( + git_dir.join("simple-lines-multiple-1-2-and-4.baseline"), + source_file_name, + ) + .unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } #[test] @@ -395,16 +456,19 @@ mod blame_ranges { let ranges = BlameRanges::from_ranges(vec![1..=2, 1..=1, 4..=4]); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, since: None, + rewrites: None, }, ) .unwrap() @@ -413,9 +477,13 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); + let baseline = Baseline::collect( + git_dir.join("simple-lines-multiple-1-2-and-4.baseline"), + source_file_name, + ) + .unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } } diff --git a/gix-blame/tests/fixtures/make_blame_repo.sh b/gix-blame/tests/fixtures/make_blame_repo.sh index bbe764e1aa6..5846f9326db 100755 --- a/gix-blame/tests/fixtures/make_blame_repo.sh +++ b/gix-blame/tests/fixtures/make_blame_repo.sh @@ -30,6 +30,16 @@ git add added-lines-around.txt git add coalesce-adjacent-hunks.txt git commit -q -m c1.3 +echo "line 1 in renamed file" >> before-rename.txt +echo "line 1 in file renamed twice" >> before-first-rename.txt +echo -e "line 1 in renamed and rewritten file\nline 2\nline 3\nline 4\nline 5\nline 6" >> before-rewrite.txt +echo -e "line 1 in file moved to sub-directory" > before-move-to-sub-directory.txt +git add before-rename.txt +git add before-first-rename.txt +git add before-rewrite.txt +git add before-move-to-sub-directory.txt +git commit -q -m c1.4 + echo "line 2" >> simple.txt git add simple.txt git commit -q -m c2 @@ -56,7 +66,9 @@ git commit -q -m c2.4 mkdir sub-directory echo -e "line 1\nline 2" > sub-directory/sub-directory.txt +mv before-move-to-sub-directory.txt sub-directory/after-move-to-sub-directory.txt git add sub-directory/sub-directory.txt +git add before-move-to-sub-directory.txt sub-directory/after-move-to-sub-directory.txt git commit -q -m c2.5 echo "line 3" >> simple.txt @@ -85,6 +97,15 @@ echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt git add same-line-changed-twice.txt git commit -q -m c3.4 +mv before-rename.txt after-rename.txt +mv before-first-rename.txt before-second-rename.txt +rm before-rewrite.txt +echo -e "line 1 in renamed and rewritten file\nline 2 changed\nline 3 changed\nline 4\nline 5\nline 6" >> after-rewrite.txt +git add before-rename.txt after-rename.txt +git add before-first-rename.txt before-second-rename.txt +git add before-rewrite.txt after-rewrite.txt +git commit -q -m c3.5 + echo "line 4" >> simple.txt git add simple.txt git commit -q -m c4 @@ -137,6 +158,10 @@ cp empty-lines-histogram.txt empty-lines-myers.txt git add empty-lines-histogram.txt empty-lines-myers.txt git commit -q -m c5.4 +mv before-second-rename.txt after-second-rename.txt +git add before-second-rename.txt after-second-rename.txt +git commit -q -m c5.5 + # The commit history created by the commits above this line is linear, it only # contains commits that have exactly one parent. # Below this line, there’s also commits that have more than one parent. @@ -253,6 +278,11 @@ git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks mkdir .git/sub-directory git blame --porcelain sub-directory/sub-directory.txt > .git/sub-directory/sub-directory.baseline +git blame --porcelain after-rename.txt > .git/after-rename.baseline +git blame --porcelain after-second-rename.txt > .git/after-second-rename.baseline +git blame --porcelain after-rewrite.txt > .git/after-rewrite.baseline +git blame --porcelain sub-directory/after-move-to-sub-directory.txt > .git/sub-directory/after-move-to-sub-directory.baseline + git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index fe111b065cf..136f977a60a 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1590,6 +1590,7 @@ pub fn main() -> Result<()> { diff_algorithm, range: gix::blame::BlameRanges::from_ranges(ranges), since, + rewrites: Some(gix::diff::Rewrites::default()), }, out, statistics.then_some(err),