Skip to content

Commit 1a91bd3

Browse files
committed
fix(package): detect dirty workspace manifest
This addresses one of the corner cases of VCS dirtiness in #14967. > Workspace inheritance — like changing `workspace.package.edition` > to `2021` would actually make member Cargo.toml dirty, > but the current dirty status check doesn't capture that. The solution here is * Retrieve workspace manifest from Git index. * Use the ws manifest from index to normalize package manifest. * Compare the difference between normalized tomls from Git index and from Git working directory. The implementation here is a bit ugly, as it exposes some internals functions to `pub(crate)`. The current implementation also has performance issues. When the workspace contains lots of members and has a dirty workspace manifest: * It adds one extra manifest parsing and normalization for checking each member. * Parsing part can be cached for the entire workspace. However normalization cannot be skipped. * It adds two TOML serializations for checking each member. * If we derive `Eq` for manifest types, we might be able to skip serializations and instead just compare them.
1 parent 37fe6a7 commit 1a91bd3

File tree

4 files changed

+185
-23
lines changed

4 files changed

+185
-23
lines changed

src/cargo/ops/cargo_package/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ fn prepare_archive(
387387
let src_files = src.list_files(pkg)?;
388388

389389
// Check (git) repository state, getting the current commit hash.
390-
let vcs_info = vcs::check_repo_state(pkg, &src_files, gctx, &opts)?;
390+
let vcs_info = vcs::check_repo_state(pkg, &src_files, ws, &opts)?;
391391

392392
build_ar_list(ws, pkg, src_files, vcs_info)
393393
}

src/cargo/ops/cargo_package/vcs.rs

Lines changed: 154 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@ use std::path::PathBuf;
66

77
use anyhow::Context as _;
88
use cargo_util::paths;
9+
use cargo_util_schemas::manifest::TomlManifest;
910
use serde::Serialize;
1011
use tracing::debug;
1112

1213
use crate::core::Package;
14+
use crate::core::Workspace;
15+
use crate::core::WorkspaceRootConfig;
1316
use crate::sources::PathEntry;
1417
use crate::CargoResult;
1518
use crate::GlobalContext;
@@ -44,9 +47,10 @@ pub struct GitVcsInfo {
4447
pub fn check_repo_state(
4548
p: &Package,
4649
src_files: &[PathEntry],
47-
gctx: &GlobalContext,
50+
ws: &Workspace<'_>,
4851
opts: &PackageOpts<'_>,
4952
) -> CargoResult<Option<VcsInfo>> {
53+
let gctx = ws.gctx();
5054
let Ok(repo) = git2::Repository::discover(p.root()) else {
5155
gctx.shell().verbose(|shell| {
5256
shell.warn(format!("no (git) VCS found for `{}`", p.root().display()))
@@ -105,7 +109,7 @@ pub fn check_repo_state(
105109
.and_then(|p| p.to_str())
106110
.unwrap_or("")
107111
.replace("\\", "/");
108-
let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
112+
let Some(git) = git(p, ws, src_files, &repo, &opts)? else {
109113
// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
110114
// then don't generate the corresponding file in order to maintain consistency with past behavior.
111115
return Ok(None);
@@ -163,11 +167,12 @@ fn warn_symlink_checked_out_as_plain_text_file(
163167
/// The real git status check starts from here.
164168
fn git(
165169
pkg: &Package,
166-
gctx: &GlobalContext,
170+
ws: &Workspace<'_>,
167171
src_files: &[PathEntry],
168172
repo: &git2::Repository,
169173
opts: &PackageOpts<'_>,
170174
) -> CargoResult<Option<GitVcsInfo>> {
175+
let gctx = ws.gctx();
171176
// This is a collection of any dirty or untracked files. This covers:
172177
// - new/modified/deleted/renamed/type change (index or worktree)
173178
// - untracked files (which are "new" worktree files)
@@ -189,7 +194,7 @@ fn git(
189194
.iter()
190195
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
191196
.map(|p| p.as_ref())
192-
.chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
197+
.chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
193198
.map(|path| {
194199
pathdiff::diff_paths(path, cwd)
195200
.as_ref()
@@ -233,6 +238,7 @@ fn git(
233238
/// current package root, but still under the git workdir, affecting the
234239
/// final packaged `.crate` file.
235240
fn dirty_files_outside_pkg_root(
241+
ws: &Workspace<'_>,
236242
pkg: &Package,
237243
repo: &git2::Repository,
238244
src_files: &[PathEntry],
@@ -247,7 +253,7 @@ fn dirty_files_outside_pkg_root(
247253
.map(|path| paths::normalize_path(&pkg_root.join(path)))
248254
.collect();
249255

250-
let mut dirty_symlinks = HashSet::new();
256+
let mut dirty_files = HashSet::new();
251257
for rel_path in src_files
252258
.iter()
253259
.filter(|p| p.is_symlink_or_under_symlink())
@@ -259,10 +265,151 @@ fn dirty_files_outside_pkg_root(
259265
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
260266
{
261267
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
262-
dirty_symlinks.insert(workdir.join(rel_path));
268+
dirty_files.insert(workdir.join(rel_path));
263269
}
264270
}
265-
Ok(dirty_symlinks)
271+
272+
if let Some(dirty_ws_manifest) = dirty_workspace_manifest(ws, pkg, repo)? {
273+
dirty_files.insert(dirty_ws_manifest);
274+
}
275+
Ok(dirty_files)
276+
}
277+
278+
fn dirty_workspace_manifest(
279+
ws: &Workspace<'_>,
280+
pkg: &Package,
281+
repo: &git2::Repository,
282+
) -> CargoResult<Option<PathBuf>> {
283+
let workdir = repo.workdir().unwrap();
284+
let ws_manifest_path = ws.root_manifest();
285+
if pkg.manifest_path() == ws_manifest_path {
286+
// The workspace manifest is also the primary package manifest.
287+
// Normal file statuc check should have covered it.
288+
return Ok(None);
289+
}
290+
if paths::strip_prefix_canonical(ws_manifest_path, pkg.root()).is_ok() {
291+
// Inside package root. Don't bother checking git status.
292+
return Ok(None);
293+
}
294+
let Ok(rel_path) = paths::strip_prefix_canonical(ws_manifest_path, workdir) else {
295+
// Completely outside this git workdir.
296+
return Ok(None);
297+
};
298+
299+
// Outside package root but under git workdir.
300+
if repo.status_file(&rel_path)? == git2::Status::CURRENT {
301+
return Ok(None);
302+
}
303+
304+
let from_index = ws_manifest_and_root_config_from_index(ws, repo, &rel_path);
305+
// If there is no workable workspace manifest in Git index,
306+
// create a default inheritable fields.
307+
// With it, we can detect any member manifest has inherited fields,
308+
// and then the workspace manifest should be considered dirty.
309+
let inheritable = if let Some(fields) = from_index
310+
.as_ref()
311+
.map(|(_, root_config)| root_config.inheritable())
312+
{
313+
fields
314+
} else {
315+
&Default::default()
316+
};
317+
318+
let empty = Vec::new();
319+
let cargo_features = crate::core::Features::new(
320+
from_index
321+
.as_ref()
322+
.and_then(|(manifest, _)| manifest.cargo_features.as_ref())
323+
.unwrap_or(&empty),
324+
ws.gctx(),
325+
&mut Default::default(),
326+
pkg.package_id().source_id().is_path(),
327+
)
328+
.unwrap_or_default();
329+
330+
let dirty_path = || Ok(Some(workdir.join(&rel_path)));
331+
let dirty = |msg| {
332+
debug!(
333+
"{msg} for `{}` of repo at `{}`",
334+
rel_path.display(),
335+
workdir.display(),
336+
);
337+
dirty_path()
338+
};
339+
340+
let Ok(normalized_toml) = crate::util::toml::normalize_toml(
341+
pkg.manifest().original_toml(),
342+
&cargo_features,
343+
&|| Ok(inheritable),
344+
pkg.manifest_path(),
345+
ws.gctx(),
346+
&mut Default::default(),
347+
&mut Default::default(),
348+
) else {
349+
return dirty("failed to normalize pkg manifest from index");
350+
};
351+
352+
let Ok(from_index) = toml::to_string_pretty(&normalized_toml) else {
353+
return dirty("failed to serialize pkg manifest from index");
354+
};
355+
356+
let Ok(from_working_dir) = toml::to_string_pretty(pkg.manifest().normalized_toml()) else {
357+
return dirty("failed to serialize pkg manifest from working directory");
358+
};
359+
360+
if from_index != from_working_dir {
361+
tracing::trace!("--- from index ---\n{from_index}");
362+
tracing::trace!("--- from working dir ---\n{from_working_dir}");
363+
return dirty("normalized manifests from index and in working directory mismatched");
364+
}
365+
366+
Ok(None)
367+
}
368+
369+
/// Gets workspace manifest and workspace root config from Git index.
370+
///
371+
/// This returns an `Option` because workspace manifest might be broken or not
372+
/// exist at all.
373+
fn ws_manifest_and_root_config_from_index(
374+
ws: &Workspace<'_>,
375+
repo: &git2::Repository,
376+
ws_manifest_rel_path: &Path,
377+
) -> Option<(TomlManifest, WorkspaceRootConfig)> {
378+
let workdir = repo.workdir().unwrap();
379+
let dirty = |msg| {
380+
debug!(
381+
"{msg} for `{}` of repo at `{}`",
382+
ws_manifest_rel_path.display(),
383+
workdir.display(),
384+
);
385+
None
386+
};
387+
let Ok(index) = repo.index() else {
388+
debug!("no index for repo at `{}`", workdir.display());
389+
return None;
390+
};
391+
let Some(entry) = index.get_path(ws_manifest_rel_path, 0) else {
392+
return dirty("workspace manifest not found");
393+
};
394+
let Ok(blob) = repo.find_blob(entry.id) else {
395+
return dirty("failed to find manifest blob");
396+
};
397+
let Ok(contents) = String::from_utf8(blob.content().to_vec()) else {
398+
return dirty("failed parse as UTF-8 encoding");
399+
};
400+
let Ok(document) = crate::util::toml::parse_document(&contents) else {
401+
return dirty("failed to parse file");
402+
};
403+
let Ok(ws_manifest_from_index) = crate::util::toml::deserialize_toml(&document) else {
404+
return dirty("failed to deserialize doc");
405+
};
406+
let Some(toml_workspace) = ws_manifest_from_index.workspace.as_ref() else {
407+
return dirty("not a workspace manifest");
408+
};
409+
410+
let ws_root_config =
411+
crate::util::toml::to_workspace_root_config(toml_workspace, ws.root_manifest());
412+
Some((ws_manifest_from_index, ws_root_config))
266413
}
267414

268415
/// Helper to collect dirty statuses for a single repo.

src/cargo/util/toml/mod.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,14 @@ pub fn read_manifest(
8484
.borrow_mut()
8585
.insert(package_root.to_owned(), ws_root_config.clone());
8686
}
87+
let inherit_cell = LazyCell::new();
88+
let inherit = || {
89+
inherit_cell.try_borrow_with(|| load_inheritable_fields(gctx, path, &workspace_config))
90+
};
8791
let normalized_toml = normalize_toml(
8892
&original_toml,
8993
&features,
90-
&workspace_config,
94+
&inherit,
9195
path,
9296
gctx,
9397
&mut warnings,
@@ -158,12 +162,14 @@ fn read_toml_string(path: &Path, gctx: &GlobalContext) -> CargoResult<String> {
158162
}
159163

160164
#[tracing::instrument(skip_all)]
161-
fn parse_document(contents: &str) -> Result<toml_edit::ImDocument<String>, toml_edit::de::Error> {
165+
pub(crate) fn parse_document(
166+
contents: &str,
167+
) -> Result<toml_edit::ImDocument<String>, toml_edit::de::Error> {
162168
toml_edit::ImDocument::parse(contents.to_owned()).map_err(Into::into)
163169
}
164170

165171
#[tracing::instrument(skip_all)]
166-
fn deserialize_toml(
172+
pub(crate) fn deserialize_toml(
167173
document: &toml_edit::ImDocument<String>,
168174
) -> Result<manifest::TomlManifest, toml_edit::de::Error> {
169175
let mut unused = BTreeSet::new();
@@ -242,7 +248,7 @@ fn to_workspace_config(
242248
Ok(workspace_config)
243249
}
244250

245-
fn to_workspace_root_config(
251+
pub(crate) fn to_workspace_root_config(
246252
normalized_toml: &manifest::TomlWorkspace,
247253
manifest_file: &Path,
248254
) -> WorkspaceRootConfig {
@@ -266,22 +272,16 @@ fn to_workspace_root_config(
266272

267273
/// See [`Manifest::normalized_toml`] for more details
268274
#[tracing::instrument(skip_all)]
269-
fn normalize_toml(
275+
pub(crate) fn normalize_toml<'a>(
270276
original_toml: &manifest::TomlManifest,
271277
features: &Features,
272-
workspace_config: &WorkspaceConfig,
278+
inherit: &dyn Fn() -> CargoResult<&'a InheritableFields>,
273279
manifest_file: &Path,
274280
gctx: &GlobalContext,
275281
warnings: &mut Vec<String>,
276282
errors: &mut Vec<String>,
277283
) -> CargoResult<manifest::TomlManifest> {
278284
let package_root = manifest_file.parent().unwrap();
279-
280-
let inherit_cell: LazyCell<InheritableFields> = LazyCell::new();
281-
let inherit = || {
282-
inherit_cell
283-
.try_borrow_with(|| load_inheritable_fields(gctx, manifest_file, &workspace_config))
284-
};
285285
let workspace_root = || inherit().map(|fields| fields.ws_root().as_path());
286286

287287
let mut normalized_toml = manifest::TomlManifest {

tests/testsuite/package.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,10 +1493,13 @@ version = "1"
14931493
);
14941494

14951495
p.cargo("package --workspace --no-verify --no-metadata")
1496+
.with_status(101)
14961497
.with_stderr_data(str![[r#"
1497-
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
1498-
[UPDATING] `dummy-registry` index
1499-
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1498+
[ERROR] 1 files in the working directory contain changes that were not yet committed into git:
1499+
1500+
Cargo.toml
1501+
1502+
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
15001503
15011504
"#]])
15021505
.run();
@@ -1571,6 +1574,18 @@ fn dirty_and_broken_workspace_manifest_with_inherited_fields() {
15711574
);
15721575

15731576
p.cargo("package --workspace --no-verify --no-metadata")
1577+
.with_status(101)
1578+
.with_stderr_data(str![[r#"
1579+
[ERROR] 1 files in the working directory contain changes that were not yet committed into git:
1580+
1581+
Cargo.toml
1582+
1583+
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
1584+
1585+
"#]])
1586+
.run();
1587+
1588+
p.cargo("package --workspace --no-verify --no-metadata --allow-dirty")
15741589
.with_stderr_data(str![[r#"
15751590
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
15761591
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)

0 commit comments

Comments
 (0)