Skip to content

Commit

Permalink
fix(updating): do not recreate deleted paths on update (#1719)
Browse files Browse the repository at this point in the history
Previously, paths that matched a pattern in `skip_if_exists` and that were deleted in the generated project were recreated during each update. This was expected, because the file now didn't exist, and thus it's considered new. However, it might be surprising to some, so docs are updated and a test now makes that an officially supported use case.

For the rest of files, they shouldn't be recreated, even if the template changed. A user that deletes the file is kind of expressing their will to ignore that file from now on. To recover it, they can just recopy the template.

BREAKING CHANGE: If you delete a file in your subproject, it will not be recreated if it changes in the template and you update the subproject. Recopy if you need it back.
  • Loading branch information
lkubb committed Aug 17, 2024
1 parent 0315674 commit 5ac93ee
Show file tree
Hide file tree
Showing 6 changed files with 279 additions and 10 deletions.
44 changes: 42 additions & 2 deletions copier/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,15 @@
)
from .subproject import Subproject
from .template import Task, Template
from .tools import OS, Style, cast_to_bool, normalize_git_path, printf, readlink
from .tools import (
OS,
Style,
cast_to_bool,
escape_git_path,
normalize_git_path,
printf,
readlink,
)
from .types import (
MISSING,
AnyByStrDict,
Expand Down Expand Up @@ -933,6 +941,36 @@ def _apply_update(self) -> None: # noqa: C901
self._execute_tasks(
self.template.migration_tasks("before", self.subproject.template) # type: ignore[arg-type]
)
with local.cwd(old_copy):
self._git_initialize_repo()
git("remote", "add", "real_dst", "file://" + str(subproject_top))
git("fetch", "--depth=1", "real_dst", "HEAD")
# Save a list of files that were intentionally removed in the generated
# project to avoid recreating them during the update.
# Files listed in `skip_if_exists` should only be skipped if they exist.
# They should even be recreated if deleted intentionally.
files_removed = git(
"diff-tree",
"-r",
"--diff-filter=D",
"--name-only",
"HEAD...FETCH_HEAD",
).splitlines()
exclude_plus_removed = list(
set(self.exclude).union(
map(
escape_git_path,
map(
normalize_git_path,
(
path
for path in files_removed
if not self.match_skip(path)
),
),
)
)
)
# Create a copy of the real destination after applying migrations
# but before performing any further update for extracting the diff
# between the temporary destination of the old template and the
Expand All @@ -954,6 +992,8 @@ def _apply_update(self) -> None: # noqa: C901
# Do a normal update in final destination
with replace(
self,
# Don't regenerate intentionally deleted paths
exclude=exclude_plus_removed,
# Files can change due to the historical diff, and those
# changes are not detected in this process, so it's better to
# say nothing than lie.
Expand All @@ -970,6 +1010,7 @@ def _apply_update(self) -> None: # noqa: C901
defaults=True,
quiet=True,
src_path=self.subproject.template.url, # type: ignore[union-attr]
exclude=exclude_plus_removed,
) as new_worker:
new_worker.run_copy()
with local.cwd(new_copy):
Expand All @@ -978,7 +1019,6 @@ def _apply_update(self) -> None: # noqa: C901
# real destination with some special handling of newly added files
# in both the poject and the template.
with local.cwd(old_copy):
self._git_initialize_repo()
git("remote", "add", "dst_copy", "file://" + str(dst_copy))
git("fetch", "--depth=1", "dst_copy", "HEAD:dst_copy")
git("remote", "add", "new_copy", "file://" + str(new_copy))
Expand Down
34 changes: 28 additions & 6 deletions copier/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import colorama
from packaging.version import Version
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
from pydantic import StrictBool

colorama.just_fix_windows_console()
Expand Down Expand Up @@ -184,17 +185,14 @@ def readlink(link: Path) -> Path:
return Path(os.readlink(link))


_re_octal = re.compile(r"\\([0-9]{3})\\([0-9]{3})")


def _re_octal_replace(match: re.Match[str]) -> str:
return bytes([int(match.group(1), 8), int(match.group(2), 8)]).decode("utf8")
_re_whitespace = re.compile(r"^\s+|\s+$")


def normalize_git_path(path: str) -> str:
r"""Convert weird characters returned by Git to normal UTF-8 path strings.
A filename like âñ will be reported by Git as "\\303\\242\\303\\261" (octal notation).
Similarly, a filename like "<tab>foo\b<lf>ar" will be reported as "\tfoo\\b\nar".
This can be disabled with `git config core.quotepath off`.
Args:
Expand All @@ -208,5 +206,29 @@ def normalize_git_path(path: str) -> str:
path = path[1:-1]
# Repair double-quotes
path = path.replace('\\"', '"')
# Unescape escape characters
path = path.encode("latin-1", "backslashreplace").decode("unicode-escape")
# Convert octal to utf8
return _re_octal.sub(_re_octal_replace, path)
return path.encode("latin-1", "backslashreplace").decode("utf-8")


def escape_git_path(path: str) -> str:
"""Escape paths that will be used as literal gitwildmatch patterns.
If the path was returned by a Git command, it should be unescaped completely.
``normalize_git_path`` can be used for this purpose.
Args:
path: The Git path to escape.
Returns:
str: The escaped Git path.
"""
# GitWildMatchPattern.escape does not escape backslashes
# or trailing whitespace.
path = path.replace("\\", "\\\\")
path = GitWildMatchPattern.escape(path)
return _re_whitespace.sub(
lambda match: "".join(f"\\{whitespace}" for whitespace in match.group()),
path,
)
5 changes: 3 additions & 2 deletions docs/configuring.md
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,9 @@ configuring `secret: true` in the [advanced prompt format][advanced-prompt-forma
- CLI flags: `-s`, `--skip`
- Default value: `[]`

[Patterns][patterns-syntax] for files/folders that must be skipped if they already
exist.
[Patterns][patterns-syntax] for files/folders that must be skipped only if they already
exist, but always be present. If they do not exist in a project during an `update`
operation, they will be recreated.

!!! example

Expand Down
10 changes: 10 additions & 0 deletions docs/updating.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ As you can see here, `copier` does several things:
- Finally, it re-applies the previously obtained diff and then runs the
post-migrations.

### Handling of deleted paths

Template-based files/directories that were deleted in the generated project are
automatically excluded from updates. If you want to recover such a file later on, you
can run `copier recopy` and recommit it to your repository. Subsequent updates for the
path will then be respected again.

An exception to this behavior applies to paths that are matched by `skip_if_exists`.
Their presence is always ensured, even during an `update` operation.

### Recover from a broken update

Usually Copier will replay the last project generation without problems. However,
Expand Down
8 changes: 8 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ def test_temporary_directory_with_git_repo_deletion() -> None:
('quo\\"tes', 'quo"tes'),
('"surrounded"', "surrounded"),
("m4\\303\\2424\\303\\2614a", "m4â4ñ4a"),
("tab\\t", "tab\t"),
("lf\\n", "lf\n"),
("crlf\\r\\n", "crlf\r\n"),
("back\\\\slash", "back\\slash"),
(
"\\a\\b\\f\\n\\t\\vcontrol\\a\\b\\f\\n\\t\\vcharacters\\a\\b\\f\\n\\t\\v",
"\a\b\f\n\t\vcontrol\a\b\f\n\t\vcharacters\a\b\f\n\t\v",
),
],
)
def test_normalizing_git_paths(path: str, normalized: str) -> None:
Expand Down
188 changes: 188 additions & 0 deletions tests/test_updatediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,194 @@ def test_skip_update(tmp_path_factory: pytest.TempPathFactory) -> None:
assert not (dst / "skip_me.rej").exists()


@pytest.mark.parametrize(
"file_name",
(
"skip_normal_file",
pytest.param(
"skip_unicode_âñ",
marks=pytest.mark.xfail(
platform.system() in {"Darwin", "Windows"},
reason="OS without proper UTF-8 filesystem.",
),
),
"skip file with whitespace",
" skip_leading_whitespace",
"skip_trailing_whitespace ",
" skip_multi_whitespace ",
pytest.param(
"\tskip_other_whitespace\t\\t",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"\a\f\n\t\vskip_control\a\f\n\t\vcharacters\v\t\n\f\a",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"skip_back\\slash",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"!skip_special",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
),
)
def test_skip_update_deleted(
file_name: str, tmp_path_factory: pytest.TempPathFactory
) -> None:
"""
Ensure that paths in ``skip_if_exists`` are always recreated
if they are absent before updating.
"""
src, dst = map(tmp_path_factory.mktemp, ("src", "dst"))

with local.cwd(src):
build_file_tree(
{
"copier.yaml": "_skip_if_exists: ['*skip*']",
"{{ _copier_conf.answers_file }}.jinja": "{{ _copier_answers|to_yaml }}",
file_name: "1",
"another_file": "foobar",
}
)
git("init")
git("add", ".")
git("commit", "-m1")
git("tag", "1.0.0")
run_copy(str(src), dst, defaults=True, overwrite=True)
skip_me = dst / file_name
answers_file = dst / ".copier-answers.yml"
answers = yaml.safe_load(answers_file.read_text())
assert skip_me.read_text() == "1"
assert answers["_commit"] == "1.0.0"
skip_me.unlink()
with local.cwd(dst):
git("init")
git("add", ".")
git("commit", "-m1")
run_update(dst, overwrite=True)
assert skip_me.exists()
assert skip_me.read_text() == "1"


@pytest.mark.parametrize(
"file_name",
(
"normal_file",
pytest.param(
"unicode_âñ",
marks=pytest.mark.xfail(
platform.system() in {"Darwin", "Windows"},
reason="OS without proper UTF-8 filesystem.",
),
),
"file with whitespace",
" leading_whitespace",
"trailing_whitespace ",
" multi_whitespace ",
pytest.param(
"\tother_whitespace\t\\t",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
# This param accounts for some limitations that would
# otherwise make the test fail:
# * \r in path segment names is converted to \n by Jinja rendering,
# hence the rendered file would be named differently altogether.
# * The pathspec lib does not account for different kinds of escaped
# whitespace at the end of the pattern, only a space.
# If there are control characters at the end of the string
# that would be stripped by .strip(), the pattern would end
# in the backslash that should have escaped it.
"\a\f\n\t\vcontrol\a\f\n\t\vcharacters\v\t\n\f\a",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"back\\slash",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"!special",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"dont_wildmatch*",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
),
)
def test_update_deleted_path(
file_name: str, tmp_path_factory: pytest.TempPathFactory
) -> None:
"""
Ensure that deleted paths are not regenerated during updates,
even if the template has changes in that path.
"""
src, dst = map(tmp_path_factory.mktemp, ("src", "dst"))
with local.cwd(src):
build_file_tree(
{
"{{ _copier_conf.answers_file }}.jinja": "{{ _copier_answers|to_yaml }}",
file_name: "foo",
"another_file": "foobar",
"dont_wildmatch": "bar",
}
)
git("init")
git("add", ".")
git("commit", "-m1")
git("tag", "1.0.0")
run_copy(str(src), dst, defaults=True, overwrite=True)
updated_file = dst / file_name
dont_wildmatch = dst / "dont_wildmatch"
answers_file = dst / ".copier-answers.yml"
answers = yaml.safe_load(answers_file.read_text())
assert dont_wildmatch.read_text() == "bar"
assert updated_file.read_text() == "foo"
assert answers["_commit"] == "1.0.0"
updated_file.unlink()
with local.cwd(dst):
git("init")
git("add", ".")
git("commit", "-m1")
with local.cwd(src):
build_file_tree({file_name: "bar", "dont_wildmatch": "baz"})
git("commit", "-am2")
git("tag", "2.0.0")
run_update(dst, overwrite=True)
assert dont_wildmatch.exists()
assert dont_wildmatch.read_text() == "baz"
assert not updated_file.exists()


@pytest.mark.parametrize(
"answers_file", [None, ".copier-answers.yml", ".custom.copier-answers.yaml"]
)
Expand Down

0 comments on commit 5ac93ee

Please sign in to comment.