Skip to content

Commit 3d0e061

Browse files
committed
fix: handle paths with non-utf-8 bytes
Signed-off-by: Adrian Braemer <[email protected]>
1 parent 30869cb commit 3d0e061

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

src/commoncode/resource.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def _get_resource_cache_location(self, path, create_dirs=False):
389389
path = clean_path(path)
390390

391391
# for the cached file name, we use an md5 of the path to avoid things being too long
392-
resid = str(md5(path.encode("utf-8")).hexdigest())
392+
resid = str(md5(path.encode("utf-8", "surrogateescape")).hexdigest())
393393
cache_sub_dir, cache_file_name = resid[-2:], resid
394394

395395
parent = join(self.cache_dir, cache_sub_dir)

tests/test_resource.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,11 @@ def test_codebase_cache_default(self):
694694
codebase.save_resource(child)
695695
child_2 = codebase.get_resource(path=child.path)
696696
assert child_2 == child
697+
698+
def test_codebase_cache_handles_non_utf8_path(self):
699+
test_codebase = self.get_test_loc("resource/cache2")
700+
codebase = Codebase(test_codebase)
701+
codebase._get_resource_cache_location('resource/cache2/\udce9', create_dirs=True)
697702

698703
def test_codebase_cache_all_in_memory(self):
699704
test_codebase = self.get_test_loc("resource/cache2")

0 commit comments

Comments
 (0)