Skip to content

Commit 1dd162b

Browse files
Merge pull request #91 from abraemer/fix-path-encoding-for-hash
fix: handle paths with non-utf-8 bytes
2 parents 30869cb + 47fb4dd commit 1dd162b

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

src/commoncode/resource.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def _get_resource_cache_location(self, path, create_dirs=False):
389389
path = clean_path(path)
390390

391391
# for the cached file name, we use an md5 of the path to avoid things being too long
392-
resid = str(md5(path.encode("utf-8")).hexdigest())
392+
resid = str(md5(path.encode("utf-8", "surrogateescape")).hexdigest())
393393
cache_sub_dir, cache_file_name = resid[-2:], resid
394394

395395
parent = join(self.cache_dir, cache_sub_dir)

tests/test_resource.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,11 @@ def test_codebase_cache_default(self):
695695
child_2 = codebase.get_resource(path=child.path)
696696
assert child_2 == child
697697

698+
def test_codebase_cache_handles_non_utf8_path(self):
699+
test_codebase = self.get_test_loc("resource/cache2")
700+
codebase = Codebase(test_codebase)
701+
codebase._get_resource_cache_location("resource/cache2/\udce9", create_dirs=True)
702+
698703
def test_codebase_cache_all_in_memory(self):
699704
test_codebase = self.get_test_loc("resource/cache2")
700705
codebase = Codebase(test_codebase, max_in_memory=0)

0 commit comments

Comments
 (0)