Skip to content

Experimental per-file cache locking #23854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 50 additions & 33 deletions tools/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,19 @@

logger = logging.getLogger('cache')

LOCK_SUBDIR = 'locks'

acquired_count = 0
acquired_count = {}
cachedir = None
cachelock = None
cachelock_name = None
cache_file_locks = {}
global_cachelock = 'cache'


def is_writable(path):
return os.access(path, os.W_OK)


def acquire_cache_lock(reason):
def acquire_cache_lock(reason, cachefile):
global acquired_count
if config.FROZEN_CACHE:
# Raise an exception here rather than exit_with_error since in practice this
Expand All @@ -37,39 +38,42 @@ def acquire_cache_lock(reason):
if not is_writable(cachedir):
utils.exit_with_error(f'cache directory "{cachedir}" is not writable while accessing cache for: {reason} (see https://emscripten.org/docs/tools_reference/emcc.html for info on setting the cache directory)')

if acquired_count == 0:
logger.debug(f'PID {os.getpid()} acquiring multiprocess file lock to Emscripten cache at {cachedir}')
assert 'EM_CACHE_IS_LOCKED' not in os.environ, f'attempt to lock the cache while a parent process is holding the lock ({reason})'
setup_file(cachefile)
# TODO: is aqcuired_count even necessary? filelock.py seems to have similar logic inside.
if acquired_count[cachefile] == 0:
logger.debug(f'PID {os.getpid()} acquiring multiprocess file lock {cache_file_locks[cachefile].lock_file} for {cachefile} ({reason})')
#assert 'EM_CACHE_IS_LOCKED' not in os.environ, f'attempt to lock the cache while a parent process is holding the lock ({reason})'
try:
cachelock.acquire(60)
cache_file_locks[cachefile].acquire(60)
except filelock.Timeout:
logger.warning(f'Accessing the Emscripten cache at "{cachedir}" (for "{reason}") is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file "{cachelock_name}" and try again. If this occurs deterministically, consider filing a bug.')
cachelock.acquire()
logger.warning(f'Accessing the Emscripten cache at "{cache_file_locks[cachefile].lock_file}" (for "{reason}") is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file and try again. If this occurs deterministically, consider filing a bug.')
cache_file_locks[cachefile].acquire()

os.environ['EM_CACHE_IS_LOCKED'] = '1'
logger.debug('done')
acquired_count += 1
#os.environ['EM_CACHE_IS_LOCKED'] = '1'
logger.debug(f'PID {os.getpid()} done')
acquired_count[cachefile] += 1


def release_cache_lock():
def release_cache_lock(cachefile):
global acquired_count
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is no longer needed I think

acquired_count -= 1
assert acquired_count >= 0, "Called release more times than acquire"
if acquired_count == 0:
assert os.environ['EM_CACHE_IS_LOCKED'] == '1'
del os.environ['EM_CACHE_IS_LOCKED']
cachelock.release()
logger.debug(f'PID {os.getpid()} released multiprocess file lock to Emscripten cache at {cachedir}')
acquired_count[cachefile] -= 1
assert acquired_count[cachefile] >= 0, "Called release more times than acquire"
if acquired_count[cachefile] == 0:
# XXX this global var isn't useful anymore. just delete it?
#assert os.environ['EM_CACHE_IS_LOCKED'] == '1'
#del os.environ['EM_CACHE_IS_LOCKED']
cache_file_locks[cachefile].release()
logger.debug(f'PID {os.getpid()} released multiprocess file lock to Emscripten cache at {cache_file_locks[cachefile].lock_file} for {cachefile}')


@contextlib.contextmanager
def lock(reason):
def lock(reason, cachefile=global_cachelock):
"""A context manager that performs actions in the given directory."""
acquire_cache_lock(reason)
acquire_cache_lock(reason, cachefile)
try:
yield
finally:
release_cache_lock()
release_cache_lock(cachefile)


def ensure():
Expand All @@ -83,9 +87,11 @@ def ensure():

def erase():
ensure_setup()
with lock('erase'):
# Delete everything except the lockfile itself
utils.delete_contents(cachedir, exclude=[os.path.basename(cachelock_name)])
with lock('erase', global_cachelock):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the default so not needed?

# Delete everything except the lockfiles directory itself
utils.delete_contents(cachedir, exclude=[LOCK_SUBDIR])
assert os.path.exists(Path(cachedir, LOCK_SUBDIR))
assert os.path.exists(cache_file_locks[global_cachelock].lock_file)


def get_path(name):
Expand Down Expand Up @@ -138,8 +144,8 @@ def erase_lib(name):


def erase_file(shortname):
with lock('erase: ' + shortname):
name = Path(cachedir, shortname)
name = Path(cachedir, shortname)
with lock('erase: ' + shortname, shortname):
if name.exists():
logger.info(f'deleting cached file: {name}')
utils.delete_file(name)
Expand All @@ -165,7 +171,7 @@ def get(shortname, creator, what=None, force=False, quiet=False, deferred=False)
# should never happen
raise Exception(f'FROZEN_CACHE is set, but cache file is missing: "{shortname}" (in cache root path "{cachedir}")')

with lock(shortname):
with lock(shortname, shortname):
if cachename.exists() and not force:
return str(cachename)
if what is None:
Expand All @@ -185,16 +191,27 @@ def get(shortname, creator, what=None, force=False, quiet=False, deferred=False)
return str(cachename)


def setup_file(cache_file):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe put this up top before its first use?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we call this setup_lock or something that suggests its function?

global cachedir, cache_file_locks, acquired_count
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of these looks like they get assigned in this function so I think you can delete this line.

if cache_file not in cache_file_locks:
file_path = Path(cache_file)
assert not file_path.is_absolute()
key_name = '_'.join(file_path.parts) + '.lock'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we just have the lock live alongside the file? e.g. key_name = cache_file + '.lock'

filename = Path(cachedir, LOCK_SUBDIR, key_name)
cache_file_locks[cache_file] = filelock.FileLock(filename)
acquired_count[cache_file] = 0


def setup():
global cachedir, cachelock, cachelock_name
global cachedir, global_cachelock
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

global_cachelock no needed here I think

# figure out the root directory for all caching
cachedir = Path(config.CACHE).resolve()

# since the lock itself lives inside the cache directory we need to ensure it
# exists.
ensure()
cachelock_name = Path(cachedir, 'cache.lock')
cachelock = filelock.FileLock(cachelock_name)
utils.safe_ensure_dirs(Path(cachedir, 'locks'))
setup_file(global_cachelock)


def ensure_setup():
Expand Down
4 changes: 3 additions & 1 deletion tools/system_libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,9 @@ def customize_build_cmd(self, cmd, _filename):
def do_build(self, out_filename, generate_only=False):
"""Builds the library and returns the path to the file."""
assert out_filename == self.get_path(absolute=True)
build_dir = os.path.join(cache.get_path('build'), self.get_base_name())
# Make separate build directories for each lib dir (e.g. wasm64, PIC, LTO)
lib_subdir = cache.get_lib_dir(False).relative_to(cache.get_sysroot(False) + '/lib')
build_dir = os.path.join(cache.get_path('build'), lib_subdir, self.get_base_name())
if USE_NINJA:
self.generate_ninja(build_dir, out_filename)
if not generate_only:
Expand Down