99headers, for the libc implementation in JS).
1010"""
1111
12+ import glob
13+ import hashlib
1214import json
1315import logging
1416import os
2022
2123from tools import (
2224 building ,
25+ cache ,
2326 config ,
2427 diagnostics ,
2528 extract_metadata ,
29+ filelock ,
2630 js_manipulation ,
2731 shared ,
2832 utils ,
@@ -173,6 +177,32 @@ def inject_code_hooks(name):
173177 return code
174178
175179
180+ @ToolchainProfiler .profile ()
181+ def generate_js_compiler_input_hash (symbols_only = False ):
182+ # We define a cache hit as when all the settings and all the `--js-library`
183+ # contents are identical.
184+ # Ignore certain settings that can are no relevant to library deps. Here we
185+ # skip PRE_JS_FILES/POST_JS_FILES which don't affect the library symbol list
186+ # and can contain full paths to temporary files.
187+ skip_settings = {'PRE_JS_FILES' , 'POST_JS_FILES' }
188+ file_contents = [json .dumps (settings .external_dict (skip_keys = skip_settings ), sort_keys = True , indent = 2 )]
189+
190+ files = glob .glob (utils .path_from_root ('src/lib' ) + '/lib*.js' )
191+ # Also, include the js compiler code itself, in case it gets locally modified.
192+ files += glob .glob (utils .path_from_root ('src/*.mjs' ))
193+ files += settings .JS_LIBRARIES
194+ if not symbols_only :
195+ files += settings .PRE_JS_FILES
196+ files += settings .POST_JS_FILES
197+
198+ for file in sorted (files ):
199+ file_contents .append (utils .read_file (file ))
200+
201+ content = '\n ' .join (file_contents )
202+ content_hash = hashlib .sha1 (content .encode ('utf-8' )).hexdigest ()
203+ return content_hash
204+
205+
176206@ToolchainProfiler .profile ()
177207def compile_javascript (symbols_only = False ):
178208 stderr_file = os .environ .get ('EMCC_STDERR_FILE' )
@@ -189,15 +219,8 @@ def compile_javascript(symbols_only=False):
189219 args = ['-' ]
190220 if symbols_only :
191221 args += ['--symbols-only' ]
192- out = shared .run_js_tool (path_from_root ('tools/compiler.mjs' ),
193- args , input = settings_json , stdout = subprocess .PIPE , stderr = stderr_file )
194- if symbols_only :
195- glue = None
196- forwarded_data = out
197- else :
198- assert '//FORWARDED_DATA:' in out , 'Did not receive forwarded data in pre output - process failed?'
199- glue , forwarded_data = out .split ('//FORWARDED_DATA:' )
200- return glue , forwarded_data
222+ return shared .run_js_tool (path_from_root ('tools/compiler.mjs' ),
223+ args , input = settings_json , stdout = subprocess .PIPE , stderr = stderr_file )
201224
202225
203226def set_memory (static_bump ):
@@ -275,6 +298,59 @@ def trim_asm_const_body(body):
275298 return body
276299
277300
301+ def get_cached_file (filetype , filename , generator , cache_limit ):
302+ """This function implements a file cache which lives inside the main
303+ emscripten cache directory but uses a per-file lock rather than a
304+ cache-wide lock.
305+
306+ The cache is pruned (by removing the oldest files) if it grows above
307+ a certain number of files.
308+ """
309+ root = cache .get_path (filetype )
310+ utils .safe_ensure_dirs (root )
311+
312+ cache_file = os .path .join (root , filename )
313+
314+ with filelock .FileLock (cache_file + '.lock' ):
315+ if os .path .exists (cache_file ):
316+ # Cache hit, read the file
317+ file_content = utils .read_file (cache_file )
318+ else :
319+ # Cache miss, generate the symbol list and write the file
320+ file_content = generator ()
321+ utils .write_file (cache_file , file_content )
322+
323+ if len ([f for f in os .listdir (root ) if not f .endswith ('.lock' )]) > cache_limit :
324+ with filelock .FileLock (cache .get_path (f'{ filetype } .lock' )):
325+ files = []
326+ for f in os .listdir (root ):
327+ if not f .endswith ('.lock' ):
328+ f = os .path .join (root , f )
329+ files .append ((f , os .path .getmtime (f )))
330+ files .sort (key = lambda x : x [1 ])
331+ # Delete all but the newest N files
332+ for f , _ in files [:- cache_limit ]:
333+ with filelock .FileLock (f + '.lock' ):
334+ utils .delete_file (f )
335+
336+ return file_content
337+
338+
339+ @ToolchainProfiler .profile ()
340+ def compile_javascript_cached ():
341+ # Avoiding using the cache when generating struct info since
342+ # this step is performed while the cache is locked.
343+ if DEBUG or settings .BOOTSTRAPPING_STRUCT_INFO or config .FROZEN_CACHE :
344+ return compile_javascript ()
345+
346+ content_hash = generate_js_compiler_input_hash ()
347+
348+ # Limit of the overall size of the cache.
349+ # This code will get test coverage since a full test run of `other` or `core`
350+ # generates ~1000 unique outputs.
351+ return get_cached_file ('js_output' , f'{ content_hash } .js' , compile_javascript , cache_limit = 500 )
352+
353+
278354def emscript (in_wasm , out_wasm , outfile_js , js_syms , finalize = True , base_metadata = None ):
279355 # Overview:
280356 # * Run wasm-emscripten-finalize to extract metadata and modify the binary
@@ -358,7 +434,9 @@ def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True, base_metadat
358434 if metadata .invoke_funcs :
359435 settings .DEFAULT_LIBRARY_FUNCS_TO_INCLUDE += ['$getWasmTableEntry' ]
360436
361- glue , forwarded_data = compile_javascript ()
437+ out = compile_javascript_cached ()
438+ assert '//FORWARDED_DATA:' in out , 'Did not receive forwarded data in pre output - process failed?'
439+ glue , forwarded_data = out .split ('//FORWARDED_DATA:' , 1 )
362440
363441 forwarded_json = json .loads (forwarded_data )
364442
0 commit comments