Skip to content

Commit ec97c4c

Browse files
committed
update: Implement chunksize calculation
Code by Théo Lebrun
1 parent 178226a commit ec97c4c

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

elixir/update.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from concurrent.futures import ProcessPoolExecutor, wait
2-
from multiprocessing import Manager
2+
from multiprocessing import Manager, cpu_count
33
import logging
44
from threading import Lock
55

@@ -327,12 +327,14 @@ def split_into_chunks(list, chunk_size):
327327
return [list[i:i+chunk_size] for i in range(0, len(list), chunk_size)]
328328

329329
# Update a single version
330-
def update_version(db, tag, pool, manager, chunk_size, dts_comp_support):
330+
def update_version(db, tag, pool, manager, dts_comp_support):
331331
state = build_partial_state(db, tag)
332332

333333
# Collect blobs to process and split list of blobs into chunks
334334
idxes = [(idx, hash, filename) for (idx, (hash, filename)) in state.idx_to_hash_and_filename.items()]
335-
chunks = split_into_chunks(idxes, chunk_size)
335+
chunksize = int(len(idxes) / cpu_count())
336+
chunksize = min(max(1, chunksize), 400)
337+
chunks = split_into_chunks(idxes, chunksize)
336338

337339
def after_all_defs_done():
338340
# NOTE: defs database cannot be written to from now on. This is very important - process pool is used,
@@ -425,7 +427,7 @@ def after_all_comps_done():
425427

426428
if not db.vers.exists(tag):
427429
print("updating tag", tag)
428-
update_version(db, tag, pool, manager, 1000, dts_comp_support)
430+
update_version(db, tag, pool, manager, dts_comp_support)
429431
db.close()
430432
db = None
431433

0 commit comments

Comments
 (0)