Skip to content

Commit 04724ee

Browse files
JinZhou5042btovar
andauthored
vine: reserve a factor of disk when allocating resources (#4035)
* init * tune param * lint * lint * lint * condition cahnge: value > 1 || value <= 0 * reserve disk not only for proportional * lint * only modify disk factor when in range (0,1) --------- Co-authored-by: Benjamin Tovar <[email protected]>
1 parent 0509ea8 commit 04724ee

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

taskvine/src/manager/vine_manager.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -2771,6 +2771,11 @@ struct rmsummary *vine_manager_choose_resources_for_task(struct vine_manager *q,
27712771
limits->disk = available_disk;
27722772
}
27732773

2774+
/* For disk, scale the estimated disk allocation by a [0, 1] factor (by default 0.75) to intentionally
2775+
* reserve some space for data movement between the sandbox and cache, and allow extra room for potential cache growth.
2776+
* This applies to tasks except function calls. */
2777+
limits->disk *= q->disk_proportion_available_to_task;
2778+
27742779
/* never go below specified min resources. */
27752780
rmsummary_merge_max(limits, min);
27762781

@@ -4006,6 +4011,7 @@ struct vine_manager *vine_ssl_create(int port, const char *key, const char *cert
40064011
q->max_task_resources_requested = rmsummary_create(-1);
40074012

40084013
q->sandbox_grow_factor = 2.0;
4014+
q->disk_proportion_available_to_task = 0.75;
40094015

40104016
q->stats = calloc(1, sizeof(struct vine_stats));
40114017
q->stats_measure = calloc(1, sizeof(struct vine_stats));
@@ -5772,7 +5778,10 @@ int vine_tune(struct vine_manager *q, const char *name, double value)
57725778

57735779
} else if (!strcmp(name, "max-library-retries")) {
57745780
q->max_library_retries = MIN(1, value);
5775-
5781+
} else if (!strcmp(name, "disk-proportion-available-to-task")) {
5782+
if (value < 1 && value > 0) {
5783+
q->disk_proportion_available_to_task = value;
5784+
}
57765785
} else {
57775786
debug(D_NOTICE | D_VINE, "Warning: tuning parameter \"%s\" not recognized\n", name);
57785787
return -1;

taskvine/src/manager/vine_manager.h

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ struct vine_manager {
229229
int watch_library_logfiles; /* If true, watch the output files produced by each of the library processes running on the remote workers, take them back the current logging directory */
230230

231231
double sandbox_grow_factor; /* When task disk sandboxes are exhausted, increase the allocation using their measured valued times this factor */
232+
double disk_proportion_available_to_task; /* intentionally reduces disk allocation for tasks to reserve some space for cache growth. */
232233

233234
/*todo: confirm datatype. int or int64*/
234235
int max_task_stdout_storage; /* Maximum size of standard output from task. (If larger, send to a separate file.) */

taskvine/test/vine_allocations.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -65,27 +65,32 @@ def check_task(category, category_mode, max, min, expected):
6565
with worker:
6666
q.tune("force-proportional-resources", 0)
6767

68+
# note that the disk is divided by a factor to reserve space for cache growth unless the users specify the disk
69+
disk_proportion_available_to_task = 0.75
70+
q.tune("disk-proportion-available-to-task", disk_proportion_available_to_task) # the default factor is 0.75
71+
6872
r = {"cores": 1, "memory": 2, "disk": 3, "gpus": 4}
6973
check_task("all_specified", "fixed", max=r, min={}, expected=r)
7074

7175
check_task("all_specified_no_gpu", "fixed", max={"cores": 1, "memory": 2, "disk": 3}, min={}, expected={"cores": 1, "memory": 2, "disk": 3, "gpus": 0})
7276

7377
check_task("all_specified_no_cores", "fixed", max={"gpus": 4, "memory": 2, "disk": 3}, min={}, expected={"cores": 0, "memory": 2, "disk": 3, "gpus": 4})
7478

75-
check_task("all_zero", "fixed", max={"cores": 0, "memory": 0, "disk": 0, "gpus": 0}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk, "gpus": 0})
79+
check_task("all_zero", "fixed", max={"cores": 0, "memory": 0, "disk": 0, "gpus": 0}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk * disk_proportion_available_to_task, "gpus": 0})
7680

7781
q.tune("force-proportional-resources", 1)
78-
check_task("only_memory", "fixed", max={"memory": worker_memory / 2}, min={}, expected={"cores": worker_cores / 2, "memory": worker_memory / 2, "disk": worker_disk / 2, "gpus": 0})
7982

80-
check_task("only_memory_w_minimum", "fixed", max={"memory": worker_memory / 2}, min={"cores": 3, "gpus": 2}, expected={"cores": 4, "memory": worker_memory, "disk": worker_disk, "gpus": 2})
83+
check_task("only_memory", "fixed", max={"memory": worker_memory / 2}, min={}, expected={"cores": worker_cores / 2, "memory": worker_memory / 2, "disk": worker_disk / 2 * disk_proportion_available_to_task, "gpus": 0})
84+
85+
check_task("only_memory_w_minimum", "fixed", max={"memory": worker_memory / 2}, min={"cores": 3, "gpus": 2}, expected={"cores": 4, "memory": worker_memory, "disk": worker_disk * disk_proportion_available_to_task, "gpus": 2})
8186

82-
check_task("only_cores", "fixed", max={"cores": worker_cores}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk, "gpus": 0})
87+
check_task("only_cores", "fixed", max={"cores": worker_cores}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk * disk_proportion_available_to_task, "gpus": 0})
8388

84-
check_task("auto_whole_worker", "min_waste", max={}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk, "gpus": 0})
89+
check_task("auto_whole_worker", "min_waste", max={}, min={}, expected={"cores": worker_cores, "memory": worker_memory, "disk": worker_disk * disk_proportion_available_to_task, "gpus": 0})
8590

8691
p = 1 / worker_cores
8792
r = {"cores": 1}
88-
e = {"cores": 1, "memory": math.floor(worker_memory * p), "disk": math.floor(worker_disk * p), "gpus": 0}
93+
e = {"cores": 1, "memory": math.floor(worker_memory * p), "disk": math.floor(worker_disk * p) * disk_proportion_available_to_task, "gpus": 0}
8994
check_task("only_cores_proportional", "fixed", max=r, min={}, expected=e)
9095

9196
p = 2 / worker_cores

0 commit comments

Comments
 (0)