-
Notifications
You must be signed in to change notification settings - Fork 184
/
run_2_workers.sh
executable file
·42 lines (36 loc) · 1.13 KB
/
run_2_workers.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
export DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="100s"
export DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP="600s"
export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MIN="1s"
export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MAX="60s"
export DASK_DISTRIBUTED__SCHEDULER__WORK_STEALING=True
export DASK_DISTRIBUTED__WORKER__MEMORY__Terminate="False"
export DEVICE_MEMORY_LIMIT="25GB"
export MAX_SYSTEM_MEMORY=$(free -m | awk '/^Mem:/{print $2}')M
ARG_INTERFACE=wlo1
ARG_HOSTNAME=ucx://10.0.0.23:8786
while getopts 'i:h:' o; do
case "${o}" in
h)
ARG_HOSTNAME=${OPTARG}
;;
i)
ARG_INTERFACE=${OPTARG}
;;
esac
done
# Dask-cuda-worker
export UCX_TLS=tcp,sockcm,cuda_copy,cuda_ipc
export UCX_SOCKADDR_TLS_PRIORITY=sockcm
export UCX_NET_DEVICES=$ARG_INTERFACE
export UCX_MEMTYPE_CACHE=n
UCXPY_NON_BLOCKING_MODE=True \
CUDA_VISIBLE_DEVICES=0 \
DASK_UCX__CUDA_COPY=True \
DASK_UCX__TCP=True \
DASK_UCX__NVLINK=False \
DASK_UCX__INFINIBAND=False \
DASK_UCX__RDMACM=False \
DASK_UCX__REUSE_ENDPOINTS=False \
dask-cuda-worker $ARG_HOSTNAME \
--interface $ARG_INTERFACE \
--enable-tcp-over-ucx --device-memory-limit "4GB" --nthreads=8