forked from microsoft/pai
-
Notifications
You must be signed in to change notification settings - Fork 1
/
tensorflow.distributed-cifar10.json
29 lines (27 loc) · 1.56 KB
/
tensorflow.distributed-cifar10.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
{
"jobName": "tensorflow-distributed-cifar10",
"image": "openpai/pai.example.tensorflow",
"dataDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/data",
"outputDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/output",
"codeDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/code",
"taskRoles": [
{
"name": "ps_server",
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 8192,
"gpuNumber": 0,
"command": "pip --quiet install scipy absl-py && python code/benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --local_parameter_device=cpu --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$PAI_DATA_DIR --data_name=cifar10 --train_dir=$PAI_OUTPUT_DIR --ps_hosts=$PAI_TASK_ROLE_ps_server_HOST_LIST --worker_hosts=$PAI_TASK_ROLE_worker_HOST_LIST --job_name=ps --task_index=$PAI_CURRENT_TASK_ROLE_CURRENT_TASK_INDEX"
},
{
"name": "worker",
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 16384,
"gpuNumber": 4,
"command": "pip --quiet install scipy absl-py && python code/benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --local_parameter_device=cpu --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$PAI_DATA_DIR --data_name=cifar10 --train_dir=$PAI_OUTPUT_DIR --ps_hosts=$PAI_TASK_ROLE_ps_server_HOST_LIST --worker_hosts=$PAI_TASK_ROLE_worker_HOST_LIST --job_name=worker --task_index=$PAI_CURRENT_TASK_ROLE_CURRENT_TASK_INDEX",
"minSucceededTaskCount": 2
}
],
"retryCount": 0
}