|
93 | 93 | "model": "dlrm",
|
94 | 94 | "max-batchsize": 2048,
|
95 | 95 | },
|
96 |
| - |
97 |
| - |
| 96 | + "tf_dlrm-kaggle-tensorflow": { |
| 97 | + "dataset": "kaggle", |
| 98 | + "inputs": "continuous and categorical features", |
| 99 | + "outputs": "probability", |
| 100 | + "backend": "tensorflow", |
| 101 | + "model": "tf_dlrm", |
| 102 | + "max-batchsize": 128, |
| 103 | + }, |
| 104 | + "tf_dlrm-terabyte-tensorflow": { |
| 105 | + "dataset": "terabyte", |
| 106 | + "inputs": "continuous and categorical features", |
| 107 | + "outputs": "probability", |
| 108 | + "backend": "tensorflow", |
| 109 | + "model": "tf_dlrm", |
| 110 | + "max-batchsize": 2048, |
| 111 | + }, |
98 | 112 | }
|
99 | 113 |
|
100 | 114 | SCENARIO_MAP = {
|
@@ -253,6 +267,39 @@ def get_backend(backend, dataset, max_ind_range, data_sub_sample_rate, use_gpu):
|
253 | 267 | else:
|
254 | 268 | raise ValueError("only kaggle|terabyte dataset options are supported")
|
255 | 269 |
|
| 270 | + elif backend == "tensorflow": |
| 271 | + from backend_tf import BackendTF |
| 272 | + # NOTE: pass model parameters here, the following options are available |
| 273 | + if dataset == "kaggle": |
| 274 | + # 1. Criteo Kaggle Display Advertisement Challenge Dataset (see ./bench/dlrm_s_criteo_kaggle.sh) |
| 275 | + backend = BackendTF( |
| 276 | + dim_embed=16, |
| 277 | + vocab_sizes=np.array([1460,583,10131227,2202608,305,24,12517,633,3,93145,5683,8351593,3194,27,14992,5461306,10,5652,2173,4,7046547,18,15,286181,105,142572]), |
| 278 | + mlp_bottom=np.array([13,512,256,64,16]), |
| 279 | + mlp_top=np.array([367,512,256,1]), |
| 280 | + ) |
| 281 | + elif dataset == "terabyte": |
| 282 | + if max_ind_range == 10000000: |
| 283 | + # 2. Criteo Terabyte (see ./bench/dlrm_s_criteo_terabyte.sh [--sub-sample=0.875] --max-in-range=10000000) |
| 284 | + backend = BackendTF( |
| 285 | + dim_embed=64, |
| 286 | + vocab_sizes=np.array([9980333,36084,17217,7378,20134,3,7112,1442,61, 9758201,1333352,313829,10,2208,11156,122,4,970,14, 9994222, 7267859, 9946608,415421,12420,101, 36]), |
| 287 | + mlp_bottom=np.array([13,512,256,64]), |
| 288 | + mlp_top=np.array([415,512,512,256,1]), |
| 289 | + ) |
| 290 | + elif max_ind_range == 40000000: |
| 291 | + # 3. Criteo Terabyte MLPerf training (see ./bench/run_and_time.sh --max-in-range=40000000) |
| 292 | + backend = BackendTF( |
| 293 | + dim_embed=128, |
| 294 | + vocab_sizes=np.array([39884406,39043,17289,7420,20263,3,7120,1543,63,38532951,2953546,403346,10,2208,11938,155,4,976,14,39979771,25641295,39664984,585935,12972,108,36]), |
| 295 | + mlp_bottom=np.array([13,512,256,128]), |
| 296 | + mlp_top=np.array([479,1024,1024,512,256,1]), |
| 297 | + ) |
| 298 | + else: |
| 299 | + raise ValueError("only --max-in-range 10M or 40M is supported") |
| 300 | + else: |
| 301 | + raise ValueError("only kaggle|terabyte dataset options are supported") |
| 302 | + |
256 | 303 | else:
|
257 | 304 | raise ValueError("unknown backend: " + backend)
|
258 | 305 | return backend
|
|
0 commit comments