@@ -841,7 +841,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
841841 parsec_gpu_task_t * gpu_task )
842842{
843843 parsec_task_t * this_task = gpu_task -> ec ;
844- parsec_gpu_data_copy_t * temp_loc [MAX_PARAM_COUNT ], * gpu_elem , * lru_gpu_elem ;
844+ parsec_gpu_data_copy_t * temp_loc [PARSEC_MAX_DEVICE_FLOWS ], * gpu_elem , * lru_gpu_elem ;
845845 parsec_data_t * master , * oldmaster ;
846846 const parsec_flow_t * flow ;
847847 int i , j , data_avail_epoch = 0 , copy_readers_update = 0 ;
@@ -1163,7 +1163,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
11631163 */
11641164int
11651165parsec_default_gpu_stage_in (parsec_gpu_task_t * gtask ,
1166- uint32_t flow_mask ,
1166+ parsec_flow_mask_t flow_mask ,
11671167 parsec_gpu_exec_stream_t * gpu_stream )
11681168{
11691169 int ret ;
@@ -1176,7 +1176,7 @@ parsec_default_gpu_stage_in(parsec_gpu_task_t *gtask,
11761176 parsec_device_transfer_direction_t dir ;
11771177
11781178 for (int i = 0 ; i < task -> task_class -> nb_flows ; i ++ ) {
1179- if ( !(flow_mask & ( 1U << i ) ) ) continue ;
1179+ if ( !PARSEC_CHECK_FLOW_MASK (flow_mask , i ) ) continue ;
11801180 source = gtask -> sources [i ];
11811181 dest = task -> data [i ].data_out ;
11821182 src_dev = (parsec_device_gpu_module_t * )parsec_mca_device_get (source -> device_index );
@@ -1213,7 +1213,7 @@ parsec_default_gpu_stage_in(parsec_gpu_task_t *gtask,
12131213 */
12141214int
12151215parsec_default_gpu_stage_out (parsec_gpu_task_t * gtask ,
1216- uint32_t flow_mask ,
1216+ parsec_flow_mask_t flow_mask ,
12171217 parsec_gpu_exec_stream_t * gpu_stream )
12181218{
12191219 int ret ;
@@ -1225,7 +1225,7 @@ parsec_default_gpu_stage_out(parsec_gpu_task_t *gtask,
12251225 parsec_device_transfer_direction_t dir ;
12261226 int i ;
12271227 for (i = 0 ; i < task -> task_class -> nb_flows ; i ++ ){
1228- if (flow_mask & ( 1U << i ) ){
1228+ if ( PARSEC_CHECK_FLOW_MASK ( flow_mask , i ) ){
12291229 source = task -> data [i ].data_out ;
12301230 dest = source -> original -> device_copies [0 ];
12311231 dst_dev = (parsec_device_gpu_module_t * )parsec_mca_device_get (dest -> device_index );
@@ -1497,7 +1497,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device,
14971497#endif
14981498 gpu_task -> sources [flow -> flow_index ] = candidate ; /* save the candidate for release on transfer completion */
14991499 /* Push data into the GPU from the source device */
1500- int rc = gpu_task -> stage_in ? gpu_task -> stage_in (gpu_task , ( 1U << flow -> flow_index ), gpu_stream ): PARSEC_SUCCESS ;
1500+ int rc = gpu_task -> stage_in ? gpu_task -> stage_in (gpu_task , PARSEC_FLOW_MASK ( flow -> flow_index ), gpu_stream ): PARSEC_SUCCESS ;
15011501 if (PARSEC_SUCCESS != rc ) {
15021502 parsec_warning ( "GPU[%d:%s]: gpu_task->stage_in to device rc=%d @%s:%d\n"
15031503 "\t<<%p on device %d:%s>> -> <<%p on device %d:%s>> [%zu, %s]" ,
@@ -2117,7 +2117,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device,
21172117 /* If the gpu copy is not owned by parsec, we don't manage it at all */
21182118 if ( 0 == (gpu_copy -> flags & PARSEC_DATA_FLAG_PARSEC_OWNED ) ) continue ;
21192119 original = gpu_copy -> original ;
2120- rc = gpu_task -> stage_out ? gpu_task -> stage_out (gpu_task , ( 1U << i ), gpu_stream ): PARSEC_SUCCESS ;
2120+ rc = gpu_task -> stage_out ? gpu_task -> stage_out (gpu_task , PARSEC_FLOW_MASK ( i ), gpu_stream ): PARSEC_SUCCESS ;
21212121 if (PARSEC_SUCCESS != rc ) {
21222122 parsec_warning ( "GPU[%d:%s]: gpu_task->stage_out from device rc=%d @%s:%d\n"
21232123 "\tdata %s <<%p>> -> <<%p>>\n" ,
@@ -2206,7 +2206,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device,
22062206 assert ( ((parsec_list_item_t * )gpu_copy )-> list_prev == (parsec_list_item_t * )gpu_copy );
22072207
22082208 assert ( PARSEC_DATA_COHERENCY_OWNED == gpu_copy -> coherency_state );
2209- if ( gpu_task -> pushout & ( 1 << i ) ) {
2209+ if ( PARSEC_CHECK_FLOW_MASK ( gpu_task -> pushout , i ) ) {
22102210 /* TODO: make sure no readers are working on the CPU version */
22112211 original = gpu_copy -> original ;
22122212 PARSEC_DEBUG_VERBOSE (10 , parsec_gpu_output_stream ,
@@ -2238,7 +2238,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device,
22382238 }
22392239#endif
22402240 /* Move the data back into main memory */
2241- rc = gpu_task -> stage_out ? gpu_task -> stage_out (gpu_task , ( 1U << flow -> flow_index ), gpu_stream ): PARSEC_SUCCESS ;
2241+ rc = gpu_task -> stage_out ? gpu_task -> stage_out (gpu_task , PARSEC_FLOW_MASK ( flow -> flow_index ), gpu_stream ): PARSEC_SUCCESS ;
22422242 if (PARSEC_SUCCESS != rc ) {
22432243 parsec_warning ( "GPU[%d:%s]: gpu_task->stage_out from device rc=%d @%s:%d\n"
22442244 "\tdata %s <<%p>> -> <<%p>>\n" ,
@@ -2342,7 +2342,7 @@ parsec_device_kernel_epilog( parsec_device_gpu_module_t *gpu_device,
23422342
23432343 assert ( 0 <= gpu_copy -> readers );
23442344
2345- if ( gpu_task -> pushout & ( 1 << i ) ) {
2345+ if ( PARSEC_CHECK_FLOW_MASK ( gpu_task -> pushout , i ) ) {
23462346 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
23472347 "GPU copy %p [ref_count %d] moved to the read LRU in %s" ,
23482348 gpu_copy , gpu_copy -> super .super .obj_reference_count , __func__ );
0 commit comments