@@ -300,7 +300,7 @@ void parsec_device_dump_exec_stream(parsec_gpu_exec_stream_t* exec_stream)
300300 int i ;
301301
302302 parsec_debug_verbose (0 , parsec_gpu_output_stream ,
303- "Dev: GPU stream %d {%p} [events = %d, start = %d, end = %d, executed = %d]" ,
303+ "Dev: GPU stream %s {%p} [events = %d, start = %d, end = %d, executed = %d]" ,
304304 exec_stream -> name , exec_stream , exec_stream -> max_events , exec_stream -> start , exec_stream -> end ,
305305 exec_stream -> executed );
306306 for ( i = 0 ; i < exec_stream -> max_events ; i ++ ) {
@@ -321,12 +321,12 @@ void parsec_device_dump_gpu_state(parsec_device_gpu_module_t* gpu_device)
321321 data_in_dev += gpu_device -> super .data_in_from_device [i ];
322322 }
323323
324- parsec_output (parsec_gpu_output_stream , "\n\n" );
325- parsec_output ( parsec_gpu_output_stream , "Device %d :%d (%p) epoch\n", gpu_device -> super . device_index ,
326- gpu_device -> super . device_index , gpu_device , gpu_device -> data_avail_epoch );
327- parsec_output ( parsec_gpu_output_stream , "\tpeer mask %x executed tasks with %llu streams %d \n" ,
328- gpu_device -> peer_access_mask , ( unsigned long long ) gpu_device -> super .executed_tasks , gpu_device -> num_exec_streams );
329- parsec_output ( parsec_gpu_output_stream , "\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu]\n" ,
324+ parsec_output (parsec_gpu_output_stream ,
325+ "\n\nDevice %s :%d (%p) epoch %zu \n"
326+ "\tpeer mask %x executed tasks %llu streams %d\n"
327+ "\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu] \n" ,
328+ gpu_device -> super . name , gpu_device -> super .device_index , gpu_device , gpu_device -> data_avail_epoch ,
329+ gpu_device -> peer_access_mask , ( unsigned long long ) gpu_device -> super . executed_tasks , gpu_device -> num_exec_streams ,
330330 (unsigned long long )data_in_host , (unsigned long long )data_in_dev ,
331331 (unsigned long long )gpu_device -> super .data_out_to_host ,
332332 (unsigned long long )gpu_device -> super .required_data_in , (unsigned long long )gpu_device -> super .required_data_out );
@@ -1030,7 +1030,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
10301030 for ( j = 0 ; j <= i ; j ++ ) {
10311031 /* This flow could be a control flow */
10321032 if ( NULL == temp_loc [j ] ) continue ;
1033- this_task -> data [j ].data_out = gpu_elem ; /* reset the data out */
1033+ this_task -> data [j ].data_out = NULL ; /* reset the data out */
10341034 /* This flow could be non-parsec-owned, in which case we can't reclaim it */
10351035 if ( 0 == (temp_loc [j ]-> flags & PARSEC_DATA_FLAG_PARSEC_OWNED ) ) continue ;
10361036 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
@@ -1044,6 +1044,9 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
10441044 PARSEC_DATA_COPY_RELEASE (gpu_elem );
10451045#endif
10461046 parsec_atomic_unlock (& master -> lock );
1047+ if ( data_avail_epoch ) { /* update the memory epoch */
1048+ gpu_device -> data_avail_epoch ++ ;
1049+ }
10471050 return PARSEC_HOOK_RETURN_AGAIN ;
10481051 }
10491052
@@ -1382,7 +1385,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device,
13821385 "GPU[%d:%s]: Prefetch task %p is staging in" ,
13831386 gpu_device -> super .device_index , gpu_device -> super .name , gpu_task );
13841387 }
1385- if ( NULL == gpu_elem ) {
1388+ if ( gpu_elem == candidate ) { /* data already located in the right place */
13861389 if ( candidate -> device_index == gpu_device -> super .device_index ) {
13871390 /* the candidate is already located on the GPU, no transfer should be necessary but let's do the bookkeeping */
13881391 if ( (PARSEC_FLOW_ACCESS_WRITE & type ) && (gpu_task -> task_type != PARSEC_GPU_TASK_TYPE_PREFETCH ) ) {
@@ -2105,7 +2108,7 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
21052108 gpu_task -> last_data_check_epoch = gpu_device -> data_avail_epoch ;
21062109 return ret ;
21072110 }
2108-
2111+ gpu_task -> last_status = 0 ; /* mark the task as clean */
21092112 for ( i = 0 ; i < this_task -> task_class -> nb_flows ; i ++ ) {
21102113
21112114 flow = gpu_task -> flow [i ];
@@ -2133,11 +2136,10 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
21332136 return ret ;
21342137 }
21352138 }
2136-
21372139 PARSEC_DEBUG_VERBOSE (10 , parsec_gpu_output_stream ,
21382140 "GPU[%d:%s]: Push task %s DONE" ,
21392141 gpu_device -> super .device_index , gpu_device -> super .name ,
2140- parsec_task_snprintf (tmp , MAX_TASK_STRLEN , this_task ) );
2142+ parsec_task_snprintf (tmp , MAX_TASK_STRLEN , this_task ));
21412143 gpu_task -> complete_stage = parsec_device_callback_complete_push ;
21422144#if defined(PARSEC_PROF_TRACE )
21432145 gpu_task -> prof_key_end = -1 ; /* We do not log that event as the completion of this task */
0 commit comments