@@ -954,9 +954,9 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
954954
955955 /* Skip CTL flows only */
956956 if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & flow -> flow_flags )) {
957- gpu_task -> flow_nb_elts [i ] = 0 ; /* assume there is nothing to transfer to the GPU */
957+ gpu_task -> flow_nb_elts [i ] = 0 ; /* assume there is nothing to transfer to the GPU */
958958 continue ;
959- }
959+ }
960960
961961 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
962962 "GPU[%d:%s]:%s: Investigating flow %s:%d" ,
@@ -971,7 +971,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
971971 gpu_device -> super .device_index , gpu_device -> super .name , task_name ,
972972 flow -> name , i , gpu_elem ,
973973 this_task -> data [i ].data_in -> data_transfer_status == PARSEC_DATA_STATUS_UNDER_TRANSFER ? " [in transfer]" : "" );
974- this_task -> data [i ].data_out = this_task -> data [i ].data_in ;
974+ this_task -> data [i ].data_out = this_task -> data [i ].data_in ;
975975 continue ;
976976 }
977977 master = this_task -> data [i ].data_in -> original ;
@@ -2466,7 +2466,10 @@ parsec_device_kernel_epilog( parsec_device_gpu_module_t *gpu_device,
24662466 gpu_copy -> coherency_state = PARSEC_DATA_COHERENCY_SHARED ;
24672467 assert (PARSEC_DATA_STATUS_UNDER_TRANSFER == cpu_copy -> data_transfer_status );
24682468 cpu_copy -> data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER ;
2469-
2469+ if ( 0 == (parsec_mpi_allow_gpu_memory_communications & PARSEC_RUNTIME_SEND_FROM_GPU_MEMORY ) ) {
2470+ /* Report the CPU copy as the output of the task. */
2471+ this_task -> data [i ].data_out = cpu_copy ;
2472+ }
24702473 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
24712474 "GPU copy %p [ref_count %d] moved to the read LRU in %s" ,
24722475 gpu_copy , gpu_copy -> super .super .obj_reference_count , __func__ );
0 commit comments