Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Optimize use of shared_ptr on kernel enqueue fast path #17569

Merged
merged 9 commits into from
Mar 26, 2025
11 changes: 6 additions & 5 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ProgramManager &ProgramManager::getInstance() {
}

static ur_program_handle_t
createBinaryProgram(const ContextImplPtr Context,
createBinaryProgram(const ContextImplPtr &Context,
const std::vector<device> &Devices,
const uint8_t **Binaries, size_t *Lengths,
const std::vector<ur_program_metadata_t> &Metadata) {
Expand Down Expand Up @@ -104,7 +104,7 @@ createBinaryProgram(const ContextImplPtr Context,
return Program;
}

static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context,
static ur_program_handle_t createSpirvProgram(const ContextImplPtr &Context,
const unsigned char *Data,
size_t DataLen) {
ur_program_handle_t Program = nullptr;
Expand Down Expand Up @@ -369,7 +369,8 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts,

appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage);

const auto PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform());
platform Platform = Devs[0].get_platform();
const auto &PlatformImpl = detail::getSyclObjImpl(Platform);

// Add optimization flags.
auto str = getUint32PropAsOptStr(Img, "optLevel");
Expand Down Expand Up @@ -945,7 +946,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
}

std::vector<ur_device_handle_t> URDevices;
for (auto Dev : Devs)
for (auto &Dev : Devs)
URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef());

ProgramPtr BuiltProgram =
Expand Down Expand Up @@ -1700,7 +1701,7 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) {
}

ProgramManager::ProgramPtr ProgramManager::build(
ProgramPtr Program, const ContextImplPtr Context,
ProgramPtr Program, const ContextImplPtr &Context,
const std::string &CompileOptions, const std::string &LinkOptions,
std::vector<ur_device_handle_t> &Devices, uint32_t DeviceLibReqMask,
const std::vector<ur_program_handle_t> &ExtraProgramsToLink,
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ class ProgramManager {

using ProgramPtr = std::unique_ptr<std::remove_pointer_t<ur_program_handle_t>,
decltype(&::urProgramRelease)>;
ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
ProgramPtr build(ProgramPtr Program, const ContextImplPtr &Context,
const std::string &CompileOptions,
const std::string &LinkOptions,
std::vector<ur_device_handle_t> &Devices,
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static event prepareSYCLEventAssociatedWithQueue(
static event createDiscardedEvent() {
EventImplPtr EventImpl =
std::make_shared<event_impl>(event_impl::HES_Discarded);
return createSyclObjFromImpl<event>(EventImpl);
return createSyclObjFromImpl<event>(std::move(EventImpl));
}

const std::vector<event> &
Expand Down Expand Up @@ -386,7 +386,7 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,

addEvent(Event);

auto EventImpl = detail::getSyclObjImpl(Event);
const auto &EventImpl = detail::getSyclObjImpl(Event);
for (auto &Stream : Streams) {
// We don't want stream flushing to be blocking operation that is why submit
// a host task to print stream buffer. It will fire up as soon as the kernel
Expand Down