forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprogram_manager.cpp
3758 lines (3333 loc) · 152 KB
/
program_manager.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//==------ program_manager.cpp --- SYCL program manager---------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <detail/compiler.hpp>
#include <detail/config.hpp>
#include <detail/context_impl.hpp>
#include <detail/device_image_impl.hpp>
#include <detail/device_impl.hpp>
#include <detail/event_impl.hpp>
#include <detail/global_handler.hpp>
#include <detail/persistent_device_code_cache.hpp>
#include <detail/platform_impl.hpp>
#include <detail/program_manager/program_manager.hpp>
#include <detail/queue_impl.hpp>
#include <detail/spec_constant_impl.hpp>
#include <detail/split_string.hpp>
#include <detail/ur_info_code.hpp>
#include <sycl/aspects.hpp>
#include <sycl/backend_types.hpp>
#include <sycl/context.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/kernel_properties.hpp>
#include <sycl/detail/os_util.hpp>
#include <sycl/detail/type_traits.hpp>
#include <sycl/detail/util.hpp>
#include <sycl/device.hpp>
#include <sycl/exception.hpp>
#include <sycl/ext/oneapi/matrix/query-types.hpp>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <memory>
#include <mutex>
#include <sstream>
#include <string>
#include <variant>
namespace sycl {
inline namespace _V1 {
namespace detail {
using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
static constexpr int DbgProgMgr = 0;
static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
/// This function enables ITT annotations in SPIR-V module by setting
/// a specialization constant if INTEL_LIBITTNOTIFY64 env variable is set.
static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog,
const AdapterPtr &Adapter) {
if (SYCLConfig<INTEL_ENABLE_OFFLOAD_ANNOTATIONS>::get() != nullptr) {
constexpr char SpecValue = 1;
ur_specialization_constant_info_t SpecConstInfo = {
ITTSpecConstId, sizeof(char), &SpecValue};
Adapter->call<UrApiKind::urProgramSetSpecializationConstants>(
Prog, 1, &SpecConstInfo);
}
}
ProgramManager &ProgramManager::getInstance() {
return GlobalHandler::instance().getProgramManager();
}
static ur_program_handle_t
createBinaryProgram(const ContextImplPtr &Context,
const std::vector<device> &Devices,
const uint8_t **Binaries, size_t *Lengths,
const std::vector<ur_program_metadata_t> &Metadata) {
const AdapterPtr &Adapter = Context->getAdapter();
ur_program_handle_t Program;
std::vector<ur_device_handle_t> DeviceHandles;
std::transform(
Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles),
[](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); });
ur_result_t BinaryStatus = UR_RESULT_SUCCESS;
ur_program_properties_t Properties = {};
Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES;
Properties.pNext = nullptr;
Properties.count = Metadata.size();
Properties.pMetadatas = Metadata.data();
assert(Devices.size() > 0 && "No devices provided for program creation");
Adapter->call<UrApiKind::urProgramCreateWithBinary>(
Context->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(),
Lengths, Binaries, &Properties, &Program);
if (BinaryStatus != UR_RESULT_SUCCESS) {
throw detail::set_ur_error(
exception(make_error_code(errc::runtime),
"Creating program with binary failed."),
BinaryStatus);
}
return Program;
}
static ur_program_handle_t createSpirvProgram(const ContextImplPtr &Context,
const unsigned char *Data,
size_t DataLen) {
ur_program_handle_t Program = nullptr;
const AdapterPtr &Adapter = Context->getAdapter();
Adapter->call<UrApiKind::urProgramCreateWithIL>(Context->getHandleRef(), Data,
DataLen, nullptr, &Program);
return Program;
}
// TODO replace this with a new UR API function
static bool isDeviceBinaryTypeSupported(const context &C,
ur::DeviceBinaryType Format) {
// All formats except SYCL_DEVICE_BINARY_TYPE_SPIRV are supported.
if (Format != SYCL_DEVICE_BINARY_TYPE_SPIRV)
return true;
const backend ContextBackend = detail::getSyclObjImpl(C)->getBackend();
// The CUDA backend cannot use SPIR-V
if (ContextBackend == backend::ext_oneapi_cuda)
return false;
std::vector<device> Devices = C.get_devices();
// Program type is SPIR-V, so we need a device compiler to do JIT.
for (const device &D : Devices) {
if (!D.get_info<info::device::is_compiler_available>())
return false;
}
// OpenCL 2.1 and greater require clCreateProgramWithIL
if (ContextBackend == backend::opencl) {
std::string ver = C.get_platform().get_info<info::platform::version>();
if (ver.find("OpenCL 1.0") == std::string::npos &&
ver.find("OpenCL 1.1") == std::string::npos &&
ver.find("OpenCL 1.2") == std::string::npos &&
ver.find("OpenCL 2.0") == std::string::npos)
return true;
}
for (const device &D : Devices) {
// We need cl_khr_il_program extension to be present
// and we can call clCreateProgramWithILKHR using the extension
std::vector<std::string> Extensions =
D.get_info<info::device::extensions>();
if (Extensions.end() ==
std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
return false;
}
return true;
}
// getFormatStr is used for debug-printing, so it may be unused.
[[maybe_unused]] static const char *getFormatStr(ur::DeviceBinaryType Format) {
switch (Format) {
case SYCL_DEVICE_BINARY_TYPE_NONE:
return "none";
case SYCL_DEVICE_BINARY_TYPE_NATIVE:
return "native";
case SYCL_DEVICE_BINARY_TYPE_SPIRV:
return "SPIR-V";
case SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE:
return "LLVM IR";
case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE:
return "compressed none";
}
assert(false && "Unknown device image format");
return "unknown";
}
[[maybe_unused]] auto VecToString = [](auto &Vec) -> std::string {
std::ostringstream Out;
Out << "{";
for (auto Elem : Vec)
Out << Elem << " ";
Out << "}";
return Out.str();
};
ur_program_handle_t
ProgramManager::createURProgram(const RTDeviceBinaryImage &Img,
const context &Context,
const std::vector<device> &Devices) {
if constexpr (DbgProgMgr > 0) {
std::vector<ur_device_handle_t> URDevices;
std::transform(
Devices.begin(), Devices.end(), std::back_inserter(URDevices),
[](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); });
std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
<< getSyclObjImpl(Context).get() << ", " << VecToString(URDevices)
<< ")\n";
}
const sycl_device_binary_struct &RawImg = Img.getRawData();
// perform minimal sanity checks on the device image and the descriptor
if (RawImg.BinaryEnd < RawImg.BinaryStart) {
throw exception(make_error_code(errc::runtime),
"Malformed device program image descriptor");
}
if (RawImg.BinaryEnd == RawImg.BinaryStart) {
throw exception(make_error_code(errc::runtime),
"Invalid device program image: size is zero");
}
size_t ImgSize = Img.getSize();
// TODO if the binary image is a part of the fat binary, the clang
// driver should have set proper format option to the
// clang-offload-wrapper. The fix depends on AOT compilation
// implementation, so will be implemented together with it.
// Img->Format can't be updated as it is inside of the in-memory
// OS module binary.
ur::DeviceBinaryType Format = Img.getFormat();
if (Format == SYCL_DEVICE_BINARY_TYPE_NONE)
Format = ur::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
// sycl::detail::pi::PiDeviceBinaryType Format = Img->Format;
// assert(Format != SYCL_DEVICE_BINARY_TYPE_NONE && "Image format not set");
if (!isDeviceBinaryTypeSupported(Context, Format))
throw sycl::exception(
sycl::errc::feature_not_supported,
"SPIR-V online compilation is not supported in this context");
// Get program metadata from properties
const auto &ProgMetadata = Img.getProgramMetadataUR();
// Load the image
const ContextImplPtr Ctx = getSyclObjImpl(Context);
std::vector<const uint8_t *> Binaries(
Devices.size(), const_cast<uint8_t *>(RawImg.BinaryStart));
std::vector<size_t> Lengths(Devices.size(), ImgSize);
ur_program_handle_t Res =
Format == SYCL_DEVICE_BINARY_TYPE_SPIRV
? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
: createBinaryProgram(Ctx, Devices, Binaries.data(), Lengths.data(),
ProgMetadata);
{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
// associate the UR program with the image it was created for
NativePrograms.insert({Res, {Ctx, &Img}});
}
Ctx->addDeviceGlobalInitializer(Res, Devices, &Img);
if constexpr (DbgProgMgr > 1)
std::cerr << "created program: " << Res
<< "; image format: " << getFormatStr(Format) << "\n";
return Res;
}
static void appendLinkOptionsFromImage(std::string &LinkOpts,
const RTDeviceBinaryImage &Img) {
static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
// Update only if link options are not overwritten by environment variable
if (!LinkOptsEnv) {
const char *TemporaryStr = Img.getLinkOptions();
if (TemporaryStr != nullptr) {
if (!LinkOpts.empty())
LinkOpts += " ";
LinkOpts += std::string(TemporaryStr);
}
}
}
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img,
const char *PropName) {
sycl_device_binary_property Prop = Img.getProperty(PropName);
return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
}
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img,
const char *PropName) {
sycl_device_binary_property Prop = Img.getProperty(PropName);
std::stringstream ss;
if (!Prop)
return "";
int optLevel = DeviceBinaryProperty(Prop).asUint32();
if (optLevel < 0 || optLevel > 3)
return "";
ss << "-O" << optLevel;
std::string temp = ss.str();
return temp;
}
static void
appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts,
const RTDeviceBinaryImage &Img,
bool IsEsimdImage) {
// TODO: sycl-register-alloc-mode is deprecated and should be removed in the
// next ABI break.
sycl_device_binary_property RegAllocModeProp =
Img.getProperty("sycl-register-alloc-mode");
sycl_device_binary_property GRFSizeProp = Img.getProperty("sycl-grf-size");
if (!RegAllocModeProp && !GRFSizeProp)
return;
// The mutual exclusivity of these properties should have been checked in
// sycl-post-link.
assert(!RegAllocModeProp || !GRFSizeProp);
bool Is256GRF = false;
bool IsAutoGRF = false;
if (RegAllocModeProp) {
uint32_t RegAllocModePropVal =
DeviceBinaryProperty(RegAllocModeProp).asUint32();
Is256GRF = RegAllocModePropVal ==
static_cast<uint32_t>(register_alloc_mode_enum::large);
IsAutoGRF = RegAllocModePropVal ==
static_cast<uint32_t>(register_alloc_mode_enum::automatic);
} else {
assert(GRFSizeProp);
uint32_t GRFSizePropVal = DeviceBinaryProperty(GRFSizeProp).asUint32();
Is256GRF = GRFSizePropVal == 256;
IsAutoGRF = GRFSizePropVal == 0;
}
if (Is256GRF) {
if (!CompileOpts.empty())
CompileOpts += " ";
// This option works for both LO AND OCL backends.
CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file";
}
if (IsAutoGRF) {
if (!CompileOpts.empty())
CompileOpts += " ";
// This option works for both LO AND OCL backends.
CompileOpts += "-ze-intel-enable-auto-large-GRF-mode";
}
}
static void appendCompileOptionsFromImage(std::string &CompileOpts,
const RTDeviceBinaryImage &Img,
const std::vector<device> &Devs,
const AdapterPtr &) {
// Build options are overridden if environment variables are present.
// Environment variables are not changed during program lifecycle so it
// is reasonable to use static here to read them only once.
static const char *CompileOptsEnv =
SYCLConfig<SYCL_PROGRAM_COMPILE_OPTIONS>::get();
// Update only if compile options are not overwritten by environment
// variable
if (!CompileOptsEnv) {
if (!CompileOpts.empty())
CompileOpts += " ";
const char *TemporaryStr = Img.getCompileOptions();
if (TemporaryStr != nullptr)
CompileOpts += std::string(TemporaryStr);
}
bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
// The -vc-codegen option is always preserved for ESIMD kernels, regardless
// of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
if (isEsimdImage) {
if (!CompileOpts.empty())
CompileOpts += " ";
CompileOpts += "-vc-codegen";
// Allow warning and performance hints from vc/finalizer if the RT warning
// level is at least 1.
if (detail::SYCLConfig<detail::SYCL_RT_WARNING_LEVEL>::get() == 0)
CompileOpts += " -disable-finalizer-msg";
}
appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage);
platform Platform = Devs[0].get_platform();
const auto &PlatformImpl = detail::getSyclObjImpl(Platform);
// Add optimization flags.
auto str = getUint32PropAsOptStr(Img, "optLevel");
const char *optLevelStr = str.c_str();
// TODO: Passing these options to vector compiler causes build failure in
// backend. Will pass the flags once backend compilation issue is resolved.
// Update only if compile options are not overwritten by environment
// variable.
if (!isEsimdImage && !CompileOptsEnv && optLevelStr != nullptr &&
optLevelStr[0] != '\0') {
// Making sure all devices have the same platform.
assert(!Devs.empty() &&
std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
return Dev.get_platform() == Devs[0].get_platform();
}));
const char *backend_option = nullptr;
// Empty string is returned in backend_option when no appropriate backend
// option is available for a given frontend option.
PlatformImpl->getBackendOption(optLevelStr, &backend_option);
if (backend_option && backend_option[0] != '\0') {
if (!CompileOpts.empty())
CompileOpts += " ";
CompileOpts += std::string(backend_option);
}
}
bool IsIntelGPU =
(PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
PlatformImpl->getBackend() == backend::opencl) &&
std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) {
return Dev.is_gpu() &&
Dev.get_info<info::device::vendor_id>() == 0x8086;
});
if (!CompileOptsEnv) {
static const char *TargetCompileFast = "-ftarget-compile-fast";
if (auto Pos = CompileOpts.find(TargetCompileFast);
Pos != std::string::npos) {
const char *BackendOption = nullptr;
if (IsIntelGPU)
PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
auto OptLen = strlen(TargetCompileFast);
if (IsIntelGPU && BackendOption && BackendOption[0] != '\0')
CompileOpts.replace(Pos, OptLen, BackendOption);
else
CompileOpts.erase(Pos, OptLen);
}
static const std::string TargetRegisterAllocMode =
"-ftarget-register-alloc-mode=";
auto OptPos = CompileOpts.find(TargetRegisterAllocMode);
while (OptPos != std::string::npos) {
auto EndOfOpt = CompileOpts.find(" ", OptPos);
// Extract everything after the equals until the end of the option
auto OptValue = CompileOpts.substr(
OptPos + TargetRegisterAllocMode.size(),
EndOfOpt - OptPos - TargetRegisterAllocMode.size());
auto ColonPos = OptValue.find(":");
auto Device = OptValue.substr(0, ColonPos);
std::string BackendStrToAdd;
bool IsPVC =
std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
return IsIntelGPU &&
(Dev.get_info<ext::intel::info::device::device_id>() &
0xFF00) == 0x0B00;
});
// Currently 'pvc' is the only supported device.
if (Device == "pvc" && IsPVC)
BackendStrToAdd = " " + OptValue.substr(ColonPos + 1) + " ";
// Extract everything before this option
std::string NewCompileOpts =
CompileOpts.substr(0, OptPos) + BackendStrToAdd;
// Extract everything after this option and add it to the above.
if (EndOfOpt != std::string::npos)
NewCompileOpts += CompileOpts.substr(EndOfOpt);
CompileOpts = NewCompileOpts;
OptPos = CompileOpts.find(TargetRegisterAllocMode);
}
constexpr std::string_view ReplaceOpts[] = {"-foffload-fp32-prec-div",
"-foffload-fp32-prec-sqrt"};
for (const std::string_view Opt : ReplaceOpts) {
if (auto Pos = CompileOpts.find(Opt); Pos != std::string::npos) {
const char *BackendOption = nullptr;
PlatformImpl->getBackendOption(std::string(Opt).c_str(),
&BackendOption);
CompileOpts.replace(Pos, Opt.length(), BackendOption);
}
}
}
}
static void
appendCompileEnvironmentVariablesThatAppend(std::string &CompileOpts) {
static const char *AppendCompileOptsEnv =
SYCLConfig<SYCL_PROGRAM_APPEND_COMPILE_OPTIONS>::get();
if (AppendCompileOptsEnv) {
if (!CompileOpts.empty())
CompileOpts += " ";
CompileOpts += AppendCompileOptsEnv;
}
}
static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts) {
static const char *AppendLinkOptsEnv =
SYCLConfig<SYCL_PROGRAM_APPEND_LINK_OPTIONS>::get();
if (AppendLinkOptsEnv) {
if (!LinkOpts.empty())
LinkOpts += " ";
LinkOpts += AppendLinkOptsEnv;
}
}
static void applyOptionsFromImage(std::string &CompileOpts,
std::string &LinkOpts,
const RTDeviceBinaryImage &Img,
const std::vector<device> &Devices,
const AdapterPtr &Adapter) {
appendCompileOptionsFromImage(CompileOpts, Img, Devices, Adapter);
appendLinkOptionsFromImage(LinkOpts, Img);
}
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
// Environment variables are not changed during program lifecycle so it
// is reasonable to use static here to read them only once.
static const char *CompileOptsEnv =
SYCLConfig<SYCL_PROGRAM_COMPILE_OPTIONS>::get();
if (CompileOptsEnv) {
CompileOpts = CompileOptsEnv;
}
}
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
// Environment variables are not changed during program lifecycle so it
// is reasonable to use static here to read them only once.
static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
if (LinkOptsEnv) {
LinkOpts = LinkOptsEnv;
}
}
static void applyOptionsFromEnvironment(std::string &CompileOpts,
std::string &LinkOpts) {
// Build options are overridden if environment variables are present.
applyCompileOptionsFromEnvironment(CompileOpts);
applyLinkOptionsFromEnvironment(LinkOpts);
}
std::pair<ur_program_handle_t, bool> ProgramManager::getOrCreateURProgram(
const RTDeviceBinaryImage &MainImg,
const std::vector<const RTDeviceBinaryImage *> &AllImages,
const context &Context, const std::vector<device> &Devices,
const std::string &CompileAndLinkOptions, SerializedObj SpecConsts) {
ur_program_handle_t NativePrg;
// Get binaries for each device (1:1 correpsondence with input Devices).
auto Binaries = PersistentDeviceCodeCache::getItemFromDisc(
Devices, AllImages, SpecConsts, CompileAndLinkOptions);
if (!Binaries.empty()) {
std::vector<const uint8_t *> BinPtrs;
std::vector<size_t> Lengths;
for (auto &Bin : Binaries) {
Lengths.push_back(Bin.size());
BinPtrs.push_back(reinterpret_cast<const uint8_t *>(Bin.data()));
}
// Get program metadata from properties
std::vector<ur_program_metadata_t> ProgMetadataVector;
for (const RTDeviceBinaryImage *Img : AllImages) {
auto &ImgProgMetadata = Img->getProgramMetadataUR();
ProgMetadataVector.insert(ProgMetadataVector.end(),
ImgProgMetadata.begin(), ImgProgMetadata.end());
}
NativePrg =
createBinaryProgram(getSyclObjImpl(Context), Devices, BinPtrs.data(),
Lengths.data(), ProgMetadataVector);
} else {
NativePrg = createURProgram(MainImg, Context, Devices);
}
return {NativePrg, Binaries.size()};
}
/// Emits information about built programs if the appropriate contitions are
/// met, namely when SYCL_RT_WARNING_LEVEL is greater than or equal to 2.
static void emitBuiltProgramInfo(const ur_program_handle_t &Prog,
const ContextImplPtr &Context) {
if (SYCLConfig<SYCL_RT_WARNING_LEVEL>::get() >= 2) {
std::string ProgramBuildLog =
ProgramManager::getProgramBuildLog(Prog, Context);
std::clog << ProgramBuildLog << std::endl;
}
}
static const char *getUrDeviceTarget(const char *URDeviceTarget) {
if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_UNKNOWN) == 0)
return UR_DEVICE_BINARY_TARGET_UNKNOWN;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV32) == 0)
return UR_DEVICE_BINARY_TARGET_SPIRV32;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0)
return UR_DEVICE_BINARY_TARGET_SPIRV64;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) ==
0)
return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0)
return UR_DEVICE_BINARY_TARGET_SPIRV64_GEN;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) ==
0)
return UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0)
return UR_DEVICE_BINARY_TARGET_NVPTX64;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0)
return UR_DEVICE_BINARY_TARGET_AMDGCN;
else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0)
return "native_cpu"; // todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU;
return UR_DEVICE_BINARY_TARGET_UNKNOWN;
}
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage,
const device &Dev) {
const std::shared_ptr<detail::device_impl> &DeviceImpl =
detail::getSyclObjImpl(Dev);
auto &Adapter = DeviceImpl->getAdapter();
const ur_device_handle_t &URDeviceHandle = DeviceImpl->getHandleRef();
// Call urDeviceSelectBinary with only one image to check if an image is
// compatible with implementation. The function returns invalid index if no
// device images are compatible.
uint32_t SuitableImageID = std::numeric_limits<uint32_t>::max();
sycl_device_binary DevBin =
const_cast<sycl_device_binary>(&BinImage->getRawData());
ur_device_binary_t UrBinary{};
UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec);
ur_result_t Error = Adapter->call_nocheck<UrApiKind::urDeviceSelectBinary>(
URDeviceHandle, &UrBinary,
/*num bin images = */ (uint32_t)1, &SuitableImageID);
if (Error != UR_RESULT_SUCCESS && Error != UR_RESULT_ERROR_INVALID_BINARY)
throw detail::set_ur_error(exception(make_error_code(errc::runtime),
"Invalid binary image or device"),
Error);
return (0 == SuitableImageID);
}
// Quick check to see whether BinImage is a compiler-generated device image.
static bool isSpecialDeviceImage(RTDeviceBinaryImage *BinImage) {
// SYCL devicelib image.
if (BinImage->getDeviceLibMetadata().isAvailable())
return true;
return false;
}
static bool isSpecialDeviceImageShouldBeUsed(RTDeviceBinaryImage *BinImage,
const device &Dev) {
// Decide whether a devicelib image should be used.
if (BinImage->getDeviceLibMetadata().isAvailable()) {
const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp =
BinImage->getDeviceLibMetadata();
uint32_t DeviceLibMeta =
DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32();
// Currently, only bfloat conversion devicelib are supported, so the prop
// DeviceLibMeta are only used to represent fallback or native version.
// For bfloat16 conversion devicelib, we have fallback and native version.
// The native should be used on platform which supports native bfloat16
// conversion capability and fallback version should be used on all other
// platforms. The native bfloat16 capability can be queried via extension.
// TODO: re-design the encode of the devicelib metadata if we must support
// more devicelib images in this way.
enum { DEVICELIB_FALLBACK = 0, DEVICELIB_NATIVE };
const std::shared_ptr<detail::device_impl> &DeviceImpl =
detail::getSyclObjImpl(Dev);
std::string NativeBF16ExtName = "cl_intel_bfloat16_conversions";
bool NativeBF16Supported = (DeviceImpl->has_extension(NativeBF16ExtName));
return NativeBF16Supported == (DeviceLibMeta == DEVICELIB_NATIVE);
}
return false;
}
static bool checkLinkingSupport(const device &Dev,
const RTDeviceBinaryImage &Img) {
const char *Target = Img.getRawData().DeviceTargetSpec;
// TODO replace with extension checks once implemented in UR.
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
return true;
}
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) {
return Dev.is_gpu() && Dev.get_backend() == backend::opencl;
}
return false;
}
std::set<RTDeviceBinaryImage *>
ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img,
const device &Dev) {
// TODO collecting dependencies for virtual functions and imported symbols
// should be combined since one can lead to new unresolved dependencies for
// the other.
std::set<RTDeviceBinaryImage *> DeviceImagesToLink =
collectDependentDeviceImagesForVirtualFunctions(Img, Dev);
std::set<RTDeviceBinaryImage *> ImageDeps =
collectDeviceImageDepsForImportedSymbols(Img, Dev);
DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end());
return DeviceImagesToLink;
}
std::set<RTDeviceBinaryImage *>
ProgramManager::collectDeviceImageDepsForImportedSymbols(
const RTDeviceBinaryImage &MainImg, const device &Dev) {
std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
std::set<std::string> HandledSymbols;
std::queue<std::string> WorkList;
for (const sycl_device_binary_property &ISProp :
MainImg.getImportedSymbols()) {
WorkList.push(ISProp->Name);
HandledSymbols.insert(ISProp->Name);
}
ur::DeviceBinaryType Format = MainImg.getFormat();
if (!WorkList.empty() && !checkLinkingSupport(Dev, MainImg))
throw exception(make_error_code(errc::feature_not_supported),
"Cannot resolve external symbols, linking is unsupported "
"for the backend");
while (!WorkList.empty()) {
std::string Symbol = WorkList.front();
WorkList.pop();
auto Range = m_ExportedSymbolImages.equal_range(Symbol);
bool Found = false;
for (auto It = Range.first; It != Range.second; ++It) {
RTDeviceBinaryImage *Img = It->second;
if (Img->getFormat() != Format ||
!doesDevSupportDeviceRequirements(Dev, *Img) ||
!compatibleWithDevice(Img, Dev))
continue;
if (isSpecialDeviceImage(Img) &&
!isSpecialDeviceImageShouldBeUsed(Img, Dev))
continue;
DeviceImagesToLink.insert(Img);
Found = true;
for (const sycl_device_binary_property &ISProp :
Img->getImportedSymbols()) {
if (HandledSymbols.insert(ISProp->Name).second)
WorkList.push(ISProp->Name);
}
break;
}
if (!Found)
throw sycl::exception(make_error_code(errc::build),
"No device image found for external symbol " +
Symbol);
}
DeviceImagesToLink.erase(const_cast<RTDeviceBinaryImage *>(&MainImg));
return DeviceImagesToLink;
}
std::set<RTDeviceBinaryImage *>
ProgramManager::collectDependentDeviceImagesForVirtualFunctions(
const RTDeviceBinaryImage &Img, const device &Dev) {
// If virtual functions are used in a program, then we need to link several
// device images together to make sure that vtable pointers stored in
// objects are valid between different kernels (which could be in different
// device images).
std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
// KernelA may use some set-a, which is also used by KernelB that in turn
// uses set-b, meaning that this search should be recursive. The set below
// is used to stop that recursion, i.e. to avoid looking at sets we have
// already seen.
std::set<std::string> HandledSets;
std::queue<std::string> WorkList;
for (const sycl_device_binary_property &VFProp : Img.getVirtualFunctions()) {
std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
// Device image passed to this function is expected to contain SYCL kernels
// and therefore it may only use virtual function sets, but cannot provide
// them. We expect to see just a single property here
assert(std::string(VFProp->Name) == "uses-virtual-functions-set" &&
"Unexpected virtual function property");
for (const auto &SetName : detail::split_string(StrValue, ',')) {
WorkList.push(SetName);
HandledSets.insert(SetName);
}
}
while (!WorkList.empty()) {
std::string SetName = WorkList.front();
WorkList.pop();
// There could be more than one device image that uses the same set
// of virtual functions, or provides virtual funtions from the same
// set.
for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage[SetName]) {
// Here we can encounter both uses-virtual-functions-set and
// virtual-functions-set properties, but their handling is the same: we
// just grab all sets they reference and add them for consideration if
// we haven't done so already.
for (const sycl_device_binary_property &VFProp :
BinImage->getVirtualFunctions()) {
std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
for (const auto &SetName : detail::split_string(StrValue, ',')) {
if (HandledSets.insert(SetName).second)
WorkList.push(SetName);
}
}
// TODO: Complete this part about handling of incompatible device images.
// If device image uses the same virtual function set, then we only
// link it if it is compatible.
// However, if device image provides virtual function set and it is
// incompatible, then we should link its "dummy" version to avoid link
// errors about unresolved external symbols.
if (doesDevSupportDeviceRequirements(Dev, *BinImage))
DeviceImagesToLink.insert(BinImage);
}
}
// We may have inserted the original image into the list as well, because it
// is also a part of m_VFSet2BinImage map. No need to to return it to avoid
// passing it twice to link call later.
DeviceImagesToLink.erase(const_cast<RTDeviceBinaryImage *>(&Img));
return DeviceImagesToLink;
}
static void
setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
ur_program_handle_t Prog,
const AdapterPtr &Adapter) {
std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
&SpecConstData = InputImpl->get_spec_const_data_ref();
const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
// Set all specialization IDs from descriptors in the input device image.
for (const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
std::ignore = SpecConstNames;
for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) {
if (SpecIDDesc.IsSet) {
ur_specialization_constant_info_t SpecConstInfo = {
SpecIDDesc.ID, SpecIDDesc.Size,
SpecConsts.data() + SpecIDDesc.BlobOffset};
Adapter->call<UrApiKind::urProgramSetSpecializationConstants>(
Prog, 1, &SpecConstInfo);
}
}
}
}
static inline void
CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) {
#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
if (auto CompImg = dynamic_cast<CompressedRTDeviceBinaryImage *>(Img))
if (CompImg->IsCompressed())
CompImg->Decompress();
#endif
}
// When caching is enabled, the returned UrProgram will already have
// its ref count incremented.
ur_program_handle_t ProgramManager::getBuiltURProgram(
const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl,
const std::string &KernelName, const NDRDescT &NDRDesc) {
// Check if we can optimize program builds for sub-devices by using a program
// built for the root device
DeviceImplPtr RootDevImpl = DeviceImpl;
while (!RootDevImpl->isRootDevice()) {
auto ParentDev = detail::getSyclObjImpl(
RootDevImpl->get_info<info::device::parent_device>());
// Sharing is allowed within a single context only
if (!ContextImpl->hasDevice(ParentDev))
break;
RootDevImpl = std::move(ParentDev);
}
ur_bool_t MustBuildOnSubdevice = true;
ContextImpl->getAdapter()->call<UrApiKind::urDeviceGetInfo>(
RootDevImpl->getHandleRef(), UR_DEVICE_INFO_BUILD_ON_SUBDEVICE,
sizeof(ur_bool_t), &MustBuildOnSubdevice, nullptr);
auto Context = createSyclObjFromImpl<context>(ContextImpl);
auto Device = createSyclObjFromImpl<device>(
MustBuildOnSubdevice == true ? DeviceImpl : RootDevImpl);
const RTDeviceBinaryImage &Img = getDeviceImage(KernelName, Context, Device);
// Check that device supports all aspects used by the kernel
if (auto exception = checkDevSupportDeviceRequirements(Device, Img, NDRDesc))
throw *exception;
std::set<RTDeviceBinaryImage *> DeviceImagesToLink =
collectDeviceImageDeps(Img, {Device});
// Decompress all DeviceImagesToLink
for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink)
CheckAndDecompressImage(BinImg);
std::vector<const RTDeviceBinaryImage *> AllImages;
AllImages.reserve(DeviceImagesToLink.size() + 1);
AllImages.push_back(&Img);
std::copy(DeviceImagesToLink.begin(), DeviceImagesToLink.end(),
std::back_inserter(AllImages));
return getBuiltURProgram(std::move(AllImages), Context, {std::move(Device)});
}
ur_program_handle_t ProgramManager::getBuiltURProgram(
const BinImgWithDeps &ImgWithDeps, const context &Context,
const std::vector<device> &Devs, const DevImgPlainWithDeps *DevImgWithDeps,
const SerializedObj &SpecConsts) {
std::string CompileOpts;
std::string LinkOpts;
applyOptionsFromEnvironment(CompileOpts, LinkOpts);
auto BuildF = [this, &ImgWithDeps, &DevImgWithDeps, &Context, &Devs,
&CompileOpts, &LinkOpts, &SpecConsts] {
const ContextImplPtr &ContextImpl = getSyclObjImpl(Context);
const AdapterPtr &Adapter = ContextImpl->getAdapter();
const RTDeviceBinaryImage &MainImg = *ImgWithDeps.getMain();
applyOptionsFromImage(CompileOpts, LinkOpts, MainImg, Devs, Adapter);
// Should always come last!
appendCompileEnvironmentVariablesThatAppend(CompileOpts);
appendLinkEnvironmentVariablesThatAppend(LinkOpts);
auto [NativePrg, DeviceCodeWasInCache] =
getOrCreateURProgram(MainImg, ImgWithDeps.getAll(), Context, Devs,
CompileOpts + LinkOpts, SpecConsts);
if (!DeviceCodeWasInCache && MainImg.supportsSpecConstants()) {
enableITTAnnotationsIfNeeded(NativePrg, Adapter);
if (DevImgWithDeps)
setSpecializationConstants(getSyclObjImpl(DevImgWithDeps->getMain()),
NativePrg, Adapter);
}
UrFuncInfo<UrApiKind::urProgramRelease> programReleaseInfo;
auto programRelease =
programReleaseInfo.getFuncPtrFromModule(ur::getURLoaderLibrary());
ProgramPtr ProgramManaged(NativePrg, programRelease);
// Link a fallback implementation of device libraries if they are not
// supported by a device compiler.
// Pre-compiled programs (after AOT compilation or read from persitent
// cache) are supposed to be already linked.
// If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
// no fallback device library will be linked.
uint32_t DeviceLibReqMask = 0;
bool UseDeviceLibs = !DeviceCodeWasInCache &&
MainImg.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV &&
!SYCLConfig<SYCL_DEVICELIB_NO_FALLBACK>::get();
if (UseDeviceLibs)
DeviceLibReqMask = getDeviceLibReqMask(MainImg);
std::vector<ur_program_handle_t> ProgramsToLink;
// If we had a program in cache, then it should have been the fully linked
// program already.
if (!DeviceCodeWasInCache) {
assert(!DevImgWithDeps ||
DevImgWithDeps->getAll().size() == ImgWithDeps.getAll().size());
// Oth image is the main one and has been handled, skip it.
for (std::size_t I = 1; I < ImgWithDeps.getAll().size(); ++I) {
const RTDeviceBinaryImage *BinImg = ImgWithDeps.getAll()[I];
if (UseDeviceLibs)
DeviceLibReqMask |= getDeviceLibReqMask(*BinImg);
ur_program_handle_t NativePrg = createURProgram(*BinImg, Context, Devs);
if (BinImg->supportsSpecConstants()) {
enableITTAnnotationsIfNeeded(NativePrg, Adapter);
if (DevImgWithDeps)
setSpecializationConstants(
getSyclObjImpl(DevImgWithDeps->getAll()[I]), NativePrg,
Adapter);
}
ProgramsToLink.push_back(NativePrg);
}
}
std::vector<ur_device_handle_t> URDevices;
for (auto &Dev : Devs)
URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef());
ProgramPtr BuiltProgram =
build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
URDevices, DeviceLibReqMask, ProgramsToLink,
/*CreatedFromBinary*/ MainImg.getFormat() !=
SYCL_DEVICE_BINARY_TYPE_SPIRV);
// Those extra programs won't be used anymore, just the final linked result
for (ur_program_handle_t Prg : ProgramsToLink)
Adapter->call<UrApiKind::urProgramRelease>(Prg);
emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
// NativePrograms map does not intend to keep reference to program handle,
// so keys in the map can be invalid (reference count went to zero and the
// underlying program disposed of). Protecting from incorrect values by
// removal of map entries with same handle (obviously invalid entries).
std::ignore = NativePrograms.erase(BuiltProgram.get());
for (const RTDeviceBinaryImage *Img : ImgWithDeps) {
NativePrograms.insert({BuiltProgram.get(), {ContextImpl, Img}});
}
}
ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &MainImg);
// Save program to persistent cache if it is not there
if (!DeviceCodeWasInCache) {
PersistentDeviceCodeCache::putItemToDisc(
Devs, ImgWithDeps.getAll(), SpecConsts, CompileOpts + LinkOpts,
BuiltProgram.get());
}
return BuiltProgram.release();
};
if (!SYCLConfig<SYCL_CACHE_IN_MEM>::get())
return BuildF();
uint32_t ImgId = ImgWithDeps.getMain()->getImageID();
std::set<ur_device_handle_t> URDevicesSet;
std::transform(Devs.begin(), Devs.end(),
std::inserter(URDevicesSet, URDevicesSet.begin()),
[](const device &Dev) {
return getSyclObjImpl(Dev).get()->getHandleRef();
});
auto CacheKey =
std::make_pair(std::make_pair(SpecConsts, ImgId), URDevicesSet);
const ContextImplPtr &ContextImpl = getSyclObjImpl(Context);