diff --git a/rocclr/compiler/lib/utils/OPTIONS.def b/rocclr/compiler/lib/utils/OPTIONS.def index b8b877386..a8a906365 100644 --- a/rocclr/compiler/lib/utils/OPTIONS.def +++ b/rocclr/compiler/lib/utils/OPTIONS.def @@ -407,6 +407,14 @@ NOPTION(OT_BOOL, \ true, 0, 0, NULL, \ "To enable the generation of 32-bit gpu isa code (default)") +// -fper-pointer-uav, -fno-per-pointer-uav +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ + "per-pointer-uav", NULL, \ + PerPointerUAV, \ + false, 0, 0, NULL, \ + "Specify that UAVs per pointer should be used(HD5XXX and HD6XXX series GPU\'s only).") + // -fbin-source -fno-bin-source OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ @@ -415,6 +423,14 @@ OPTION(OT_BOOL, \ false, 0, 0, NULL, \ "Allow OpenCL binary to [not] have SOURCE") +// -fonly-bin-source +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ + "only-bin-source", NULL, \ + OnlyBinSOURCE, \ + false, 0, 0, NULL, \ + "Allow OpenCL binary to [not] have SOURCE") + // -fbin-llvmir -fno-bin-llvmir OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ @@ -423,6 +439,14 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Allow OpenCL binary to [not] have LLVMIR") +// -fbin-spir -fno-bin-spir +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ + "bin-spir", NULL, \ + BinSPIR, \ + true, 0, 0, NULL, \ + "Allow OpenCL binary to [not] have SPIR") + // -fbin-cg, -fno-bin-cg OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ @@ -431,6 +455,22 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Allow OpenCL binary to [not] have output from code generator") +// -fbin-amdil -fno-bin-amdil +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ + "bin-amdil", NULL, \ + BinAMDIL, \ + false, 0, 0, NULL, \ + "Allow OpenCL binary to [not] have AMDIL") + +// -fbin-hsail -fno-bin-hsail +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ + "bin-hsail", NULL, \ + BinHSAIL, \ + false, 0, 0, NULL, \ + "Allow OpenCL binary to [not] have HSAIL") + // -fbin-exe -fno-bin-exe OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F|OVIS_SUPPORT, \ @@ -439,6 +479,14 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Allow OpenCL binary to [not] have Executable") +// -fbin-bif30 +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ + "bin-bif30", NULL, \ + BinBIF30, \ + false, 0, 0, NULL, \ + "Allow OpenCL binary to be BIF3.0 format") + // -fbin-as -fno-bin-as OPTION(OT_BOOL, \ OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ @@ -447,6 +495,14 @@ OPTION(OT_BOOL, \ false, 0, 0, NULL, \ "Allow OpenCL binary to [not] have X86 assembly text") +// -fbin-disasm -fno-bin-disasm +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "bin-disasm", NULL, \ + BinDISASM, \ + false, 0, 0, NULL, \ + "Allow OpenCL binary to [not] have X86 assembly text") + // -fbin-encrypt -fno-bin-encrypt OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ @@ -455,6 +511,22 @@ OPTION(OT_BOOL, \ false, 0, 0, NULL, \ "Generate an encrypted OpenCL binary (not by default)") +// -fbin-gpu64 -fno-bin-gpu64 (default) +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ + "bin-gpu64", NULL, \ + EnableGpuElf64, \ + false, 0, 0, NULL, \ + "Generate 64-bit ELF binary for GPU (default: 32-bit)") + +// -fdebug-linker -fno-debug-linker +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ + "debug-linker", NULL, \ + EnableDebugLinker, \ + false, 0, 0, NULL, \ + "Enable debug output for linker") + // -fc99-inline -fno-c99-inline (default) OPTION(OT_BOOL, \ OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ @@ -503,6 +575,63 @@ OPTION(OT_UCHAR, \ '3', 0, 's', NULL, \ "Set optimization level to (0|1|2|3|4|5(-Os equivalent)|g)") +// -srt/--sr-threshold= : Scalar Replacement threshold +OPTION(OT_UINT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "srt", "sr-threshold", \ + SRThreshold, \ + 128, 0, 0x3FFFFFFF, NULL, \ + "Set scalar replacement threshold to ") + +// -apt/--ap-threshold= : Argument Promotion threshold +OPTION(OT_UINT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "apt", "ap-threshold", \ + APThreshold, \ + 1024, 0, 0x3FFFFFFF, NULL, \ + "Set argument promotion threshold to ") + +// -unroll-count= : loop unroll count +// default is 0, meaning auto-unrolling +OPTION(OT_UINT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "unroll-count", NULL, \ + LUCount, \ + 0, 0, 0x3FFFFFFF, NULL, \ + "Set loop unroll count to for all loops") + +// -unroll-threshold= : loop unroll threshold +// default is 150. +OPTION(OT_UINT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "unroll-threshold", NULL, \ + LUThreshold, \ + 150, 0, 0x3FFFFFFF, NULL, \ + "Set loop unroll threshold to for all loops") + +// -unroll-allow-partial : allow partial unroll +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "unroll-allow-partial", NULL, \ + LUAllowPartial, \ + true, 0, 0, NULL, \ + "Allow unrolling loops partially") + +// -licm=0|1 (deafault is 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "licm", OptLICM, 1, "Enable/disable LLVM Optimization LICM") + +// -mem2reg=0|1 (default is 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "mem2reg", OptMem2reg, 1, "Enable/disable mem2reg (for -O0 only)") + +// -aa=0|1 (default 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "aa", OptAA, 1, "Enable/disable Module AA pass") + +// -ebb=0|1 (default 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "ebb", CGEBB, 1, "Enable/disable Codegen EBB") + +// -bfo=0|1 (default 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "bfo", CGBFO, 1, "Enable/disable Codegen BitField Optim") + // -mimage-support -mno-image-support (default yes) OPTION(OT_BOOL, \ OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_M, \ @@ -511,12 +640,66 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Define __IMAGE_SUPPORT__ indicating device support for images") +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_M, \ + "fast-fma", NULL, \ + FastFMA, \ + false, 0, 0, NULL, \ + "Define FP_FAST_FMA indicating fma function is faster than multiply and add for double") + +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_M, \ + "fast-fmaf", NULL, \ + FastFMAF, \ + false, 0, 0, NULL, \ + "Define FP_FAST_FMA indicating fma function is faster than multiply and add for float") + +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_M, \ + "small-global-objects", NULL, \ + SmallGlobalObjects, \ + false, 0, 0, NULL, \ + "Assume no global allocation is > 4G and does not span the 4G boundary") + +// -memcombine-max-vec-gen= (default 16) +FLAG(OT_UINT32, OVIS_SUPPORT, "memcombine-max-vec-gen", OptMemCombineMaxVecGen, 16, \ + "Maximum width (#bytes) of vector loads/stores generated by memory\n" + HINDENT "access combining. Set it to 1 to disable memory access combining.") + +// -SRAE-threshold= (default 1024) +FLAG(OT_UINT32, OVIS_SUPPORT, "SRAE-threshold", OptSRAEThreshold, 1024, \ + "Maximum size of the local array element aggregrates that\n" + HINDENT " will be scalar replaced.") + +// -liveness=0|1 (default 0) +FLAG(OT_BOOL, OVIS_SUPPORT, "liveness", OptLiveness, 0, "Enable/disable Liveness Analysis") + +// -prt-opt-liveness=0|1 (default 0) +FLAG(OT_BOOL, OVIS_SUPPORT, "prt-opt-liveness", OptPrintLiveness, 0, \ + "Print liveness information at the end of Opt.") + // -wgs= (default NULL : use default work group size 256,1,1) FLAG(OT_CSTRING, OVIS_SUPPORT, "wgs", WorkGrpSize, 0, "Work group size (ie 256,1,1).") // -buildlog=<0|stdout|stderr|> : output build log into the given output. FLAG(OT_CSTRING, OVIS_SUPPORT, "buildlog", BuildLog, 0, "Redirect log of clBuildProgram.") +// -fdiv2fmul=0|1 (default 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "fdiv2fmul", EnableFDiv2FMul, 1, \ + "Enable/disable float f/c ==> f * (1.0f/c) for GPU (default : on)") + +// -stack-alignment= +FLAG(OT_UINT32, OVIS_SUPPORT, "stack-alignment", CPUStackAlignment, 64, \ + "Override CPU stack alignment (64 bytes by default).") + +// -slc=0|1 or --simplifylibcall (default 1) +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "slc", "simplifylibcall", \ + OptSimplifyLibCall, \ + 1, 0, 0, NULL, \ + "Enable/disable optimization to simplify lib calls (on by default)") + // -finline -fno-inline OPTION(OT_BOOL, \ OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ @@ -525,6 +708,220 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Disabling (-fno-inline) GPU inlining for testing") +// -fsc-keep-calls -fno-sc-keep-calls +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-keep-calls", NULL, \ + SCKeepCalls, \ + false, 0, 0, NULL, \ + "SC does not inline function calls in AMDIL") + +// -fsc-selective-inline -fno-sc-selective-inline +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-selective-inline", NULL, \ + SCSelectiveInline, \ + true, 0, 0, NULL, \ + "SC inlines functions that are called only once and keep other functions") + +// -fsc-use-mubuf -fno-sc-use-mubuf +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-use-mubuf", NULL, \ + SCUseMUBuf, \ + false, 0, 0, NULL, \ + "SC uses MUBUF whenever possible") + + +// -fsc-disable-loop-unroll +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-disable-loop-unroll", NULL, \ + SCDisableLoopUnroll, \ + false, 0, 0, NULL, \ + "SC does not unroll loops") + +// -fsc-disable-merge-memory +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-disable-merge-memory", NULL, \ + SCDisableMergeMemory, \ + false, 0, 0, NULL, \ + "SC does not merge memory loads and stores") + +// -fsc-bias-schedule-to-minimize-regs +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-bias-schedule-to-minimize-regs", NULL, \ + SCBiasScheduleToMinimizeRegs, \ + false, 0, 0, NULL, \ + "Scheduler heuristic bias: force minimize register stategy") + +// -fsc-bias-schedule-to-minimize-insts +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-bias-schedule-to-minimize-insts", NULL, \ + SCBiasScheduleToMinimizeInsts, \ + false, 0, 0, NULL, \ + "Scheduler heuristic bias: force minimize instructions stategy") + +// -fsc-min-reg-schedule +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-min-reg-schedule", NULL, \ + SCMinRegSchedule, \ + false, 0, 0, NULL, \ + "Scheduler: schedule to minimize register usage") + +// -fsc-schedule-no-reorder +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-schedule-no-reorder", NULL, \ + SCScheduleNoReorder, \ + false, 0, 0, NULL, \ + "Scheduler: turn off instruction reordering") + +// -fsc-use-buffer-for-hsa-global +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-use-buffer-for-hsa-global", NULL, \ + SCUseBufferForHsaGlobal, \ + false, 0, 0, NULL, \ + "HSA: use buffer instructions instead of flat for global memory") + +// -fsc-live-sched -fno-sc-live-sched +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-live-sched", NULL, \ + SCLiveSched, \ + false, 0, 0, NULL, \ + "SC turn on/off liveness based instruction scheduling (default off)") + +// -fsc-post-ra-sched (default false) +OPTION(OT_BOOL, \ + OA_RUNTIME|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "sc-post-ra-sched", NULL, \ + SCPostRASched, \ + false, 0, 0, NULL, \ + "Run instruction scheduling after register allocation (default off)") + +// -scras=int or --sc-si-opt-reg-alloc-strategy (default 4) +// 4 let OCL compiler choose SC reg alloc strategy by heuristic +// keep this updated with SCShaderSi.h +OPTION(OT_UINT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "scras", "sc-si-opt-reg-alloc-strategy", \ + SCSIOptRegAllocStrategy, \ + 4, 0, 4, NULL, \ + "Set SI+ shader compiler register allocation strategy 0-SC default, " + "1-balanced, 2-minimize GPRs, 3-minimize moves, 4-heuristic(default 4).") + +// -fuser-no-inline -fno-user-no-inline +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "user-no-inline", NULL, \ + AddUserNoInline, \ + true, 0, 0, NULL, \ + "Adding (-fuser-no-inline) noinline attribute to user functions") + +// -flib-no-inline -fno-lib-no-inline +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "lib-no-inline", NULL, \ + AddLibNoInline, \ + true, 0, 0, NULL, \ + "Adding (-flib-no-inline) noinline attribute to opencl library functions") + +// -scopt=int or --sc-opt-level (default -1) +// -1 let shader compiler choose optimization level +OPTION(OT_INT32, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "scopt", "sc-opt-level", \ + SCOptLvl, \ + -1, -1, 4, NULL, \ + "Set AMDIL shader compiler optimization level -1,0,1,2,3,4 (default -1 " + "auto).") + +// -ilcth=int or --inline-cost-threshold (default 14000) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "ilcth", "inline-cost-threshold", \ + InlineCostThreshold, \ + 14000, 0, 0xFFFFFFFF, NULL, \ + "Set cost threshold for inliner (default 14000).") + +// -ilsth=int or --inline-size-threshold (default 50) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "ilsth", "inline-size-threshold", \ + InlineSizeThreshold, \ + 50, 0, 0xFFFFFFFF, NULL, \ + "Set size threshold for inliner (default 50).") + +// -ilkth=int or --inline-kernel-size-threshold (default 100000) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "ilkth", "inline-kernel-size-threshold", \ + InlineKernelSizeThreshold, \ + 100000, 0, 0xFFFFFFFF, NULL, \ + "Set kernel size threshold for inliner (default 100000).") + +// -wokth=int or --waves-opt-kernel-threshold (default 0) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "wokth", "waves-opt-kernel-threshold", \ + WavesOptKernelThreshold, \ + 0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \ + "Enable waves optimization when kernel size is greater than this threshold.") + +// -fdef-res-id -fno-def-res-id +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "def-res-id", NULL, \ + DefaultResourceId, \ + false, 0, 0, NULL, \ + "Use default resource id when AMDIL contains non-kernel functions.") + +// -fstack-uav -fno-stack-uav +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "stack-uav", NULL, \ + UseStackUAV, \ + false, 0, 0, NULL, \ + "Use stack uav instead of private uav for stack variables.") + +// -fmacro-call -fno-macro-call +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "macro-call", NULL, \ + UseMacroForCall, \ + true, 0, 0, NULL, \ + "Use outline macro for function call in AMDIL.") + +// -fdebug-call -fno-debug-call +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "debug-call", NULL, \ + DebugCall, \ + false, 0, 0, NULL, \ + "Allow function call for debug options in AMDIL.") + +// -fmulti-level-call -fno-multi-level-call +OPTION(OT_BOOL, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "multi-level-call", NULL, \ + AllowMultiLevelCall, \ + true, 0, 0, NULL, \ + "Allow multi-level function call in AMDIL.") + +// -use-debugil +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "use-debugil", NULL, \ + UseDebugIL, \ + false, 0, 0, NULL, \ + "Enable recompilation from DebugIL.") + // -kernel= OPTION(OT_CSTRING, \ OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ @@ -533,6 +930,14 @@ OPTION(OT_CSTRING, \ 0, 0, 0, NULL, \ "Specify the kernel to compile for.") +// -just-kernel= +OPTION(OT_CSTRING, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "just-kernel", NULL, \ + JustKernel, \ + 0, 0, 0, NULL, \ + "Recompilation for only (for -use-debugil)") + // -fenable-dump/-fno-enable-dump OPTION(OT_BOOL, \ OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ @@ -585,7 +990,74 @@ NOPTION(OT_CSTRING, \ 0, 0, 0, NULL, \ "Store the binary into file with prefix 'prefix'") -// -fuse-jit, -fno-use-jit +// -cl[=], --load-cl-dll[=] +// The name of the frontend DLL that is to be opened. By default it is 'amdcl' +OPTION(OT_CSTRING, \ + OA_LINK_LIB|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "cl", "--load-cl-dll", \ + UseCL, \ + 0, 0, 0, NULL, \ + "Specify the CL library(without extension) to load and run compiler library from.") + + +// -fe[=], --load-fe-dll[=] +// The name of the frontend DLL that is to be opened. By default it is 'amdfe' +OPTION(OT_CSTRING, \ + OA_LINK_LIB|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "fe", "--load-fe-dll", \ + UseFE, \ + 0, 0, 0, NULL, \ + "Specify the FE library(without extension) to load and run frontend from.") + +// -opt[=], --load-opt-dll[=] +// The name of the opt DLL that is to be opened. By default it is 'amdopt' +OPTION(OT_CSTRING, \ + OA_LINK_EXE|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "use-opt", "--load-opt-dll", \ + UseLLVM, \ + 0, 0, 0, NULL, \ + "Specify the OPT library(without extension) to load and run optimizations from.") + + +// -link[=], --load-link-dll[=] +// The name of the link DLL that is to be opened. By default it is 'amdlink' +OPTION(OT_CSTRING, \ + OA_LINK_EXE|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "use-link", "--load-link-dll", \ + UseLINK, \ + 0, 0, 0, NULL, \ + "Specify the link library(without extension) to load and run linking from.") + +// -cg[=], --load-cg-dll[=] +// The name of the code generator DLL that is to be opened. By default it is 'amdcg' +OPTION(OT_CSTRING, \ + OA_LINK_LIB|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "cg", "--load-cg-dll", \ + UseCG, \ + 0, 0, 0, NULL, \ + "Specify the CG library(without extension) to load and run code generator from.") + + +// -be[=], --load-be-dll[=] +// The name of the frontend DLL that is to be opened. By default it is 'amdbe' +OPTION(OT_CSTRING, \ + OA_LINK_LIB|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "be", "--load-be-dll", \ + UseBE, \ + 0, 0, 0, NULL, \ + "Specify the BE library(without extension) to load and run backend from.") + + +// -sc[=], --load-sc-dll[=] +// The name of the sc DLL that is to be opened. By default it is 'amdsc' +OPTION(OT_CSTRING, \ + OA_CLC|OVIS_SUPPORT|OVA_OPTIONAL|OA_SEPARATOR_EQUAL|OFA_NORMAL, \ + "-sc", "--load-sc-dll", \ + UseSC, \ + 0, 0, 0, NULL, \ + "Specify the SC library(without extension) to load and run SC from.") + +// -fuse-jit, -fno-use-jit OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ "use-jit", NULL, \ @@ -593,7 +1065,7 @@ OPTION(OT_BOOL, \ true, 0, 0, NULL, \ "Use JIT for CPU target, disabled if debugging is enabled") -// -fforce-jit, -fno-force-jit +// -fforce-jit, -fno-force-jit OPTION(OT_BOOL, \ OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ "force-jit", NULL, \ @@ -609,6 +1081,71 @@ OPTION(OT_BOOL, \ false, 0, 0, NULL, \ "Enable timing for Kernel build.") +// -kernel-cache +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "kernel-cache", NULL, \ + EnableKernelCaching, \ + true, 0, 0, NULL, \ + "Enable kernel caching functionality.") + +// -kernel-cache-wipe +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "kernel-cache-wipe", NULL, \ + WipeKernelCache, \ + false, 0, 0, NULL, \ + "Wipe out kernel cache storage.") + +// -kernel-cache-enforce-miss +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "kernel-cache-enforce-miss", NULL, \ + EnforceKernelCacheMiss, \ + false, 0, 0, NULL, \ + "Enforce kernel cache miss (actual compilation).") + +// -print-compile-phases +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "print-compile-phases", NULL, \ + PrintCompilePhases, \ + false, 0, 0, NULL, \ + "Print compile phases info.") + +// -fforce-llvm, -fno-force-llvm +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "force-llvm", NULL, \ + ForceLLVM, \ + false, 0, 0, NULL, \ + "Forces LLVM recompilation from binaries") + +// -fdisable-avx, -fno-disable-avx +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ + "disable-avx", NULL, \ + DisableAVX, \ + false, 0, 0, NULL, \ + "Disable AVX code generation.") + +// -fmad-enable, -fno-mad-enable (when -cl-mad-enable is turned on, +// this option is no longer needed.) +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ + "mad-enable", NULL, \ + EnableMAD, \ + false, 0, 0, NULL, \ + "Enable mad for a*b+c.") + +// -ffma-enable, -fno-fma-enable +OPTION(OT_BOOL, \ + OA_RUNTIME|OVA_DISALLOWED|OFA_PREFIX_F, \ + "fma-enable", NULL, \ + EnableFMA, \ + false, 0, 0, NULL, \ + "Enable fma for a*b+c.") + // -fuse-native=[all|,,…] OPTION(OT_CSTRING, \ OA_RUNTIME|OVA_OPTIONAL|OA_SEPARATOR_EQUAL, \ @@ -617,6 +1154,119 @@ OPTION(OT_CSTRING, \ 0, 0, 0, NULL, \ "Replace math function calls with that native version.") +// -verify-hwspir +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "verify-hwspir", NULL, \ + verifyHWSpir, \ + false, 0, 0, NULL, \ + "Enable the heavy weight spir verification pass.") + +// -verify-lwspir +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "verify-lwspir", NULL, \ + verifyLWSpir, \ + false, 0, 0, NULL, \ + "Enable the light weight spir verification pass.") + +// -sc-dev-format +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED, \ + "sc-dev-format", NULL, \ + SCDevFormat, \ + false, 0, 0, NULL, \ + "Emit the IL in a format compatible with SC\'s dev.exe.") + +// -faa-for-barrier -fno-a-for-barrier +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_INTERNAL|OVA_DISALLOWED|OFA_PREFIX_F, \ + "aa-for-barrier", NULL, \ + AAForBarrier, \ + true, 0, 0, NULL, \ + "Use AMDAliasAnalysis for correct barrier behavior. If disabled, remove noalias to ensure correct barrier behavior.") + +// -sc-xnack-iommu +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_NORMAL, \ + "sc-xnack-iommu", NULL, \ + SCXnackIommu, \ + false, 0, 0, NULL, \ + "Enable SC XNACK workaround if IOMMUv2 is being used") + +// -limit-vector-registers= (default 0 = hardware specific) +FLAG(OT_UINT32, OVIS_SUPPORT|OVIS_INTERNAL|OVA_REQUIRED, \ + "limit-vector-registers", OptLimitVecRegisters, 0, \ + "Maximum number of vector registers that are available to the compiler.") + +// -limit-scalar-registers= (default 0 = hardware specific, GCN and later only) +FLAG(OT_UINT32, OVIS_SUPPORT|OVIS_INTERNAL|OVA_REQUIRED, \ + "limit-scalar-registers", OptLimitScalarRegisters, 0, \ + "Maximum number of scalar registers that are available to the compiler.") + +// -set-vector-registers= (default 0 = no change) +FLAG(OT_UINT32, OVIS_SUPPORT|OVIS_INTERNAL|OVA_REQUIRED, \ + "set-vector-registers", OptSetVecRegisters, 0, \ + "Set the number of vector registers used in ISA (no effect if less than actual value).") + +// -set-scalar-registers= (default 0 = no change) +FLAG(OT_UINT32, OVIS_SUPPORT|OVIS_INTERNAL|OVA_REQUIRED, \ + "set-scalar-registers", OptSetScalarRegisters, 0, \ + "Set the number of scalar registers used in ISA (no effect if less than actual value).") + +// -set-lds= (default 0 = no change) +FLAG(OT_UINT32, OVIS_SUPPORT|OVIS_INTERNAL|OVA_REQUIRED, \ + "set-lds", OptSetLDS, 0, \ + "Set LDS usage in bytes in ISA (no effect if less than actual value).") + +// -lower-atomics=0|1 (default is 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "lower-atomics", LowerAtomics, 1,\ + "Enable/disable pass lowering OCL atomics to LLVM intrinsics (only for x86/x64)") + +// -lower-pipe-builtins=0|1 (default is 1) +FLAG(OT_BOOL, OVIS_SUPPORT, "lower-pipe-builtins", LowerPipeBuiltins, 1,\ + "Enable/disable pass lowering OCL pipe builtin functions to internal library functions ") + +// -fe-gen-spirv +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "fe-gen-spirv", NULL, \ + FEGenSPIRV, \ + false, 0, 0, NULL, \ + "Let frontend generate SPIR-V.") + +// -round-trip-spirv +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \ + "round-trip-spirv", NULL, \ + RoundTripSPIRV, \ + false, 0, 0, NULL, \ + "Do round-trip translation of SPIR-V in pre-linking for testing purpose.") + +// -force-wave-size-32 +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_NORMAL, \ + "force-wave-size-32", NULL, \ + SCForceWaveSize32, \ + false, 0, 0, NULL, \ + "Force wave size 32 for compute shader compilation") + +// -force-wgp-mode +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_NORMAL, \ + "force-wgp-mode", NULL, \ + SCForceWgpMode, \ + false, 0, 0, NULL, \ + "Force wgp mode for compute shader compilation") + +// -xnack +OPTION(OT_BOOL, \ + OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_NORMAL, \ + "xnack", NULL, \ + SCXnack, \ + false, 0, 0, NULL, \ + "Enable the xnack feature for Finalizer/SC") + // -code-object-version= : code object version OPTION(OT_UINT32, \ OA_RUNTIME|OVA_OPTIONAL|OA_SEPARATOR_EQUAL, \