50 #include "llvm/InitializePasses.h" 51 #include <llvm/IR/BasicBlock.h> 52 #include <llvm/IR/Constants.h> 53 #include <llvm/IR/Function.h> 54 #include <llvm/IR/Instructions.h> 55 #include <llvm/IR/Intrinsics.h> 56 #include <llvm/IR/Module.h> 57 #include <llvm/Pass.h> 59 #include <llvm/Transforms/Instrumentation.h> 61 #include "llvm/IR/LegacyPassManager.h" 63 #include <llvm/PassRegistry.h> 65 #include <llvm/IR/DebugInfo.h> 66 #include <llvm/IR/IRPrintingPasses.h> 67 #include <llvm/IR/PatternMatch.h> 68 #include <llvm/IR/Verifier.h> 70 #include <llvm/Analysis/ConstantFolding.h> 72 #include <llvm/Analysis/TargetLibraryInfo.h> 73 #if ISPC_LLVM_VERSION >= ISPC_LLVM_7_0 74 #include "llvm/Transforms/InstCombine/InstCombine.h" 75 #include "llvm/Transforms/Utils.h" 77 #include <llvm/ADT/SmallSet.h> 78 #include <llvm/ADT/Triple.h> 79 #include <llvm/Target/TargetOptions.h> 80 #include <llvm/Transforms/IPO.h> 81 #include <llvm/Transforms/Scalar.h> 82 #include <llvm/Transforms/Utils/BasicBlockUtils.h> 84 #include <llvm/Analysis/TargetTransformInfo.h> 85 #include <llvm/IR/DataLayout.h> 87 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 88 #include "llvm/Transforms/IPO/FunctionAttrs.h" 89 #include "llvm/Transforms/Scalar/GVN.h" 90 #include <llvm/Analysis/BasicAliasAnalysis.h> 91 #include <llvm/Analysis/Passes.h> 92 #include <llvm/BinaryFormat/Dwarf.h> 93 #include <llvm/Support/raw_ostream.h> 94 #include <llvm/Target/TargetMachine.h> 95 #if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 96 #include "llvm/IR/IntrinsicsX86.h" 99 #include <llvm/IR/IntrinsicInst.h> 100 #ifdef ISPC_HOST_IS_LINUX 102 #elif defined(ISPC_HOST_IS_WINDOWS) 105 #define alloca _alloca 107 #endif // ISPC_HOST_IS_WINDOWS 115 #ifndef ISPC_NO_DUMPS 116 #include <llvm/Support/FileSystem.h> 117 #include <llvm/Support/Regex.h> 131 #ifndef ISPC_NO_DUMPS 140 #ifndef ISPC_NO_DUMPS 141 #define DEBUG_START_PASS(NAME) \ 142 if (g->debugPrint && \ 143 (getenv("FUNC") == NULL || (getenv("FUNC") != NULL && !strncmp(bb.getParent()->getName().str().c_str(), \ 144 getenv("FUNC"), strlen(getenv("FUNC")))))) { \ 145 fprintf(stderr, "Start of " NAME "\n"); \ 146 fprintf(stderr, "---------------\n"); \ 148 fprintf(stderr, "---------------\n\n"); \ 151 #define DEBUG_END_PASS(NAME) \ 152 if (g->debugPrint && \ 153 (getenv("FUNC") == NULL || (getenv("FUNC") != NULL && !strncmp(bb.getParent()->getName().str().c_str(), \ 154 getenv("FUNC"), strlen(getenv("FUNC")))))) { \ 155 fprintf(stderr, "End of " NAME " %s\n", modifiedAny ? "** CHANGES **" : ""); \ 156 fprintf(stderr, "---------------\n"); \ 158 fprintf(stderr, "---------------\n\n"); \ 161 #define DEBUG_START_PASS(NAME) 162 #define DEBUG_END_PASS(NAME) 177 static void lCopyMetadata(llvm::Value *vto,
const llvm::Instruction *from) {
178 llvm::Instruction *to = llvm::dyn_cast<llvm::Instruction>(vto);
182 llvm::SmallVector<std::pair<unsigned int, llvm::MDNode *>, 8> metadata;
184 from->getAllMetadata(metadata);
185 for (
unsigned int i = 0; i < metadata.size(); ++i)
186 to->setMetadata(metadata[i].first, metadata[i].second);
211 llvm::MDNode *filename = inst->getMetadata(
"filename");
212 llvm::MDNode *first_line = inst->getMetadata(
"first_line");
213 llvm::MDNode *first_column = inst->getMetadata(
"first_column");
214 llvm::MDNode *last_line = inst->getMetadata(
"last_line");
215 llvm::MDNode *last_column = inst->getMetadata(
"last_column");
217 if (!filename || !first_line || !first_column || !last_line || !last_column)
222 llvm::MDString *str = llvm::dyn_cast<llvm::MDString>(filename->getOperand(0));
224 llvm::ConstantInt *first_lnum =
226 llvm::mdconst::extract<llvm::ConstantInt>(first_line->getOperand(0));
229 llvm::ConstantInt *first_colnum =
231 llvm::mdconst::extract<llvm::ConstantInt>(first_column->getOperand(0));
234 llvm::ConstantInt *last_lnum =
236 llvm::mdconst::extract<llvm::ConstantInt>(last_line->getOperand(0));
239 llvm::ConstantInt *last_colnum = llvm::mdconst::extract<llvm::ConstantInt>(last_column->getOperand(0));
242 *pos =
SourcePos(str->getString().data(), (int)first_lnum->getZExtValue(), (int)first_colnum->getZExtValue(),
243 (int)last_lnum->getZExtValue(), (int)last_colnum->getZExtValue());
247 static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
const char *name,
248 llvm::Instruction *insertBefore = NULL) {
249 llvm::Value *args[2] = {arg0, arg1};
250 llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
251 return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
254 static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2,
255 const char *name, llvm::Instruction *insertBefore = NULL) {
256 llvm::Value *args[3] = {arg0, arg1, arg2};
257 llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
258 return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
261 static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2,
262 llvm::Value *arg3,
const char *name, llvm::Instruction *insertBefore = NULL) {
263 llvm::Value *args[4] = {arg0, arg1, arg2, arg3};
264 llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]);
265 return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
268 static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2,
269 llvm::Value *arg3, llvm::Value *arg4,
const char *name,
270 llvm::Instruction *insertBefore = NULL) {
271 llvm::Value *args[5] = {arg0, arg1, arg2, arg3, arg4};
272 llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[5]);
273 return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
276 static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2,
277 llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5,
const char *name,
278 llvm::Instruction *insertBefore = NULL) {
279 llvm::Value *args[6] = {arg0, arg1, arg2, arg3, arg4, arg5};
280 llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[6]);
281 return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
284 static llvm::Instruction *
lGEPInst(llvm::Value *ptr, llvm::Value *offset,
const char *name,
285 llvm::Instruction *insertBefore) {
286 llvm::Value *index[1] = {offset};
287 llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
289 return llvm::GetElementPtrInst::Create(
PTYPE(ptr), ptr, arrayRef, name, insertBefore);
297 Assert(elements.size() <= 64);
300 for (
unsigned int i = 0; i < elements.size(); ++i) {
301 llvm::APInt intMaskValue;
304 llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
306 llvm::APFloat apf = cf->getValueAPF();
307 intMaskValue = apf.bitcastToAPInt();
310 llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
312 intMaskValue = ci->getValue();
316 if (intMaskValue.countLeadingOnes() > 0)
328 static bool lGetMask(llvm::Value *factor, uint64_t *mask) {
329 llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
331 llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
332 for (
int i = 0; i < (int)cdv->getNumElements(); ++i)
333 elements.push_back(cdv->getElementAsConstant(i));
338 llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
340 llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
341 for (
int i = 0; i < (int)cv->getNumOperands(); ++i) {
342 llvm::Constant *c = llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
345 if (llvm::isa<llvm::ConstantExpr>(cv->getOperand(i)))
347 elements.push_back(c);
351 }
else if (llvm::isa<llvm::ConstantAggregateZero>(factor)) {
356 llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
359 const llvm::TargetData *td = targetMachine->getTargetData();
360 llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
365 Assert(!llvm::isa<llvm::Constant>(factor));
388 for (
int i = 0; i < vecWidth; ++i) {
389 if ((bits & (1ull << i)) == 0)
405 void add(llvm::Pass *P,
int stage);
406 bool run(llvm::Module &M) {
return PM.run(M); }
407 llvm::legacy::PassManager &
getPM() {
return PM; }
410 llvm::legacy::PassManager
PM;
424 #ifndef ISPC_NO_DUMPS 431 snprintf(buf,
sizeof(buf),
"\n\n*****LLVM IR after phase %d: %s*****\n\n",
number,
432 P->getPassName().data());
441 void Optimize(llvm::Module *module,
int optLevel) {
442 #ifndef ISPC_NO_DUMPS 444 printf(
"*** Code going into optimization ***\n");
449 optPM.
add(llvm::createVerifierPass(), 0);
451 optPM.
add(
new llvm::TargetLibraryInfoWrapperPass(llvm::Triple(module->getTargetTriple())));
455 optPM.
getPM().add(createTargetTransformInfoWrapperPass(targetMachine->getTargetIRAnalysis()));
457 optPM.
add(llvm::createIndVarSimplifyPass());
472 optPM.
add(llvm::createFunctionInliningPass());
474 optPM.
add(llvm::createCFGSimplificationPass());
475 optPM.
add(llvm::createGlobalDCEPass());
477 llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry();
478 llvm::initializeCore(*registry);
479 llvm::initializeScalarOpts(*registry);
480 llvm::initializeIPO(*registry);
481 llvm::initializeAnalysis(*registry);
482 llvm::initializeTransformUtils(*registry);
483 llvm::initializeInstCombine(*registry);
484 llvm::initializeInstrumentation(*registry);
485 llvm::initializeTarget(*registry);
487 optPM.
add(llvm::createGlobalDCEPass(), 185);
498 optPM.
add(llvm::createTypeBasedAAWrapperPass(), 190);
499 optPM.
add(llvm::createBasicAAWrapperPass());
500 optPM.
add(llvm::createCFGSimplificationPass());
502 optPM.
add(llvm::createSROAPass());
504 optPM.
add(llvm::createEarlyCSEPass());
505 optPM.
add(llvm::createLowerExpectIntrinsicPass());
509 optPM.
add(llvm::createReassociatePass(), 200);
510 optPM.
add(llvm::createConstantPropagationPass());
511 optPM.
add(llvm::createDeadInstEliminationPass());
512 optPM.
add(llvm::createCFGSimplificationPass());
514 optPM.
add(llvm::createPromoteMemoryToRegisterPass());
515 optPM.
add(llvm::createAggressiveDCEPass());
518 optPM.
add(llvm::createInstructionCombiningPass(), 210);
525 optPM.
add(llvm::createDeadInstEliminationPass(), 220);
528 optPM.
add(llvm::createSROAPass());
529 optPM.
add(llvm::createInstructionCombiningPass());
530 optPM.
add(llvm::createCFGSimplificationPass());
531 optPM.
add(llvm::createPromoteMemoryToRegisterPass());
532 optPM.
add(llvm::createGlobalOptimizerPass());
533 optPM.
add(llvm::createReassociatePass());
534 optPM.
add(llvm::createIPConstantPropagationPass());
538 optPM.
add(llvm::createDeadArgEliminationPass(), 230);
539 optPM.
add(llvm::createInstructionCombiningPass());
540 optPM.
add(llvm::createCFGSimplificationPass());
541 optPM.
add(llvm::createPruneEHPass());
542 optPM.
add(llvm::createPostOrderFunctionAttrsLegacyPass());
543 optPM.
add(llvm::createReversePostOrderFunctionAttrsPass());
545 optPM.
add(llvm::createFunctionInliningPass());
546 optPM.
add(llvm::createConstantPropagationPass());
547 optPM.
add(llvm::createDeadInstEliminationPass());
548 optPM.
add(llvm::createCFGSimplificationPass());
550 optPM.
add(llvm::createArgumentPromotionPass());
552 optPM.
add(llvm::createAggressiveDCEPass());
553 optPM.
add(llvm::createInstructionCombiningPass(), 241);
554 optPM.
add(llvm::createJumpThreadingPass());
555 optPM.
add(llvm::createCFGSimplificationPass());
557 optPM.
add(llvm::createSROAPass());
559 optPM.
add(llvm::createInstructionCombiningPass());
560 optPM.
add(llvm::createTailCallEliminationPass());
568 optPM.
add(llvm::createInstructionCombiningPass(), 255);
574 optPM.
add(llvm::createEarlyCSEPass(), 260);
579 optPM.
add(llvm::createFunctionInliningPass(), 265);
580 optPM.
add(llvm::createConstantPropagationPass());
585 optPM.
add(llvm::createInstructionCombiningPass(), 270);
589 optPM.
add(llvm::createIPSCCPPass(), 275);
590 optPM.
add(llvm::createDeadArgEliminationPass());
591 optPM.
add(llvm::createAggressiveDCEPass());
592 optPM.
add(llvm::createInstructionCombiningPass());
593 optPM.
add(llvm::createCFGSimplificationPass());
601 optPM.
add(llvm::createFunctionInliningPass());
602 optPM.
add(llvm::createArgumentPromotionPass());
604 optPM.
add(llvm::createSROAPass());
606 optPM.
add(llvm::createInstructionCombiningPass());
608 optPM.
add(llvm::createCFGSimplificationPass());
609 optPM.
add(llvm::createReassociatePass());
610 optPM.
add(llvm::createLoopRotatePass());
611 optPM.
add(llvm::createLICMPass());
612 optPM.
add(llvm::createLoopUnswitchPass(
false));
613 optPM.
add(llvm::createInstructionCombiningPass());
615 optPM.
add(llvm::createIndVarSimplifyPass());
616 optPM.
add(llvm::createLoopIdiomPass());
617 optPM.
add(llvm::createLoopDeletionPass());
619 optPM.
add(llvm::createLoopUnrollPass(), 300);
621 optPM.
add(llvm::createGVNPass(), 301);
627 optPM.
add(llvm::createMemCpyOptPass());
628 optPM.
add(llvm::createSCCPPass());
629 optPM.
add(llvm::createInstructionCombiningPass());
631 optPM.
add(llvm::createJumpThreadingPass());
632 optPM.
add(llvm::createCorrelatedValuePropagationPass());
633 optPM.
add(llvm::createDeadStoreEliminationPass());
634 optPM.
add(llvm::createAggressiveDCEPass());
635 optPM.
add(llvm::createCFGSimplificationPass());
636 optPM.
add(llvm::createInstructionCombiningPass());
639 optPM.
add(llvm::createFunctionInliningPass());
640 optPM.
add(llvm::createAggressiveDCEPass());
641 optPM.
add(llvm::createStripDeadPrototypesPass());
643 optPM.
add(llvm::createGlobalDCEPass());
644 optPM.
add(llvm::createConstantMergePass());
655 #ifndef ISPC_NO_DUMPS 657 printf(
"\n*****\nFINAL OUTPUT\n*****\n");
680 llvm::StringRef
getPassName()
const {
return "Intrinsics Cleanup Optimization"; }
682 bool runOnBasicBlock(llvm::BasicBlock &BB);
684 bool runOnFunction(llvm::Function &F);
691 llvm::Function *
function;
700 : function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) {}
702 llvm::Function *
function;
717 bool matchesMaskInstruction(llvm::Function *
function);
733 if (llvm::isa<llvm::UndefValue>(value))
736 llvm::BitCastInst *bci = llvm::dyn_cast<llvm::BitCastInst>(value);
738 return lIsUndef(bci->getOperand(0));
752 if (llvm::Function *ssei8Movmsk =
753 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse2_pmovmskb_128))) {
754 maskInstructions.push_back(ssei8Movmsk);
756 if (llvm::Function *sseFloatMovmsk =
757 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse_movmsk_ps))) {
758 maskInstructions.push_back(sseFloatMovmsk);
760 if (llvm::Function *__movmsk =
m->
module->getFunction(
"__movmsk")) {
761 maskInstructions.push_back(__movmsk);
763 if (llvm::Function *avxFloatMovmsk =
764 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_movmsk_ps_256))) {
765 maskInstructions.push_back(avxFloatMovmsk);
770 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse41_blendvps)), 0xf, 0, 1, 2));
772 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_blendv_ps_256)), 0xff, 0, 1, 2));
774 llvm::Function *avxMaskedLoad32 =
775 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskload_ps_256));
776 llvm::Function *avxMaskedLoad64 =
777 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskload_pd_256));
778 llvm::Function *avxMaskedStore32 =
779 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskstore_ps_256));
780 llvm::Function *avxMaskedStore64 =
781 m->
module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskstore_pd_256));
783 bool modifiedAny =
false;
785 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
786 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
787 if (callInst == NULL || callInst->getCalledFunction() == NULL)
790 BlendInstruction *blend = matchingBlendInstruction(callInst->getCalledFunction());
792 llvm::Value *v[2] = {callInst->getArgOperand(blend->
op0), callInst->getArgOperand(blend->
op1)};
793 llvm::Value *factor = callInst->getArgOperand(blend->
opFactor);
797 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, v[0]);
809 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, v[1]);
814 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, v[0]);
820 if (
lGetMask(factor, &mask) ==
true) {
821 llvm::Value *value = NULL;
830 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, value);
835 }
else if (matchesMaskInstruction(callInst->getCalledFunction())) {
836 llvm::Value *factor = callInst->getArgOperand(0);
838 if (
lGetMask(factor, &mask) ==
true) {
843 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, value);
847 }
else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
848 callInst->getCalledFunction() == avxMaskedLoad64) {
849 llvm::Value *factor = callInst->getArgOperand(1);
851 if (
lGetMask(factor, &mask) ==
true) {
854 llvm::Type *returnType = callInst->getType();
855 Assert(llvm::isa<llvm::VectorType>(returnType));
856 llvm::Value *undefValue = llvm::UndefValue::get(returnType);
857 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, undefValue);
860 }
else if (mask == 0xff) {
862 llvm::Type *returnType = callInst->getType();
863 Assert(llvm::isa<llvm::VectorType>(returnType));
865 const char *name =
LLVMGetName(callInst->getArgOperand(0),
"_cast");
866 llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
867 llvm::PointerType::get(returnType, 0), name, callInst);
873 align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
874 name =
LLVMGetName(callInst->getArgOperand(0),
"_load");
875 llvm::Instruction *loadInst =
876 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 877 new llvm::LoadInst(castPtr, name,
false , align, (llvm::Instruction *)NULL);
879 new llvm::LoadInst(castPtr, name,
false , llvm::MaybeAlign(align),
880 (llvm::Instruction *)NULL);
883 llvm::ReplaceInstWithInst(callInst, loadInst);
888 }
else if (callInst->getCalledFunction() == avxMaskedStore32 ||
889 callInst->getCalledFunction() == avxMaskedStore64) {
891 llvm::Value *factor = callInst->getArgOperand(1);
893 if (
lGetMask(factor, &mask) ==
true) {
896 callInst->eraseFromParent();
899 }
else if (mask == 0xff) {
901 llvm::Value *rvalue = callInst->getArgOperand(2);
902 llvm::Type *storeType = rvalue->getType();
903 const char *name =
LLVMGetName(callInst->getArgOperand(0),
"_ptrcast");
904 llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
905 llvm::PointerType::get(storeType, 0), name, callInst);
908 llvm::StoreInst *storeInst =
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
913 align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
914 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 915 storeInst->setAlignment(align);
917 storeInst->setAlignment(llvm::MaybeAlign(align));
920 llvm::ReplaceInstWithInst(callInst, storeInst);
936 bool modifiedAny =
false;
937 for (llvm::BasicBlock &BB : F) {
938 modifiedAny |= runOnBasicBlock(BB);
944 for (
unsigned int i = 0; i < maskInstructions.size(); ++i) {
945 if (maskInstructions[i].
function != NULL &&
function == maskInstructions[i].
function) {
953 for (
unsigned int i = 0; i < blendInstructions.size(); ++i) {
954 if (blendInstructions[i].
function != NULL &&
function == blendInstructions[i].
function) {
955 return &blendInstructions[i];
976 llvm::StringRef
getPassName()
const {
return "Vector Select Optimization"; }
977 bool runOnBasicBlock(llvm::BasicBlock &BB);
978 bool runOnFunction(llvm::Function &F);
983 static bool simplifySelect(llvm::SelectInst *selectInst, llvm::BasicBlock::iterator iter);
984 static llvm::Value *simplifyBoolVec(llvm::Value *value);
985 static bool simplifyCall(llvm::CallInst *callInst, llvm::BasicBlock::iterator iter);
991 llvm::TruncInst *trunc = llvm::dyn_cast<llvm::TruncInst>(value);
994 llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(value);
996 return sext->getOperand(0);
998 llvm::ZExtInst *zext = llvm::dyn_cast<llvm::ZExtInst>(value);
1000 return zext->getOperand(0);
1029 if (selectInst->getType()->isVectorTy() ==
false)
1031 Assert(selectInst->getOperand(1) != NULL);
1032 Assert(selectInst->getOperand(2) != NULL);
1033 llvm::Value *factor = selectInst->getOperand(0);
1037 llvm::Value *value = NULL;
1038 if (maskStatus ==
ALL_ON)
1040 value = selectInst->getOperand(1);
1041 else if (maskStatus ==
ALL_OFF)
1043 value = selectInst->getOperand(2);
1044 if (value != NULL) {
1045 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, value);
1054 if ((factor = simplifyBoolVec(factor)) != NULL) {
1055 llvm::Instruction *newSelect = llvm::SelectInst::Create(factor, selectInst->getOperand(1),
1056 selectInst->getOperand(2), selectInst->getName());
1057 llvm::ReplaceInstWithInst(selectInst, newSelect);
1065 llvm::Function *calledFunc = callInst->getCalledFunction();
1069 if (calledFunc == NULL || calledFunc !=
m->
module->getFunction(
"__movmsk"))
1073 if (
lGetMask(callInst->getArgOperand(0), &mask) ==
true) {
1074 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter,
LLVMInt64(mask));
1083 bool modifiedAny =
false;
1086 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
1087 llvm::SelectInst *selectInst = llvm::dyn_cast<llvm::SelectInst>(&*iter);
1088 if (selectInst && simplifySelect(selectInst, iter)) {
1092 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
1093 if (callInst && simplifyCall(callInst, iter)) {
1106 bool modifiedAny =
false;
1107 for (llvm::BasicBlock &BB : F) {
1108 modifiedAny |= runOnBasicBlock(BB);
1135 bool runOnBasicBlock(llvm::BasicBlock &BB);
1137 bool runOnFunction(llvm::Function &F);
1154 }
else if (llvm::isa<llvm::PointerType>(v->getType())) {
1156 }
else if (llvm::isa<llvm::PtrToIntInst>(v)) {
1163 else if (llvm::isa<llvm::LoadInst>(v)) {
1167 else if (llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(v)) {
1175 llvm::ConstantExpr *uce = llvm::dyn_cast<llvm::ConstantExpr>(v);
1176 if (uce != NULL && uce->getOpcode() == llvm::Instruction::PtrToInt)
1188 static llvm::Value *
lGetBasePointer(llvm::Value *v, llvm::Instruction *insertBefore,
bool broadcastDetected) {
1189 if (llvm::isa<llvm::InsertElementInst>(v) || llvm::isa<llvm::ShuffleVectorInst>(v)) {
1196 if (element != NULL) {
1205 if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
1207 }
else if (llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v)) {
1212 else if (llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(v)) {
1213 llvm::Value *t =
lGetBasePointer(ci->getOperand(0), insertBefore, broadcastDetected);
1217 return llvm::CastInst::Create(ci->getOpcode(), t, ci->getType()->getScalarType(),
LLVMGetName(t,
"_cast"),
1229 llvm::ConstantExpr *op = llvm::dyn_cast<llvm::ConstantExpr>(op0);
1230 if (op == NULL || op->getOpcode() != llvm::Instruction::PtrToInt)
1234 llvm::ConstantInt *opDelta = llvm::dyn_cast<llvm::ConstantInt>(op1);
1235 if (opDelta == NULL)
1244 llvm::InsertValueInst *iv = llvm::dyn_cast<llvm::InsertValueInst>(v);
1248 Assert(iv->hasIndices() && iv->getNumIndices() == 1);
1249 if (iv->getIndices()[0] == index)
1250 return iv->getInsertedValueOperand();
1261 #ifndef ISPC_NO_DUMPS 1263 fprintf(stderr,
"lGetBasePtrAndOffsets\n");
1268 bool broadcastDetected =
false;
1270 llvm::ShuffleVectorInst *shuffle = llvm::dyn_cast<llvm::ShuffleVectorInst>(ptrs);
1271 if (shuffle != NULL) {
1272 llvm::Value *indices = shuffle->getOperand(2);
1273 llvm::Value *vec = shuffle->getOperand(1);
1274 if (
lIsUndef(vec) && llvm::isa<llvm::ConstantAggregateZero>(indices)) {
1275 broadcastDetected =
true;
1278 llvm::Value *base =
lGetBasePointer(ptrs, insertBefore, broadcastDetected);
1287 if (broadcastDetected) {
1288 llvm::Value *op = shuffle->getOperand(0);
1289 llvm::BinaryOperator *bop_var = llvm::dyn_cast<llvm::BinaryOperator>(op);
1290 if (bop_var != NULL && ((bop_var->getOpcode() == llvm::Instruction::Add) ||
IsOrEquivalentToAdd(bop_var))) {
1293 llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(bop_var->getOperand(1));
1295 llvm::Value *zeroMask =
1296 #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 1297 llvm::ConstantVector::getSplat(cv->getType()->getVectorNumElements(),
1299 llvm::ConstantVector::getSplat({cv->getType()->getVectorNumElements(),
false},
1301 llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*
g->
ctx)));
1303 llvm::Value *shuffle_offset =
new llvm::ShuffleVectorInst(cv, llvm::UndefValue::get(cv->getType()),
1304 zeroMask,
"shuffle", bop_var);
1305 *offsets = llvm::BinaryOperator::Create(llvm::Instruction::Add, *offsets, shuffle_offset,
1306 "new_offsets", insertBefore);
1313 llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(ptrs);
1314 if (bop != NULL && ((bop->getOpcode() == llvm::Instruction::Add) ||
IsOrEquivalentToAdd(bop))) {
1318 *offsets = llvm::BinaryOperator::Create(llvm::Instruction::Add, *offsets, bop->getOperand(1),
"new_offsets",
1322 *offsets = llvm::BinaryOperator::Create(llvm::Instruction::Add, *offsets, bop->getOperand(0),
"new_offsets",
1327 llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(ptrs);
1331 llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
1332 for (
int i = 0; i < (int)cv->getNumOperands(); ++i) {
1333 llvm::Constant *c = llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
1336 elements.push_back(c);
1340 for (
unsigned int i = 0; i < elements.size(); ++i) {
1343 llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(elements[i]);
1348 llvm::Value *elementBase = NULL;
1349 if (ce->getOpcode() == llvm::Instruction::PtrToInt) {
1358 if (elementBase == NULL)
1365 if (elementBase == NULL)
1368 Assert(delta[i] != NULL);
1372 else if (base != elementBase)
1379 llvm::ArrayRef<llvm::Constant *> deltas(&delta[0], &delta[elements.size()]);
1380 *offsets = llvm::ConstantVector::get(deltas);
1384 llvm::ExtractValueInst *ev = llvm::dyn_cast<llvm::ExtractValueInst>(ptrs);
1386 Assert(ev->getNumIndices() == 1);
1387 int index = ev->getIndices()[0];
1409 llvm::Instruction *insertBefore) {
1410 if (llvm::isa<llvm::ConstantVector>(vec) || llvm::isa<llvm::ConstantDataVector>(vec) ||
1411 llvm::isa<llvm::ConstantAggregateZero>(vec)) {
1413 *variableOffset = NULL;
1417 llvm::CastInst *cast = llvm::dyn_cast<llvm::CastInst>(vec);
1420 llvm::Value *co, *vo;
1425 *constOffset = NULL;
1428 llvm::CastInst::Create(cast->getOpcode(), co, cast->getType(),
LLVMGetName(co,
"_cast"), insertBefore);
1430 *variableOffset = NULL;
1433 llvm::CastInst::Create(cast->getOpcode(), vo, cast->getType(),
LLVMGetName(vo,
"_cast"), insertBefore);
1437 llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(vec);
1439 llvm::Value *op0 = bop->getOperand(0);
1440 llvm::Value *op1 = bop->getOperand(1);
1441 llvm::Value *c0, *v0, *c1, *v1;
1447 if (c0 == NULL || llvm::isa<llvm::ConstantAggregateZero>(c0))
1449 else if (c1 == NULL || llvm::isa<llvm::ConstantAggregateZero>(c1))
1452 *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, c0, c1,
LLVMGetName(
"add", c0, c1),
1455 if (v0 == NULL || llvm::isa<llvm::ConstantAggregateZero>(v0))
1456 *variableOffset = v1;
1457 else if (v1 == NULL || llvm::isa<llvm::ConstantAggregateZero>(v1))
1458 *variableOffset = v0;
1460 *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, v0, v1,
1463 }
else if (bop->getOpcode() == llvm::Instruction::Shl) {
1470 if ((v1 != NULL) || (c0 == NULL) || (c1 == NULL)) {
1471 *constOffset = NULL;
1472 *variableOffset = vec;
1473 }
else if (v0 == NULL) {
1475 *variableOffset = NULL;
1477 *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Shl, c0, c1,
LLVMGetName(
"shl", c0, c1),
1479 *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Shl, v0, c1,
1483 }
else if (bop->getOpcode() == llvm::Instruction::Mul) {
1491 if (c0 != NULL && c1 != NULL)
1492 *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, c1,
LLVMGetName(
"mul", c0, c1),
1495 *constOffset = NULL;
1497 llvm::Value *va = NULL, *vb = NULL, *vc = NULL;
1498 if (v0 != NULL && c1 != NULL)
1499 va = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, c1,
LLVMGetName(
"mul", v0, c1),
1501 if (c0 != NULL && v1 != NULL)
1502 vb = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, v1,
LLVMGetName(
"mul", c0, v1),
1504 if (v0 != NULL && v1 != NULL)
1505 vc = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, v1,
LLVMGetName(
"mul", v0, v1),
1508 llvm::Value *vab = NULL;
1509 if (va != NULL && vb != NULL)
1510 vab = llvm::BinaryOperator::Create(llvm::Instruction::Add, va, vb,
LLVMGetName(
"add", va, vb),
1512 else if (va != NULL)
1517 if (vab != NULL && vc != NULL)
1518 *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, vab, vc,
1520 else if (vab != NULL)
1521 *variableOffset = vab;
1523 *variableOffset = vc;
1530 *constOffset = NULL;
1531 *variableOffset = vec;
1538 llvm::ConstantDataVector *cvec = llvm::dyn_cast<llvm::ConstantDataVector>(v);
1542 llvm::Constant *splatConst = cvec->getSplatValue();
1543 if (splatConst == NULL)
1546 llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(splatConst);
1550 int64_t splatVal = ci->getSExtValue();
1551 *splat = (int)splatVal;
1555 static llvm::Value *
lExtract248Scale(llvm::Value *splatOperand,
int splatValue, llvm::Value *otherOperand,
1556 llvm::Value **result) {
1557 if (splatValue == 2 || splatValue == 4 || splatValue == 8) {
1558 *result = otherOperand;
1564 for (
int scale = 8; scale >= 2; scale /= 2) {
1565 llvm::Instruction *insertBefore = llvm::dyn_cast<llvm::Instruction>(*result);
1566 Assert(insertBefore != NULL);
1568 if ((splatValue % scale) == 0) {
1573 llvm::Value *splatDiv =
1574 llvm::BinaryOperator::Create(llvm::Instruction::SDiv, splatOperand, splatScaleVec,
"div", insertBefore);
1575 *result = llvm::BinaryOperator::Create(llvm::Instruction::Mul, splatDiv, otherOperand,
"mul", insertBefore);
1598 llvm::CastInst *cast = llvm::dyn_cast<llvm::CastInst>(*vec);
1600 llvm::Value *castOp = cast->getOperand(0);
1608 *vec = llvm::CastInst::Create(cast->getOpcode(), castOp, cast->getType(),
"offset_cast", cast);
1613 llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(*vec);
1617 llvm::Value *op0 = bop->getOperand(0), *op1 = bop->getOperand(1);
1619 if (llvm::isa<llvm::ConstantAggregateZero>(op0)) {
1622 }
else if (llvm::isa<llvm::ConstantAggregateZero>(op1)) {
1629 *vec = llvm::BinaryOperator::Create(llvm::Instruction::Add, op0, op1,
"new_add", bop);
1634 }
else if (bop->getOpcode() == llvm::Instruction::Mul) {
1648 static llvm::Value *
1649 lExtractUniforms(llvm::Value **vec, llvm::Instruction *insertBefore) {
1650 fprintf(stderr,
" lextract: ");
1652 fprintf(stderr,
"\n");
1654 if (llvm::isa<llvm::ConstantVector>(*vec) ||
1655 llvm::isa<llvm::ConstantDataVector>(*vec) ||
1656 llvm::isa<llvm::ConstantAggregateZero>(*vec))
1659 llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(*vec);
1661 llvm::Value *sextOp = sext->getOperand(0);
1663 llvm::Value *unif = lExtractUniforms(&sextOp, insertBefore);
1669 *vec =
new llvm::SExtInst(sextOp, sext->getType(),
"offset_sext", sext);
1677 llvm::ExtractElementInst::Create(*vec,
LLVMInt32(0),
1678 "first_uniform", insertBefore);
1683 llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(*vec);
1687 llvm::Value *op0 = bop->getOperand(0), *op1 = bop->getOperand(1);
1688 if (bop->getOpcode() == llvm::Instruction::Add) {
1689 llvm::Value *s0 = lExtractUniforms(&op0, insertBefore);
1690 llvm::Value *s1 = lExtractUniforms(&op1, insertBefore);
1691 if (s0 == NULL && s1 == NULL)
1696 else if (op1 == NULL)
1699 *vec = llvm::BinaryOperator::Create(llvm::Instruction::Add,
1700 op0, op1,
"new_add", insertBefore);
1704 else if (s1 == NULL)
1707 return llvm::BinaryOperator::Create(llvm::Instruction::Add, s0, s1,
1708 "add_unif", insertBefore);
1711 else if (bop->getOpcode() == llvm::Instruction::Mul) {
1714 if (lIs248Splat(op0, &splat)) {
1718 else if (lIs248Splat(op1, &splat)) {
1732 lExtractUniformsFromOffset(llvm::Value **basePtr, llvm::Value **offsetVector,
1733 llvm::Value *offsetScale,
1734 llvm::Instruction *insertBefore) {
1738 (*offsetVector)->dump();
1740 offsetScale->dump();
1744 llvm::Value *uniformDelta = lExtractUniforms(offsetVector, insertBefore);
1745 if (uniformDelta == NULL)
1748 *basePtr =
lGEPInst(*basePtr, arrayRef,
"new_base", insertBefore);
1752 Assert(*offsetVector != NULL);
1762 for (
int i = 0; i < nElts; ++i)
1763 if ((int32_t)elts[i] != elts[i])
1773 llvm::Instruction *insertBefore) {
1774 llvm::Value *variableOffset = *variableOffsetPtr;
1775 llvm::Value *constOffset = *constOffsetPtr;
1778 llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(variableOffset);
1781 variableOffset = sext->getOperand(0);
1787 LLVMGetName(variableOffset,
"_trunc"), insertBefore);
1796 LLVMGetName(constOffset,
"_trunc"), insertBefore);
1807 LLVMGetName(constOffset,
"_trunc"), insertBefore);
1811 *variableOffsetPtr = variableOffset;
1812 *constOffsetPtr = constOffset;
1824 if (llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v)) {
1829 }
else if (llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(v)) {
1839 llvm::Value *offset = *offsetPtr;
1844 llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(offset);
1847 *offsetPtr = sext->getOperand(0);
1865 GSInfo(
const char *pgFuncName,
const char *pgboFuncName,
const char *pgbo32FuncName,
bool ig,
bool ip)
1866 : isGather(ig), isPrefetch(ip) {
1867 func =
m->
module->getFunction(pgFuncName);
1868 baseOffsetsFunc =
m->
module->getFunction(pgboFuncName);
1869 baseOffsets32Func =
m->
module->getFunction(pgbo32FuncName);
1871 llvm::Function *func;
1872 llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
1873 const bool isGather;
1874 const bool isPrefetch;
1877 GSInfo gsFuncs[] = {
1879 "__pseudo_gather32_i8",
1880 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
1881 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
1883 GSInfo(
"__pseudo_gather32_i16",
1885 :
"__pseudo_gather_factored_base_offsets32_i16",
1887 :
"__pseudo_gather_factored_base_offsets32_i16",
1889 GSInfo(
"__pseudo_gather32_i32",
1891 :
"__pseudo_gather_factored_base_offsets32_i32",
1893 :
"__pseudo_gather_factored_base_offsets32_i32",
1895 GSInfo(
"__pseudo_gather32_float",
1897 :
"__pseudo_gather_factored_base_offsets32_float",
1899 :
"__pseudo_gather_factored_base_offsets32_float",
1901 GSInfo(
"__pseudo_gather32_i64",
1903 :
"__pseudo_gather_factored_base_offsets32_i64",
1905 :
"__pseudo_gather_factored_base_offsets32_i64",
1907 GSInfo(
"__pseudo_gather32_double",
1909 :
"__pseudo_gather_factored_base_offsets32_double",
1911 :
"__pseudo_gather_factored_base_offsets32_double",
1914 GSInfo(
"__pseudo_scatter32_i8",
1916 :
"__pseudo_scatter_factored_base_offsets32_i8",
1918 :
"__pseudo_scatter_factored_base_offsets32_i8",
1920 GSInfo(
"__pseudo_scatter32_i16",
1922 :
"__pseudo_scatter_factored_base_offsets32_i16",
1924 :
"__pseudo_scatter_factored_base_offsets32_i16",
1926 GSInfo(
"__pseudo_scatter32_i32",
1928 :
"__pseudo_scatter_factored_base_offsets32_i32",
1930 :
"__pseudo_scatter_factored_base_offsets32_i32",
1932 GSInfo(
"__pseudo_scatter32_float",
1934 :
"__pseudo_scatter_factored_base_offsets32_float",
1936 :
"__pseudo_scatter_factored_base_offsets32_float",
1938 GSInfo(
"__pseudo_scatter32_i64",
1940 :
"__pseudo_scatter_factored_base_offsets32_i64",
1942 :
"__pseudo_scatter_factored_base_offsets32_i64",
1944 GSInfo(
"__pseudo_scatter32_double",
1946 :
"__pseudo_scatter_factored_base_offsets32_double",
1948 :
"__pseudo_scatter_factored_base_offsets32_double",
1952 "__pseudo_gather64_i8",
1953 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8",
1954 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
1956 GSInfo(
"__pseudo_gather64_i16",
1958 :
"__pseudo_gather_factored_base_offsets64_i16",
1960 :
"__pseudo_gather_factored_base_offsets32_i16",
1962 GSInfo(
"__pseudo_gather64_i32",
1964 :
"__pseudo_gather_factored_base_offsets64_i32",
1966 :
"__pseudo_gather_factored_base_offsets32_i32",
1968 GSInfo(
"__pseudo_gather64_float",
1970 :
"__pseudo_gather_factored_base_offsets64_float",
1972 :
"__pseudo_gather_factored_base_offsets32_float",
1974 GSInfo(
"__pseudo_gather64_i64",
1976 :
"__pseudo_gather_factored_base_offsets64_i64",
1978 :
"__pseudo_gather_factored_base_offsets32_i64",
1980 GSInfo(
"__pseudo_gather64_double",
1982 :
"__pseudo_gather_factored_base_offsets64_double",
1984 :
"__pseudo_gather_factored_base_offsets32_double",
1987 GSInfo(
"__pseudo_scatter64_i8",
1989 :
"__pseudo_scatter_factored_base_offsets64_i8",
1991 :
"__pseudo_scatter_factored_base_offsets32_i8",
1993 GSInfo(
"__pseudo_scatter64_i16",
1995 :
"__pseudo_scatter_factored_base_offsets64_i16",
1997 :
"__pseudo_scatter_factored_base_offsets32_i16",
1999 GSInfo(
"__pseudo_scatter64_i32",
2001 :
"__pseudo_scatter_factored_base_offsets64_i32",
2003 :
"__pseudo_scatter_factored_base_offsets32_i32",
2005 GSInfo(
"__pseudo_scatter64_float",
2007 :
"__pseudo_scatter_factored_base_offsets64_float",
2009 :
"__pseudo_scatter_factored_base_offsets32_float",
2011 GSInfo(
"__pseudo_scatter64_i64",
2013 :
"__pseudo_scatter_factored_base_offsets64_i64",
2015 :
"__pseudo_scatter_factored_base_offsets32_i64",
2017 GSInfo(
"__pseudo_scatter64_double",
2019 :
"__pseudo_scatter_factored_base_offsets64_double",
2021 :
"__pseudo_scatter_factored_base_offsets32_double",
2023 GSInfo(
"__pseudo_prefetch_read_varying_1",
2024 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
2025 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
2028 GSInfo(
"__pseudo_prefetch_read_varying_2",
2029 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_2_native" :
"__prefetch_read_varying_2",
2030 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_2_native" :
"__prefetch_read_varying_2",
2033 GSInfo(
"__pseudo_prefetch_read_varying_3",
2034 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_3_native" :
"__prefetch_read_varying_3",
2035 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_3_native" :
"__prefetch_read_varying_3",
2038 GSInfo(
"__pseudo_prefetch_read_varying_nt",
2039 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_nt_native" :
"__prefetch_read_varying_nt",
2040 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_nt_native" :
"__prefetch_read_varying_nt",
2044 int numGSFuncs =
sizeof(gsFuncs) /
sizeof(gsFuncs[0]);
2045 for (
int i = 0; i < numGSFuncs; ++i)
2046 Assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL && gsFuncs[i].baseOffsets32Func != NULL);
2048 GSInfo *info = NULL;
2049 for (
int i = 0; i < numGSFuncs; ++i)
2050 if (gsFuncs[i].func != NULL && callInst->getCalledFunction() == gsFuncs[i].func) {
2060 llvm::Value *ptrs = callInst->getArgOperand(0);
2061 llvm::Value *offsetVector = NULL;
2064 if (basePtr == NULL || offsetVector == NULL ||
2076 llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
2079 (info->isGather ==
false && info->isPrefetch ==
false &&
g->
target->
hasScatter()) ||
2091 gatherScatterFunc = info->baseOffsets32Func;
2094 if (info->isGather || info->isPrefetch) {
2095 llvm::Value *mask = callInst->getArgOperand(1);
2102 llvm::Instruction *newCall =
lCallInst(gatherScatterFunc, basePtr, offsetScale, offsetVector, mask,
2103 callInst->getName().str().c_str(), NULL);
2105 llvm::ReplaceInstWithInst(callInst, newCall);
2107 llvm::Value *storeValue = callInst->getArgOperand(1);
2108 llvm::Value *mask = callInst->getArgOperand(2);
2113 llvm::Instruction *newCall =
2114 lCallInst(gatherScatterFunc, basePtr, offsetScale, offsetVector, storeValue, mask,
"", NULL);
2116 llvm::ReplaceInstWithInst(callInst, newCall);
2124 llvm::Value *constOffset = NULL;
2125 llvm::Value *variableOffset = NULL;
2127 if (constOffset == NULL)
2129 if (variableOffset == NULL)
2142 gatherScatterFunc = info->baseOffsets32Func;
2145 if (info->isGather || info->isPrefetch) {
2146 llvm::Value *mask = callInst->getArgOperand(1);
2153 llvm::Instruction *newCall =
lCallInst(gatherScatterFunc, basePtr, variableOffset, offsetScale, constOffset,
2154 mask, callInst->getName().str().c_str(), NULL);
2156 llvm::ReplaceInstWithInst(callInst, newCall);
2158 llvm::Value *storeValue = callInst->getArgOperand(1);
2159 llvm::Value *mask = callInst->getArgOperand(2);
2164 llvm::Instruction *newCall =
lCallInst(gatherScatterFunc, basePtr, variableOffset, offsetScale, constOffset,
2165 storeValue, mask,
"", NULL);
2167 llvm::ReplaceInstWithInst(callInst, newCall);
2181 GSBOInfo(
const char *pgboFuncName,
const char *pgbo32FuncName,
bool ig,
bool ip)
2182 : isGather(ig), isPrefetch(ip) {
2183 baseOffsetsFunc =
m->
module->getFunction(pgboFuncName);
2184 baseOffsets32Func =
m->
module->getFunction(pgbo32FuncName);
2186 llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
2187 const bool isGather;
2188 const bool isPrefetch;
2191 GSBOInfo gsFuncs[] = {
2193 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
2194 g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
2197 :
"__pseudo_gather_factored_base_offsets32_i16",
2199 :
"__pseudo_gather_factored_base_offsets32_i16",
2202 :
"__pseudo_gather_factored_base_offsets32_i32",
2204 :
"__pseudo_gather_factored_base_offsets32_i32",
2207 :
"__pseudo_gather_factored_base_offsets32_float",
2209 :
"__pseudo_gather_factored_base_offsets32_float",
2212 :
"__pseudo_gather_factored_base_offsets32_i64",
2214 :
"__pseudo_gather_factored_base_offsets32_i64",
2217 :
"__pseudo_gather_factored_base_offsets32_double",
2219 :
"__pseudo_gather_factored_base_offsets32_double",
2223 :
"__pseudo_scatter_factored_base_offsets32_i8",
2225 :
"__pseudo_scatter_factored_base_offsets32_i8",
2228 :
"__pseudo_scatter_factored_base_offsets32_i16",
2230 :
"__pseudo_scatter_factored_base_offsets32_i16",
2233 :
"__pseudo_scatter_factored_base_offsets32_i32",
2235 :
"__pseudo_scatter_factored_base_offsets32_i32",
2238 :
"__pseudo_scatter_factored_base_offsets32_float",
2240 :
"__pseudo_scatter_factored_base_offsets32_float",
2243 :
"__pseudo_scatter_factored_base_offsets32_i64",
2245 :
"__pseudo_scatter_factored_base_offsets32_i64",
2248 :
"__pseudo_scatter_factored_base_offsets32_double",
2250 :
"__pseudo_scatter_factored_base_offsets32_double",
2253 GSBOInfo(
g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
2254 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
2257 GSBOInfo(
g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_2_native" :
"__prefetch_read_varying_2",
2258 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_2_native" :
"__prefetch_read_varying_2",
2261 GSBOInfo(
g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_3_native" :
"__prefetch_read_varying_3",
2262 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_3_native" :
"__prefetch_read_varying_3",
2266 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_nt_native" :
"__prefetch_read_varying_nt",
2267 g->
target->
hasVecPrefetch() ?
"__pseudo_prefetch_read_varying_nt_native" :
"__prefetch_read_varying_nt",
2271 int numGSFuncs =
sizeof(gsFuncs) /
sizeof(gsFuncs[0]);
2272 for (
int i = 0; i < numGSFuncs; ++i)
2273 Assert(gsFuncs[i].baseOffsetsFunc != NULL && gsFuncs[i].baseOffsets32Func != NULL);
2275 llvm::Function *calledFunc = callInst->getCalledFunction();
2276 Assert(calledFunc != NULL);
2280 GSBOInfo *info = NULL;
2281 for (
int i = 0; i < numGSFuncs; ++i)
2282 if (calledFunc == gsFuncs[i].baseOffsetsFunc || calledFunc == gsFuncs[i].baseOffsets32Func) {
2290 llvm::Value *origVariableOffset = callInst->getArgOperand(1);
2294 if (llvm::isa<llvm::ConstantAggregateZero>(origVariableOffset))
2298 llvm::Value *constOffset = NULL;
2299 llvm::Value *variableOffset = NULL;
2303 if (constOffset == NULL)
2307 if (variableOffset == NULL)
2308 variableOffset =
LLVMIntAsType(0, origVariableOffset->getType());
2312 llvm::ConstantInt *varScale = llvm::dyn_cast<llvm::ConstantInt>(callInst->getArgOperand(2));
2313 Assert(varScale != NULL);
2315 llvm::Value *scaleSmear;
2322 llvm::BinaryOperator::Create(llvm::Instruction::Mul, constOffset, scaleSmear, constOffset->getName(), callInst);
2325 constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, constOffset, callInst->getArgOperand(3),
2326 callInst->getArgOperand(3)->getName(), callInst);
2330 callInst->setArgOperand(1, variableOffset);
2331 callInst->setArgOperand(3, constOffset);
2338 return lGEPInst(base, firstOffset,
"ptr", insertBefore);
2342 llvm::ConstantInt *offsetScaleInt = llvm::dyn_cast<llvm::ConstantInt>(offsetScale);
2343 Assert(offsetScaleInt != NULL);
2344 uint64_t scaleValue = offsetScaleInt->getZExtValue();
2346 std::vector<llvm::Constant *> scales;
2349 scales.push_back(
LLVMInt64(scaleValue));
2352 scales.push_back(
LLVMInt32((int32_t)scaleValue));
2355 return llvm::ConstantVector::get(scales);
2374 struct GatherImpInfo {
2375 GatherImpInfo(
const char *pName,
const char *lmName, llvm::Type *st,
int a)
2377 pseudoFunc =
m->
module->getFunction(pName);
2378 loadMaskedFunc =
m->
module->getFunction(lmName);
2379 Assert(pseudoFunc != NULL && loadMaskedFunc != NULL);
2383 llvm::Function *pseudoFunc;
2384 llvm::Function *loadMaskedFunc;
2385 llvm::Type *scalarType;
2387 const bool isFactored;
2390 GatherImpInfo gInfo[] = {
2392 :
"__pseudo_gather_factored_base_offsets32_i8",
2394 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i16" 2395 :
"__pseudo_gather_factored_base_offsets32_i16",
2397 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i32" 2398 :
"__pseudo_gather_factored_base_offsets32_i32",
2400 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_float" 2401 :
"__pseudo_gather_factored_base_offsets32_float",
2403 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_i64" 2404 :
"__pseudo_gather_factored_base_offsets32_i64",
2406 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets32_double" 2407 :
"__pseudo_gather_factored_base_offsets32_double",
2410 :
"__pseudo_gather_factored_base_offsets64_i8",
2412 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_i16" 2413 :
"__pseudo_gather_factored_base_offsets64_i16",
2415 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_i32" 2416 :
"__pseudo_gather_factored_base_offsets64_i32",
2418 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_float" 2419 :
"__pseudo_gather_factored_base_offsets64_float",
2421 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_i64" 2422 :
"__pseudo_gather_factored_base_offsets64_i64",
2424 GatherImpInfo(
g->
target->
hasGather() ?
"__pseudo_gather_base_offsets64_double" 2425 :
"__pseudo_gather_factored_base_offsets64_double",
2429 struct ScatterImpInfo {
2430 ScatterImpInfo(
const char *pName,
const char *msName, llvm::Type *vpt,
int a)
2432 pseudoFunc =
m->
module->getFunction(pName);
2433 maskedStoreFunc =
m->
module->getFunction(msName);
2435 Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
2437 llvm::Function *pseudoFunc;
2438 llvm::Function *maskedStoreFunc;
2439 llvm::Type *vecPtrType;
2441 const bool isFactored;
2444 ScatterImpInfo sInfo[] = {
2446 :
"__pseudo_scatter_factored_base_offsets32_i8",
2449 :
"__pseudo_scatter_factored_base_offsets32_i16",
2452 :
"__pseudo_scatter_factored_base_offsets32_i32",
2454 ScatterImpInfo(
g->
target->
hasScatter() ?
"__pseudo_scatter_base_offsets32_float" 2455 :
"__pseudo_scatter_factored_base_offsets32_float",
2458 :
"__pseudo_scatter_factored_base_offsets32_i64",
2460 ScatterImpInfo(
g->
target->
hasScatter() ?
"__pseudo_scatter_base_offsets32_double" 2461 :
"__pseudo_scatter_factored_base_offsets32_double",
2464 :
"__pseudo_scatter_factored_base_offsets64_i8",
2467 :
"__pseudo_scatter_factored_base_offsets64_i16",
2470 :
"__pseudo_scatter_factored_base_offsets64_i32",
2472 ScatterImpInfo(
g->
target->
hasScatter() ?
"__pseudo_scatter_base_offsets64_float" 2473 :
"__pseudo_scatter_factored_base_offsets64_float",
2476 :
"__pseudo_scatter_factored_base_offsets64_i64",
2478 ScatterImpInfo(
g->
target->
hasScatter() ?
"__pseudo_scatter_base_offsets64_double" 2479 :
"__pseudo_scatter_factored_base_offsets64_double",
2483 llvm::Function *calledFunc = callInst->getCalledFunction();
2485 GatherImpInfo *gatherInfo = NULL;
2486 ScatterImpInfo *scatterInfo = NULL;
2487 for (
unsigned int i = 0; i <
sizeof(gInfo) /
sizeof(gInfo[0]); ++i) {
2488 if (gInfo[i].pseudoFunc != NULL && calledFunc == gInfo[i].pseudoFunc) {
2489 gatherInfo = &gInfo[i];
2493 for (
unsigned int i = 0; i <
sizeof(sInfo) /
sizeof(sInfo[0]); ++i) {
2494 if (sInfo[i].pseudoFunc != NULL && calledFunc == sInfo[i].pseudoFunc) {
2495 scatterInfo = &sInfo[i];
2499 if (gatherInfo == NULL && scatterInfo == NULL)
2505 llvm::Value *base = callInst->getArgOperand(0);
2506 llvm::Value *fullOffsets = NULL;
2507 llvm::Value *storeValue = NULL;
2508 llvm::Value *mask = NULL;
2510 if ((gatherInfo != NULL && gatherInfo->isFactored) || (scatterInfo != NULL && scatterInfo->isFactored)) {
2511 llvm::Value *varyingOffsets = callInst->getArgOperand(1);
2512 llvm::Value *offsetScale = callInst->getArgOperand(2);
2513 llvm::Value *constOffsets = callInst->getArgOperand(3);
2515 storeValue = callInst->getArgOperand(4);
2516 mask = callInst->getArgOperand((gatherInfo != NULL) ? 4 : 5);
2519 llvm::Constant *offsetScaleVec =
lGetOffsetScaleVec(offsetScale, varyingOffsets->getType());
2521 llvm::Value *scaledVarying = llvm::BinaryOperator::Create(llvm::Instruction::Mul, offsetScaleVec,
2522 varyingOffsets,
"scaled_varying", callInst);
2523 fullOffsets = llvm::BinaryOperator::Create(llvm::Instruction::Add, scaledVarying, constOffsets,
2524 "varying+const_offsets", callInst);
2527 storeValue = callInst->getArgOperand(3);
2528 mask = callInst->getArgOperand((gatherInfo != NULL) ? 3 : 4);
2530 llvm::Value *offsetScale = callInst->getArgOperand(1);
2531 llvm::Value *offsets = callInst->getArgOperand(2);
2535 llvm::BinaryOperator::Create(llvm::Instruction::Mul, offsetScaleVec, offsets,
"scaled_offsets", callInst);
2538 Debug(
SourcePos(),
"GSToLoadStore: %s.", fullOffsets->getName().str().c_str());
2547 if (gatherInfo != NULL) {
2550 Debug(pos,
"Transformed gather to scalar load and broadcast!");
2553 new llvm::BitCastInst(ptr, llvm::PointerType::get(gatherInfo->scalarType, 0), ptr->getName(), callInst);
2554 llvm::Value *scalarValue =
new llvm::LoadInst(ptr, callInst->getName(), callInst);
2560 llvm::Value *undef1Value = llvm::UndefValue::get(callInst->getType());
2561 llvm::Value *undef2Value = llvm::UndefValue::get(callInst->getType());
2562 llvm::Value *insertVec =
2563 llvm::InsertElementInst::Create(undef1Value, scalarValue,
LLVMInt32(0), callInst->getName(), callInst);
2564 llvm::Value *zeroMask =
2565 #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 2566 llvm::ConstantVector::getSplat(callInst->getType()->getVectorNumElements(),
2568 llvm::ConstantVector::getSplat({callInst->getType()->getVectorNumElements(),
false},
2570 llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*
g->
ctx)));
2571 llvm::Value *shufValue =
new llvm::ShuffleVectorInst(insertVec, undef2Value, zeroMask, callInst->getName());
2574 llvm::ReplaceInstWithInst(callInst, llvm::dyn_cast<llvm::Instruction>(shufValue));
2581 Warning(pos,
"Undefined behavior: all program instances are " 2582 "writing to the same location!");
2594 int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
2604 if (gatherInfo != NULL) {
2605 Debug(pos,
"Transformed gather to unaligned vector load!");
2606 llvm::Instruction *newCall =
2609 llvm::ReplaceInstWithInst(callInst, newCall);
2612 Debug(pos,
"Transformed scatter to unaligned vector store!");
2613 ptr =
new llvm::BitCastInst(ptr, scatterInfo->vecPtrType,
"ptrcast", callInst);
2614 llvm::Instruction *newCall =
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue, mask,
"");
2616 llvm::ReplaceInstWithInst(callInst, newCall);
2635 MSInfo(
const char *name,
const int a) : align(a) {
2636 func =
m->
module->getFunction(name);
2639 llvm::Function *func;
2643 MSInfo msInfo[] = {MSInfo(
"__pseudo_masked_store_i8", 1), MSInfo(
"__pseudo_masked_store_i16", 2),
2644 MSInfo(
"__pseudo_masked_store_i32", 4), MSInfo(
"__pseudo_masked_store_float", 4),
2645 MSInfo(
"__pseudo_masked_store_i64", 8), MSInfo(
"__pseudo_masked_store_double", 8),
2646 MSInfo(
"__masked_store_blend_i8", 1), MSInfo(
"__masked_store_blend_i16", 2),
2647 MSInfo(
"__masked_store_blend_i32", 4), MSInfo(
"__masked_store_blend_float", 4),
2648 MSInfo(
"__masked_store_blend_i64", 8), MSInfo(
"__masked_store_blend_double", 8),
2649 MSInfo(
"__masked_store_i8", 1), MSInfo(
"__masked_store_i16", 2),
2650 MSInfo(
"__masked_store_i32", 4), MSInfo(
"__masked_store_float", 4),
2651 MSInfo(
"__masked_store_i64", 8), MSInfo(
"__masked_store_double", 8)};
2653 llvm::Function *called = callInst->getCalledFunction();
2655 int nMSFuncs =
sizeof(msInfo) /
sizeof(msInfo[0]);
2656 MSInfo *info = NULL;
2657 for (
int i = 0; i < nMSFuncs; ++i) {
2658 if (msInfo[i].func != NULL && called == msInfo[i].func) {
2667 llvm::Value *lvalue = callInst->getArgOperand(0);
2668 llvm::Value *rvalue = callInst->getArgOperand(1);
2669 llvm::Value *mask = callInst->getArgOperand(2);
2676 callInst->eraseFromParent();
2678 }
else if (maskStatus ==
ALL_ON) {
2680 llvm::Type *rvalueType = rvalue->getType();
2681 llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
2683 lvalue =
new llvm::BitCastInst(lvalue, ptrType,
"lvalue_to_ptr_type", callInst);
2685 llvm::Instruction *store =
2686 new llvm::StoreInst(rvalue, lvalue,
false ,
2694 llvm::ReplaceInstWithInst(callInst, store);
2703 MLInfo(
const char *name,
const int a) : align(a) {
2704 func =
m->
module->getFunction(name);
2707 llvm::Function *func;
2711 MLInfo mlInfo[] = {MLInfo(
"__masked_load_i8", 1), MLInfo(
"__masked_load_i16", 2),
2712 MLInfo(
"__masked_load_i32", 4), MLInfo(
"__masked_load_float", 4),
2713 MLInfo(
"__masked_load_i64", 8), MLInfo(
"__masked_load_double", 8)};
2715 llvm::Function *called = callInst->getCalledFunction();
2717 int nFuncs =
sizeof(mlInfo) /
sizeof(mlInfo[0]);
2718 MLInfo *info = NULL;
2719 for (
int i = 0; i < nFuncs; ++i) {
2720 if (mlInfo[i].func != NULL && called == mlInfo[i].func) {
2729 llvm::Value *ptr = callInst->getArgOperand(0);
2730 llvm::Value *mask = callInst->getArgOperand(1);
2735 llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, llvm::UndefValue::get(callInst->getType()));
2737 }
else if (maskStatus ==
ALL_ON) {
2739 llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
2740 ptr =
new llvm::BitCastInst(ptr, ptrType,
"ptr_cast_for_load", callInst);
2741 llvm::Instruction *load =
new llvm::LoadInst(
2742 ptr, callInst->getName(),
false ,
2743 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 2747 (llvm::Instruction *)NULL);
2750 llvm::ReplaceInstWithInst(callInst, load);
2759 bool modifiedAny =
false;
2762 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
2763 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
2767 if (callInst == NULL || callInst->getCalledFunction() == NULL)
2799 bool modifiedAny =
false;
2800 for (llvm::BasicBlock &BB : F) {
2801 modifiedAny |= runOnBasicBlock(BB);
2836 bool runOnBasicBlock(llvm::BasicBlock &BB);
2837 bool runOnFunction(llvm::Function &F);
2849 load = element0 = element1 = NULL;
2875 std::set<int64_t>::iterator *newIter,
int vectorWidth) {
2878 int64_t start = *iter;
2896 int64_t lastAccepted = start;
2898 while (iter != end) {
2901 int64_t delta = *iter - lastAccepted;
2906 int64_t span = *iter - start + 1;
2908 if (span == vectorWidth) {
2915 }
else if (span > vectorWidth) {
2946 const int pageSize = 4096;
2947 if (vectorWidth != 2 && (lastAccepted - start) > (vectorWidth / 2) && (*iter - lastAccepted) < pageSize) {
2955 lastAccepted = *iter;
2968 static void lSelectLoads(
const std::vector<int64_t> &loadOffsets, std::vector<CoalescedLoadOp> *loads) {
2970 std::set<int64_t> allOffsets;
2971 for (
unsigned int i = 0; i < loadOffsets.size(); ++i)
2972 allOffsets.insert(loadOffsets[i]);
2974 std::set<int64_t>::iterator iter = allOffsets.begin();
2975 while (iter != allOffsets.end()) {
2983 iter = allOffsets.begin();
2984 while (iter != allOffsets.end()) {
2987 int vectorWidths[] = {8, 4, 2};
2988 int nVectorWidths =
sizeof(vectorWidths) /
sizeof(vectorWidths[0]);
2989 bool gotOne =
false;
2990 for (
int i = 0; i < nVectorWidths; ++i) {
2994 std::set<int64_t>::iterator newIter;
3007 if (gotOne ==
false) {
3019 const std::vector<CoalescedLoadOp> &loadOps) {
3025 char otherPositions[512];
3026 otherPositions[0] =
'\0';
3027 if (coalesceGroup.size() > 1) {
3028 const char *plural = (coalesceGroup.size() > 2) ?
"s" :
"";
3030 snprintf(otherBuf,
sizeof(otherBuf),
"(other%s at line%s ", plural, plural);
3031 strncat(otherPositions, otherBuf,
sizeof(otherPositions) - strlen(otherPositions) - 1);
3033 for (
int i = 1; i < (int)coalesceGroup.size(); ++i) {
3038 snprintf(buf,
sizeof(buf),
"%d", p.
first_line);
3039 strncat(otherPositions, buf,
sizeof(otherPositions) - strlen(otherPositions) - 1);
3040 if (i < (
int)coalesceGroup.size() - 1)
3041 strncat(otherPositions,
", ",
sizeof(otherPositions) - strlen(otherPositions) - 1);
3044 strncat(otherPositions,
") ",
sizeof(otherPositions) - strlen(otherPositions) - 1);
3048 std::map<int, int> loadOpsCount;
3049 for (
int i = 0; i < (int)loadOps.size(); ++i)
3050 ++loadOpsCount[loadOps[i].count];
3053 char loadOpsInfo[512];
3054 loadOpsInfo[0] =
'\0';
3055 std::map<int, int>::const_iterator iter = loadOpsCount.begin();
3056 while (iter != loadOpsCount.end()) {
3058 snprintf(buf,
sizeof(buf),
"%d x %d-wide", iter->second, iter->first);
3059 if ((strlen(loadOpsInfo) + strlen(buf)) >= 512) {
3062 strncat(loadOpsInfo, buf,
sizeof(loadOpsInfo) - strlen(loadOpsInfo) - 1);
3064 if (iter != loadOpsCount.end())
3065 strncat(loadOpsInfo,
", ",
sizeof(loadOpsInfo) - strlen(loadOpsInfo) - 1);
3069 if (coalesceGroup.size() == 1)
3070 PerformanceWarning(pos,
"Coalesced gather into %d load%s (%s).", (
int)loadOps.size(),
3071 (loadOps.size() > 1) ?
"s" :
"", loadOpsInfo);
3074 "Coalesced %d gathers starting here %sinto %d " 3076 (
int)coalesceGroup.size(), otherPositions, (int)loadOps.size(),
3077 (loadOps.size() > 1) ?
"s" :
"", loadOpsInfo);
3087 llvm::Value *
lGEPAndLoad(llvm::Value *basePtr, int64_t offset,
int align, llvm::Instruction *insertBefore,
3089 llvm::Value *ptr =
lGEPInst(basePtr,
LLVMInt64(offset),
"new_base", insertBefore);
3090 ptr =
new llvm::BitCastInst(ptr, llvm::PointerType::get(type, 0),
"ptr_cast", insertBefore);
3091 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 3092 return new llvm::LoadInst(ptr,
"gather_load",
false , align, insertBefore);
3094 return new llvm::LoadInst(ptr,
"gather_load",
false , llvm::MaybeAlign(align), insertBefore);
3102 static void lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
int elementSize,
3103 llvm::Instruction *insertBefore) {
3104 Debug(
SourcePos(),
"Coalesce doing %d loads.", (
int)loadOps.size());
3105 for (
int i = 0; i < (int)loadOps.size(); ++i) {
3110 int64_t start = loadOps[i].start * elementSize;
3113 switch (loadOps[i].count) {
3123 loadOps[i].element0 =
3126 llvm::Value *shift = llvm::BinaryOperator::Create(llvm::Instruction::LShr, loadOps[i].load,
LLVMInt64(32),
3127 "load64_shift", insertBefore);
3128 loadOps[i].element1 =
new llvm::TruncInst(shift,
LLVMTypes::Int32Type,
"load64_elt1", insertBefore);
3137 loadOps[i].load =
lGEPAndLoad(basePtr, start, align, insertBefore, vt);
3146 loadOps[i].load =
lGEPAndLoad(basePtr, start, align, insertBefore, vt);
3150 FATAL(
"Unexpected load count in lEmitLoads()");
3160 static std::vector<CoalescedLoadOp>
lSplit8WideLoads(
const std::vector<CoalescedLoadOp> &loadOps,
3161 llvm::Instruction *insertBefore) {
3162 std::vector<CoalescedLoadOp> ret;
3163 for (
unsigned int i = 0; i < loadOps.size(); ++i) {
3164 if (loadOps[i].count == 8) {
3168 int32_t shuf[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}};
3171 ret.back().load =
LLVMShuffleVectors(loadOps[i].load, loadOps[i].load, shuf[0], 4, insertBefore);
3174 ret.back().load =
LLVMShuffleVectors(loadOps[i].load, loadOps[i].load, shuf[1], 4, insertBefore);
3176 ret.push_back(loadOps[i]);
3186 llvm::Instruction *insertBefore) {
3187 for (
int elt = 0; elt < 4; ++elt) {
3188 if (offsets[elt] >= load.
start && offsets[elt] < load.
start + load.
count) {
3190 "Load 1 @ %" PRId64 " matches for element #%d " 3192 load.
start, elt, offsets[elt]);
3195 Assert(
set[elt] ==
false);
3196 result = llvm::InsertElementInst::Create(result, load.
load,
LLVMInt32(elt),
"insert_load", insertBefore);
3207 llvm::Instruction *insertBefore) {
3215 if ((elt & 1) == 0 && offsets[elt] + 1 == offsets[elt + 1] && offsets[elt] == load.
start) {
3217 "Load 2 @ %" PRId64 " matches for elements #%d,%d " 3219 load.
start, elt, elt + 1, offsets[elt], offsets[elt + 1]);
3220 Assert(
set[elt] ==
false && ((elt < 3) &&
set[elt + 1] ==
false));
3224 result =
new llvm::BitCastInst(result, vec2x64Type,
"to2x64", insertBefore);
3228 result = llvm::InsertElementInst::Create(result, load.
load,
LLVMInt32(elt / 2),
"insert64", insertBefore);
3232 result =
new llvm::BitCastInst(result, vec4x32Type,
"to4x32", insertBefore);
3236 set[elt + 1] =
true;
3241 }
else if (offsets[elt] >= load.
start && offsets[elt] < load.
start + load.
count) {
3243 "Load 2 @ %" PRId64 " matches for element #%d " 3245 load.
start, elt, offsets[elt]);
3248 Assert(
set[elt] ==
false);
3250 result = llvm::InsertElementInst::Create(result, toInsert,
LLVMInt32(elt),
"insert_load", insertBefore);
3266 llvm::Instruction *insertBefore) {
3271 int32_t shuf[4] = {4, 5, 6, 7};
3273 for (
int elt = 0; elt < 4; ++elt) {
3274 if (offsets[elt] >= load.
start && offsets[elt] < load.
start + load.
count) {
3276 "Load 4 @ %" PRId64 " matches for element #%d " 3278 load.
start, elt, offsets[elt]);
3284 Assert(
set[elt] ==
false);
3285 shuf[elt] = int32_t(offsets[elt] - load.
start);
3292 if (shuf[0] != 4 || shuf[1] != 5 || shuf[2] != 6 || shuf[3] != 7)
3304 static llvm::Value *
lAssemble4Vector(
const std::vector<CoalescedLoadOp> &loadOps,
const int64_t offsets[4],
3305 llvm::Instruction *insertBefore) {
3307 llvm::Value *result = llvm::UndefValue::get(returnType);
3310 offsets[1], offsets[2], offsets[3]);
3314 bool set[4] = {
false,
false,
false,
false};
3318 for (
int load = 0; load < (int)loadOps.size(); ++load) {
3323 result =
lApplyLoad1(result, li, offsets,
set, insertBefore);
3326 result =
lApplyLoad2(result, li, offsets,
set, insertBefore);
3329 result =
lApplyLoad4(result, li, offsets,
set, insertBefore);
3332 FATAL(
"Unexpected load count in lAssemble4Vector()");
3336 Debug(
SourcePos(),
"Done with search for loads [%" PRId64
" %" PRId64
" %" PRId64
" %" PRId64
"].", offsets[0],
3337 offsets[1], offsets[2], offsets[3]);
3339 for (
int i = 0; i < 4; ++i)
3347 static llvm::Value *lApplyLoad4s(llvm::Value *result,
const std::vector<CoalescedLoadOp> &loadOps,
3348 const int64_t offsets[4],
bool set[4], llvm::Instruction *insertBefore) {
3349 int32_t firstMatchElements[4] = {-1, -1, -1, -1};
3352 Assert(llvm::isa<llvm::UndefValue>(result));
3354 for (
int load = 0; load < (int)loadOps.size(); ++load) {
3356 if (loadop.
count != 4)
3359 int32_t matchElements[4] = {-1, -1, -1, -1};
3360 bool anyMatched =
false;
3361 for (
int elt = 0; elt < 4; ++elt) {
3362 if (offsets[elt] >= loadop.
start && offsets[elt] < loadop.
start + loadop.
count) {
3364 "Load 4 @ %" PRId64 " matches for element #%d " 3366 loadop.
start, elt, offsets[elt]);
3368 Assert(
set[elt] ==
false);
3369 matchElements[elt] = offsets[elt] - loadop.
start;
3375 if (llvm::isa<llvm::UndefValue>(result)) {
3376 if (firstMatch == NULL) {
3377 firstMatch = &loadop;
3378 for (
int i = 0; i < 4; ++i)
3379 firstMatchElements[i] = matchElements[i];
3381 int32_t shuffle[4] = {-1, -1, -1, -1};
3382 for (
int i = 0; i < 4; ++i) {
3383 if (firstMatchElements[i] != -1)
3384 shuffle[i] = firstMatchElements[i];
3386 shuffle[i] = 4 + matchElements[i];
3392 int32_t shuffle[4] = {-1, -1, -1, -1};
3393 for (
int i = 0; i < 4; ++i) {
3394 if (matchElements[i] != -1)
3395 shuffle[i] = 4 + matchElements[i];
3404 if (firstMatch != NULL && llvm::isa<llvm::UndefValue>(result))
3410 static llvm::Value *lApplyLoad12s(llvm::Value *result,
const std::vector<CoalescedLoadOp> &loadOps,
3411 const int64_t offsets[4],
bool set[4], llvm::Instruction *insertBefore) {
3414 for (
int load = 0; load < (int)loadOps.size(); ++load) {
3418 if (loadop.
count == 1)
3419 result =
lApplyLoad1(result, loadop, offsets,
set, insertBefore);
3420 else if (loadop.
count == 2)
3421 result =
lApplyLoad2(result, loadop, offsets,
set, insertBefore);
3432 static llvm::Value *
lAssemble4Vector(
const std::vector<CoalescedLoadOp> &loadOps,
const int64_t offsets[4],
3433 llvm::Instruction *insertBefore) {
3435 llvm::Value *result = llvm::UndefValue::get(returnType);
3438 offsets[1], offsets[2], offsets[3]);
3442 bool set[4] = {
false,
false,
false,
false};
3444 result = lApplyLoad4s(result, loadOps, offsets,
set, insertBefore);
3445 result = lApplyLoad12s(result, loadOps, offsets,
set, insertBefore);
3447 Debug(
SourcePos(),
"Done with search for loads [%" PRId64
" %" PRId64
" %" PRId64
" %" PRId64
"].", offsets[0],
3448 offsets[1], offsets[2], offsets[3]);
3450 for (
int i = 0; i < 4; ++i)
3462 const std::vector<int64_t> &constOffsets, std::vector<llvm::Value *> &results,
3463 llvm::Instruction *insertBefore) {
3467 Assert((constOffsets.size() % 4) == 0);
3468 std::vector<llvm::Value *> vec4s;
3469 for (
int i = 0; i < (int)constOffsets.size(); i += 4)
3470 vec4s.push_back(
lAssemble4Vector(loadOps, &constOffsets[i], insertBefore));
3475 for (
int i = 0; i < numGathers; ++i) {
3476 llvm::Value *result = NULL;
3485 llvm::Value *v1 =
LLVMConcatVectors(vec4s[4 * i], vec4s[4 * i + 1], insertBefore);
3486 llvm::Value *v2 =
LLVMConcatVectors(vec4s[4 * i + 2], vec4s[4 * i + 3], insertBefore);
3491 FATAL(
"Unhandled vector width in lAssembleResultVectors()");
3494 results.push_back(result);
3504 static llvm::Value *
lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) {
3505 llvm::Value *basePtr = gatherInst->getArgOperand(0);
3506 llvm::Value *variableOffsets = gatherInst->getArgOperand(1);
3507 llvm::Value *offsetScale = gatherInst->getArgOperand(2);
3513 Assert(variable != NULL);
3515 offsetScale =
new llvm::ZExtInst(offsetScale,
LLVMTypes::Int64Type,
"scale_to64", insertBefore);
3516 llvm::Value *offset =
3517 llvm::BinaryOperator::Create(llvm::Instruction::Mul, variable, offsetScale,
"offset", insertBefore);
3519 return lGEPInst(basePtr, offset,
"new_base", insertBefore);
3530 std::vector<int64_t> *constOffsets) {
3532 *constOffsets = std::vector<int64_t>(coalesceGroup.size() * width, 0);
3534 int64_t *endPtr = &((*constOffsets)[0]);
3535 for (
int i = 0; i < (int)coalesceGroup.size(); ++i, endPtr += width) {
3536 llvm::Value *offsets = coalesceGroup[i]->getArgOperand(3);
3539 Assert(ok && nElts == width);
3542 for (
int i = 0; i < (int)constOffsets->size(); ++i)
3543 (*constOffsets)[i] /= elementSize;
3557 llvm::Instruction *insertBefore = coalesceGroup[0];
3560 llvm::Value *basePtr =
lComputeBasePtr(coalesceGroup[0], insertBefore);
3562 int elementSize = 0;
3570 FATAL(
"Unexpected gather type in lCoalesceGathers");
3576 std::vector<int64_t> constOffsets;
3581 std::vector<CoalescedLoadOp> loadOps;
3587 lEmitLoads(basePtr, loadOps, elementSize, insertBefore);
3597 std::vector<llvm::Value *> results;
3602 Assert(results.size() == coalesceGroup.size());
3603 for (
int i = 0; i < (int)results.size(); ++i) {
3604 llvm::Instruction *ir = llvm::dyn_cast<llvm::Instruction>(results[i]);
3607 llvm::Type *origType = coalesceGroup[i]->getType();
3608 if (origType != ir->getType())
3609 ir =
new llvm::BitCastInst(ir, origType, ir->getName(), coalesceGroup[i]);
3617 ir->removeFromParent();
3619 llvm::ReplaceInstWithInst(coalesceGroup[i], ir);
3631 if (llvm::isa<llvm::StoreInst>(inst) || llvm::isa<llvm::AtomicRMWInst>(inst) ||
3632 llvm::isa<llvm::AtomicCmpXchgInst>(inst))
3640 llvm::CallInst *ci = llvm::dyn_cast<llvm::CallInst>(inst);
3642 llvm::Function *calledFunc = ci->getCalledFunction();
3643 if (calledFunc == NULL)
3646 if (calledFunc->onlyReadsMemory() || calledFunc->doesNotAccessMemory())
3657 llvm::Function *gatherFuncs[] = {
3658 m->
module->getFunction(
"__pseudo_gather_factored_base_offsets32_i32"),
3659 m->
module->getFunction(
"__pseudo_gather_factored_base_offsets32_float"),
3660 m->
module->getFunction(
"__pseudo_gather_factored_base_offsets64_i32"),
3661 m->
module->getFunction(
"__pseudo_gather_factored_base_offsets64_float"),
3663 int nGatherFuncs =
sizeof(gatherFuncs) /
sizeof(gatherFuncs[0]);
3665 bool modifiedAny =
false;
3668 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
3671 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
3672 if (callInst == NULL)
3675 llvm::Function *calledFunc = callInst->getCalledFunction();
3676 if (calledFunc == NULL)
3680 for (i = 0; i < nGatherFuncs; ++i)
3681 if (gatherFuncs[i] != NULL && calledFunc == gatherFuncs[i])
3683 if (i == nGatherFuncs)
3689 Debug(pos,
"Checking for coalescable gathers starting here...");
3691 llvm::Value *base = callInst->getArgOperand(0);
3692 llvm::Value *variableOffsets = callInst->getArgOperand(1);
3693 llvm::Value *offsetScale = callInst->getArgOperand(2);
3694 llvm::Value *mask = callInst->getArgOperand(4);
3716 std::vector<llvm::CallInst *> coalesceGroup;
3717 coalesceGroup.push_back(callInst);
3723 llvm::BasicBlock::iterator fwdIter = iter;
3725 for (; fwdIter != bb.end(); ++fwdIter) {
3732 llvm::CallInst *fwdCall = llvm::dyn_cast<llvm::CallInst>(&*fwdIter);
3733 if (fwdCall == NULL || fwdCall->getCalledFunction() != calledFunc)
3741 #ifndef ISPC_NO_DUMPS 3743 if (base != fwdCall->getArgOperand(0)) {
3744 Debug(fwdPos,
"base pointers mismatch");
3748 if (variableOffsets != fwdCall->getArgOperand(1)) {
3749 Debug(fwdPos,
"varying offsets mismatch");
3753 if (offsetScale != fwdCall->getArgOperand(2)) {
3754 Debug(fwdPos,
"offset scales mismatch");
3758 if (mask != fwdCall->getArgOperand(4)) {
3759 Debug(fwdPos,
"masks mismatch");
3766 if (base == fwdCall->getArgOperand(0) && variableOffsets == fwdCall->getArgOperand(1) &&
3767 offsetScale == fwdCall->getArgOperand(2) && mask == fwdCall->getArgOperand(4)) {
3768 Debug(fwdPos,
"This gather can be coalesced.");
3769 coalesceGroup.push_back(fwdCall);
3771 if (coalesceGroup.size() == 4)
3778 Debug(fwdPos,
"This gather doesn't match the initial one.");
3781 Debug(pos,
"Done with checking for matching gathers");
3798 bool modifiedAny =
false;
3799 for (llvm::BasicBlock &BB : F) {
3800 modifiedAny |= runOnBasicBlock(BB);
3819 llvm::StringRef
getPassName()
const {
return "Replace Pseudo Memory Ops"; }
3820 bool runOnBasicBlock(llvm::BasicBlock &BB);
3821 bool runOnFunction(llvm::Function &F);
3834 llvm::BitCastInst *bc = llvm::dyn_cast<llvm::BitCastInst>(lvalue);
3838 llvm::AllocaInst *ai = llvm::dyn_cast<llvm::AllocaInst>(lvalue);
3840 llvm::Type *type = ai->getType();
3841 llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(type);
3843 type = pt->getElementType();
3844 llvm::ArrayType *at;
3845 while ((at = llvm::dyn_cast<llvm::ArrayType>(type))) {
3846 type = at->getElementType();
3848 llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(type);
3851 llvm::GetElementPtrInst *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(lvalue);
3862 LMSInfo(
const char *pname,
const char *bname,
const char *msname) {
3863 pseudoFunc =
m->
module->getFunction(pname);
3864 blendFunc =
m->
module->getFunction(bname);
3865 maskedStoreFunc =
m->
module->getFunction(msname);
3866 Assert(pseudoFunc != NULL && blendFunc != NULL && maskedStoreFunc != NULL);
3868 llvm::Function *pseudoFunc;
3869 llvm::Function *blendFunc;
3870 llvm::Function *maskedStoreFunc;
3873 LMSInfo msInfo[] = {
3874 LMSInfo(
"__pseudo_masked_store_i8",
"__masked_store_blend_i8",
"__masked_store_i8"),
3875 LMSInfo(
"__pseudo_masked_store_i16",
"__masked_store_blend_i16",
"__masked_store_i16"),
3876 LMSInfo(
"__pseudo_masked_store_i32",
"__masked_store_blend_i32",
"__masked_store_i32"),
3877 LMSInfo(
"__pseudo_masked_store_float",
"__masked_store_blend_float",
"__masked_store_float"),
3878 LMSInfo(
"__pseudo_masked_store_i64",
"__masked_store_blend_i64",
"__masked_store_i64"),
3879 LMSInfo(
"__pseudo_masked_store_double",
"__masked_store_blend_double",
"__masked_store_double")};
3881 LMSInfo *info = NULL;
3882 for (
unsigned int i = 0; i <
sizeof(msInfo) /
sizeof(msInfo[0]); ++i) {
3883 if (msInfo[i].pseudoFunc != NULL && callInst->getCalledFunction() == msInfo[i].pseudoFunc) {
3891 llvm::Value *lvalue = callInst->getArgOperand(0);
3892 llvm::Value *rvalue = callInst->getArgOperand(1);
3893 llvm::Value *mask = callInst->getArgOperand(2);
3903 llvm::Function *fms = doBlend ? info->blendFunc : info->maskedStoreFunc;
3904 llvm::Instruction *inst =
lCallInst(fms, lvalue, rvalue, mask,
"", callInst);
3907 callInst->eraseFromParent();
3912 struct LowerGSInfo {
3913 LowerGSInfo(
const char *pName,
const char *aName,
bool ig,
bool ip) : isGather(ig), isPrefetch(ip) {
3914 pseudoFunc =
m->
module->getFunction(pName);
3915 actualFunc =
m->
module->getFunction(aName);
3917 llvm::Function *pseudoFunc;
3918 llvm::Function *actualFunc;
3919 const bool isGather;
3920 const bool isPrefetch;
3923 LowerGSInfo lgsInfo[] = {
3924 LowerGSInfo(
"__pseudo_gather32_i8",
"__gather32_i8",
true,
false),
3925 LowerGSInfo(
"__pseudo_gather32_i16",
"__gather32_i16",
true,
false),
3926 LowerGSInfo(
"__pseudo_gather32_i32",
"__gather32_i32",
true,
false),
3927 LowerGSInfo(
"__pseudo_gather32_float",
"__gather32_float",
true,
false),
3928 LowerGSInfo(
"__pseudo_gather32_i64",
"__gather32_i64",
true,
false),
3929 LowerGSInfo(
"__pseudo_gather32_double",
"__gather32_double",
true,
false),
3931 LowerGSInfo(
"__pseudo_gather64_i8",
"__gather64_i8",
true,
false),
3932 LowerGSInfo(
"__pseudo_gather64_i16",
"__gather64_i16",
true,
false),
3933 LowerGSInfo(
"__pseudo_gather64_i32",
"__gather64_i32",
true,
false),
3934 LowerGSInfo(
"__pseudo_gather64_float",
"__gather64_float",
true,
false),
3935 LowerGSInfo(
"__pseudo_gather64_i64",
"__gather64_i64",
true,
false),
3936 LowerGSInfo(
"__pseudo_gather64_double",
"__gather64_double",
true,
false),
3938 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_i8",
"__gather_factored_base_offsets32_i8",
true,
false),
3939 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_i16",
"__gather_factored_base_offsets32_i16",
true,
false),
3940 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_i32",
"__gather_factored_base_offsets32_i32",
true,
false),
3941 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_float",
"__gather_factored_base_offsets32_float",
true,
3943 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_i64",
"__gather_factored_base_offsets32_i64",
true,
false),
3944 LowerGSInfo(
"__pseudo_gather_factored_base_offsets32_double",
"__gather_factored_base_offsets32_double",
true,
3947 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_i8",
"__gather_factored_base_offsets64_i8",
true,
false),
3948 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_i16",
"__gather_factored_base_offsets64_i16",
true,
false),
3949 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_i32",
"__gather_factored_base_offsets64_i32",
true,
false),
3950 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_float",
"__gather_factored_base_offsets64_float",
true,
3952 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_i64",
"__gather_factored_base_offsets64_i64",
true,
false),
3953 LowerGSInfo(
"__pseudo_gather_factored_base_offsets64_double",
"__gather_factored_base_offsets64_double",
true,
3956 LowerGSInfo(
"__pseudo_gather_base_offsets32_i8",
"__gather_base_offsets32_i8",
true,
false),
3957 LowerGSInfo(
"__pseudo_gather_base_offsets32_i16",
"__gather_base_offsets32_i16",
true,
false),
3958 LowerGSInfo(
"__pseudo_gather_base_offsets32_i32",
"__gather_base_offsets32_i32",
true,
false),
3959 LowerGSInfo(
"__pseudo_gather_base_offsets32_float",
"__gather_base_offsets32_float",
true,
false),
3960 LowerGSInfo(
"__pseudo_gather_base_offsets32_i64",
"__gather_base_offsets32_i64",
true,
false),
3961 LowerGSInfo(
"__pseudo_gather_base_offsets32_double",
"__gather_base_offsets32_double",
true,
false),
3963 LowerGSInfo(
"__pseudo_gather_base_offsets64_i8",
"__gather_base_offsets64_i8",
true,
false),
3964 LowerGSInfo(
"__pseudo_gather_base_offsets64_i16",
"__gather_base_offsets64_i16",
true,
false),
3965 LowerGSInfo(
"__pseudo_gather_base_offsets64_i32",
"__gather_base_offsets64_i32",
true,
false),
3966 LowerGSInfo(
"__pseudo_gather_base_offsets64_float",
"__gather_base_offsets64_float",
true,
false),
3967 LowerGSInfo(
"__pseudo_gather_base_offsets64_i64",
"__gather_base_offsets64_i64",
true,
false),
3968 LowerGSInfo(
"__pseudo_gather_base_offsets64_double",
"__gather_base_offsets64_double",
true,
false),
3970 LowerGSInfo(
"__pseudo_scatter32_i8",
"__scatter32_i8",
false,
false),
3971 LowerGSInfo(
"__pseudo_scatter32_i16",
"__scatter32_i16",
false,
false),
3972 LowerGSInfo(
"__pseudo_scatter32_i32",
"__scatter32_i32",
false,
false),
3973 LowerGSInfo(
"__pseudo_scatter32_float",
"__scatter32_float",
false,
false),
3974 LowerGSInfo(
"__pseudo_scatter32_i64",
"__scatter32_i64",
false,
false),
3975 LowerGSInfo(
"__pseudo_scatter32_double",
"__scatter32_double",
false,
false),
3977 LowerGSInfo(
"__pseudo_scatter64_i8",
"__scatter64_i8",
false,
false),
3978 LowerGSInfo(
"__pseudo_scatter64_i16",
"__scatter64_i16",
false,
false),
3979 LowerGSInfo(
"__pseudo_scatter64_i32",
"__scatter64_i32",
false,
false),
3980 LowerGSInfo(
"__pseudo_scatter64_float",
"__scatter64_float",
false,
false),
3981 LowerGSInfo(
"__pseudo_scatter64_i64",
"__scatter64_i64",
false,
false),
3982 LowerGSInfo(
"__pseudo_scatter64_double",
"__scatter64_double",
false,
false),
3984 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_i8",
"__scatter_factored_base_offsets32_i8",
false,
3986 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_i16",
"__scatter_factored_base_offsets32_i16",
false,
3988 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_i32",
"__scatter_factored_base_offsets32_i32",
false,
3990 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_float",
"__scatter_factored_base_offsets32_float",
false,
3992 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_i64",
"__scatter_factored_base_offsets32_i64",
false,
3994 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets32_double",
"__scatter_factored_base_offsets32_double",
3997 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_i8",
"__scatter_factored_base_offsets64_i8",
false,
3999 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_i16",
"__scatter_factored_base_offsets64_i16",
false,
4001 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_i32",
"__scatter_factored_base_offsets64_i32",
false,
4003 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_float",
"__scatter_factored_base_offsets64_float",
false,
4005 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_i64",
"__scatter_factored_base_offsets64_i64",
false,
4007 LowerGSInfo(
"__pseudo_scatter_factored_base_offsets64_double",
"__scatter_factored_base_offsets64_double",
4010 LowerGSInfo(
"__pseudo_scatter_base_offsets32_i8",
"__scatter_base_offsets32_i8",
false,
false),
4011 LowerGSInfo(
"__pseudo_scatter_base_offsets32_i16",
"__scatter_base_offsets32_i16",
false,
false),
4012 LowerGSInfo(
"__pseudo_scatter_base_offsets32_i32",
"__scatter_base_offsets32_i32",
false,
false),
4013 LowerGSInfo(
"__pseudo_scatter_base_offsets32_float",
"__scatter_base_offsets32_float",
false,
false),
4014 LowerGSInfo(
"__pseudo_scatter_base_offsets32_i64",
"__scatter_base_offsets32_i64",
false,
false),
4015 LowerGSInfo(
"__pseudo_scatter_base_offsets32_double",
"__scatter_base_offsets32_double",
false,
false),
4017 LowerGSInfo(
"__pseudo_scatter_base_offsets64_i8",
"__scatter_base_offsets64_i8",
false,
false),
4018 LowerGSInfo(
"__pseudo_scatter_base_offsets64_i16",
"__scatter_base_offsets64_i16",
false,
false),
4019 LowerGSInfo(
"__pseudo_scatter_base_offsets64_i32",
"__scatter_base_offsets64_i32",
false,
false),
4020 LowerGSInfo(
"__pseudo_scatter_base_offsets64_float",
"__scatter_base_offsets64_float",
false,
false),
4021 LowerGSInfo(
"__pseudo_scatter_base_offsets64_i64",
"__scatter_base_offsets64_i64",
false,
false),
4022 LowerGSInfo(
"__pseudo_scatter_base_offsets64_double",
"__scatter_base_offsets64_double",
false,
false),
4024 LowerGSInfo(
"__pseudo_prefetch_read_varying_1",
"__prefetch_read_varying_1",
false,
true),
4025 LowerGSInfo(
"__pseudo_prefetch_read_varying_1_native",
"__prefetch_read_varying_1_native",
false,
true),
4027 LowerGSInfo(
"__pseudo_prefetch_read_varying_2",
"__prefetch_read_varying_2",
false,
true),
4028 LowerGSInfo(
"__pseudo_prefetch_read_varying_2_native",
"__prefetch_read_varying_2_native",
false,
true),
4030 LowerGSInfo(
"__pseudo_prefetch_read_varying_3",
"__prefetch_read_varying_3",
false,
true),
4031 LowerGSInfo(
"__pseudo_prefetch_read_varying_3_native",
"__prefetch_read_varying_3_native",
false,
true),
4033 LowerGSInfo(
"__pseudo_prefetch_read_varying_nt",
"__prefetch_read_varying_nt",
false,
true),
4034 LowerGSInfo(
"__pseudo_prefetch_read_varying_nt_native",
"__prefetch_read_varying_nt_native",
false,
true),
4037 llvm::Function *calledFunc = callInst->getCalledFunction();
4039 LowerGSInfo *info = NULL;
4040 for (
unsigned int i = 0; i <
sizeof(lgsInfo) /
sizeof(lgsInfo[0]); ++i) {
4041 if (lgsInfo[i].pseudoFunc != NULL && calledFunc == lgsInfo[i].pseudoFunc) {
4049 Assert(info->actualFunc != NULL);
4056 callInst->setCalledFunction(info->actualFunc);
4060 else if (!info->isPrefetch)
4069 bool modifiedAny =
false;
4072 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
4073 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
4074 if (callInst == NULL || callInst->getCalledFunction() == NULL)
4093 bool modifiedAny =
false;
4094 for (llvm::BasicBlock &BB : F) {
4095 modifiedAny |= runOnBasicBlock(BB);
4123 llvm::StringRef
getPassName()
const {
return "Resolve \"is compile time constant\""; }
4124 bool runOnBasicBlock(llvm::BasicBlock &BB);
4125 bool runOnFunction(llvm::Function &F);
4135 llvm::Function *funcs[] = {
m->
module->getFunction(
"__is_compile_time_constant_mask"),
4136 m->
module->getFunction(
"__is_compile_time_constant_uniform_int32"),
4137 m->
module->getFunction(
"__is_compile_time_constant_varying_int32")};
4139 bool modifiedAny =
false;
4141 for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
4144 llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
4145 if (callInst == NULL)
4149 int nFuncs =
sizeof(funcs) /
sizeof(funcs[0]);
4150 for (j = 0; j < nFuncs; ++j) {
4151 if (funcs[j] != NULL && callInst->getCalledFunction() == funcs[j])
4162 llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i,
LLVMFalse);
4169 llvm::Value *operand = callInst->getArgOperand(0);
4170 if (llvm::isa<llvm::Constant>(operand)) {
4171 llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i,
LLVMTrue);
4183 llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i,
LLVMFalse);
4196 bool modifiedAny =
false;
4197 for (llvm::BasicBlock &BB : F) {
4198 modifiedAny |= runOnBasicBlock(BB);
4212 #ifndef ISPC_NO_DUMPS 4216 DebugPass(
char *output) : ModulePass(ID) { snprintf(str_output,
sizeof(str_output),
"%s", output); }
4219 bool runOnModule(llvm::Module &
m);
4222 char str_output[100];
4228 fprintf(stderr,
"%s", str_output);
4243 #ifndef ISPC_NO_DUMPS 4250 bool runOnModule(llvm::Module &
m);
4251 bool doInitialization(llvm::Module &m);
4254 void run(llvm::Module &m,
bool init);
4265 llvm::Regex r(
"[^[:alnum:]]");
4274 snprintf(fname,
sizeof(fname),
"%s_%d_%s.ll", init ?
"init" :
"ir", pnum,
sanitize(std::string(pname)).c_str());
4275 llvm::raw_fd_ostream OS(fname, EC, llvm::sys::fs::F_None);
4276 Assert(!EC &&
"IR dump file creation failed!");
4277 module.print(OS, 0);
4314 llvm::StringRef
getPassName()
const {
return "Make internal funcs \"static\""; }
4315 bool runOnModule(llvm::Module &
m);
4321 const char *names[] = {
4328 "__avg_down_uint16",
4330 "__fast_masked_vload",
4331 "__gather_factored_base_offsets32_i8",
4332 "__gather_factored_base_offsets32_i16",
4333 "__gather_factored_base_offsets32_i32",
4334 "__gather_factored_base_offsets32_i64",
4335 "__gather_factored_base_offsets32_float",
4336 "__gather_factored_base_offsets32_double",
4337 "__gather_factored_base_offsets64_i8",
4338 "__gather_factored_base_offsets64_i16",
4339 "__gather_factored_base_offsets64_i32",
4340 "__gather_factored_base_offsets64_i64",
4341 "__gather_factored_base_offsets64_float",
4342 "__gather_factored_base_offsets64_double",
4343 "__gather_base_offsets32_i8",
4344 "__gather_base_offsets32_i16",
4345 "__gather_base_offsets32_i32",
4346 "__gather_base_offsets32_i64",
4347 "__gather_base_offsets32_float",
4348 "__gather_base_offsets32_double",
4349 "__gather_base_offsets64_i8",
4350 "__gather_base_offsets64_i16",
4351 "__gather_base_offsets64_i32",
4352 "__gather_base_offsets64_i64",
4353 "__gather_base_offsets64_float",
4354 "__gather_base_offsets64_double",
4360 "__gather32_double",
4366 "__gather64_double",
4367 "__gather_elt32_i8",
4368 "__gather_elt32_i16",
4369 "__gather_elt32_i32",
4370 "__gather_elt32_i64",
4371 "__gather_elt32_float",
4372 "__gather_elt32_double",
4373 "__gather_elt64_i8",
4374 "__gather_elt64_i16",
4375 "__gather_elt64_i32",
4376 "__gather_elt64_i64",
4377 "__gather_elt64_float",
4378 "__gather_elt64_double",
4380 "__masked_load_i16",
4381 "__masked_load_i32",
4382 "__masked_load_i64",
4383 "__masked_load_float",
4384 "__masked_load_double",
4385 "__masked_store_i8",
4386 "__masked_store_i16",
4387 "__masked_store_i32",
4388 "__masked_store_i64",
4389 "__masked_store_float",
4390 "__masked_store_double",
4391 "__masked_store_blend_i8",
4392 "__masked_store_blend_i16",
4393 "__masked_store_blend_i32",
4394 "__masked_store_blend_i64",
4395 "__masked_store_blend_float",
4396 "__masked_store_blend_double",
4397 "__scatter_factored_base_offsets32_i8",
4398 "__scatter_factored_base_offsets32_i16",
4399 "__scatter_factored_base_offsets32_i32",
4400 "__scatter_factored_base_offsets32_i64",
4401 "__scatter_factored_base_offsets32_float",
4402 "__scatter_factored_base_offsets32_double",
4403 "__scatter_factored_base_offsets64_i8",
4404 "__scatter_factored_base_offsets64_i16",
4405 "__scatter_factored_base_offsets64_i32",
4406 "__scatter_factored_base_offsets64_i64",
4407 "__scatter_factored_base_offsets64_float",
4408 "__scatter_factored_base_offsets64_double",
4409 "__scatter_base_offsets32_i8",
4410 "__scatter_base_offsets32_i16",
4411 "__scatter_base_offsets32_i32",
4412 "__scatter_base_offsets32_i64",
4413 "__scatter_base_offsets32_float",
4414 "__scatter_base_offsets32_double",
4415 "__scatter_base_offsets64_i8",
4416 "__scatter_base_offsets64_i16",
4417 "__scatter_base_offsets64_i32",
4418 "__scatter_base_offsets64_i64",
4419 "__scatter_base_offsets64_float",
4420 "__scatter_base_offsets64_double",
4421 "__scatter_elt32_i8",
4422 "__scatter_elt32_i16",
4423 "__scatter_elt32_i32",
4424 "__scatter_elt32_i64",
4425 "__scatter_elt32_float",
4426 "__scatter_elt32_double",
4427 "__scatter_elt64_i8",
4428 "__scatter_elt64_i16",
4429 "__scatter_elt64_i32",
4430 "__scatter_elt64_i64",
4431 "__scatter_elt64_float",
4432 "__scatter_elt64_double",
4437 "__scatter32_float",
4438 "__scatter32_double",
4443 "__scatter64_float",
4444 "__scatter64_double",
4445 "__prefetch_read_varying_1",
4446 "__prefetch_read_varying_2",
4447 "__prefetch_read_varying_3",
4448 "__prefetch_read_varying_nt",
4449 "__keep_funcs_live",
4452 bool modifiedAny =
false;
4453 int count =
sizeof(names) /
sizeof(names[0]);
4454 for (
int i = 0; i < count; ++i) {
4455 llvm::Function *f =
m->
module->getFunction(names[i]);
4456 if (f != NULL && f->empty() ==
false) {
4457 f->setLinkage(llvm::GlobalValue::InternalLinkage);
4474 llvm::StringRef
getPassName()
const {
return "Peephole Optimizations"; }
4475 bool runOnBasicBlock(llvm::BasicBlock &BB);
4476 bool runOnFunction(llvm::Function &F);
4492 : Op(OpMatch), fromType(f), toType(t) {}
4494 template <
typename OpTy>
bool match(OpTy *V) {
4495 if (llvm::Operator *O = llvm::dyn_cast<llvm::Operator>(V))
4496 return (O->getOpcode() == Opcode && Op.match(O->getOperand(0)) && O->getType() == toType &&
4497 O->getOperand(0)->getType() == fromType);
4537 template <
typename OpTy>
bool match(OpTy *V) {
4538 llvm::BinaryOperator *bop;
4539 llvm::ConstantDataVector *cdv;
4540 if ((bop = llvm::dyn_cast<llvm::BinaryOperator>(V)) &&
4541 (cdv = llvm::dyn_cast<llvm::ConstantDataVector>(bop->getOperand(1))) && cdv->getSplatValue() != NULL) {
4542 const llvm::APInt &apInt = cdv->getUniqueInteger();
4544 switch (bop->getOpcode()) {
4545 case llvm::Instruction::UDiv:
4547 return (apInt.isIntN(2) && Op.match(bop->getOperand(0)));
4548 case llvm::Instruction::LShr:
4550 return (apInt.isIntN(1) && Op.match(bop->getOperand(0)));
4566 template <
typename OpTy>
bool match(OpTy *V) {
4567 llvm::BinaryOperator *bop;
4568 llvm::ConstantDataVector *cdv;
4569 if ((bop = llvm::dyn_cast<llvm::BinaryOperator>(V)) &&
4570 (cdv = llvm::dyn_cast<llvm::ConstantDataVector>(bop->getOperand(1))) && cdv->getSplatValue() != NULL) {
4571 const llvm::APInt &apInt = cdv->getUniqueInteger();
4573 switch (bop->getOpcode()) {
4574 case llvm::Instruction::SDiv:
4576 return (apInt.isIntN(2) && Op.match(bop->getOperand(0)));
4577 case llvm::Instruction::AShr:
4579 return (apInt.isIntN(1) && Op.match(bop->getOperand(0)));
4593 llvm::Function::iterator bbiter = func->begin();
4594 for (; bbiter != func->end(); ++bbiter) {
4595 for (llvm::BasicBlock::iterator institer = bbiter->begin(); institer != bbiter->end(); ++institer) {
4596 if (llvm::isa<llvm::IntrinsicInst>(institer))
4604 llvm::Function *func =
m->
module->getFunction(name);
4622 llvm::Value *opa, *opb;
4623 const llvm::APInt *delta;
4628 if (delta->isIntN(1) ==
false)
4638 llvm::Value *opa, *opb;
4647 llvm::Value *opa, *opb;
4648 const llvm::APInt *delta;
4654 if (delta->isIntN(1) ==
false)
4664 llvm::Value *opa, *opb;
4673 llvm::Value *opa, *opb;
4674 const llvm::APInt *delta;
4679 if (delta->isIntN(1) ==
false)
4689 llvm::Value *opa, *opb;
4698 llvm::Value *opa, *opb;
4699 const llvm::APInt *delta;
4705 if (delta->isIntN(1) ==
false)
4715 llvm::Value *opa, *opb;
4725 bool modifiedAny =
false;
4727 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
4728 llvm::Instruction *inst = &*iter;
4745 if (builtinCall != NULL) {
4746 llvm::ReplaceInstWithInst(inst, builtinCall);
4759 bool modifiedAny =
false;
4760 for (llvm::BasicBlock &BB : F) {
4772 llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
4773 Assert(intOffset && (intOffset->getBitWidth() == 32 || intOffset->getBitWidth() == 64));
4774 return intOffset->getSExtValue();
4785 llvm::StringRef
getPassName()
const {
return "Resolve \"replace extract insert chains\""; }
4802 bool modifiedAny =
false;
4804 llvm::Function *shifts[6];
4805 shifts[0] =
m->
module->getFunction(
"shift___vytuni");
4806 shifts[1] =
m->
module->getFunction(
"shift___vysuni");
4807 shifts[2] =
m->
module->getFunction(
"shift___vyiuni");
4808 shifts[3] =
m->
module->getFunction(
"shift___vyIuni");
4809 shifts[4] =
m->
module->getFunction(
"shift___vyfuni");
4810 shifts[5] =
m->
module->getFunction(
"shift___vyduni");
4812 for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
4813 llvm::Instruction *inst = &*iter;
4815 if (llvm::CallInst *ci = llvm::dyn_cast<llvm::CallInst>(inst)) {
4816 llvm::Function *func = ci->getCalledFunction();
4817 for (
int i = 0; i < 6; i++) {
4818 if (shifts[i] && (shifts[i] == func)) {
4820 llvm::Value *shiftedVec = ci->getArgOperand(0);
4821 llvm::Value *shiftAmt = ci->getArgOperand(1);
4822 if (llvm::isa<llvm::Constant>(shiftAmt)) {
4824 int *shuffleVals =
new int[vectorWidth];
4826 for (
int i = 0; i < vectorWidth; i++) {
4827 int s = i + shiftInt;
4828 s = (s < 0) ? vectorWidth : s;
4829 s = (s >= vectorWidth) ? vectorWidth : s;
4833 llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shiftedVec->getType());
4834 llvm::Value *shuffle =
4835 new llvm::ShuffleVectorInst(shiftedVec, zeroVec, shuffleIdxs,
"vecShift", ci);
4836 ci->replaceAllUsesWith(shuffle);
4838 delete[] shuffleVals;
4854 bool modifiedAny =
false;
4855 for (llvm::BasicBlock &BB : F) {
4894 llvm::StringRef
getPassName()
const {
return "Resolve \"replace extract insert chains\""; }
4898 llvm::Instruction *fixSelect(llvm::SelectInst *sel, llvm::SExtInst *sext);
4905 llvm::VectorType *orig_type = llvm::dyn_cast<llvm::VectorType>(sel->getType());
4907 llvm::VectorType *int_type = llvm::VectorType::getInteger(orig_type);
4910 llvm::Instruction *result = 0, *optional_to_delete = 0;
4913 if (orig_type->getElementType()->isIntegerTy()) {
4915 result = llvm::BinaryOperator::CreateAnd(sext, sel->getTrueValue(),
"and_mask", sel);
4917 llvm::BitCastInst *bc = llvm::dyn_cast<llvm::BitCastInst>(sel->getTrueValue());
4919 if (bc && bc->hasOneUse() && bc->getSrcTy()->isIntOrIntVectorTy() && bc->getSrcTy()->isVectorTy() &&
4920 llvm::isa<llvm::Instruction>(bc->getOperand(0)) &&
4921 llvm::dyn_cast<llvm::Instruction>(bc->getOperand(0))->getParent() == sel->getParent()) {
4924 llvm::BinaryOperator *and_inst = llvm::BinaryOperator::CreateAnd(sext, bc->getOperand(0),
"and_mask", sel);
4926 result =
new llvm::BitCastInst(and_inst, sel->getType(),
"bitcast_mask_out", sel);
4928 optional_to_delete = bc;
4932 llvm::BitCastInst *bc_in =
new llvm::BitCastInst(sel->getTrueValue(), int_type,
"bitcast_mask_in", sel);
4934 llvm::BinaryOperator *and_inst = llvm::BinaryOperator::CreateAnd(sext, bc_in,
"and_mask", sel);
4936 result =
new llvm::BitCastInst(and_inst, sel->getType(),
"bitcast_mask_out", sel);
4941 sel->replaceAllUsesWith(result);
4942 sel->eraseFromParent();
4943 if (optional_to_delete) {
4944 optional_to_delete->eraseFromParent();
4951 bool modifiedAny =
false;
static llvm::Pass * CreateFixBooleanSelectPass()
llvm::Constant * LLVMIntAsType(int64_t val, llvm::Type *type)
static void lExtractConstOffsets(const std::vector< llvm::CallInst *> &coalesceGroup, int elementSize, std::vector< int64_t > *constOffsets)
void run(llvm::Module &m, bool init)
static llvm::Type * FloatType
static llvm::Type * Int32VectorPointerType
llvm::legacy::PassManager & getPM()
static llvm::Instruction * lMatchAvgDownInt16(llvm::Value *inst)
DebugPassFile(int number, llvm::StringRef name)
SDiv2_match(const Op_t &OpMatch)
llvm::Instruction * fixSelect(llvm::SelectInst *sel, llvm::SExtInst *sext)
llvm::StringRef getPassName() const
static bool lIsSafeToBlend(llvm::Value *lvalue)
static bool lCoalesceGathers(const std::vector< llvm::CallInst *> &coalesceGroup)
Declaration of the FunctionEmitContext class
llvm::StringRef getPassName() const
bool hasVecPrefetch() const
static llvm::Type * DoubleType
bool runOnFunction(llvm::Function &F)
static llvm::Value * lExtractFromInserts(llvm::Value *v, unsigned int index)
static llvm::Instruction * lMatchAvgDownUInt8(llvm::Value *inst)
bool disableBlendedMaskedStores
static llvm::Value * lExtractOffsetVector248Scale(llvm::Value **vec)
static bool simplifyCall(llvm::CallInst *callInst, llvm::BasicBlock::iterator iter)
llvm::Constant * LLVMInt64Vector(int64_t ival)
void Optimize(llvm::Module *module, int optLevel)
static llvm::Pass * CreateImproveMemoryOpsPass()
bool runOnModule(llvm::Module &m)
llvm::StringRef getPassName() const
static llvm::Instruction * lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const char *name, llvm::Instruction *insertBefore=NULL)
static bool lVectorLoadIsEfficient(std::set< int64_t >::iterator iter, std::set< int64_t >::iterator end, std::set< int64_t >::iterator *newIter, int vectorWidth)
bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts)
int getNativeVectorAlignment() const
const char * LLVMGetName(llvm::Value *v, const char *s)
static void lSelectLoads(const std::vector< int64_t > &loadOffsets, std::vector< CoalescedLoadOp > *loads)
static llvm::Constant * lGetConstantAddExprBaseOffset(llvm::Constant *op0, llvm::Constant *op1, llvm::Constant **delta)
static llvm::Value * lApplyLoad1(llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
#define ISPC_LLVM_VERSION
llvm::Value * lGEPAndLoad(llvm::Value *basePtr, int64_t offset, int align, llvm::Instruction *insertBefore, llvm::Type *type)
#define DEBUG_START_PASS(NAME)
static llvm::Value * lAssemble4Vector(const std::vector< CoalescedLoadOp > &loadOps, const int64_t offsets[4], llvm::Instruction *insertBefore)
static llvm::VectorType * Int32VectorType
Declarations related to optimization passes.
static llvm::Pass * CreateReplaceStdlibShiftPass()
std::vector< MaskInstruction > maskInstructions
static void lCoalescePerfInfo(const std::vector< llvm::CallInst *> &coalesceGroup, const std::vector< CoalescedLoadOp > &loadOps)
static llvm::Type * FloatVectorPointerType
bool runOnModule(llvm::Module &m)
BlendInstruction * matchingBlendInstruction(llvm::Function *function)
static bool lVectorIs32BitInts(llvm::Value *v)
static bool lGetSourcePosFromMetadata(const llvm::Instruction *inst, SourcePos *pos)
IsCompileTimeConstantPass(bool last=false)
static bool lGetMask(llvm::Value *factor, uint64_t *mask)
static llvm::Type * Int16VectorPointerType
bool runOnBasicBlock(llvm::BasicBlock &BB)
ReplacePseudoMemoryOpsPass()
bool runOnFunction(llvm::Function &F)
static bool lIsIntegerSplat(llvm::Value *v, int *splat)
MakeInternalFuncsStaticPass(bool last=false)
static llvm::Pass * CreateReplacePseudoMemoryOpsPass()
UDiv2_match< V > m_UDiv2(const V &v)
static bool lHasIntrinsicInDefinition(llvm::Function *func)
static llvm::Type * Int16Type
static llvm::Type * DoubleVectorPointerType
bool run(llvm::Module &M)
static bool lInstructionMayWriteToMemory(llvm::Instruction *inst)
bool runOnBasicBlock(llvm::BasicBlock &BB)
static llvm::Pass * CreateIntrinsicsOptPass()
static llvm::Instruction * lMatchAvgDownUInt16(llvm::Value *inst)
static llvm::Pass * CreatePeepholePass()
llvm::StringRef getPassName() const
static bool lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst)
llvm::StringRef getPassName() const
static llvm::VectorType * Int1VectorType
llvm::legacy::PassManager PM
static llvm::Instruction * lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name, llvm::Instruction *insertBefore)
static bool lIsUndef(llvm::Value *value)
static void lCopyMetadata(llvm::Value *vto, const llvm::Instruction *from)
header file with declarations for symbol and symbol table classes.
llvm::StringRef getPassName() const
static llvm::Pass * CreateDebugPassFile(int number, llvm::StringRef name)
std::set< int > debug_stages
bool disableMaskAllOnOptimizations
bool matchesMaskInstruction(llvm::Function *function)
static llvm::Type * Int8VectorPointerType
bool runOnFunction(llvm::Function &F)
void PerformanceWarning(SourcePos p, const char *fmt,...)
bool disableGatherScatterOptimizations
static llvm::VectorType * Int8VectorType
llvm::Constant * LLVMInt32Vector(int32_t ival)
std::string sanitize(std::string in)
static uint64_t lConstElementsToMask(const llvm::SmallVector< llvm::Constant *, ISPC_MAX_NVEC > &elements)
static llvm::Pass * CreateInstructionSimplifyPass()
static llvm::Pass * CreateDebugPass(char *output)
llvm::Constant * LLVMTrue
static llvm::Value * simplifyBoolVec(llvm::Value *value)
static llvm::Pass * CreateIsCompileTimeConstantPass(bool isLastTry)
const llvm::Type * toType
static llvm::Instruction * lMatchAvgUpInt8(llvm::Value *inst)
static llvm::VectorType * FloatVectorType
llvm::StringRef getPassName() const
static bool lReplacePseudoGS(llvm::CallInst *callInst)
static bool lReplacePseudoMaskedStore(llvm::CallInst *callInst)
static llvm::Type * Int64Type
static llvm::Type * Int8Type
bool IsOrEquivalentToAdd(llvm::Value *op)
static llvm::VectorType * Int64VectorType
Header file with declarations for various LLVM utility stuff.
static bool simplifySelect(llvm::SelectInst *selectInst, llvm::BasicBlock::iterator iter)
static llvm::Value * lGetBasePointer(llvm::Value *v, llvm::Instruction *insertBefore, bool broadcastDetected)
UDiv2_match(const Op_t &OpMatch)
static llvm::Value * lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore)
bool runOnFunction(llvm::Function &F)
bool runOnModule(llvm::Module &m)
BlendInstruction(llvm::Function *f, uint64_t ao, int o0, int o1, int of)
bool runOnFunction(llvm::Function &F)
CastClassTypes_match(const Op_t &OpMatch, const llvm::Type *f, const llvm::Type *t)
CastClassTypes_match< OpTy, llvm::Instruction::Trunc > m_Trunc16To8(const OpTy &Op)
llvm::StringRef getPassName() const
CastClassTypes_match< OpTy, llvm::Instruction::ZExt > m_ZExt16To32(const OpTy &Op)
SDiv2_match< V > m_SDiv2(const V &v)
llvm::StringRef getPassName() const
bool runOnFunction(llvm::Function &F)
Representation of a range of positions in a source file.
void getAnalysisUsage(llvm::AnalysisUsage &AU) const
llvm::Value * LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2, llvm::Instruction *insertBefore)
llvm::ConstantInt * LLVMInt32(int32_t ival)
static std::vector< CoalescedLoadOp > lSplit8WideLoads(const std::vector< CoalescedLoadOp > &loadOps, llvm::Instruction *insertBefore)
CastClassTypes_match< OpTy, llvm::Instruction::SExt > m_SExt8To16(const OpTy &Op)
static llvm::Pass * CreateGatherCoalescePass()
bool disableHandlePseudoMemoryOps
bool force32BitAddressing
static llvm::Pass * CreateMakeInternalFuncsStaticPass()
bool LLVMVectorIsLinear(llvm::Value *v, int stride)
static llvm::Instruction * lMatchAvgDownInt8(llvm::Value *inst)
static llvm::Instruction * lMatchAvgUpUInt16(llvm::Value *inst)
bool runOnFunction(llvm::Function &F)
static llvm::PointerType * VoidPointerType
llvm::StringRef getPassName() const
int getVectorWidth() const
bool runOnFunction(llvm::Function &F)
bool doInitialization(llvm::Module &m)
void LLVMDumpValue(llvm::Value *v)
bool runOnBasicBlock(llvm::BasicBlock &BB)
static llvm::Type * Int64VectorPointerType
static llvm::Type * Int32Type
static llvm::Instruction * lMatchAvgUpUInt8(llvm::Value *inst)
bool runOnBasicBlock(llvm::BasicBlock &BB)
static bool lOffsets32BitSafe(llvm::Value **variableOffsetPtr, llvm::Value **constOffsetPtr, llvm::Instruction *insertBefore)
llvm::StringRef getPassName() const
#define DEBUG_END_PASS(NAME)
bool runOnBasicBlock(llvm::BasicBlock &BB)
void add(llvm::Pass *P, int stage)
llvm::Constant * LLVMFalse
CastClassTypes_match< OpTy, llvm::Instruction::SExt > m_SExt16To32(const OpTy &Op)
llvm::Value * LLVMExtractFirstVectorElement(llvm::Value *v)
CastClassTypes_match< OpTy, llvm::Instruction::Trunc > m_Trunc32To16(const OpTy &Op)
static bool lGSToGSBaseOffsets(llvm::CallInst *callInst)
void Debug(SourcePos p, const char *fmt,...)
bool LLVMVectorValuesAllEqual(llvm::Value *v, llvm::Value **splat)
bool runOnFunction(llvm::Function &F)
CoalescedLoadOp(int64_t s, int c)
bool runOnBasicBlock(llvm::BasicBlock &BB)
static void lEmitLoads(llvm::Value *basePtr, std::vector< CoalescedLoadOp > &loadOps, int elementSize, llvm::Instruction *insertBefore)
static llvm::VectorType * DoubleVectorType
InstructionSimplifyPass()
static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, llvm::Value **variableOffset, llvm::Instruction *insertBefore)
CastClassTypes_match< OpTy, llvm::Instruction::ZExt > m_ZExt8To16(const OpTy &Op)
bool runOnBasicBlock(llvm::BasicBlock &BB)
std::set< int > off_stages
static llvm::Value * lCheckForActualPointer(llvm::Value *v)
llvm::ConstantInt * LLVMInt64(int64_t ival)
bool runOnBasicBlock(llvm::BasicBlock &BB)
static llvm::VectorType * Int16VectorType
static llvm::Constant * lGetOffsetScaleVec(llvm::Value *offsetScale, llvm::Type *vecType)
static llvm::Value * lExtract248Scale(llvm::Value *splatOperand, int splatValue, llvm::Value *otherOperand, llvm::Value **result)
static llvm::Value * lApplyLoad2(llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
static void lAssembleResultVectors(const std::vector< CoalescedLoadOp > &loadOps, const std::vector< int64_t > &constOffsets, std::vector< llvm::Value *> &results, llvm::Instruction *insertBefore)
static llvm::Value * lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets, llvm::Instruction *insertBefore)
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
static bool lImproveMaskedLoad(llvm::CallInst *callInst, llvm::BasicBlock::iterator iter)
static bool lIs32BitSafeHelper(llvm::Value *v)
static bool lGSToLoadStore(llvm::CallInst *callInst)
llvm::StringRef getPassName() const
static llvm::Instruction * lGetBinaryIntrinsic(const char *name, llvm::Value *opa, llvm::Value *opb)
static llvm::Instruction * lMatchAvgUpInt16(llvm::Value *inst)
MaskInstruction(llvm::Function *f)
void Warning(SourcePos p, const char *fmt,...)
static MaskStatus lGetMaskStatus(llvm::Value *mask, int vecWidth=-1)
llvm::Value * LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[], int shufSize, llvm::Instruction *insertBefore)
static llvm::Value * lApplyLoad4(llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
llvm::TargetMachine * GetTargetMachine() const
std::vector< BlendInstruction > blendInstructions
static llvm::Value * lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets, llvm::Instruction *insertBefore)
bool disableGatherScatterFlattening
llvm::Value * LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth, bool compare, bool undef, bool searchFirstUndef)
static bool lImproveMaskedStore(llvm::CallInst *callInst)
static int64_t lGetIntValue(llvm::Value *offset)