Intel® Implicit SPMD Program Compiler (Intel® ISPC)
1.13.0
|
Implementations of various ispc optimization passes that operate on the LLVM IR. More...
#include "opt.h"
#include "ctx.h"
#include "llvmutil.h"
#include "module.h"
#include "sym.h"
#include "util.h"
#include <map>
#include <set>
#include <stdio.h>
#include "llvm/InitializePasses.h"
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/Module.h>
#include <llvm/Pass.h>
#include <llvm/Transforms/Instrumentation.h>
#include "llvm/IR/LegacyPassManager.h"
#include <llvm/PassRegistry.h>
#include <llvm/IR/DebugInfo.h>
#include <llvm/IR/IRPrintingPasses.h>
#include <llvm/IR/PatternMatch.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Analysis/ConstantFolding.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/ADT/SmallSet.h>
#include <llvm/ADT/Triple.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/IR/DataLayout.h>
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include <llvm/Analysis/BasicAliasAnalysis.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/BinaryFormat/Dwarf.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Regex.h>
Go to the source code of this file.
Classes | |
class | DebugPassManager |
class | IntrinsicsOpt |
struct | IntrinsicsOpt::MaskInstruction |
struct | IntrinsicsOpt::BlendInstruction |
class | InstructionSimplifyPass |
class | ImproveMemoryOpsPass |
class | GatherCoalescePass |
struct | CoalescedLoadOp |
class | ReplacePseudoMemoryOpsPass |
class | IsCompileTimeConstantPass |
class | DebugPass |
class | DebugPassFile |
class | MakeInternalFuncsStaticPass |
class | PeepholePass |
struct | CastClassTypes_match< Op_t, Opcode > |
struct | UDiv2_match< Op_t > |
struct | SDiv2_match< Op_t > |
class | ReplaceStdlibShiftPass |
class | FixBooleanSelectPass |
Macros | |
#define | PRId64 "lld" |
#define | PRIu64 "llu" |
#define | DEBUG_START_PASS(NAME) |
#define | DEBUG_END_PASS(NAME) |
Enumerations | |
enum | MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN } |
Functions | |
static llvm::Pass * | CreateIntrinsicsOptPass () |
static llvm::Pass * | CreateInstructionSimplifyPass () |
static llvm::Pass * | CreatePeepholePass () |
static llvm::Pass * | CreateImproveMemoryOpsPass () |
static llvm::Pass * | CreateGatherCoalescePass () |
static llvm::Pass * | CreateReplacePseudoMemoryOpsPass () |
static llvm::Pass * | CreateIsCompileTimeConstantPass (bool isLastTry) |
static llvm::Pass * | CreateMakeInternalFuncsStaticPass () |
static llvm::Pass * | CreateDebugPass (char *output) |
static llvm::Pass * | CreateDebugPassFile (int number, llvm::StringRef name) |
static llvm::Pass * | CreateReplaceStdlibShiftPass () |
static llvm::Pass * | CreateFixBooleanSelectPass () |
static void | lCopyMetadata (llvm::Value *vto, const llvm::Instruction *from) |
static bool | lGetSourcePosFromMetadata (const llvm::Instruction *inst, SourcePos *pos) |
static llvm::Instruction * | lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const char *name, llvm::Instruction *insertBefore=NULL) |
static llvm::Instruction * | lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, const char *name, llvm::Instruction *insertBefore=NULL) |
static llvm::Instruction * | lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, const char *name, llvm::Instruction *insertBefore=NULL) |
static llvm::Instruction * | lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, const char *name, llvm::Instruction *insertBefore=NULL) |
static llvm::Instruction * | lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5, const char *name, llvm::Instruction *insertBefore=NULL) |
static llvm::Instruction * | lGEPInst (llvm::Value *ptr, llvm::Value *offset, const char *name, llvm::Instruction *insertBefore) |
static uint64_t | lConstElementsToMask (const llvm::SmallVector< llvm::Constant *, ISPC_MAX_NVEC > &elements) |
static bool | lGetMask (llvm::Value *factor, uint64_t *mask) |
static MaskStatus | lGetMaskStatus (llvm::Value *mask, int vecWidth=-1) |
void | Optimize (llvm::Module *module, int optLevel) |
static bool | lIsUndef (llvm::Value *value) |
static llvm::Value * | lCheckForActualPointer (llvm::Value *v) |
static llvm::Value * | lGetBasePointer (llvm::Value *v, llvm::Instruction *insertBefore, bool broadcastDetected) |
static llvm::Constant * | lGetConstantAddExprBaseOffset (llvm::Constant *op0, llvm::Constant *op1, llvm::Constant **delta) |
static llvm::Value * | lExtractFromInserts (llvm::Value *v, unsigned int index) |
static llvm::Value * | lGetBasePtrAndOffsets (llvm::Value *ptrs, llvm::Value **offsets, llvm::Instruction *insertBefore) |
static void | lExtractConstantOffset (llvm::Value *vec, llvm::Value **constOffset, llvm::Value **variableOffset, llvm::Instruction *insertBefore) |
static bool | lIsIntegerSplat (llvm::Value *v, int *splat) |
static llvm::Value * | lExtract248Scale (llvm::Value *splatOperand, int splatValue, llvm::Value *otherOperand, llvm::Value **result) |
static llvm::Value * | lExtractOffsetVector248Scale (llvm::Value **vec) |
static bool | lVectorIs32BitInts (llvm::Value *v) |
static bool | lOffsets32BitSafe (llvm::Value **variableOffsetPtr, llvm::Value **constOffsetPtr, llvm::Instruction *insertBefore) |
static bool | lIs32BitSafeHelper (llvm::Value *v) |
static bool | lOffsets32BitSafe (llvm::Value **offsetPtr, llvm::Instruction *insertBefore) |
static bool | lGSToGSBaseOffsets (llvm::CallInst *callInst) |
static bool | lGSBaseOffsetsGetMoreConst (llvm::CallInst *callInst) |
static llvm::Value * | lComputeCommonPointer (llvm::Value *base, llvm::Value *offsets, llvm::Instruction *insertBefore) |
static llvm::Constant * | lGetOffsetScaleVec (llvm::Value *offsetScale, llvm::Type *vecType) |
static bool | lGSToLoadStore (llvm::CallInst *callInst) |
static bool | lImproveMaskedStore (llvm::CallInst *callInst) |
static bool | lImproveMaskedLoad (llvm::CallInst *callInst, llvm::BasicBlock::iterator iter) |
static bool | lVectorLoadIsEfficient (std::set< int64_t >::iterator iter, std::set< int64_t >::iterator end, std::set< int64_t >::iterator *newIter, int vectorWidth) |
static void | lSelectLoads (const std::vector< int64_t > &loadOffsets, std::vector< CoalescedLoadOp > *loads) |
static void | lCoalescePerfInfo (const std::vector< llvm::CallInst *> &coalesceGroup, const std::vector< CoalescedLoadOp > &loadOps) |
llvm::Value * | lGEPAndLoad (llvm::Value *basePtr, int64_t offset, int align, llvm::Instruction *insertBefore, llvm::Type *type) |
static void | lEmitLoads (llvm::Value *basePtr, std::vector< CoalescedLoadOp > &loadOps, int elementSize, llvm::Instruction *insertBefore) |
static std::vector< CoalescedLoadOp > | lSplit8WideLoads (const std::vector< CoalescedLoadOp > &loadOps, llvm::Instruction *insertBefore) |
static llvm::Value * | lApplyLoad1 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore) |
static llvm::Value * | lApplyLoad2 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore) |
static llvm::Value * | lApplyLoad4 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore) |
static llvm::Value * | lAssemble4Vector (const std::vector< CoalescedLoadOp > &loadOps, const int64_t offsets[4], llvm::Instruction *insertBefore) |
static void | lAssembleResultVectors (const std::vector< CoalescedLoadOp > &loadOps, const std::vector< int64_t > &constOffsets, std::vector< llvm::Value *> &results, llvm::Instruction *insertBefore) |
static llvm::Value * | lComputeBasePtr (llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) |
static void | lExtractConstOffsets (const std::vector< llvm::CallInst *> &coalesceGroup, int elementSize, std::vector< int64_t > *constOffsets) |
static bool | lCoalesceGathers (const std::vector< llvm::CallInst *> &coalesceGroup) |
static bool | lInstructionMayWriteToMemory (llvm::Instruction *inst) |
static bool | lIsSafeToBlend (llvm::Value *lvalue) |
static bool | lReplacePseudoMaskedStore (llvm::CallInst *callInst) |
static bool | lReplacePseudoGS (llvm::CallInst *callInst) |
std::string | sanitize (std::string in) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::SExt > | m_SExt8To16 (const OpTy &Op) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::ZExt > | m_ZExt8To16 (const OpTy &Op) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::Trunc > | m_Trunc16To8 (const OpTy &Op) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::SExt > | m_SExt16To32 (const OpTy &Op) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::ZExt > | m_ZExt16To32 (const OpTy &Op) |
template<typename OpTy > | |
CastClassTypes_match< OpTy, llvm::Instruction::Trunc > | m_Trunc32To16 (const OpTy &Op) |
template<typename V > | |
UDiv2_match< V > | m_UDiv2 (const V &v) |
template<typename V > | |
SDiv2_match< V > | m_SDiv2 (const V &v) |
static bool | lHasIntrinsicInDefinition (llvm::Function *func) |
static llvm::Instruction * | lGetBinaryIntrinsic (const char *name, llvm::Value *opa, llvm::Value *opb) |
static llvm::Instruction * | lMatchAvgUpUInt8 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgDownUInt8 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgUpUInt16 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgDownUInt16 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgUpInt8 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgDownInt8 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgUpInt16 (llvm::Value *inst) |
static llvm::Instruction * | lMatchAvgDownInt16 (llvm::Value *inst) |
static int64_t | lGetIntValue (llvm::Value *offset) |
Implementations of various ispc optimization passes that operate on the LLVM IR.
Definition in file opt.cpp.
#define DEBUG_END_PASS | ( | NAME | ) |
Definition at line 151 of file opt.cpp.
Referenced by IntrinsicsOpt::runOnBasicBlock(), InstructionSimplifyPass::runOnBasicBlock(), ImproveMemoryOpsPass::runOnBasicBlock(), GatherCoalescePass::runOnBasicBlock(), ReplacePseudoMemoryOpsPass::runOnBasicBlock(), IsCompileTimeConstantPass::runOnBasicBlock(), PeepholePass::runOnBasicBlock(), and ReplaceStdlibShiftPass::runOnBasicBlock().
#define DEBUG_START_PASS | ( | NAME | ) |
Definition at line 141 of file opt.cpp.
Referenced by IntrinsicsOpt::runOnBasicBlock(), InstructionSimplifyPass::runOnBasicBlock(), ImproveMemoryOpsPass::runOnBasicBlock(), GatherCoalescePass::runOnBasicBlock(), ReplacePseudoMemoryOpsPass::runOnBasicBlock(), IsCompileTimeConstantPass::runOnBasicBlock(), PeepholePass::runOnBasicBlock(), and ReplaceStdlibShiftPass::runOnBasicBlock().
#define PRId64 "lld" |
Definition at line 110 of file opt.cpp.
Referenced by lApplyLoad1(), lApplyLoad2(), lApplyLoad4(), lAssemble4Vector(), lEmitLoads(), lSelectLoads(), and ConstExpr::Print().
#define PRIu64 "llu" |
Definition at line 113 of file opt.cpp.
Referenced by ConstExpr::Print().
enum MaskStatus |
|
static |
Definition at line 4234 of file opt.cpp.
Referenced by DebugPassManager::add().
|
static |
Definition at line 4290 of file opt.cpp.
Referenced by DebugPassManager::add().
|
static |
Definition at line 4956 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 3805 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 2806 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 1113 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 961 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 4203 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 4465 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 4766 of file opt.cpp.
References PeepholePass::PeepholePass().
Referenced by Optimize().
|
static |
Definition at line 4100 of file opt.cpp.
Referenced by Optimize().
|
static |
Definition at line 4861 of file opt.cpp.
Referenced by Optimize().
|
static |
Given a 1-wide load of a 32-bit value, merge its value into the result vector for any and all elements for which it applies.
Definition at line 3185 of file opt.cpp.
References Assert, CoalescedLoadOp::count, Debug(), LLVMInt32(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.
Referenced by lAssemble4Vector().
|
static |
Similarly, incorporate the values from a 2-wide load into any vector elements that they apply to.
Definition at line 3206 of file opt.cpp.
References Assert, CoalescedLoadOp::count, Debug(), CoalescedLoadOp::element0, CoalescedLoadOp::element1, LLVMTypes::Int32Type, LLVMTypes::Int64Type, LLVMInt32(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.
Referenced by lAssemble4Vector().
|
static |
And handle a 4-wide load
Definition at line 3265 of file opt.cpp.
References Assert, CoalescedLoadOp::count, Debug(), LLVMShuffleVectors(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.
Referenced by lAssemble4Vector().
|
static |
We're need to fill in the values for a 4-wide result vector. This function looks at all of the generated loads and extracts the appropriate elements from the appropriate loads to assemble the result. Here the offsets[] parameter gives the 4 offsets from the base pointer for the four elements of the result.
Definition at line 3304 of file opt.cpp.
References Assert, CoalescedLoadOp::count, Debug(), FATAL, LLVMTypes::Int32Type, lApplyLoad1(), lApplyLoad2(), lApplyLoad4(), LLVMShuffleVectors(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.
Referenced by lAssembleResultVectors().
|
static |
Given the set of loads that we've done and the set of result values to be computed, this function computes the final llvm::Value *s for each result vector.
Definition at line 3461 of file opt.cpp.
References Assert, FATAL, g, Target::getVectorWidth(), lAssemble4Vector(), LLVMConcatVectors(), and Globals::target.
Referenced by lCoalesceGathers().
|
static |
Definition at line 247 of file opt.cpp.
Referenced by lGetBinaryIntrinsic(), lGSToGSBaseOffsets(), lGSToLoadStore(), and lReplacePseudoMaskedStore().
|
static |
|
static |
|
static |
|
static |
|
static |
Check to make sure that this value is actually a pointer in the end. We need to make sure that given an expression like vec(offset) + ptr2int(ptr), lGetBasePointer() doesn't return vec(offset) for the base pointer such that we then treat ptr2int(ptr) as an offset. This ends up being important so that we don't generate LLVM GEP instructions like "gep inttoptr 8, i64 %ptr", which in turn can lead to incorrect code since LLVM's pointer aliasing analysis assumes that operands after the first one to a GEP aren't pointers.
Definition at line 1151 of file opt.cpp.
Referenced by lGetBasePointer().
|
static |
Actually do the coalescing. We have a set of gathers all accessing addresses of the form:
(ptr + {1,2,4,8} * varyingOffset) + constOffset, a.k.a. basePtr + constOffset
where varyingOffset actually has the same value across all of the SIMD lanes and where the part in parenthesis has the same value for all of the gathers in the group.
Definition at line 3556 of file opt.cpp.
References Assert, LLVMTypes::DoubleVectorType, FATAL, LLVMTypes::FloatVectorType, LLVMTypes::Int32VectorType, LLVMTypes::Int64VectorType, lAssembleResultVectors(), lCoalescePerfInfo(), lComputeBasePtr(), lEmitLoads(), lExtractConstOffsets(), lSelectLoads(), and lSplit8WideLoads().
Referenced by GatherCoalescePass::runOnBasicBlock().
|
static |
Print a performance message with the details of the result of coalescing over a group of gathers.
Definition at line 3018 of file opt.cpp.
References SourcePos::first_line, g, Opt::level, lGetSourcePosFromMetadata(), Globals::opt, and PerformanceWarning().
Referenced by lCoalesceGathers().
|
static |
Given a call to a gather function, extract the base pointer, the 2/4/8 scale, and the first varying offsets value to use them to compute that scalar base pointer that is shared by all of the gathers in the group. (Thus, this base pointer plus the constant offsets term for each gather gives the set of addresses to use for each gather.
Definition at line 3504 of file opt.cpp.
References Assert, LLVMTypes::Int64Type, lGEPInst(), and LLVMExtractFirstVectorElement().
Referenced by lCoalesceGathers().
|
static |
Definition at line 2336 of file opt.cpp.
References lGEPInst(), and LLVMExtractFirstVectorElement().
Referenced by lGSToLoadStore().
|
static |
Given a vector of constant values (int, float, or bool) representing an execution mask, convert it to a bitvector where the 0th bit corresponds to the first vector value and so forth.
Definition at line 296 of file opt.cpp.
References Assert.
Referenced by lGetMask().
|
static |
This utility routine copies the metadata (if any) attached to the 'from' instruction in the IR to the 'to' instruction.
For flexibility, this function takes an llvm::Value rather than an llvm::Instruction for the 'to' parameter; at some places in the code below, we sometimes use a llvm::Value to start out storing a value and then later store instructions. If a llvm::Value is passed to this, the routine just returns without doing anything; if it is in fact an LLVM::Instruction, then the metadata can be copied to it.
Definition at line 177 of file opt.cpp.
Referenced by lGSToGSBaseOffsets(), lGSToLoadStore(), lImproveMaskedLoad(), lImproveMaskedStore(), lReplacePseudoMaskedStore(), and IntrinsicsOpt::runOnBasicBlock().
|
static |
Definition at line 3102 of file opt.cpp.
References Debug(), FATAL, Opt::forceAlignedMemory, g, Target::getNativeVectorAlignment(), LLVMTypes::Int32Type, LLVMTypes::Int64Type, lGEPAndLoad(), LLVMInt64(), Globals::opt, PRId64, and Globals::target.
Referenced by lCoalesceGathers().
|
static |
Definition at line 1555 of file opt.cpp.
References Assert, LLVMTypes::Int32VectorType, LLVMInt32(), LLVMInt32Vector(), and LLVMInt64Vector().
Referenced by lExtractOffsetVector248Scale().
|
static |
Given a vector expression in vec, separate it into a compile-time constant component and a variable component, returning the two parts in constOffset and *variableOffset. (It should be the case that the sum of these two is exactly equal to the original vector.)
This routine only handles some (important) patterns; in some cases it will fail and return components that are actually compile-time constants in *variableOffset.
Finally, if there aren't any constant (or, respectivaly, variable) components, the corresponding return value may be set to NULL.
Definition at line 1408 of file opt.cpp.
References IsOrEquivalentToAdd(), and LLVMGetName().
Referenced by lGSBaseOffsetsGetMoreConst(), and lGSToGSBaseOffsets().
|
static |
Extract the constant offsets (from the common base pointer) from each of the gathers in a set to be coalesced. These come in as byte offsets, but we'll transform them into offsets in terms of the size of the base scalar type being gathered. (e.g. for an i32 gather, we might have offsets like <0,4,16,20>, which would be transformed to <0,1,4,5> here.)
Definition at line 3529 of file opt.cpp.
References Assert, g, Target::getVectorWidth(), LLVMExtractVectorInts(), and Globals::target.
Referenced by lCoalesceGathers().
|
static |
|
static |
Given a vector of integer offsets to a base pointer being used for a gather or a scatter, see if its root operation is a multiply by a vector of some value by all 2s/4s/8s. If not, return NULL.
If it is return an i32 value of 2, 4, 8 from the function and modify vec so that it points to the operand that is being multiplied by 2/4/8.
We go through all this trouble so that we can pass the i32 scale factor to the {gather,scatter}_base_offsets function as a separate scale factor for the offsets. This in turn is used in a way so that the LLVM x86 code generator matches it to apply x86's free scale by 2x, 4x, or 8x to one of two registers being added together for an addressing calculation.
Definition at line 1597 of file opt.cpp.
References Assert, IsOrEquivalentToAdd(), lExtract248Scale(), lGEPInst(), lIsIntegerSplat(), LLVMInt32(), and LLVMVectorValuesAllEqual().
Referenced by lGSToGSBaseOffsets().
llvm::Value* lGEPAndLoad | ( | llvm::Value * | basePtr, |
int64_t | offset, | ||
int | align, | ||
llvm::Instruction * | insertBefore, | ||
llvm::Type * | type | ||
) |
Utility routine that computes an offset from a base pointer and then returns the result of a load of the given type from the resulting location:
return *((type *)(basePtr + offset))
Definition at line 3087 of file opt.cpp.
References lGEPInst(), and LLVMInt64().
Referenced by lEmitLoads().
|
static |
Definition at line 284 of file opt.cpp.
References PTYPE.
Referenced by lComputeBasePtr(), lComputeCommonPointer(), lExtractOffsetVector248Scale(), and lGEPAndLoad().
|
static |
Given a llvm::Value representing a varying pointer, this function checks to see if all of the elements of the vector have the same value (i.e. there's a common base pointer). If broadcast has been already detected it checks that the first element of the vector is not undef. If one of the conditions is true, it returns the common pointer value; otherwise it returns NULL.
Definition at line 1188 of file opt.cpp.
References g, Target::getVectorWidth(), lCheckForActualPointer(), LLVMFlattenInsertChain(), LLVMGetName(), and Globals::target.
Referenced by lGetBasePtrAndOffsets().
|
static |
Given a varying pointer in ptrs, this function checks to see if it can be determined to be indexing from a common uniform base pointer. If so, the function returns the base pointer llvm::Value and initializes offsets with an int vector of the per-lane offsets
Definition at line 1260 of file opt.cpp.
References Assert, Globals::ctx, Globals::debugPrint, g, Target::is32Bit(), IsOrEquivalentToAdd(), ISPC_MAX_NVEC, lExtractFromInserts(), lGetBasePointer(), lGetConstantAddExprBaseOffset(), lIsUndef(), LLVMDumpValue(), LLVMInt32(), LLVMInt32Vector(), LLVMInt64(), LLVMInt64Vector(), and Globals::target.
Referenced by lGSToGSBaseOffsets().
|
static |
Definition at line 4603 of file opt.cpp.
References Assert, lCallInst(), lHasIntrinsicInDefinition(), m, and Module::module.
Referenced by lMatchAvgDownInt16(), lMatchAvgDownInt8(), lMatchAvgDownUInt16(), lMatchAvgDownUInt8(), lMatchAvgUpInt16(), lMatchAvgUpInt8(), lMatchAvgUpUInt16(), and lMatchAvgUpUInt8().
|
static |
Given the two operands to a constant add expression, see if we have the form "base pointer + offset", whee op0 is the base pointer and op1 is the offset; if so return the base and the offset.
Definition at line 1228 of file opt.cpp.
Referenced by lGetBasePtrAndOffsets().
|
static |
Given an llvm::Value known to be an integer, return its value as an int64_t.
Definition at line 4771 of file opt.cpp.
References Assert.
Referenced by ReplaceStdlibShiftPass::runOnBasicBlock().
|
static |
Given an llvm::Value represinting a vector mask, see if the value is a constant. If so, return true and set *bits to be the integer mask found by taking the high bits of the mask values in turn and concatenating them into a single integer. In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9.
Definition at line 328 of file opt.cpp.
References Assert, g, Target::GetTargetMachine(), lConstElementsToMask(), and Globals::target.
Referenced by lGetMaskStatus(), IntrinsicsOpt::runOnBasicBlock(), and InstructionSimplifyPass::simplifyCall().
|
static |
Determines if the given mask value is all on, all off, mixed, or unknown at compile time.
Definition at line 376 of file opt.cpp.
References ALL_OFF, ALL_ON, Assert, g, Target::getVectorWidth(), lGetMask(), MIXED, Globals::target, and UNKNOWN.
Referenced by lImproveMaskedLoad(), lImproveMaskedStore(), GatherCoalescePass::runOnBasicBlock(), and InstructionSimplifyPass::simplifySelect().
|
static |
Definition at line 2341 of file opt.cpp.
References Assert, g, Target::getVectorWidth(), LLVMTypes::Int32VectorType, LLVMTypes::Int64VectorType, LLVMInt32(), LLVMInt64(), and Globals::target.
Referenced by lGSToLoadStore().
|
static |
We have a protocol with the front-end LLVM IR code generation process that allows us to encode the source file position that corresponds with instructions. (For example, this allows us to issue performance warnings related to things like scatter and gather after optimization has been performed, so that we aren't warning about scatters and gathers that have been improved to stores and loads by optimization passes.) Note that this is slightly redundant with the source file position encoding generated for debugging symbols, though we don't always generate debugging information but we do always generate this position data.
This function finds the SourcePos that the metadata in the instruction (if present) corresponds to. See the implementation of FunctionEmitContext::addGSMetadata(), which encodes the source position during code generation.
inst | Instruction to try to find the source position of |
pos | Output variable in which to store the position |
Definition at line 210 of file opt.cpp.
References Assert.
Referenced by lCoalescePerfInfo(), lGSToLoadStore(), lReplacePseudoGS(), and GatherCoalescePass::runOnBasicBlock().
|
static |
Try to improve the decomposition between compile-time constant and compile-time unknown offsets in calls to the __pseudo_*_base_offsets* functions. Other other optimizations have run, we will sometimes be able to pull more terms out of the unknown part and add them into the compile-time-known part.
Definition at line 2179 of file opt.cpp.
References Assert, g, Target::hasGather(), Target::hasScatter(), Target::hasVecPrefetch(), LLVMTypes::Int64VectorType, lExtractConstantOffset(), LLVMInt32Vector(), LLVMInt64Vector(), LLVMIntAsType(), m, Module::module, and Globals::target.
Referenced by ImproveMemoryOpsPass::runOnBasicBlock().
|
static |
Definition at line 1863 of file opt.cpp.
References Assert, Opt::force32BitAddressing, g, Target::hasGather(), Target::hasScatter(), Target::hasVecPrefetch(), lCallInst(), lCopyMetadata(), lExtractConstantOffset(), lExtractOffsetVector248Scale(), lGetBasePtrAndOffsets(), LLVMGetName(), LLVMIntAsType(), lOffsets32BitSafe(), m, Module::module, Globals::opt, Globals::target, and LLVMTypes::VoidPointerType.
Referenced by ImproveMemoryOpsPass::runOnBasicBlock().
|
static |
After earlier optimization passes have run, we are sometimes able to determine that gathers/scatters are actually accessing memory in a more regular fashion and then change the operation to something simpler and more efficient. For example, if all of the lanes in a gather are reading from the same location, we can instead do a scalar load and broadcast. This pass examines gathers and scatters and tries to simplify them if at all possible.
Definition at line 2373 of file opt.cpp.
References Assert, Globals::ctx, Debug(), LLVMTypes::DoubleType, LLVMTypes::DoubleVectorPointerType, LLVMTypes::FloatType, LLVMTypes::FloatVectorPointerType, g, Target::getVectorWidth(), Target::hasGather(), Target::hasScatter(), LLVMTypes::Int16Type, LLVMTypes::Int16VectorPointerType, LLVMTypes::Int32Type, LLVMTypes::Int32VectorPointerType, LLVMTypes::Int64Type, LLVMTypes::Int64VectorPointerType, LLVMTypes::Int8Type, LLVMTypes::Int8VectorPointerType, lCallInst(), lComputeCommonPointer(), lCopyMetadata(), lGetOffsetScaleVec(), lGetSourcePosFromMetadata(), LLVMGetName(), LLVMInt32(), LLVMVectorIsLinear(), LLVMVectorValuesAllEqual(), m, Module::module, Globals::target, and Warning().
Referenced by ImproveMemoryOpsPass::runOnBasicBlock().
|
static |
Definition at line 4592 of file opt.cpp.
Referenced by lGetBinaryIntrinsic().
|
static |
Definition at line 2701 of file opt.cpp.
References ALL_OFF, ALL_ON, Assert, Opt::forceAlignedMemory, g, Target::getNativeVectorAlignment(), lCopyMetadata(), lGetMaskStatus(), m, Module::module, Globals::opt, and Globals::target.
Referenced by ImproveMemoryOpsPass::runOnBasicBlock().
|
static |
Masked stores are generally more complex than regular stores; for example, they require multiple instructions to simulate under SSE. This optimization detects cases where masked stores can be replaced with regular stores or removed entirely, for the cases of an 'all on' mask and an 'all off' mask, respectively.
Definition at line 2633 of file opt.cpp.
References ALL_OFF, ALL_ON, Assert, Opt::forceAlignedMemory, g, Target::getNativeVectorAlignment(), ISPC_LLVM_9_0, ISPC_LLVM_VERSION, lCopyMetadata(), lGetMaskStatus(), m, Module::module, Globals::opt, and Globals::target.
Referenced by ImproveMemoryOpsPass::runOnBasicBlock().
|
static |
Given an instruction, returns true if the instructon may write to memory. This is a conservative test in that it may return true for some instructions that don't actually end up writing to memory, but should never return false for an instruction that does write to memory.
Definition at line 3630 of file opt.cpp.
Referenced by GatherCoalescePass::runOnBasicBlock().
|
static |
Check to see if the offset value is composed of a string of Adds, SExts, and Constant Vectors that are 32-bit safe. Recursively explores the operands of Add instructions (as they might themselves be adds that eventually terminate in constant vectors or a SExt.)
Definition at line 1822 of file opt.cpp.
References LLVMTypes::Int32VectorType, IsOrEquivalentToAdd(), and lVectorIs32BitInts().
Referenced by lOffsets32BitSafe().
|
static |
Definition at line 1537 of file opt.cpp.
Referenced by lExtractOffsetVector248Scale().
|
static |
This routine attempts to determine if the given pointer in lvalue is pointing to stack-allocated memory. It's conservative in that it should never return true for non-stack allocated memory, but may return false for memory that actually is stack allocated. The basic strategy is to traverse through the operands and see if the pointer originally comes from an AllocaInst.
Definition at line 3833 of file opt.cpp.
References Assert, g, Target::getVectorWidth(), and Globals::target.
Referenced by lReplacePseudoMaskedStore().
|
static |
Given an llvm::Value, return true if we can determine that it's an undefined value. This only makes a weak attempt at chasing this down, only detecting flat-out undef values, and bitcasts of undef values.
Definition at line 732 of file opt.cpp.
Referenced by lGetBasePtrAndOffsets(), and IntrinsicsOpt::runOnBasicBlock().
|
static |
Definition at line 4713 of file opt.cpp.
References lGetBinaryIntrinsic(), m_SDiv2(), m_SExt16To32(), and m_Trunc32To16().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4687 of file opt.cpp.
References lGetBinaryIntrinsic(), m_SDiv2(), m_SExt8To16(), and m_Trunc16To8().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4662 of file opt.cpp.
References lGetBinaryIntrinsic(), m_Trunc32To16(), m_UDiv2(), and m_ZExt16To32().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4636 of file opt.cpp.
References lGetBinaryIntrinsic(), m_Trunc16To8(), m_UDiv2(), and m_ZExt8To16().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4696 of file opt.cpp.
References lGetBinaryIntrinsic(), m_SDiv2(), m_SExt16To32(), and m_Trunc32To16().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4671 of file opt.cpp.
References lGetBinaryIntrinsic(), m_SDiv2(), m_SExt8To16(), and m_Trunc16To8().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4645 of file opt.cpp.
References lGetBinaryIntrinsic(), m_Trunc32To16(), m_UDiv2(), and m_ZExt16To32().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Definition at line 4620 of file opt.cpp.
References lGetBinaryIntrinsic(), m_Trunc16To8(), m_UDiv2(), and m_ZExt8To16().
Referenced by PeepholePass::runOnBasicBlock().
|
static |
Check to see if the two offset vectors can safely be represented with 32-bit values. If so, return true and update the pointed-to llvm::Value *s to be the 32-bit equivalents.
Definition at line 1772 of file opt.cpp.
References LLVMTypes::Int32VectorType, LLVMGetName(), and lVectorIs32BitInts().
Referenced by lGSToGSBaseOffsets().
|
static |
Check to see if the single offset vector can safely be represented with 32-bit values. If so, return true and update the pointed-to llvm::Value * to be the 32-bit equivalent.
Definition at line 1838 of file opt.cpp.
References LLVMTypes::Int32VectorType, lIs32BitSafeHelper(), and LLVMGetName().
|
static |
Definition at line 3911 of file opt.cpp.
References Assert, g, Target::getVectorWidth(), Opt::level, lGetSourcePosFromMetadata(), m, Module::module, Globals::opt, PerformanceWarning(), and Globals::target.
Referenced by ReplacePseudoMemoryOpsPass::runOnBasicBlock().
|
static |
Definition at line 3860 of file opt.cpp.
References Assert, Opt::disableBlendedMaskedStores, g, lCallInst(), lCopyMetadata(), lIsSafeToBlend(), m, Module::module, and Globals::opt.
Referenced by ReplacePseudoMemoryOpsPass::runOnBasicBlock().
|
static |
Given a set of offsets from a common base pointer that we need to get loaded into memory, determine a reasonable set of load operations that gets all of the corresponding values in memory (ideally, including as many as possible wider vector loads rather than scalar loads). Return a CoalescedLoadOp for each one in the *loads array.
Definition at line 2968 of file opt.cpp.
References Debug(), lVectorLoadIsEfficient(), and PRId64.
Referenced by lCoalesceGathers().
|
static |
Convert any loads of 8-wide vectors into two 4-wide vectors (logically). This allows the assembly code below to always operate on 4-wide vectors, which leads to better code. Returns a new vector of load operations.
Definition at line 3160 of file opt.cpp.
References LLVMShuffleVectors().
Referenced by lCoalesceGathers().
|
static |
Definition at line 1756 of file opt.cpp.
References ISPC_MAX_NVEC, and LLVMExtractVectorInts().
Referenced by lIs32BitSafeHelper(), and lOffsets32BitSafe().
|
static |
This function determines whether it makes sense (and is safe) to generate a vector load of width vectorWidth, starting at *iter. It returns true if so, setting *newIter to point to the next element in the set that isn't taken care of by the generated load. If a vector load of the given width doesn't make sense, then false is returned.
Definition at line 2874 of file opt.cpp.
Referenced by lSelectLoads().
|
inline |
Definition at line 4588 of file opt.cpp.
Referenced by lMatchAvgDownInt16(), lMatchAvgDownInt8(), lMatchAvgUpInt16(), and lMatchAvgUpInt8().
|
inline |
Definition at line 4517 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int32VectorType.
Referenced by lMatchAvgDownInt16(), and lMatchAvgUpInt16().
|
inline |
Definition at line 4502 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int8VectorType.
Referenced by lMatchAvgDownInt8(), and lMatchAvgUpInt8().
|
inline |
Definition at line 4512 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int8VectorType.
Referenced by lMatchAvgDownInt8(), lMatchAvgDownUInt8(), lMatchAvgUpInt8(), and lMatchAvgUpUInt8().
|
inline |
Definition at line 4527 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int32VectorType.
Referenced by lMatchAvgDownInt16(), lMatchAvgDownUInt16(), lMatchAvgUpInt16(), and lMatchAvgUpUInt16().
|
inline |
Definition at line 4559 of file opt.cpp.
Referenced by lMatchAvgDownUInt16(), lMatchAvgDownUInt8(), lMatchAvgUpUInt16(), and lMatchAvgUpUInt8().
|
inline |
Definition at line 4522 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int32VectorType.
Referenced by lMatchAvgDownUInt16(), and lMatchAvgUpUInt16().
|
inline |
Definition at line 4507 of file opt.cpp.
References LLVMTypes::Int16VectorType, and LLVMTypes::Int8VectorType.
Referenced by lMatchAvgDownUInt8(), and lMatchAvgUpUInt8().
void Optimize | ( | llvm::Module * | module, |
int | optLevel | ||
) |
Optimize the functions in the given module, applying the specified level of optimization. optLevel zero corresponds to essentially no optimization–just enough to generate correct code, while level one corresponds to full optimization.
Definition at line 441 of file opt.cpp.
References DebugPassManager::add(), CreateFixBooleanSelectPass(), CreateGatherCoalescePass(), CreateImproveMemoryOpsPass(), CreateInstructionSimplifyPass(), CreateIntrinsicsOptPass(), CreateIsCompileTimeConstantPass(), CreateMakeInternalFuncsStaticPass(), CreatePeepholePass(), CreateReplacePseudoMemoryOpsPass(), CreateReplaceStdlibShiftPass(), Globals::debugPrint, Opt::disableCoalescing, Opt::disableGatherScatterOptimizations, Opt::disableHandlePseudoMemoryOps, Opt::disableMaskAllOnOptimizations, g, Target::GENERIC, Target::getISA(), DebugPassManager::getPM(), Target::GetTargetMachine(), Target::getVectorWidth(), LAST_OPT_NUMBER, Globals::opt, DebugPassManager::run(), Globals::target, and Opt::unrollLoops.
std::string sanitize | ( | std::string | in | ) |
Strips all non-alphanumeric characters from given string.
Definition at line 4264 of file opt.cpp.
Referenced by DebugPassFile::run().