Intel SPMD Program Compiler  1.9.1
Classes | Macros | Enumerations | Functions
opt.cpp File Reference

Implementations of various ispc optimization passes that operate on the LLVM IR. More...

#include "opt.h"
#include "ctx.h"
#include "sym.h"
#include "module.h"
#include "util.h"
#include "llvmutil.h"
#include <stdio.h>
#include <map>
#include <set>
#include <llvm/Pass.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
#include "llvm/PassManager.h"
#include <llvm/PassRegistry.h>
#include <llvm/Analysis/Verifier.h>
#include <llvm/Assembly/PrintModulePass.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/DebugInfo.h>
#include <llvm/Analysis/ConstantFolding.h>
#include <llvm/Target/TargetLibraryInfo.h>
#include <llvm/ADT/Triple.h>
#include <llvm/ADT/SmallSet.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/Dwarf.h>
Include dependency graph for opt.cpp:

Go to the source code of this file.

Classes

class  DebugPassManager
 
class  IntrinsicsOpt
 
struct  IntrinsicsOpt::MaskInstruction
 
struct  IntrinsicsOpt::BlendInstruction
 
class  InstructionSimplifyPass
 
class  ImproveMemoryOpsPass
 
class  GatherCoalescePass
 
struct  CoalescedLoadOp
 
class  ReplacePseudoMemoryOpsPass
 
class  IsCompileTimeConstantPass
 
class  DebugPass
 
class  MakeInternalFuncsStaticPass
 
class  PeepholePass
 
class  ReplaceStdlibShiftPass
 
class  FixBooleanSelectPass
 

Macros

#define PRId64   "lld"
 
#define PRIu64   "llu"
 
#define DEBUG_START_PASS(NAME)
 
#define DEBUG_END_PASS(NAME)
 

Enumerations

enum  MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN }
 

Functions

static llvm::Pass * CreateIntrinsicsOptPass ()
 
static llvm::Pass * CreateInstructionSimplifyPass ()
 
static llvm::Pass * CreatePeepholePass ()
 
static llvm::Pass * CreateImproveMemoryOpsPass ()
 
static llvm::Pass * CreateGatherCoalescePass ()
 
static llvm::Pass * CreateReplacePseudoMemoryOpsPass ()
 
static llvm::Pass * CreateIsCompileTimeConstantPass (bool isLastTry)
 
static llvm::Pass * CreateMakeInternalFuncsStaticPass ()
 
static llvm::Pass * CreateDebugPass (char *output)
 
static llvm::Pass * CreateReplaceStdlibShiftPass ()
 
static llvm::Pass * CreateFixBooleanSelectPass ()
 
static void lCopyMetadata (llvm::Value *vto, const llvm::Instruction *from)
 
static bool lGetSourcePosFromMetadata (const llvm::Instruction *inst, SourcePos *pos)
 
static llvm::Instruction * lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const char *name, llvm::Instruction *insertBefore=NULL)
 
static llvm::Instruction * lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, const char *name, llvm::Instruction *insertBefore=NULL)
 
static llvm::Instruction * lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, const char *name, llvm::Instruction *insertBefore=NULL)
 
static llvm::Instruction * lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, const char *name, llvm::Instruction *insertBefore=NULL)
 
static llvm::Instruction * lCallInst (llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5, const char *name, llvm::Instruction *insertBefore=NULL)
 
static llvm::Instruction * lGEPInst (llvm::Value *ptr, llvm::Value *offset, const char *name, llvm::Instruction *insertBefore)
 
static uint64_t lConstElementsToMask (const llvm::SmallVector< llvm::Constant *, ISPC_MAX_NVEC > &elements)
 
static bool lGetMask (llvm::Value *factor, uint64_t *mask)
 
static MaskStatus lGetMaskStatus (llvm::Value *mask, int vecWidth=-1)
 
void Optimize (llvm::Module *module, int optLevel)
 
static bool lIsUndef (llvm::Value *value)
 
static llvm::Value * lCheckForActualPointer (llvm::Value *v)
 
static llvm::Value * lGetBasePointer (llvm::Value *v, llvm::Instruction *insertBefore)
 
static llvm::Constant * lGetConstantAddExprBaseOffset (llvm::Constant *op0, llvm::Constant *op1, llvm::Constant **delta)
 
static llvm::Value * lExtractFromInserts (llvm::Value *v, unsigned int index)
 
static llvm::Value * lGetBasePtrAndOffsets (llvm::Value *ptrs, llvm::Value **offsets, llvm::Instruction *insertBefore)
 
static void lExtractConstantOffset (llvm::Value *vec, llvm::Value **constOffset, llvm::Value **variableOffset, llvm::Instruction *insertBefore)
 
static bool lIsIntegerSplat (llvm::Value *v, int *splat)
 
static llvm::Value * lExtract248Scale (llvm::Value *splatOperand, int splatValue, llvm::Value *otherOperand, llvm::Value **result)
 
static llvm::Value * lExtractOffsetVector248Scale (llvm::Value **vec)
 
static bool lVectorIs32BitInts (llvm::Value *v)
 
static bool lOffsets32BitSafe (llvm::Value **variableOffsetPtr, llvm::Value **constOffsetPtr, llvm::Instruction *insertBefore)
 
static bool lIs32BitSafeHelper (llvm::Value *v)
 
static bool lOffsets32BitSafe (llvm::Value **offsetPtr, llvm::Instruction *insertBefore)
 
static bool lGSToGSBaseOffsets (llvm::CallInst *callInst)
 
static bool lGSBaseOffsetsGetMoreConst (llvm::CallInst *callInst)
 
static llvm::Value * lComputeCommonPointer (llvm::Value *base, llvm::Value *offsets, llvm::Instruction *insertBefore)
 
static llvm::Constant * lGetOffsetScaleVec (llvm::Value *offsetScale, llvm::Type *vecType)
 
static bool lGSToLoadStore (llvm::CallInst *callInst)
 
static bool lImproveMaskedStore (llvm::CallInst *callInst)
 
static bool lImproveMaskedLoad (llvm::CallInst *callInst, llvm::BasicBlock::iterator iter)
 
static bool lVectorLoadIsEfficient (std::set< int64_t >::iterator iter, std::set< int64_t >::iterator end, std::set< int64_t >::iterator *newIter, int vectorWidth)
 
static void lSelectLoads (const std::vector< int64_t > &loadOffsets, std::vector< CoalescedLoadOp > *loads)
 
static void lCoalescePerfInfo (const std::vector< llvm::CallInst * > &coalesceGroup, const std::vector< CoalescedLoadOp > &loadOps)
 
llvm::Value * lGEPAndLoad (llvm::Value *basePtr, int64_t offset, int align, llvm::Instruction *insertBefore, llvm::Type *type)
 
static void lEmitLoads (llvm::Value *basePtr, std::vector< CoalescedLoadOp > &loadOps, int elementSize, llvm::Instruction *insertBefore)
 
static std::vector< CoalescedLoadOplSplit8WideLoads (const std::vector< CoalescedLoadOp > &loadOps, llvm::Instruction *insertBefore)
 
static llvm::Value * lApplyLoad1 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
 
static llvm::Value * lApplyLoad2 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
 
static llvm::Value * lApplyLoad4 (llvm::Value *result, const CoalescedLoadOp &load, const int64_t offsets[4], bool set[4], llvm::Instruction *insertBefore)
 
static llvm::Value * lAssemble4Vector (const std::vector< CoalescedLoadOp > &loadOps, const int64_t offsets[4], llvm::Instruction *insertBefore)
 
static void lAssembleResultVectors (const std::vector< CoalescedLoadOp > &loadOps, const std::vector< int64_t > &constOffsets, std::vector< llvm::Value * > &results, llvm::Instruction *insertBefore)
 
static llvm::Value * lComputeBasePtr (llvm::CallInst *gatherInst, llvm::Instruction *insertBefore)
 
static void lExtractConstOffsets (const std::vector< llvm::CallInst * > &coalesceGroup, int elementSize, std::vector< int64_t > *constOffsets)
 
static bool lCoalesceGathers (const std::vector< llvm::CallInst * > &coalesceGroup)
 
static bool lInstructionMayWriteToMemory (llvm::Instruction *inst)
 
static bool lIsSafeToBlend (llvm::Value *lvalue)
 
static bool lReplacePseudoMaskedStore (llvm::CallInst *callInst)
 
static bool lReplacePseudoGS (llvm::CallInst *callInst)
 
static int64_t lGetIntValue (llvm::Value *offset)
 

Detailed Description

Implementations of various ispc optimization passes that operate on the LLVM IR.

Definition in file opt.cpp.

Macro Definition Documentation

#define DEBUG_END_PASS (   NAME)
Value:
if (g->debugPrint && \
(getenv("FUNC") == NULL || \
!strncmp(bb.getParent()->getName().str().c_str(), getenv("FUNC"), \
strlen(getenv("FUNC"))))) { \
fprintf(stderr, "End of " NAME " %s\n", modifiedAny ? "** CHANGES **" : ""); \
fprintf(stderr, "---------------\n"); \
bb.dump(); \
fprintf(stderr, "---------------\n\n"); \
} else /* eat semicolon */
Globals * g
Definition: ispc.cpp:88
bool debugPrint
Definition: ispc.h:560

Definition at line 172 of file opt.cpp.

Referenced by IntrinsicsOpt::runOnBasicBlock(), InstructionSimplifyPass::runOnBasicBlock(), ImproveMemoryOpsPass::runOnBasicBlock(), GatherCoalescePass::runOnBasicBlock(), ReplacePseudoMemoryOpsPass::runOnBasicBlock(), IsCompileTimeConstantPass::runOnBasicBlock(), PeepholePass::runOnBasicBlock(), and ReplaceStdlibShiftPass::runOnBasicBlock().

#define DEBUG_START_PASS (   NAME)
Value:
if (g->debugPrint && \
(getenv("FUNC") == NULL || \
!strncmp(bb.getParent()->getName().str().c_str(), getenv("FUNC"), \
strlen(getenv("FUNC"))))) { \
fprintf(stderr, "Start of " NAME "\n"); \
fprintf(stderr, "---------------\n"); \
bb.dump(); \
fprintf(stderr, "---------------\n\n"); \
} else /* eat semicolon */
Globals * g
Definition: ispc.cpp:88
bool debugPrint
Definition: ispc.h:560

Definition at line 161 of file opt.cpp.

Referenced by IntrinsicsOpt::runOnBasicBlock(), InstructionSimplifyPass::runOnBasicBlock(), ImproveMemoryOpsPass::runOnBasicBlock(), GatherCoalescePass::runOnBasicBlock(), ReplacePseudoMemoryOpsPass::runOnBasicBlock(), IsCompileTimeConstantPass::runOnBasicBlock(), PeepholePass::runOnBasicBlock(), and ReplaceStdlibShiftPass::runOnBasicBlock().

#define PRId64   "lld"
#define PRIu64   "llu"

Definition at line 138 of file opt.cpp.

Referenced by ConstExpr::Print().

Enumeration Type Documentation

enum MaskStatus
Enumerator
ALL_ON 
ALL_OFF 
MIXED 
UNKNOWN 

Definition at line 439 of file opt.cpp.

Function Documentation

static llvm::Pass * CreateDebugPass ( char *  output)
static

Definition at line 4821 of file opt.cpp.

Referenced by DebugPassManager::add().

static llvm::Pass * CreateFixBooleanSelectPass ( )
static

Definition at line 5602 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateGatherCoalescePass ( )
static

Definition at line 4322 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateImproveMemoryOpsPass ( )
static

Definition at line 3249 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateInstructionSimplifyPass ( )
static

Definition at line 1406 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateIntrinsicsOptPass ( )
static

Definition at line 1241 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateIsCompileTimeConstantPass ( bool  isLastTry)
static

Definition at line 4785 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateMakeInternalFuncsStaticPass ( )
static

Definition at line 4944 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreatePeepholePass ( )
static

Definition at line 5333 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateReplacePseudoMemoryOpsPass ( )
static

Definition at line 4680 of file opt.cpp.

Referenced by Optimize().

static llvm::Pass * CreateReplaceStdlibShiftPass ( )
static

Definition at line 5419 of file opt.cpp.

Referenced by Optimize().

static llvm::Value* lApplyLoad1 ( llvm::Value *  result,
const CoalescedLoadOp load,
const int64_t  offsets[4],
bool  set[4],
llvm::Instruction *  insertBefore 
)
static

Given a 1-wide load of a 32-bit value, merge its value into the result vector for any and all elements for which it applies.

Definition at line 3661 of file opt.cpp.

References Assert, CoalescedLoadOp::count, Debug(), LLVMInt32(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.

Referenced by lAssemble4Vector().

static llvm::Value* lApplyLoad2 ( llvm::Value *  result,
const CoalescedLoadOp load,
const int64_t  offsets[4],
bool  set[4],
llvm::Instruction *  insertBefore 
)
static

Similarly, incorporate the values from a 2-wide load into any vector elements that they apply to.

Definition at line 3686 of file opt.cpp.

References Assert, CoalescedLoadOp::count, Debug(), CoalescedLoadOp::element0, CoalescedLoadOp::element1, LLVMTypes::Int32Type, LLVMTypes::Int64Type, LLVMInt32(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.

Referenced by lAssemble4Vector().

static llvm::Value* lApplyLoad4 ( llvm::Value *  result,
const CoalescedLoadOp load,
const int64_t  offsets[4],
bool  set[4],
llvm::Instruction *  insertBefore 
)
static

And handle a 4-wide load

Definition at line 3753 of file opt.cpp.

References Assert, CoalescedLoadOp::count, Debug(), LLVMShuffleVectors(), CoalescedLoadOp::load, PRId64, and CoalescedLoadOp::start.

Referenced by lAssemble4Vector().

static llvm::Value* lAssemble4Vector ( const std::vector< CoalescedLoadOp > &  loadOps,
const int64_t  offsets[4],
llvm::Instruction *  insertBefore 
)
static

We're need to fill in the values for a 4-wide result vector. This function looks at all of the generated loads and extracts the appropriate elements from the appropriate loads to assemble the result. Here the offsets[] parameter gives the 4 offsets from the base pointer for the four elements of the result.

Definition at line 3794 of file opt.cpp.

References Assert, CoalescedLoadOp::count, Debug(), FATAL, LLVMTypes::Int32Type, lApplyLoad1(), lApplyLoad2(), lApplyLoad4(), and PRId64.

Referenced by lAssembleResultVectors().

static void lAssembleResultVectors ( const std::vector< CoalescedLoadOp > &  loadOps,
const std::vector< int64_t > &  constOffsets,
std::vector< llvm::Value * > &  results,
llvm::Instruction *  insertBefore 
)
static

Given the set of loads that we've done and the set of result values to be computed, this function computes the final llvm::Value *s for each result vector.

Definition at line 3966 of file opt.cpp.

References Assert, FATAL, g, Target::getVectorWidth(), lAssemble4Vector(), LLVMConcatVectors(), and Globals::target.

Referenced by lCoalesceGathers().

static llvm::Instruction* lCallInst ( llvm::Function *  func,
llvm::Value *  arg0,
llvm::Value *  arg1,
const char *  name,
llvm::Instruction *  insertBefore = NULL 
)
static

Definition at line 288 of file opt.cpp.

Referenced by lGSToGSBaseOffsets(), lGSToLoadStore(), and lReplacePseudoMaskedStore().

static llvm::Instruction* lCallInst ( llvm::Function *  func,
llvm::Value *  arg0,
llvm::Value *  arg1,
llvm::Value *  arg2,
const char *  name,
llvm::Instruction *  insertBefore = NULL 
)
static

Definition at line 297 of file opt.cpp.

static llvm::Instruction* lCallInst ( llvm::Function *  func,
llvm::Value *  arg0,
llvm::Value *  arg1,
llvm::Value *  arg2,
llvm::Value *  arg3,
const char *  name,
llvm::Instruction *  insertBefore = NULL 
)
static

Definition at line 307 of file opt.cpp.

static llvm::Instruction* lCallInst ( llvm::Function *  func,
llvm::Value *  arg0,
llvm::Value *  arg1,
llvm::Value *  arg2,
llvm::Value *  arg3,
llvm::Value *  arg4,
const char *  name,
llvm::Instruction *  insertBefore = NULL 
)
static

Definition at line 316 of file opt.cpp.

static llvm::Instruction* lCallInst ( llvm::Function *  func,
llvm::Value *  arg0,
llvm::Value *  arg1,
llvm::Value *  arg2,
llvm::Value *  arg3,
llvm::Value *  arg4,
llvm::Value *  arg5,
const char *  name,
llvm::Instruction *  insertBefore = NULL 
)
static

Definition at line 326 of file opt.cpp.

static llvm::Value* lCheckForActualPointer ( llvm::Value *  v)
static

Check to make sure that this value is actually a pointer in the end. We need to make sure that given an expression like vec(offset) + ptr2int(ptr), lGetBasePointer() doesn't return vec(offset) for the base pointer such that we then treat ptr2int(ptr) as an offset. This ends up being important so that we don't generate LLVM GEP instructions like "gep inttoptr 8, i64 %ptr", which in turn can lead to incorrect code since LLVM's pointer aliasing analysis assumes that operands after the first one to a GEP aren't pointers.

Definition at line 1447 of file opt.cpp.

Referenced by lGetBasePointer().

static bool lCoalesceGathers ( const std::vector< llvm::CallInst * > &  coalesceGroup)
static

Actually do the coalescing. We have a set of gathers all accessing addresses of the form:

(ptr + {1,2,4,8} * varyingOffset) + constOffset, a.k.a. basePtr + constOffset

where varyingOffset actually has the same value across all of the SIMD lanes and where the part in parenthesis has the same value for all of the gathers in the group.

Definition at line 4072 of file opt.cpp.

References Assert, LLVMTypes::DoubleVectorType, FATAL, LLVMTypes::FloatVectorType, LLVMTypes::Int32VectorType, LLVMTypes::Int64VectorType, lAssembleResultVectors(), lCoalescePerfInfo(), lComputeBasePtr(), lEmitLoads(), lExtractConstOffsets(), lSelectLoads(), and lSplit8WideLoads().

Referenced by GatherCoalescePass::runOnBasicBlock().

static void lCoalescePerfInfo ( const std::vector< llvm::CallInst * > &  coalesceGroup,
const std::vector< CoalescedLoadOp > &  loadOps 
)
static

Print a performance message with the details of the result of coalescing over a group of gathers.

Definition at line 3477 of file opt.cpp.

References SourcePos::first_line, lGetSourcePosFromMetadata(), and PerformanceWarning().

Referenced by lCoalesceGathers().

static llvm::Value* lComputeBasePtr ( llvm::CallInst *  gatherInst,
llvm::Instruction *  insertBefore 
)
static

Given a call to a gather function, extract the base pointer, the 2/4/8 scale, and the first varying offsets value to use them to compute that scalar base pointer that is shared by all of the gathers in the group. (Thus, this base pointer plus the constant offsets term for each gather gives the set of addresses to use for each gather.

Definition at line 4015 of file opt.cpp.

References LLVMTypes::Int64Type, lGEPInst(), and LLVMExtractFirstVectorElement().

Referenced by lCoalesceGathers().

static llvm::Value* lComputeCommonPointer ( llvm::Value *  base,
llvm::Value *  offsets,
llvm::Instruction *  insertBefore 
)
static

Definition at line 2738 of file opt.cpp.

References lGEPInst(), and LLVMExtractFirstVectorElement().

Referenced by lGSToLoadStore().

static uint64_t lConstElementsToMask ( const llvm::SmallVector< llvm::Constant *, ISPC_MAX_NVEC > &  elements)
static

Given a vector of constant values (int, float, or bool) representing an execution mask, convert it to a bitvector where the 0th bit corresponds to the first vector value and so forth.

Definition at line 356 of file opt.cpp.

References Assert.

Referenced by lGetMask().

static void lCopyMetadata ( llvm::Value *  vto,
const llvm::Instruction *  from 
)
static

This utility routine copies the metadata (if any) attached to the 'from' instruction in the IR to the 'to' instruction.

For flexibility, this function takes an llvm::Value rather than an llvm::Instruction for the 'to' parameter; at some places in the code below, we sometimes use a llvm::Value to start out storing a value and then later store instructions. If a llvm::Value is passed to this, the routine just returns without doing anything; if it is in fact an LLVM::Instruction, then the metadata can be copied to it.

Definition at line 199 of file opt.cpp.

Referenced by lGSToGSBaseOffsets(), lGSToLoadStore(), lImproveMaskedLoad(), lImproveMaskedStore(), lReplacePseudoMaskedStore(), and IntrinsicsOpt::runOnBasicBlock().

static void lEmitLoads ( llvm::Value *  basePtr,
std::vector< CoalescedLoadOp > &  loadOps,
int  elementSize,
llvm::Instruction *  insertBefore 
)
static
static llvm::Value* lExtract248Scale ( llvm::Value *  splatOperand,
int  splatValue,
llvm::Value *  otherOperand,
llvm::Value **  result 
)
static
static void lExtractConstantOffset ( llvm::Value *  vec,
llvm::Value **  constOffset,
llvm::Value **  variableOffset,
llvm::Instruction *  insertBefore 
)
static

Given a vector expression in vec, separate it into a compile-time constant component and a variable component, returning the two parts in constOffset and *variableOffset. (It should be the case that the sum of these two is exactly equal to the original vector.)

This routine only handles some (important) patterns; in some cases it will fail and return components that are actually compile-time constants in *variableOffset.

Finally, if there aren't any constant (or, respectivaly, variable) components, the corresponding return value may be set to NULL.

Definition at line 1699 of file opt.cpp.

References LLVMGetName().

Referenced by lGSBaseOffsetsGetMoreConst(), and lGSToGSBaseOffsets().

static void lExtractConstOffsets ( const std::vector< llvm::CallInst * > &  coalesceGroup,
int  elementSize,
std::vector< int64_t > *  constOffsets 
)
static

Extract the constant offsets (from the common base pointer) from each of the gathers in a set to be coalesced. These come in as byte offsets, but we'll transform them into offsets in terms of the size of the base scalar type being gathered. (e.g. for an i32 gather, we might have offsets like <0,4,16,20>, which would be transformed to <0,1,4,5> here.)

Definition at line 4043 of file opt.cpp.

References Assert, g, Target::getVectorWidth(), LLVMExtractVectorInts(), and Globals::target.

Referenced by lCoalesceGathers().

static llvm::Value* lExtractFromInserts ( llvm::Value *  v,
unsigned int  index 
)
static

Definition at line 1548 of file opt.cpp.

References Assert.

Referenced by lGetBasePtrAndOffsets().

static llvm::Value* lExtractOffsetVector248Scale ( llvm::Value **  vec)
static

Given a vector of integer offsets to a base pointer being used for a gather or a scatter, see if its root operation is a multiply by a vector of some value by all 2s/4s/8s. If not, return NULL.

If it is return an i32 value of 2, 4, 8 from the function and modify vec so that it points to the operand that is being multiplied by 2/4/8.

We go through all this trouble so that we can pass the i32 scale factor to the {gather,scatter}_base_offsets function as a separate scale factor for the offsets. This in turn is used in a way so that the LLVM x86 code generator matches it to apply x86's free scale by 2x, 4x, or 8x to one of two registers being added together for an addressing calculation.

Definition at line 1924 of file opt.cpp.

References lExtract248Scale(), lIsIntegerSplat(), and LLVMInt32().

Referenced by lGSToGSBaseOffsets().

llvm::Value* lGEPAndLoad ( llvm::Value *  basePtr,
int64_t  offset,
int  align,
llvm::Instruction *  insertBefore,
llvm::Type *  type 
)

Utility routine that computes an offset from a base pointer and then returns the result of a load of the given type from the resulting location:

return *((type *)(basePtr + offset))

Definition at line 3543 of file opt.cpp.

References lGEPInst(), and LLVMInt64().

Referenced by lEmitLoads().

static llvm::Instruction* lGEPInst ( llvm::Value *  ptr,
llvm::Value *  offset,
const char *  name,
llvm::Instruction *  insertBefore 
)
static

Definition at line 337 of file opt.cpp.

References PTYPE.

Referenced by lComputeBasePtr(), lComputeCommonPointer(), and lGEPAndLoad().

static llvm::Value* lGetBasePointer ( llvm::Value *  v,
llvm::Instruction *  insertBefore 
)
static

Given a llvm::Value representing a varying pointer, this function checks to see if all of the elements of the vector have the same value (i.e. there's a common base pointer). If so, it returns the common pointer value; otherwise it returns NULL.

Definition at line 1483 of file opt.cpp.

References g, Target::getVectorWidth(), lCheckForActualPointer(), LLVMFlattenInsertChain(), LLVMGetName(), and Globals::target.

Referenced by lGetBasePtrAndOffsets().

static llvm::Value* lGetBasePtrAndOffsets ( llvm::Value *  ptrs,
llvm::Value **  offsets,
llvm::Instruction *  insertBefore 
)
static

Given a varying pointer in ptrs, this function checks to see if it can be determined to be indexing from a common uniform base pointer. If so, the function returns the base pointer llvm::Value and initializes offsets with an int vector of the per-lane offsets

Definition at line 1567 of file opt.cpp.

References Assert, Globals::debugPrint, g, Target::is32Bit(), ISPC_MAX_NVEC, lExtractFromInserts(), lGetBasePointer(), lGetConstantAddExprBaseOffset(), LLVMDumpValue(), LLVMInt32(), LLVMInt32Vector(), LLVMInt64(), LLVMInt64Vector(), and Globals::target.

Referenced by lGSToGSBaseOffsets().

static llvm::Constant* lGetConstantAddExprBaseOffset ( llvm::Constant *  op0,
llvm::Constant *  op1,
llvm::Constant **  delta 
)
static

Given the two operands to a constant add expression, see if we have the form "base pointer + offset", whee op0 is the base pointer and op1 is the offset; if so return the base and the offset.

Definition at line 1530 of file opt.cpp.

Referenced by lGetBasePtrAndOffsets().

static int64_t lGetIntValue ( llvm::Value *  offset)
static

Given an llvm::Value known to be an integer, return its value as an int64_t.

Definition at line 5341 of file opt.cpp.

References Assert.

Referenced by ReplaceStdlibShiftPass::runOnBasicBlock().

static bool lGetMask ( llvm::Value *  factor,
uint64_t *  mask 
)
static

Given an llvm::Value represinting a vector mask, see if the value is a constant. If so, return true and set *bits to be the integer mask found by taking the high bits of the mask values in turn and concatenating them into a single integer. In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9.

Definition at line 392 of file opt.cpp.

References Assert, g, Target::GetTargetMachine(), lConstElementsToMask(), and Globals::target.

Referenced by lGetMaskStatus(), IntrinsicsOpt::runOnBasicBlock(), and InstructionSimplifyPass::simplifyCall().

static MaskStatus lGetMaskStatus ( llvm::Value *  mask,
int  vecWidth = -1 
)
static

Determines if the given mask value is all on, all off, mixed, or unknown at compile time.

Definition at line 445 of file opt.cpp.

References ALL_OFF, ALL_ON, Assert, g, Target::getVectorWidth(), lGetMask(), MIXED, Globals::target, and UNKNOWN.

Referenced by lImproveMaskedLoad(), lImproveMaskedStore(), GatherCoalescePass::runOnBasicBlock(), and InstructionSimplifyPass::simplifySelect().

static llvm::Constant* lGetOffsetScaleVec ( llvm::Value *  offsetScale,
llvm::Type *  vecType 
)
static
static bool lGetSourcePosFromMetadata ( const llvm::Instruction *  inst,
SourcePos pos 
)
static

We have a protocol with the front-end LLVM IR code generation process that allows us to encode the source file position that corresponds with instructions. (For example, this allows us to issue performance warnings related to things like scatter and gather after optimization has been performed, so that we aren't warning about scatters and gathers that have been improved to stores and loads by optimization passes.) Note that this is slightly redundant with the source file position encoding generated for debugging symbols, though we don't always generate debugging information but we do always generate this position data.

This function finds the SourcePos that the metadata in the instruction (if present) corresponds to. See the implementation of FunctionEmitContext::addGSMetadata(), which encodes the source position during code generation.

Parameters
instInstruction to try to find the source position of
posOutput variable in which to store the position
Returns
True if source file position metadata was present and *pos has been set. False otherwise.

Definition at line 234 of file opt.cpp.

References Assert.

Referenced by lCoalescePerfInfo(), lGSToLoadStore(), lReplacePseudoGS(), and GatherCoalescePass::runOnBasicBlock().

static bool lGSBaseOffsetsGetMoreConst ( llvm::CallInst *  callInst)
static

Try to improve the decomposition between compile-time constant and compile-time unknown offsets in calls to the __pseudo_*_base_offsets* functions. Other other optimizations have run, we will sometimes be able to pull more terms out of the unknown part and add them into the compile-time-known part.

Definition at line 2565 of file opt.cpp.

References Assert, g, Target::hasGather(), Target::hasScatter(), Target::hasVecPrefetch(), LLVMTypes::Int64VectorType, lExtractConstantOffset(), LLVMInt32Vector(), LLVMInt64Vector(), LLVMIntAsType(), m, Module::module, and Globals::target.

Referenced by ImproveMemoryOpsPass::runOnBasicBlock().

static bool lGSToGSBaseOffsets ( llvm::CallInst *  callInst)
static
static bool lGSToLoadStore ( llvm::CallInst *  callInst)
static

After earlier optimization passes have run, we are sometimes able to determine that gathers/scatters are actually accessing memory in a more regular fashion and then change the operation to something simpler and more efficient. For example, if all of the lanes in a gather are reading from the same location, we can instead do a scalar load and broadcast. This pass examines gathers and scatters and tries to simplify them if at all possible.

Todo:
Currently, this only looks for all program instances going to the same location and all going to a linear sequence of locations in memory. There are a number of other cases that might make sense to look for, including things that could be handled with a vector load + shuffle or things that could be handled with hybrids of e.g. 2 4-wide vector loads with AVX, etc.

Definition at line 2781 of file opt.cpp.

References Assert, Globals::ctx, Debug(), LLVMTypes::DoubleType, LLVMTypes::DoubleVectorPointerType, LLVMTypes::FloatType, LLVMTypes::FloatVectorPointerType, g, Target::getVectorWidth(), Target::hasGather(), Target::hasScatter(), LLVMTypes::Int16Type, LLVMTypes::Int16VectorPointerType, LLVMTypes::Int32Type, LLVMTypes::Int32VectorPointerType, LLVMTypes::Int64Type, LLVMTypes::Int64VectorPointerType, LLVMTypes::Int8Type, LLVMTypes::Int8VectorPointerType, lCallInst(), lComputeCommonPointer(), lCopyMetadata(), lGetOffsetScaleVec(), lGetSourcePosFromMetadata(), LLVMGetName(), LLVMInt32(), LLVMVectorIsLinear(), LLVMVectorValuesAllEqual(), m, Module::module, Globals::target, and Warning().

Referenced by ImproveMemoryOpsPass::runOnBasicBlock().

static bool lImproveMaskedLoad ( llvm::CallInst *  callInst,
llvm::BasicBlock::iterator  iter 
)
static
static bool lImproveMaskedStore ( llvm::CallInst *  callInst)
static

Masked stores are generally more complex than regular stores; for example, they require multiple instructions to simulate under SSE. This optimization detects cases where masked stores can be replaced with regular stores or removed entirely, for the cases of an 'all on' mask and an 'all off' mask, respectively.

Definition at line 3061 of file opt.cpp.

References ALL_OFF, ALL_ON, Assert, Opt::forceAlignedMemory, g, Target::getNativeVectorAlignment(), lCopyMetadata(), lGetMaskStatus(), m, Module::module, Globals::opt, and Globals::target.

Referenced by ImproveMemoryOpsPass::runOnBasicBlock().

static bool lInstructionMayWriteToMemory ( llvm::Instruction *  inst)
static

Given an instruction, returns true if the instructon may write to memory. This is a conservative test in that it may return true for some instructions that don't actually end up writing to memory, but should never return false for an instruction that does write to memory.

Definition at line 4149 of file opt.cpp.

Referenced by GatherCoalescePass::runOnBasicBlock().

static bool lIs32BitSafeHelper ( llvm::Value *  v)
static

Check to see if the offset value is composed of a string of Adds, SExts, and Constant Vectors that are 32-bit safe. Recursively explores the operands of Add instructions (as they might themselves be adds that eventually terminate in constant vectors or a SExt.)

Definition at line 2168 of file opt.cpp.

References LLVMTypes::Int32VectorType, and lVectorIs32BitInts().

Referenced by lOffsets32BitSafe().

static bool lIsIntegerSplat ( llvm::Value *  v,
int *  splat 
)
static

Definition at line 1852 of file opt.cpp.

Referenced by lExtractOffsetVector248Scale().

static bool lIsSafeToBlend ( llvm::Value *  lvalue)
static

This routine attempts to determine if the given pointer in lvalue is pointing to stack-allocated memory. It's conservative in that it should never return true for non-stack allocated memory, but may return false for memory that actually is stack allocated. The basic strategy is to traverse through the operands and see if the pointer originally comes from an AllocaInst.

Definition at line 4354 of file opt.cpp.

References g, Target::getVectorWidth(), and Globals::target.

Referenced by lReplacePseudoMaskedStore().

static bool lIsUndef ( llvm::Value *  value)
static

Given an llvm::Value, return true if we can determine that it's an undefined value. This only makes a weak attempt at chasing this down, only detecting flat-out undef values, and bitcasts of undef values.

Todo:
Is it worth working harder to find more of these? It starts to get tricky, since having an undef operand doesn't necessarily mean that the result will be undefined. (And for that matter, is there an LLVM call that will do this for us?)

Definition at line 997 of file opt.cpp.

Referenced by IntrinsicsOpt::runOnBasicBlock().

static bool lOffsets32BitSafe ( llvm::Value **  variableOffsetPtr,
llvm::Value **  constOffsetPtr,
llvm::Instruction *  insertBefore 
)
static

Check to see if the two offset vectors can safely be represented with 32-bit values. If so, return true and update the pointed-to llvm::Value *s to be the 32-bit equivalents.

Definition at line 2110 of file opt.cpp.

References LLVMTypes::Int32VectorType, LLVMGetName(), and lVectorIs32BitInts().

Referenced by lGSToGSBaseOffsets().

static bool lOffsets32BitSafe ( llvm::Value **  offsetPtr,
llvm::Instruction *  insertBefore 
)
static

Check to see if the single offset vector can safely be represented with 32-bit values. If so, return true and update the pointed-to llvm::Value * to be the 32-bit equivalent.

Definition at line 2187 of file opt.cpp.

References LLVMTypes::Int32VectorType, lIs32BitSafeHelper(), and LLVMGetName().

static bool lReplacePseudoGS ( llvm::CallInst *  callInst)
static
static bool lReplacePseudoMaskedStore ( llvm::CallInst *  callInst)
static
static void lSelectLoads ( const std::vector< int64_t > &  loadOffsets,
std::vector< CoalescedLoadOp > *  loads 
)
static

Given a set of offsets from a common base pointer that we need to get loaded into memory, determine a reasonable set of load operations that gets all of the corresponding values in memory (ideally, including as many as possible wider vector loads rather than scalar loads). Return a CoalescedLoadOp for each one in the *loads array.

Definition at line 3423 of file opt.cpp.

References Debug(), lVectorLoadIsEfficient(), and PRId64.

Referenced by lCoalesceGathers().

static std::vector<CoalescedLoadOp> lSplit8WideLoads ( const std::vector< CoalescedLoadOp > &  loadOps,
llvm::Instruction *  insertBefore 
)
static

Convert any loads of 8-wide vectors into two 4-wide vectors (logically). This allows the assembly code below to always operate on 4-wide vectors, which leads to better code. Returns a new vector of load operations.

Definition at line 3631 of file opt.cpp.

References LLVMShuffleVectors().

Referenced by lCoalesceGathers().

static bool lVectorIs32BitInts ( llvm::Value *  v)
static

Definition at line 2092 of file opt.cpp.

References ISPC_MAX_NVEC, and LLVMExtractVectorInts().

Referenced by lIs32BitSafeHelper(), and lOffsets32BitSafe().

static bool lVectorLoadIsEfficient ( std::set< int64_t >::iterator  iter,
std::set< int64_t >::iterator  end,
std::set< int64_t >::iterator *  newIter,
int  vectorWidth 
)
static

This function determines whether it makes sense (and is safe) to generate a vector load of width vectorWidth, starting at *iter. It returns true if so, setting *newIter to point to the next element in the set that isn't taken care of by the generated load. If a vector load of the given width doesn't make sense, then false is returned.

Definition at line 3322 of file opt.cpp.

Referenced by lSelectLoads().

void Optimize ( llvm::Module *  module,
int  optLevel 
)