|
Intel SPMD Program Compiler
1.3.0
|
00001 /* 00002 Copyright (c) 2010-2012, Intel Corporation 00003 All rights reserved. 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions are 00007 met: 00008 00009 * Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 * Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 * Neither the name of Intel Corporation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00022 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00023 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00024 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00025 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 00034 /** @file ctx.cpp 00035 @brief Implementation of the FunctionEmitContext class 00036 */ 00037 00038 #include "ctx.h" 00039 #include "util.h" 00040 #include "func.h" 00041 #include "llvmutil.h" 00042 #include "type.h" 00043 #include "stmt.h" 00044 #include "expr.h" 00045 #include "module.h" 00046 #include "sym.h" 00047 #include <map> 00048 #include <llvm/DerivedTypes.h> 00049 #include <llvm/Instructions.h> 00050 #include <llvm/Support/Dwarf.h> 00051 #include <llvm/Metadata.h> 00052 #include <llvm/Module.h> 00053 00054 /** This is a small utility structure that records information related to one 00055 level of nested control flow. It's mostly used in correctly restoring 00056 the mask and other state as we exit control flow nesting levels. 00057 */ 00058 struct CFInfo { 00059 /** Returns a new instance of the structure that represents entering an 00060 'if' statement */ 00061 static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask); 00062 00063 /** Returns a new instance of the structure that represents entering a 00064 loop. */ 00065 static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, 00066 llvm::BasicBlock *continueTarget, 00067 llvm::Value *savedBreakLanesPtr, 00068 llvm::Value *savedContinueLanesPtr, 00069 llvm::Value *savedMask, llvm::Value *savedLoopMask); 00070 00071 static CFInfo *GetForeach(FunctionEmitContext::ForeachType ft, 00072 llvm::BasicBlock *breakTarget, 00073 llvm::BasicBlock *continueTarget, 00074 llvm::Value *savedBreakLanesPtr, 00075 llvm::Value *savedContinueLanesPtr, 00076 llvm::Value *savedMask, llvm::Value *savedLoopMask); 00077 00078 static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, 00079 llvm::BasicBlock *continueTarget, 00080 llvm::Value *savedBreakLanesPtr, 00081 llvm::Value *savedContinueLanesPtr, 00082 llvm::Value *savedMask, llvm::Value *savedLoopMask, 00083 llvm::Value *switchExpr, 00084 llvm::BasicBlock *bbDefault, 00085 const std::vector<std::pair<int, llvm::BasicBlock *> > *bbCases, 00086 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext, 00087 bool scUniform); 00088 00089 bool IsIf() { return type == If; } 00090 bool IsLoop() { return type == Loop; } 00091 bool IsForeach() { return (type == ForeachRegular || 00092 type == ForeachActive || 00093 type == ForeachUnique); } 00094 bool IsSwitch() { return type == Switch; } 00095 bool IsVarying() { return !isUniform; } 00096 bool IsUniform() { return isUniform; } 00097 00098 enum CFType { If, Loop, ForeachRegular, ForeachActive, ForeachUnique, 00099 Switch }; 00100 CFType type; 00101 bool isUniform; 00102 llvm::BasicBlock *savedBreakTarget, *savedContinueTarget; 00103 llvm::Value *savedBreakLanesPtr, *savedContinueLanesPtr; 00104 llvm::Value *savedMask, *savedLoopMask; 00105 llvm::Value *savedSwitchExpr; 00106 llvm::BasicBlock *savedDefaultBlock; 00107 const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCaseBlocks; 00108 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks; 00109 bool savedSwitchConditionWasUniform; 00110 00111 private: 00112 CFInfo(CFType t, bool uniformIf, llvm::Value *sm) { 00113 Assert(t == If); 00114 type = t; 00115 isUniform = uniformIf; 00116 savedBreakTarget = savedContinueTarget = NULL; 00117 savedBreakLanesPtr = savedContinueLanesPtr = NULL; 00118 savedMask = savedLoopMask = sm; 00119 savedSwitchExpr = NULL; 00120 savedDefaultBlock = NULL; 00121 savedCaseBlocks = NULL; 00122 savedNextBlocks = NULL; 00123 } 00124 CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, 00125 llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, 00126 llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL, 00127 const std::vector<std::pair<int, llvm::BasicBlock *> > *bbc = NULL, 00128 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL, 00129 bool scu = false) { 00130 Assert(t == Loop || t == Switch); 00131 type = t; 00132 isUniform = iu; 00133 savedBreakTarget = bt; 00134 savedContinueTarget = ct; 00135 savedBreakLanesPtr = sb; 00136 savedContinueLanesPtr = sc; 00137 savedMask = sm; 00138 savedLoopMask = lm; 00139 savedSwitchExpr = sse; 00140 savedDefaultBlock = bbd; 00141 savedCaseBlocks = bbc; 00142 savedNextBlocks = bbn; 00143 savedSwitchConditionWasUniform = scu; 00144 } 00145 CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, 00146 llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, 00147 llvm::Value *lm) { 00148 Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique); 00149 type = t; 00150 isUniform = false; 00151 savedBreakTarget = bt; 00152 savedContinueTarget = ct; 00153 savedBreakLanesPtr = sb; 00154 savedContinueLanesPtr = sc; 00155 savedMask = sm; 00156 savedLoopMask = lm; 00157 savedSwitchExpr = NULL; 00158 savedDefaultBlock = NULL; 00159 savedCaseBlocks = NULL; 00160 savedNextBlocks = NULL; 00161 } 00162 }; 00163 00164 00165 CFInfo * 00166 CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) { 00167 return new CFInfo(If, isUniform, savedMask); 00168 } 00169 00170 00171 CFInfo * 00172 CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, 00173 llvm::BasicBlock *continueTarget, 00174 llvm::Value *savedBreakLanesPtr, 00175 llvm::Value *savedContinueLanesPtr, 00176 llvm::Value *savedMask, llvm::Value *savedLoopMask) { 00177 return new CFInfo(Loop, isUniform, breakTarget, continueTarget, 00178 savedBreakLanesPtr, savedContinueLanesPtr, 00179 savedMask, savedLoopMask); 00180 } 00181 00182 00183 CFInfo * 00184 CFInfo::GetForeach(FunctionEmitContext::ForeachType ft, 00185 llvm::BasicBlock *breakTarget, 00186 llvm::BasicBlock *continueTarget, 00187 llvm::Value *savedBreakLanesPtr, 00188 llvm::Value *savedContinueLanesPtr, 00189 llvm::Value *savedMask, llvm::Value *savedForeachMask) { 00190 CFType cfType; 00191 switch (ft) { 00192 case FunctionEmitContext::FOREACH_REGULAR: 00193 cfType = ForeachRegular; 00194 break; 00195 case FunctionEmitContext::FOREACH_ACTIVE: 00196 cfType = ForeachActive; 00197 break; 00198 case FunctionEmitContext::FOREACH_UNIQUE: 00199 cfType = ForeachUnique; 00200 break; 00201 default: 00202 FATAL("Unhandled foreach type"); 00203 return NULL; 00204 } 00205 00206 return new CFInfo(cfType, breakTarget, continueTarget, 00207 savedBreakLanesPtr, savedContinueLanesPtr, 00208 savedMask, savedForeachMask); 00209 } 00210 00211 00212 CFInfo * 00213 CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, 00214 llvm::BasicBlock *continueTarget, 00215 llvm::Value *savedBreakLanesPtr, 00216 llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, 00217 llvm::Value *savedLoopMask, llvm::Value *savedSwitchExpr, 00218 llvm::BasicBlock *savedDefaultBlock, 00219 const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCases, 00220 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext, 00221 bool savedSwitchConditionUniform) { 00222 return new CFInfo(Switch, isUniform, breakTarget, continueTarget, 00223 savedBreakLanesPtr, savedContinueLanesPtr, 00224 savedMask, savedLoopMask, savedSwitchExpr, savedDefaultBlock, 00225 savedCases, savedNext, savedSwitchConditionUniform); 00226 } 00227 00228 /////////////////////////////////////////////////////////////////////////// 00229 00230 FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, 00231 llvm::Function *lf, 00232 SourcePos firstStmtPos) { 00233 function = func; 00234 llvmFunction = lf; 00235 00236 /* Create a new basic block to store all of the allocas */ 00237 allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0); 00238 bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0); 00239 /* But jump from it immediately into the real entry block */ 00240 llvm::BranchInst::Create(bblock, allocaBlock); 00241 00242 funcStartPos = funSym->pos; 00243 00244 internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory"); 00245 StoreInst(LLVMMaskAllOn, internalMaskPointer); 00246 00247 functionMaskValue = LLVMMaskAllOn; 00248 00249 fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory"); 00250 StoreInst(LLVMMaskAllOn, fullMaskPointer); 00251 00252 loopMask = NULL; 00253 breakLanesPtr = continueLanesPtr = NULL; 00254 breakTarget = continueTarget = NULL; 00255 00256 switchExpr = NULL; 00257 caseBlocks = NULL; 00258 defaultBlock = NULL; 00259 nextBlocks = NULL; 00260 00261 returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory"); 00262 StoreInst(LLVMMaskAllOff, returnedLanesPtr); 00263 00264 launchedTasks = false; 00265 launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle"); 00266 StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType), 00267 launchGroupHandlePtr); 00268 00269 disableGSWarningCount = 0; 00270 00271 const Type *returnType = function->GetReturnType(); 00272 if (!returnType || Type::Equal(returnType, AtomicType::Void)) 00273 returnValuePtr = NULL; 00274 else { 00275 llvm::Type *ftype = returnType->LLVMType(g->ctx); 00276 returnValuePtr = AllocaInst(ftype, "return_value_memory"); 00277 } 00278 00279 if (g->opt.disableMaskAllOnOptimizations) { 00280 // This is really disgusting. We want to be able to fool the 00281 // compiler to not be able to reason that the mask is all on, but 00282 // we don't want to pay too much of a price at the start of each 00283 // function to do so. 00284 // 00285 // Therefore: first, we declare a module-static __all_on_mask 00286 // variable that will hold an "all on" mask value. At the start of 00287 // each function, we'll load its value and call SetInternalMaskAnd 00288 // with the result to set the current internal execution mask. 00289 // (This is a no-op at runtime.) 00290 // 00291 // Then, to fool the optimizer that maybe the value of 00292 // __all_on_mask can't be guaranteed to be "all on", we emit a 00293 // dummy function that sets __all_on_mask be "all off". (That 00294 // function is never actually called.) 00295 llvm::Value *globalAllOnMaskPtr = 00296 m->module->getNamedGlobal("__all_on_mask"); 00297 if (globalAllOnMaskPtr == NULL) { 00298 globalAllOnMaskPtr = 00299 new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false, 00300 llvm::GlobalValue::InternalLinkage, 00301 LLVMMaskAllOn, "__all_on_mask"); 00302 00303 char buf[256]; 00304 sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString()); 00305 llvm::Constant *offFunc = 00306 m->module->getOrInsertFunction(buf, LLVMTypes::VoidType, 00307 NULL); 00308 AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc)); 00309 llvm::BasicBlock *offBB = 00310 llvm::BasicBlock::Create(*g->ctx, "entry", 00311 (llvm::Function *)offFunc, 0); 00312 new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); 00313 llvm::ReturnInst::Create(*g->ctx, offBB); 00314 } 00315 00316 llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask"); 00317 SetInternalMaskAnd(LLVMMaskAllOn, allOnMask); 00318 } 00319 00320 if (m->diBuilder) { 00321 currentPos = funSym->pos; 00322 00323 /* If debugging is enabled, tell the debug information emission 00324 code about this new function */ 00325 diFile = funcStartPos.GetDIFile(); 00326 AssertPos(currentPos, diFile.Verify()); 00327 00328 llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU()); 00329 AssertPos(currentPos, scope.Verify()); 00330 00331 const FunctionType *functionType = function->GetType(); 00332 llvm::DIType diSubprogramType; 00333 if (functionType == NULL) 00334 AssertPos(currentPos, m->errorCount > 0); 00335 else { 00336 diSubprogramType = functionType->GetDIType(scope); 00337 AssertPos(currentPos, diSubprogramType.Verify()); 00338 } 00339 00340 std::string mangledName = llvmFunction->getName(); 00341 if (mangledName == funSym->name) 00342 mangledName = ""; 00343 00344 bool isStatic = (funSym->storageClass == SC_STATIC); 00345 bool isOptimized = (g->opt.level > 0); 00346 int firstLine = funcStartPos.first_line; 00347 int flags = (llvm::DIDescriptor::FlagPrototyped); 00348 00349 diSubprogram = 00350 m->diBuilder->createFunction(diFile /* scope */, funSym->name, 00351 mangledName, diFile, 00352 firstLine, diSubprogramType, 00353 isStatic, true, /* is defn */ 00354 #ifndef LLVM_3_0 00355 firstLine, 00356 #endif // !LLVM_3_0 00357 flags, 00358 isOptimized, llvmFunction); 00359 AssertPos(currentPos, diSubprogram.Verify()); 00360 00361 /* And start a scope representing the initial function scope */ 00362 StartScope(); 00363 } 00364 } 00365 00366 00367 FunctionEmitContext::~FunctionEmitContext() { 00368 AssertPos(currentPos, controlFlowInfo.size() == 0); 00369 AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0)); 00370 } 00371 00372 00373 const Function * 00374 FunctionEmitContext::GetFunction() const { 00375 return function; 00376 } 00377 00378 00379 llvm::BasicBlock * 00380 FunctionEmitContext::GetCurrentBasicBlock() { 00381 return bblock; 00382 } 00383 00384 00385 void 00386 FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) { 00387 bblock = bb; 00388 } 00389 00390 00391 llvm::Value * 00392 FunctionEmitContext::GetFunctionMask() { 00393 return functionMaskValue; 00394 } 00395 00396 00397 llvm::Value * 00398 FunctionEmitContext::GetInternalMask() { 00399 return LoadInst(internalMaskPointer, "load_mask"); 00400 } 00401 00402 00403 llvm::Value * 00404 FunctionEmitContext::GetFullMask() { 00405 return BinaryOperator(llvm::Instruction::And, GetInternalMask(), 00406 functionMaskValue, "internal_mask&function_mask"); 00407 } 00408 00409 00410 llvm::Value * 00411 FunctionEmitContext::GetFullMaskPointer() { 00412 return fullMaskPointer; 00413 } 00414 00415 00416 void 00417 FunctionEmitContext::SetFunctionMask(llvm::Value *value) { 00418 functionMaskValue = value; 00419 if (bblock != NULL) 00420 StoreInst(GetFullMask(), fullMaskPointer); 00421 } 00422 00423 00424 void 00425 FunctionEmitContext::SetLoopMask(llvm::Value *value) { 00426 loopMask = value; 00427 } 00428 00429 00430 void 00431 FunctionEmitContext::SetInternalMask(llvm::Value *value) { 00432 StoreInst(value, internalMaskPointer); 00433 // kludge so that __mask returns the right value in ispc code. 00434 StoreInst(GetFullMask(), fullMaskPointer); 00435 } 00436 00437 00438 void 00439 FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) { 00440 llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, 00441 test, "oldMask&test"); 00442 SetInternalMask(mask); 00443 } 00444 00445 00446 void 00447 FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) { 00448 llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn, 00449 "~test"); 00450 llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest, 00451 "oldMask&~test"); 00452 SetInternalMask(mask); 00453 } 00454 00455 00456 void 00457 FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { 00458 AssertPos(currentPos, bblock != NULL); 00459 llvm::Value *any = Any(GetFullMask()); 00460 BranchInst(btrue, bfalse, any); 00461 // It's illegal to add any additional instructions to the basic block 00462 // now that it's terminated, so set bblock to NULL to be safe 00463 bblock = NULL; 00464 } 00465 00466 00467 void 00468 FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { 00469 AssertPos(currentPos, bblock != NULL); 00470 llvm::Value *all = All(GetFullMask()); 00471 BranchInst(btrue, bfalse, all); 00472 // It's illegal to add any additional instructions to the basic block 00473 // now that it's terminated, so set bblock to NULL to be safe 00474 bblock = NULL; 00475 } 00476 00477 00478 void 00479 FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { 00480 AssertPos(currentPos, bblock != NULL); 00481 // switch sense of true/false bblocks 00482 BranchIfMaskAny(bfalse, btrue); 00483 // It's illegal to add any additional instructions to the basic block 00484 // now that it's terminated, so set bblock to NULL to be safe 00485 bblock = NULL; 00486 } 00487 00488 00489 void 00490 FunctionEmitContext::StartUniformIf() { 00491 controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask())); 00492 } 00493 00494 00495 void 00496 FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) { 00497 controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask)); 00498 } 00499 00500 00501 void 00502 FunctionEmitContext::EndIf() { 00503 CFInfo *ci = popCFState(); 00504 // Make sure we match up with a Start{Uniform,Varying}If(). 00505 AssertPos(currentPos, ci->IsIf()); 00506 00507 // 'uniform' ifs don't change the mask so we only need to restore the 00508 // mask going into the if for 'varying' if statements 00509 if (ci->IsUniform() || bblock == NULL) 00510 return; 00511 00512 // We can't just restore the mask as it was going into the 'if' 00513 // statement. First we have to take into account any program 00514 // instances that have executed 'return' statements; the restored 00515 // mask must be off for those lanes. 00516 restoreMaskGivenReturns(ci->savedMask); 00517 00518 // If the 'if' statement is inside a loop with a 'varying' 00519 // condition, we also need to account for any break or continue 00520 // statements that executed inside the 'if' statmeent; we also must 00521 // leave the lane masks for the program instances that ran those 00522 // off after we restore the mask after the 'if'. The code below 00523 // ends up being optimized out in the case that there were no break 00524 // or continue statements (and breakLanesPtr and continueLanesPtr 00525 // have their initial 'all off' values), so we don't need to check 00526 // for that here. 00527 // 00528 // There are three general cases to deal with here: 00529 // - Loops: both break and continue are allowed, and thus the corresponding 00530 // lane mask pointers are non-NULL 00531 // - Foreach: only continueLanesPtr may be non-NULL 00532 // - Switch: only breakLanesPtr may be non-NULL 00533 if (continueLanesPtr != NULL || breakLanesPtr != NULL) { 00534 // We want to compute: 00535 // newMask = (oldMask & ~(breakLanes | continueLanes)), 00536 // treading breakLanes or continueLanes as "all off" if the 00537 // corresponding pointer is NULL. 00538 llvm::Value *bcLanes = NULL; 00539 00540 if (continueLanesPtr != NULL) 00541 bcLanes = LoadInst(continueLanesPtr, "continue_lanes"); 00542 else 00543 bcLanes = LLVMMaskAllOff; 00544 00545 if (breakLanesPtr != NULL) { 00546 llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes"); 00547 bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes, 00548 breakLanes, "|break_lanes"); 00549 } 00550 00551 llvm::Value *notBreakOrContinue = 00552 NotOperator(bcLanes, "!(break|continue)_lanes"); 00553 llvm::Value *oldMask = GetInternalMask(); 00554 llvm::Value *newMask = 00555 BinaryOperator(llvm::Instruction::And, oldMask, 00556 notBreakOrContinue, "new_mask"); 00557 SetInternalMask(newMask); 00558 } 00559 } 00560 00561 00562 void 00563 FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, 00564 bool uniformCF) { 00565 // Store the current values of various loop-related state so that we 00566 // can restore it when we exit this loop. 00567 llvm::Value *oldMask = GetInternalMask(); 00568 controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget, 00569 continueTarget, breakLanesPtr, 00570 continueLanesPtr, oldMask, loopMask)); 00571 if (uniformCF) 00572 // If the loop has a uniform condition, we don't need to track 00573 // which lanes 'break' or 'continue'; all of the running ones go 00574 // together, so we just jump 00575 breakLanesPtr = continueLanesPtr = NULL; 00576 else { 00577 // For loops with varying conditions, allocate space to store masks 00578 // that record which lanes have done these 00579 continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory"); 00580 StoreInst(LLVMMaskAllOff, continueLanesPtr); 00581 breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory"); 00582 StoreInst(LLVMMaskAllOff, breakLanesPtr); 00583 } 00584 00585 breakTarget = bt; 00586 continueTarget = ct; 00587 loopMask = NULL; // this better be set by the loop! 00588 } 00589 00590 00591 void 00592 FunctionEmitContext::EndLoop() { 00593 CFInfo *ci = popCFState(); 00594 AssertPos(currentPos, ci->IsLoop()); 00595 00596 if (!ci->IsUniform()) 00597 // If the loop had a 'uniform' test, then it didn't make any 00598 // changes to the mask so there's nothing to restore. If it had a 00599 // varying test, we need to restore the mask to what it was going 00600 // into the loop, but still leaving off any lanes that executed a 00601 // 'return' statement. 00602 restoreMaskGivenReturns(ci->savedMask); 00603 } 00604 00605 00606 void 00607 FunctionEmitContext::StartForeach(ForeachType ft) { 00608 // Issue an error if we're in a nested foreach... 00609 if (ft == FOREACH_REGULAR) { 00610 for (int i = 0; i < (int)controlFlowInfo.size(); ++i) { 00611 if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) { 00612 Error(currentPos, "Nested \"foreach\" statements are currently " 00613 "illegal."); 00614 break; 00615 // Don't return here, however, and in turn allow the caller to 00616 // do the rest of its codegen and then call EndForeach() 00617 // normally--the idea being that this gives a chance to find 00618 // any other errors inside the body of the foreach loop... 00619 } 00620 } 00621 } 00622 00623 // Store the current values of various loop-related state so that we 00624 // can restore it when we exit this loop. 00625 llvm::Value *oldMask = GetInternalMask(); 00626 controlFlowInfo.push_back(CFInfo::GetForeach(ft, breakTarget, continueTarget, 00627 breakLanesPtr, continueLanesPtr, 00628 oldMask, loopMask)); 00629 breakLanesPtr = NULL; 00630 breakTarget = NULL; 00631 00632 continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes"); 00633 StoreInst(LLVMMaskAllOff, continueLanesPtr); 00634 continueTarget = NULL; // should be set by SetContinueTarget() 00635 00636 loopMask = NULL; 00637 } 00638 00639 00640 void 00641 FunctionEmitContext::EndForeach() { 00642 CFInfo *ci = popCFState(); 00643 AssertPos(currentPos, ci->IsForeach()); 00644 } 00645 00646 00647 void 00648 FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { 00649 if (!bblock) 00650 return; 00651 00652 // Restore the mask to the given old mask, but leave off any lanes that 00653 // executed a return statement. 00654 // newMask = (oldMask & ~returnedLanes) 00655 llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, 00656 "returned_lanes"); 00657 llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes"); 00658 llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, 00659 oldMask, notReturned, "new_mask"); 00660 SetInternalMask(newMask); 00661 } 00662 00663 00664 /** Returns "true" if the first enclosing non-if control flow expression is 00665 a "switch" statement. 00666 */ 00667 bool 00668 FunctionEmitContext::inSwitchStatement() const { 00669 // Go backwards through controlFlowInfo, since we add new nested scopes 00670 // to the back. 00671 int i = controlFlowInfo.size() - 1; 00672 while (i >= 0 && controlFlowInfo[i]->IsIf()) 00673 --i; 00674 // Got to the first non-if (or end of CF info) 00675 if (i == -1) 00676 return false; 00677 return controlFlowInfo[i]->IsSwitch(); 00678 } 00679 00680 00681 void 00682 FunctionEmitContext::Break(bool doCoherenceCheck) { 00683 if (breakTarget == NULL) { 00684 Error(currentPos, "\"break\" statement is illegal outside of " 00685 "for/while/do loops and \"switch\" statements."); 00686 return; 00687 } 00688 AssertPos(currentPos, controlFlowInfo.size() > 0); 00689 00690 if (bblock == NULL) 00691 return; 00692 00693 if (inSwitchStatement() == true && 00694 switchConditionWasUniform == true && 00695 ifsInCFAllUniform(CFInfo::Switch)) { 00696 // We know that all program instances are executing the break, so 00697 // just jump to the block immediately after the switch. 00698 AssertPos(currentPos, breakTarget != NULL); 00699 BranchInst(breakTarget); 00700 bblock = NULL; 00701 return; 00702 } 00703 00704 // If all of the enclosing 'if' tests in the loop have uniform control 00705 // flow or if we can tell that the mask is all on, then we can just 00706 // jump to the break location. 00707 if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) { 00708 BranchInst(breakTarget); 00709 if (ifsInCFAllUniform(CFInfo::Loop) && doCoherenceCheck) 00710 Warning(currentPos, "Coherent break statement not necessary in " 00711 "fully uniform control flow."); 00712 // Set bblock to NULL since the jump has terminated the basic block 00713 bblock = NULL; 00714 } 00715 else { 00716 // Varying switch, uniform switch where the 'break' is under 00717 // varying control flow, or a loop with varying 'if's above the 00718 // break. In these cases, we need to update the mask of the lanes 00719 // that have executed a 'break' statement: 00720 // breakLanes = breakLanes | mask 00721 AssertPos(currentPos, breakLanesPtr != NULL); 00722 llvm::Value *mask = GetInternalMask(); 00723 llvm::Value *breakMask = LoadInst(breakLanesPtr, 00724 "break_mask"); 00725 llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, 00726 mask, breakMask, "mask|break_mask"); 00727 StoreInst(newMask, breakLanesPtr); 00728 00729 // Set the current mask to be all off, just in case there are any 00730 // statements in the same scope after the 'break'. Most of time 00731 // this will be optimized away since we'll likely end the scope of 00732 // an 'if' statement and restore the mask then. 00733 SetInternalMask(LLVMMaskAllOff); 00734 00735 if (doCoherenceCheck) { 00736 if (continueTarget != NULL) 00737 // If the user has indicated that this is a 'coherent' 00738 // break statement, then check to see if the mask is all 00739 // off. If so, we have to conservatively jump to the 00740 // continueTarget, not the breakTarget, since part of the 00741 // reason the mask is all off may be due to 'continue' 00742 // statements that executed in the current loop iteration. 00743 jumpIfAllLoopLanesAreDone(continueTarget); 00744 else if (breakTarget != NULL) 00745 // Similarly handle these for switch statements, where we 00746 // only have a break target. 00747 jumpIfAllLoopLanesAreDone(breakTarget); 00748 } 00749 } 00750 } 00751 00752 00753 static bool 00754 lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) { 00755 for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) { 00756 if (controlFlowInfo[i]->type == CFInfo::ForeachActive) 00757 return true; 00758 } 00759 return false; 00760 } 00761 00762 00763 void 00764 FunctionEmitContext::Continue(bool doCoherenceCheck) { 00765 if (!continueTarget) { 00766 Error(currentPos, "\"continue\" statement illegal outside of " 00767 "for/while/do/foreach loops."); 00768 return; 00769 } 00770 AssertPos(currentPos, controlFlowInfo.size() > 0); 00771 00772 if (ifsInCFAllUniform(CFInfo::Loop) || 00773 lEnclosingLoopIsForeachActive(controlFlowInfo)) { 00774 // Similarly to 'break' statements, we can immediately jump to the 00775 // continue target if we're only in 'uniform' control flow within 00776 // loop or if we can tell that the mask is all on. Here, we can 00777 // also jump if the enclosing loop is a 'foreach_active' loop, in 00778 // which case we know that only a single program instance is 00779 // executing. 00780 AddInstrumentationPoint("continue: uniform CF, jumped"); 00781 if (doCoherenceCheck) 00782 Warning(currentPos, "Coherent continue statement not necessary in " 00783 "fully uniform control flow."); 00784 BranchInst(continueTarget); 00785 bblock = NULL; 00786 } 00787 else { 00788 // Otherwise update the stored value of which lanes have 'continue'd. 00789 // continueLanes = continueLanes | mask 00790 AssertPos(currentPos, continueLanesPtr); 00791 llvm::Value *mask = GetInternalMask(); 00792 llvm::Value *continueMask = 00793 LoadInst(continueLanesPtr, "continue_mask"); 00794 llvm::Value *newMask = 00795 BinaryOperator(llvm::Instruction::Or, mask, continueMask, 00796 "mask|continueMask"); 00797 StoreInst(newMask, continueLanesPtr); 00798 00799 // And set the current mask to be all off in case there are any 00800 // statements in the same scope after the 'continue' 00801 SetInternalMask(LLVMMaskAllOff); 00802 00803 if (doCoherenceCheck) 00804 // If this is a 'coherent continue' statement, then emit the 00805 // code to see if all of the lanes are now off due to 00806 // breaks/continues and jump to the continue target if so. 00807 jumpIfAllLoopLanesAreDone(continueTarget); 00808 } 00809 } 00810 00811 00812 /** This function checks to see if all of the 'if' statements (if any) 00813 between the current scope and the first enclosing loop/switch of given 00814 control flow type have 'uniform' tests. 00815 */ 00816 bool 00817 FunctionEmitContext::ifsInCFAllUniform(int type) const { 00818 AssertPos(currentPos, controlFlowInfo.size() > 0); 00819 // Go backwards through controlFlowInfo, since we add new nested scopes 00820 // to the back. Stop once we come to the first enclosing control flow 00821 // structure of the desired type. 00822 int i = controlFlowInfo.size() - 1; 00823 while (i >= 0 && controlFlowInfo[i]->type != type) { 00824 if (controlFlowInfo[i]->isUniform == false) 00825 // Found a scope due to an 'if' statement with a varying test 00826 return false; 00827 --i; 00828 } 00829 AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type! 00830 return true; 00831 } 00832 00833 00834 void 00835 FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) { 00836 llvm::Value *allDone = NULL; 00837 AssertPos(currentPos, continueLanesPtr != NULL); 00838 if (breakLanesPtr == NULL) { 00839 // In a foreach loop, break and return are illegal, and 00840 // breakLanesPtr is NULL. In this case, the mask is guaranteed to 00841 // be all on at the start of each iteration, so we only need to 00842 // check if all lanes have continued.. 00843 llvm::Value *continued = LoadInst(continueLanesPtr, 00844 "continue_lanes"); 00845 allDone = All(continued); 00846 } 00847 else { 00848 // Check to see if (returned lanes | continued lanes | break lanes) is 00849 // equal to the value of mask at the start of the loop iteration. If 00850 // so, everyone is done and we can jump to the given target 00851 llvm::Value *returned = LoadInst(returnedLanesPtr, 00852 "returned_lanes"); 00853 llvm::Value *continued = LoadInst(continueLanesPtr, 00854 "continue_lanes"); 00855 llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes"); 00856 llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or, 00857 returned, continued, 00858 "returned|continued"); 00859 llvm::Value *returnedOrContinuedOrBreaked = 00860 BinaryOperator(llvm::Instruction::Or, returnedOrContinued, 00861 breaked, "returned|continued"); 00862 00863 // Do we match the mask at loop entry? 00864 allDone = MasksAllEqual(returnedOrContinuedOrBreaked, loopMask); 00865 } 00866 00867 llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked"); 00868 llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked"); 00869 BranchInst(bAll, bNotAll, allDone); 00870 00871 // If so, have an extra basic block along the way to add 00872 // instrumentation, if the user asked for it. 00873 bblock = bAll; 00874 AddInstrumentationPoint("break/continue: all dynamically went"); 00875 BranchInst(target); 00876 00877 // And set the current basic block to a new one for future instructions 00878 // for the path where we weren't able to jump 00879 bblock = bNotAll; 00880 AddInstrumentationPoint("break/continue: not all went"); 00881 } 00882 00883 00884 void 00885 FunctionEmitContext::RestoreContinuedLanes() { 00886 if (continueLanesPtr == NULL) 00887 return; 00888 00889 // mask = mask & continueFlags 00890 llvm::Value *mask = GetInternalMask(); 00891 llvm::Value *continueMask = LoadInst(continueLanesPtr, 00892 "continue_mask"); 00893 llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, 00894 mask, continueMask, "mask|continue_mask"); 00895 SetInternalMask(orMask); 00896 00897 // continueLanes = 0 00898 StoreInst(LLVMMaskAllOff, continueLanesPtr); 00899 } 00900 00901 00902 void 00903 FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) { 00904 llvm::Value *oldMask = GetInternalMask(); 00905 controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget, 00906 continueTarget, breakLanesPtr, 00907 continueLanesPtr, oldMask, 00908 loopMask, switchExpr, defaultBlock, 00909 caseBlocks, nextBlocks, 00910 switchConditionWasUniform)); 00911 00912 breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory"); 00913 StoreInst(LLVMMaskAllOff, breakLanesPtr); 00914 breakTarget = bbBreak; 00915 00916 continueLanesPtr = NULL; 00917 continueTarget = NULL; 00918 loopMask = NULL; 00919 00920 // These will be set by the SwitchInst() method 00921 switchExpr = NULL; 00922 defaultBlock = NULL; 00923 caseBlocks = NULL; 00924 nextBlocks = NULL; 00925 } 00926 00927 00928 void 00929 FunctionEmitContext::EndSwitch() { 00930 AssertPos(currentPos, bblock != NULL); 00931 00932 CFInfo *ci = popCFState(); 00933 if (ci->IsVarying() && bblock != NULL) 00934 restoreMaskGivenReturns(ci->savedMask); 00935 } 00936 00937 00938 /** Emit code to check for an "all off" mask before the code for a 00939 case or default label in a "switch" statement. 00940 */ 00941 void 00942 FunctionEmitContext::addSwitchMaskCheck(llvm::Value *mask) { 00943 llvm::Value *allOff = None(mask); 00944 llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on"); 00945 00946 // Find the basic block for the case or default label immediately after 00947 // the current one in the switch statement--that's where we want to 00948 // jump if the mask is all off at this label. 00949 AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end()); 00950 llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second; 00951 00952 // Jump to the next one of the mask is all off; otherwise jump to the 00953 // newly created block that will hold the actual code for this label. 00954 BranchInst(bbNext, bbSome, allOff); 00955 SetCurrentBasicBlock(bbSome); 00956 } 00957 00958 00959 /** Returns the execution mask at entry to the first enclosing "switch" 00960 statement. */ 00961 llvm::Value * 00962 FunctionEmitContext::getMaskAtSwitchEntry() { 00963 AssertPos(currentPos, controlFlowInfo.size() > 0); 00964 int i = controlFlowInfo.size() - 1; 00965 while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch) 00966 --i; 00967 AssertPos(currentPos, i != -1); 00968 return controlFlowInfo[i]->savedMask; 00969 } 00970 00971 00972 void 00973 FunctionEmitContext::EmitDefaultLabel(bool checkMask, SourcePos pos) { 00974 if (inSwitchStatement() == false) { 00975 Error(pos, "\"default\" label illegal outside of \"switch\" " 00976 "statement."); 00977 return; 00978 } 00979 00980 // If there's a default label in the switch, a basic block for it 00981 // should have been provided in the previous call to SwitchInst(). 00982 AssertPos(currentPos, defaultBlock != NULL); 00983 00984 if (bblock != NULL) 00985 // The previous case in the switch fell through, or we're in a 00986 // varying switch; terminate the current block with a jump to the 00987 // block for the code for the default label. 00988 BranchInst(defaultBlock); 00989 SetCurrentBasicBlock(defaultBlock); 00990 00991 if (switchConditionWasUniform) 00992 // Nothing more to do for this case; return back to the caller, 00993 // which will then emit the code for the default case. 00994 return; 00995 00996 // For a varying switch, we need to update the execution mask. 00997 // 00998 // First, compute the mask that corresponds to which program instances 00999 // should execute the "default" code; this corresponds to the set of 01000 // program instances that don't match any of the case statements. 01001 // Therefore, we generate code that compares the value of the switch 01002 // expression to the value associated with each of the "case" 01003 // statements such that the surviving lanes didn't match any of them. 01004 llvm::Value *matchesDefault = getMaskAtSwitchEntry(); 01005 for (int i = 0; i < (int)caseBlocks->size(); ++i) { 01006 int value = (*caseBlocks)[i].first; 01007 llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ? 01008 LLVMInt32Vector(value) : LLVMInt64Vector(value); 01009 // TODO: for AVX2 at least, the following generates better code 01010 // than doing ICMP_NE and skipping the NotOperator() below; file a 01011 // LLVM bug? 01012 llvm::Value *matchesCaseValue = 01013 CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, 01014 valueVec, "cmp_case_value"); 01015 matchesCaseValue = I1VecToBoolVec(matchesCaseValue); 01016 01017 llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue); 01018 matchesDefault = BinaryOperator(llvm::Instruction::And, matchesDefault, 01019 notMatchesCaseValue, "default&~case_match"); 01020 } 01021 01022 // The mask may have some lanes on, which corresponds to the previous 01023 // label falling through; compute the updated mask by ANDing with the 01024 // current mask. 01025 llvm::Value *oldMask = GetInternalMask(); 01026 llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, 01027 matchesDefault, "old_mask|matches_default"); 01028 SetInternalMask(newMask); 01029 01030 if (checkMask) 01031 addSwitchMaskCheck(newMask); 01032 } 01033 01034 01035 void 01036 FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) { 01037 if (inSwitchStatement() == false) { 01038 Error(pos, "\"case\" label illegal outside of \"switch\" statement."); 01039 return; 01040 } 01041 01042 // Find the basic block for this case statement. 01043 llvm::BasicBlock *bbCase = NULL; 01044 AssertPos(currentPos, caseBlocks != NULL); 01045 for (int i = 0; i < (int)caseBlocks->size(); ++i) 01046 if ((*caseBlocks)[i].first == value) { 01047 bbCase = (*caseBlocks)[i].second; 01048 break; 01049 } 01050 AssertPos(currentPos, bbCase != NULL); 01051 01052 if (bblock != NULL) 01053 // fall through from the previous case 01054 BranchInst(bbCase); 01055 SetCurrentBasicBlock(bbCase); 01056 01057 if (switchConditionWasUniform) 01058 return; 01059 01060 // update the mask: first, get a mask that indicates which program 01061 // instances have a value for the switch expression that matches this 01062 // case statement. 01063 llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ? 01064 LLVMInt32Vector(value) : LLVMInt64Vector(value); 01065 llvm::Value *matchesCaseValue = 01066 CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, 01067 valueVec, "cmp_case_value"); 01068 matchesCaseValue = I1VecToBoolVec(matchesCaseValue); 01069 01070 // If a lane was off going into the switch, we don't care if has a 01071 // value in the switch expression that happens to match this case. 01072 llvm::Value *entryMask = getMaskAtSwitchEntry(); 01073 matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask, 01074 matchesCaseValue, "entry_mask&case_match"); 01075 01076 // Take the surviving lanes and turn on the mask for them. 01077 llvm::Value *oldMask = GetInternalMask(); 01078 llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, 01079 matchesCaseValue, "mask|case_match"); 01080 SetInternalMask(newMask); 01081 01082 if (checkMask) 01083 addSwitchMaskCheck(newMask); 01084 } 01085 01086 01087 void 01088 FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault, 01089 const std::vector<std::pair<int, llvm::BasicBlock *> > &bbCases, 01090 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) { 01091 // The calling code should have called StartSwitch() before calling 01092 // SwitchInst(). 01093 AssertPos(currentPos, controlFlowInfo.size() && 01094 controlFlowInfo.back()->IsSwitch()); 01095 01096 switchExpr = expr; 01097 defaultBlock = bbDefault; 01098 caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases); 01099 nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext); 01100 switchConditionWasUniform = 01101 (llvm::isa<llvm::VectorType>(expr->getType()) == false); 01102 01103 if (switchConditionWasUniform == true) { 01104 // For a uniform switch condition, just wire things up to the LLVM 01105 // switch instruction. 01106 llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault, 01107 bbCases.size(), bblock); 01108 for (int i = 0; i < (int)bbCases.size(); ++i) { 01109 if (expr->getType() == LLVMTypes::Int32Type) 01110 s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second); 01111 else { 01112 AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type); 01113 s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second); 01114 } 01115 } 01116 01117 AddDebugPos(s); 01118 // switch is a terminator 01119 bblock = NULL; 01120 } 01121 else { 01122 // For a varying switch, we first turn off all lanes of the mask 01123 SetInternalMask(LLVMMaskAllOff); 01124 01125 if (nextBlocks->size() > 0) { 01126 // If there are any labels inside the switch, jump to the first 01127 // one; any code before the first label won't be executed by 01128 // anyone. 01129 std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter; 01130 iter = nextBlocks->find(NULL); 01131 AssertPos(currentPos, iter != nextBlocks->end()); 01132 llvm::BasicBlock *bbFirst = iter->second; 01133 BranchInst(bbFirst); 01134 bblock = NULL; 01135 } 01136 } 01137 } 01138 01139 01140 int 01141 FunctionEmitContext::VaryingCFDepth() const { 01142 int sum = 0; 01143 for (unsigned int i = 0; i < controlFlowInfo.size(); ++i) 01144 if (controlFlowInfo[i]->IsVarying()) 01145 ++sum; 01146 return sum; 01147 } 01148 01149 01150 bool 01151 FunctionEmitContext::InForeachLoop() const { 01152 for (unsigned int i = 0; i < controlFlowInfo.size(); ++i) 01153 if (controlFlowInfo[i]->IsForeach()) 01154 return true; 01155 return false; 01156 } 01157 01158 01159 void 01160 FunctionEmitContext::DisableGatherScatterWarnings() { 01161 ++disableGSWarningCount; 01162 } 01163 01164 01165 void 01166 FunctionEmitContext::EnableGatherScatterWarnings() { 01167 --disableGSWarningCount; 01168 } 01169 01170 01171 01172 bool 01173 FunctionEmitContext::initLabelBBlocks(ASTNode *node, void *data) { 01174 LabeledStmt *ls = dynamic_cast<LabeledStmt *>(node); 01175 if (ls == NULL) 01176 return true; 01177 01178 FunctionEmitContext *ctx = (FunctionEmitContext *)data; 01179 01180 if (ctx->labelMap.find(ls->name) != ctx->labelMap.end()) 01181 Error(ls->pos, "Multiple labels named \"%s\" in function.", 01182 ls->name.c_str()); 01183 else { 01184 llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str()); 01185 ctx->labelMap[ls->name] = bb; 01186 } 01187 return true; 01188 } 01189 01190 01191 void 01192 FunctionEmitContext::InitializeLabelMap(Stmt *code) { 01193 labelMap.erase(labelMap.begin(), labelMap.end()); 01194 WalkAST(code, initLabelBBlocks, NULL, this); 01195 } 01196 01197 01198 llvm::BasicBlock * 01199 FunctionEmitContext::GetLabeledBasicBlock(const std::string &label) { 01200 if (labelMap.find(label) != labelMap.end()) 01201 return labelMap[label]; 01202 else 01203 return NULL; 01204 } 01205 01206 std::vector<std::string> 01207 FunctionEmitContext::GetLabels() { 01208 // Initialize vector to the right size 01209 std::vector<std::string> labels(labelMap.size()); 01210 01211 // Iterate through labelMap and grab only the keys 01212 std::map<std::string, llvm::BasicBlock*>::iterator iter; 01213 for (iter=labelMap.begin(); iter != labelMap.end(); iter++) 01214 labels.push_back(iter->first); 01215 01216 return labels; 01217 } 01218 01219 01220 void 01221 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { 01222 const Type *returnType = function->GetReturnType(); 01223 if (Type::Equal(returnType, AtomicType::Void)) { 01224 if (expr != NULL) 01225 Error(expr->pos, "Can't return non-void type \"%s\" from void function.", 01226 expr->GetType()->GetString().c_str()); 01227 } 01228 else { 01229 if (expr == NULL) { 01230 Error(funcStartPos, "Must provide return value for return " 01231 "statement for non-void function."); 01232 return; 01233 } 01234 01235 expr = TypeConvertExpr(expr, returnType, "return statement"); 01236 if (expr != NULL) { 01237 llvm::Value *retVal = expr->GetValue(this); 01238 if (retVal != NULL) { 01239 if (returnType->IsUniformType() || 01240 CastType<ReferenceType>(returnType) != NULL) 01241 StoreInst(retVal, returnValuePtr); 01242 else { 01243 // Use a masked store to store the value of the expression 01244 // in the return value memory; this preserves the return 01245 // values from other lanes that may have executed return 01246 // statements previously. 01247 StoreInst(retVal, returnValuePtr, GetInternalMask(), 01248 returnType, PointerType::GetUniform(returnType)); 01249 } 01250 } 01251 } 01252 } 01253 01254 if (VaryingCFDepth() == 0) { 01255 // If there is only uniform control flow between us and the 01256 // function entry, then it's guaranteed that all lanes are running, 01257 // so we can just emit a true return instruction 01258 AddInstrumentationPoint("return: uniform control flow"); 01259 ReturnInst(); 01260 } 01261 else { 01262 // Otherwise we update the returnedLanes value by ANDing it with 01263 // the current lane mask. 01264 llvm::Value *oldReturnedLanes = 01265 LoadInst(returnedLanesPtr, "old_returned_lanes"); 01266 llvm::Value *newReturnedLanes = 01267 BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, 01268 GetInternalMask(), "old_mask|returned_lanes"); 01269 01270 // For 'coherent' return statements, emit code to check if all 01271 // lanes have returned 01272 if (doCoherenceCheck) { 01273 // if newReturnedLanes == functionMaskValue, get out of here! 01274 llvm::Value *cmp = MasksAllEqual(functionMaskValue, 01275 newReturnedLanes); 01276 llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return"); 01277 llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return"); 01278 BranchInst(bDoReturn, bNoReturn, cmp); 01279 01280 bblock = bDoReturn; 01281 AddInstrumentationPoint("return: all lanes have returned"); 01282 ReturnInst(); 01283 01284 bblock = bNoReturn; 01285 } 01286 // Otherwise update returnedLanesPtr and turn off all of the lanes 01287 // in the current mask so that any subsequent statements in the 01288 // same scope after the return have no effect 01289 StoreInst(newReturnedLanes, returnedLanesPtr); 01290 AddInstrumentationPoint("return: some but not all lanes have returned"); 01291 SetInternalMask(LLVMMaskAllOff); 01292 } 01293 } 01294 01295 01296 llvm::Value * 01297 FunctionEmitContext::Any(llvm::Value *mask) { 01298 llvm::Value *mmval = LaneMask(mask); 01299 return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, mmval, 01300 LLVMInt64(0), LLVMGetName(mask, "_any")); 01301 } 01302 01303 01304 llvm::Value * 01305 FunctionEmitContext::All(llvm::Value *mask) { 01306 llvm::Value *mmval = LaneMask(mask); 01307 llvm::Value *allOnMaskValue = (g->target.vectorWidth == 64) ? 01308 LLVMInt64(~0ull) : 01309 LLVMInt64((1ull << g->target.vectorWidth) - 1); 01310 01311 return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, 01312 allOnMaskValue, LLVMGetName(mask, "_all")); 01313 } 01314 01315 01316 llvm::Value * 01317 FunctionEmitContext::None(llvm::Value *mask) { 01318 llvm::Value *mmval = LaneMask(mask); 01319 return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, 01320 LLVMInt64(0), LLVMGetName(mask, "_none")); 01321 } 01322 01323 01324 llvm::Value * 01325 FunctionEmitContext::LaneMask(llvm::Value *v) { 01326 // Call the target-dependent movmsk function to turn the vector mask 01327 // into an i64 value 01328 std::vector<Symbol *> mm; 01329 m->symbolTable->LookupFunction("__movmsk", &mm); 01330 if (g->target.maskBitCount == 1) 01331 AssertPos(currentPos, mm.size() == 1); 01332 else 01333 // There should be one with signed int signature, one unsigned int. 01334 AssertPos(currentPos, mm.size() == 2); 01335 // We can actually call either one, since both are i32s as far as 01336 // LLVM's type system is concerned... 01337 llvm::Function *fmm = mm[0]->function; 01338 return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk")); 01339 } 01340 01341 01342 llvm::Value * 01343 FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { 01344 #if 0 01345 // Compare the two masks to get a vector of i1s 01346 llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, 01347 v1, v2, "v1==v2"); 01348 // Turn that into a bool vector type (often i32s) 01349 cmp = I1VecToBoolVec(cmp); 01350 // And see if it's all on 01351 return All(cmp); 01352 #else 01353 llvm::Value *mm1 = LaneMask(v1); 01354 llvm::Value *mm2 = LaneMask(v2); 01355 return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, 01356 LLVMGetName("equal", v1, v2)); 01357 #endif 01358 } 01359 01360 01361 llvm::Value * 01362 FunctionEmitContext::GetStringPtr(const std::string &str) { 01363 #ifdef LLVM_3_0 01364 llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str); 01365 #else 01366 llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str); 01367 #endif 01368 llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage; 01369 llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(), 01370 true /*isConst*/, 01371 linkage, lstr, "__str"); 01372 return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType, 01373 "str_void_ptr", bblock); 01374 } 01375 01376 01377 llvm::BasicBlock * 01378 FunctionEmitContext::CreateBasicBlock(const char *name) { 01379 return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction); 01380 } 01381 01382 01383 llvm::Value * 01384 FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { 01385 if (b == NULL) { 01386 AssertPos(currentPos, m->errorCount > 0); 01387 return NULL; 01388 } 01389 01390 if (g->target.maskBitCount == 1) 01391 return b; 01392 01393 llvm::ArrayType *at = 01394 llvm::dyn_cast<llvm::ArrayType>(b->getType()); 01395 if (at) { 01396 // If we're given an array of vectors of i1s, then do the 01397 // conversion for each of the elements 01398 llvm::Type *boolArrayType = 01399 llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements()); 01400 llvm::Value *ret = llvm::UndefValue::get(boolArrayType); 01401 01402 for (unsigned int i = 0; i < at->getNumElements(); ++i) { 01403 llvm::Value *elt = ExtractInst(b, i); 01404 llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType, 01405 LLVMGetName(elt, "_to_boolvec32")); 01406 ret = InsertInst(ret, sext, i); 01407 } 01408 return ret; 01409 } 01410 else 01411 return SExtInst(b, LLVMTypes::BoolVectorType, "val_to_boolvec32"); 01412 } 01413 01414 01415 static llvm::Value * 01416 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) { 01417 #ifdef LLVM_3_0 01418 llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s); 01419 #else 01420 llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s); 01421 #endif 01422 llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), 01423 true /* const */, 01424 llvm::GlobalValue::InternalLinkage, 01425 sConstant, s); 01426 llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) }; 01427 llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]); 01428 return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock); 01429 } 01430 01431 01432 void 01433 FunctionEmitContext::AddInstrumentationPoint(const char *note) { 01434 AssertPos(currentPos, note != NULL); 01435 if (!g->emitInstrumentation) 01436 return; 01437 01438 std::vector<llvm::Value *> args; 01439 // arg 1: filename as string 01440 args.push_back(lGetStringAsValue(bblock, currentPos.name)); 01441 // arg 2: provided note 01442 args.push_back(lGetStringAsValue(bblock, note)); 01443 // arg 3: line number 01444 args.push_back(LLVMInt32(currentPos.first_line)); 01445 // arg 4: current mask, movmsk'ed down to an int64 01446 args.push_back(LaneMask(GetFullMask())); 01447 01448 llvm::Function *finst = m->module->getFunction("ISPCInstrument"); 01449 CallInst(finst, NULL, args, ""); 01450 } 01451 01452 01453 void 01454 FunctionEmitContext::SetDebugPos(SourcePos pos) { 01455 currentPos = pos; 01456 } 01457 01458 01459 SourcePos 01460 FunctionEmitContext::GetDebugPos() const { 01461 return currentPos; 01462 } 01463 01464 01465 void 01466 FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos, 01467 llvm::DIScope *scope) { 01468 llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value); 01469 if (inst != NULL && m->diBuilder) { 01470 SourcePos p = pos ? *pos : currentPos; 01471 if (p.first_line != 0) 01472 // If first_line == 0, then we're in the middle of setting up 01473 // the standard library or the like; don't add debug positions 01474 // for those functions 01475 inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column, 01476 scope ? *scope : GetDIScope())); 01477 } 01478 } 01479 01480 01481 void 01482 FunctionEmitContext::StartScope() { 01483 if (m->diBuilder != NULL) { 01484 llvm::DIScope parentScope; 01485 if (debugScopes.size() > 0) 01486 parentScope = debugScopes.back(); 01487 else 01488 parentScope = diSubprogram; 01489 01490 llvm::DILexicalBlock lexicalBlock = 01491 m->diBuilder->createLexicalBlock(parentScope, diFile, 01492 currentPos.first_line, 01493 currentPos.first_column); 01494 AssertPos(currentPos, lexicalBlock.Verify()); 01495 debugScopes.push_back(lexicalBlock); 01496 } 01497 } 01498 01499 01500 void 01501 FunctionEmitContext::EndScope() { 01502 if (m->diBuilder != NULL) { 01503 AssertPos(currentPos, debugScopes.size() > 0); 01504 debugScopes.pop_back(); 01505 } 01506 } 01507 01508 01509 llvm::DIScope 01510 FunctionEmitContext::GetDIScope() const { 01511 AssertPos(currentPos, debugScopes.size() > 0); 01512 return debugScopes.back(); 01513 } 01514 01515 01516 void 01517 FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { 01518 if (m->diBuilder == NULL) 01519 return; 01520 01521 llvm::DIScope scope = GetDIScope(); 01522 llvm::DIType diType = sym->type->GetDIType(scope); 01523 AssertPos(currentPos, diType.Verify()); 01524 llvm::DIVariable var = 01525 m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable, 01526 scope, 01527 sym->name, 01528 sym->pos.GetDIFile(), 01529 sym->pos.first_line, 01530 diType, 01531 true /* preserve through opts */); 01532 AssertPos(currentPos, var.Verify()); 01533 llvm::Instruction *declareInst = 01534 m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); 01535 AddDebugPos(declareInst, &sym->pos, &scope); 01536 } 01537 01538 01539 void 01540 FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) { 01541 if (m->diBuilder == NULL) 01542 return; 01543 01544 llvm::DIScope scope = diSubprogram; 01545 llvm::DIType diType = sym->type->GetDIType(scope); 01546 AssertPos(currentPos, diType.Verify()); 01547 int flags = 0; 01548 01549 llvm::DIVariable var = 01550 m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable, 01551 scope, 01552 sym->name, 01553 sym->pos.GetDIFile(), 01554 sym->pos.first_line, 01555 diType, 01556 true /* preserve through opts */, 01557 flags, 01558 argNum+1); 01559 AssertPos(currentPos, var.Verify()); 01560 llvm::Instruction *declareInst = 01561 m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); 01562 AddDebugPos(declareInst, &sym->pos, &scope); 01563 } 01564 01565 01566 /** If the given type is an array of vector types, then it's the 01567 representation of an ispc VectorType with varying elements. If it is 01568 one of these, return the array size (i.e. the VectorType's size). 01569 Otherwise return zero. 01570 */ 01571 static int 01572 lArrayVectorWidth(llvm::Type *t) { 01573 llvm::ArrayType *arrayType = 01574 llvm::dyn_cast<llvm::ArrayType>(t); 01575 if (arrayType == NULL) 01576 return 0; 01577 01578 // We shouldn't be seeing arrays of anything but vectors being passed 01579 // to things like FunctionEmitContext::BinaryOperator() as operands. 01580 llvm::VectorType *vectorElementType = 01581 llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType()); 01582 Assert((vectorElementType != NULL && 01583 (int)vectorElementType->getNumElements() == g->target.vectorWidth)); 01584 01585 return (int)arrayType->getNumElements(); 01586 } 01587 01588 01589 llvm::Value * 01590 FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, 01591 llvm::Value *v0, llvm::Value *v1, 01592 const char *name) { 01593 if (v0 == NULL || v1 == NULL) { 01594 AssertPos(currentPos, m->errorCount > 0); 01595 return NULL; 01596 } 01597 01598 AssertPos(currentPos, v0->getType() == v1->getType()); 01599 llvm::Type *type = v0->getType(); 01600 int arraySize = lArrayVectorWidth(type); 01601 if (arraySize == 0) { 01602 llvm::Instruction *bop = 01603 llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock); 01604 AddDebugPos(bop); 01605 return bop; 01606 } 01607 else { 01608 // If this is an ispc VectorType, apply the binary operator to each 01609 // of the elements of the array (which in turn should be either 01610 // scalar types or llvm::VectorTypes.) 01611 llvm::Value *ret = llvm::UndefValue::get(type); 01612 for (int i = 0; i < arraySize; ++i) { 01613 llvm::Value *a = ExtractInst(v0, i); 01614 llvm::Value *b = ExtractInst(v1, i); 01615 llvm::Value *op = BinaryOperator(inst, a, b); 01616 ret = InsertInst(ret, op, i); 01617 } 01618 return ret; 01619 } 01620 } 01621 01622 01623 llvm::Value * 01624 FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { 01625 if (v == NULL) { 01626 AssertPos(currentPos, m->errorCount > 0); 01627 return NULL; 01628 } 01629 01630 // Similarly to BinaryOperator, do the operation on all the elements of 01631 // the array if we're given an array type; otherwise just do the 01632 // regular llvm operation. 01633 llvm::Type *type = v->getType(); 01634 int arraySize = lArrayVectorWidth(type); 01635 if (arraySize == 0) { 01636 llvm::Instruction *binst = 01637 llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock); 01638 AddDebugPos(binst); 01639 return binst; 01640 } 01641 else { 01642 llvm::Value *ret = llvm::UndefValue::get(type); 01643 for (int i = 0; i < arraySize; ++i) { 01644 llvm::Value *a = ExtractInst(v, i); 01645 llvm::Value *op = 01646 llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock); 01647 AddDebugPos(op); 01648 ret = InsertInst(ret, op, i); 01649 } 01650 return ret; 01651 } 01652 } 01653 01654 01655 // Given the llvm Type that represents an ispc VectorType, return an 01656 // equally-shaped type with boolean elements. (This is the type that will 01657 // be returned from CmpInst with ispc VectorTypes). 01658 static llvm::Type * 01659 lGetMatchingBoolVectorType(llvm::Type *type) { 01660 llvm::ArrayType *arrayType = 01661 llvm::dyn_cast<llvm::ArrayType>(type); 01662 Assert(arrayType != NULL); 01663 01664 llvm::VectorType *vectorElementType = 01665 llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType()); 01666 Assert(vectorElementType != NULL); 01667 Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth); 01668 01669 llvm::Type *base = 01670 llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth); 01671 return llvm::ArrayType::get(base, arrayType->getNumElements()); 01672 } 01673 01674 01675 llvm::Value * 01676 FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, 01677 llvm::CmpInst::Predicate pred, 01678 llvm::Value *v0, llvm::Value *v1, 01679 const char *name) { 01680 if (v0 == NULL || v1 == NULL) { 01681 AssertPos(currentPos, m->errorCount > 0); 01682 return NULL; 01683 } 01684 01685 AssertPos(currentPos, v0->getType() == v1->getType()); 01686 llvm::Type *type = v0->getType(); 01687 int arraySize = lArrayVectorWidth(type); 01688 if (arraySize == 0) { 01689 llvm::Instruction *ci = 01690 llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp", 01691 bblock); 01692 AddDebugPos(ci); 01693 return ci; 01694 } 01695 else { 01696 llvm::Type *boolType = lGetMatchingBoolVectorType(type); 01697 llvm::Value *ret = llvm::UndefValue::get(boolType); 01698 for (int i = 0; i < arraySize; ++i) { 01699 llvm::Value *a = ExtractInst(v0, i); 01700 llvm::Value *b = ExtractInst(v1, i); 01701 llvm::Value *op = CmpInst(inst, pred, a, b, name); 01702 ret = InsertInst(ret, op, i); 01703 } 01704 return ret; 01705 } 01706 } 01707 01708 01709 llvm::Value * 01710 FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { 01711 if (value == NULL) { 01712 AssertPos(currentPos, m->errorCount > 0); 01713 return NULL; 01714 } 01715 01716 llvm::Value *ret = NULL; 01717 llvm::Type *eltType = value->getType(); 01718 01719 llvm::PointerType *pt = 01720 llvm::dyn_cast<llvm::PointerType>(eltType); 01721 if (pt != NULL) { 01722 // Varying pointers are represented as vectors of i32/i64s 01723 ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType); 01724 value = PtrToIntInst(value); 01725 } 01726 else 01727 // All other varying types are represented as vectors of the 01728 // underlying type. 01729 ret = llvm::UndefValue::get(llvm::VectorType::get(eltType, 01730 g->target.vectorWidth)); 01731 01732 for (int i = 0; i < g->target.vectorWidth; ++i) { 01733 llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") + 01734 llvm::Twine(i); 01735 ret = InsertInst(ret, value, i, n.str().c_str()); 01736 } 01737 01738 return ret; 01739 } 01740 01741 01742 llvm::Value * 01743 FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, 01744 const char *name) { 01745 if (value == NULL) { 01746 AssertPos(currentPos, m->errorCount > 0); 01747 return NULL; 01748 } 01749 01750 if (name == NULL) 01751 name = LLVMGetName(value, "_bitcast"); 01752 01753 llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock); 01754 AddDebugPos(inst); 01755 return inst; 01756 } 01757 01758 01759 llvm::Value * 01760 FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { 01761 if (value == NULL) { 01762 AssertPos(currentPos, m->errorCount > 0); 01763 return NULL; 01764 } 01765 01766 if (llvm::isa<llvm::VectorType>(value->getType())) 01767 // no-op for varying pointers; they're already vectors of ints 01768 return value; 01769 01770 if (name == NULL) 01771 name = LLVMGetName(value, "_ptr2int"); 01772 llvm::Type *type = LLVMTypes::PointerIntType; 01773 llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); 01774 AddDebugPos(inst); 01775 return inst; 01776 } 01777 01778 01779 llvm::Value * 01780 FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, 01781 const char *name) { 01782 if (value == NULL) { 01783 AssertPos(currentPos, m->errorCount > 0); 01784 return NULL; 01785 } 01786 01787 if (name == NULL) 01788 name = LLVMGetName(value, "_ptr2int"); 01789 01790 llvm::Type *fromType = value->getType(); 01791 if (llvm::isa<llvm::VectorType>(fromType)) { 01792 // varying pointer 01793 if (fromType == toType) 01794 // already the right type--done 01795 return value; 01796 else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) 01797 return TruncInst(value, toType, name); 01798 else { 01799 AssertPos(currentPos, fromType->getScalarSizeInBits() < 01800 toType->getScalarSizeInBits()); 01801 return ZExtInst(value, toType, name); 01802 } 01803 } 01804 01805 llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); 01806 AddDebugPos(inst); 01807 return inst; 01808 } 01809 01810 01811 llvm::Value * 01812 FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, 01813 const char *name) { 01814 if (value == NULL) { 01815 AssertPos(currentPos, m->errorCount > 0); 01816 return NULL; 01817 } 01818 01819 if (name == NULL) 01820 name = LLVMGetName(value, "_int2ptr"); 01821 01822 llvm::Type *fromType = value->getType(); 01823 if (llvm::isa<llvm::VectorType>(fromType)) { 01824 // varying pointer 01825 if (fromType == toType) 01826 // done 01827 return value; 01828 else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) 01829 return TruncInst(value, toType, name); 01830 else { 01831 AssertPos(currentPos, fromType->getScalarSizeInBits() < 01832 toType->getScalarSizeInBits()); 01833 return ZExtInst(value, toType, name); 01834 } 01835 } 01836 01837 llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, 01838 bblock); 01839 AddDebugPos(inst); 01840 return inst; 01841 } 01842 01843 01844 llvm::Instruction * 01845 FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, 01846 const char *name) { 01847 if (value == NULL) { 01848 AssertPos(currentPos, m->errorCount > 0); 01849 return NULL; 01850 } 01851 01852 if (name == NULL) 01853 name = LLVMGetName(value, "_trunc"); 01854 01855 // TODO: we should probably handle the array case as in 01856 // e.g. BitCastInst(), but we don't currently need that functionality 01857 llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock); 01858 AddDebugPos(inst); 01859 return inst; 01860 } 01861 01862 01863 llvm::Instruction * 01864 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, 01865 llvm::Type *type, const char *name) { 01866 if (value == NULL) { 01867 AssertPos(currentPos, m->errorCount > 0); 01868 return NULL; 01869 } 01870 01871 if (name == NULL) 01872 name = LLVMGetName(value, "_cast"); 01873 01874 // TODO: we should probably handle the array case as in 01875 // e.g. BitCastInst(), but we don't currently need that functionality 01876 llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name, 01877 bblock); 01878 AddDebugPos(inst); 01879 return inst; 01880 } 01881 01882 01883 llvm::Instruction * 01884 FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, 01885 const char *name) { 01886 if (value == NULL) { 01887 AssertPos(currentPos, m->errorCount > 0); 01888 return NULL; 01889 } 01890 01891 if (name == NULL) 01892 name = LLVMGetName(value, "_cast"); 01893 01894 // TODO: we should probably handle the array case as in 01895 // e.g. BitCastInst(), but we don't currently need that functionality 01896 llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock); 01897 AddDebugPos(inst); 01898 return inst; 01899 } 01900 01901 01902 llvm::Instruction * 01903 FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, 01904 const char *name) { 01905 if (value == NULL) { 01906 AssertPos(currentPos, m->errorCount > 0); 01907 return NULL; 01908 } 01909 01910 if (name == NULL) 01911 name = LLVMGetName(value, "_sext"); 01912 01913 // TODO: we should probably handle the array case as in 01914 // e.g. BitCastInst(), but we don't currently need that functionality 01915 llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock); 01916 AddDebugPos(inst); 01917 return inst; 01918 } 01919 01920 01921 llvm::Instruction * 01922 FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, 01923 const char *name) { 01924 if (value == NULL) { 01925 AssertPos(currentPos, m->errorCount > 0); 01926 return NULL; 01927 } 01928 01929 if (name == NULL) 01930 name = LLVMGetName(value, "_zext"); 01931 01932 // TODO: we should probably handle the array case as in 01933 // e.g. BitCastInst(), but we don't currently need that functionality 01934 llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock); 01935 AddDebugPos(inst); 01936 return inst; 01937 } 01938 01939 01940 /** Utility routine used by the GetElementPtrInst() methods; given a 01941 pointer to some type (either uniform or varying) and an index (also 01942 either uniform or varying), this returns the new pointer (varying if 01943 appropriate) given by offsetting the base pointer by the index times 01944 the size of the object that the pointer points to. 01945 */ 01946 llvm::Value * 01947 FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, 01948 const Type *ptrType) { 01949 // Find the scale factor for the index (i.e. the size of the object 01950 // that the pointer(s) point(s) to. 01951 const Type *scaleType = ptrType->GetBaseType(); 01952 llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock); 01953 01954 bool indexIsVarying = 01955 llvm::isa<llvm::VectorType>(index->getType()); 01956 llvm::Value *offset = NULL; 01957 if (indexIsVarying == false) { 01958 // Truncate or sign extend the index as appropriate to a 32 or 01959 // 64-bit type. 01960 if ((g->target.is32Bit || g->opt.force32BitAddressing) && 01961 index->getType() == LLVMTypes::Int64Type) 01962 index = TruncInst(index, LLVMTypes::Int32Type); 01963 else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && 01964 index->getType() == LLVMTypes::Int32Type) 01965 index = SExtInst(index, LLVMTypes::Int64Type); 01966 01967 // do a scalar multiply to get the offset as index * scale and then 01968 // smear the result out to be a vector; this is more efficient than 01969 // first promoting both the scale and the index to vectors and then 01970 // multiplying. 01971 offset = BinaryOperator(llvm::Instruction::Mul, scale, index); 01972 offset = SmearUniform(offset); 01973 } 01974 else { 01975 // Similarly, truncate or sign extend the index to be a 32 or 64 01976 // bit vector type 01977 if ((g->target.is32Bit || g->opt.force32BitAddressing) && 01978 index->getType() == LLVMTypes::Int64VectorType) 01979 index = TruncInst(index, LLVMTypes::Int32VectorType); 01980 else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && 01981 index->getType() == LLVMTypes::Int32VectorType) 01982 index = SExtInst(index, LLVMTypes::Int64VectorType); 01983 01984 scale = SmearUniform(scale); 01985 01986 // offset = index * scale 01987 offset = BinaryOperator(llvm::Instruction::Mul, scale, index, 01988 LLVMGetName("mul", scale, index)); 01989 } 01990 01991 // For 64-bit targets, if we've been doing our offset calculations in 01992 // 32 bits, we still have to convert to a 64-bit value before we 01993 // actually add the offset to the pointer. 01994 if (g->target.is32Bit == false && g->opt.force32BitAddressing == true) 01995 offset = SExtInst(offset, LLVMTypes::Int64VectorType, 01996 LLVMGetName(offset, "_to_64")); 01997 01998 // Smear out the pointer to be varying; either the base pointer or the 01999 // index must be varying for this method to be called. 02000 bool baseIsUniform = 02001 (llvm::isa<llvm::PointerType>(basePtr->getType())); 02002 AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true); 02003 llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr; 02004 02005 // newPtr = ptr + offset 02006 return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, 02007 LLVMGetName(basePtr, "_offset")); 02008 } 02009 02010 02011 void 02012 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) { 02013 llvm::Type *type0 = (*v0)->getType(); 02014 llvm::Type *type1 = (*v1)->getType(); 02015 02016 // First, promote to a vector type if one of the two values is a vector 02017 // type 02018 if (llvm::isa<llvm::VectorType>(type0) && 02019 !llvm::isa<llvm::VectorType>(type1)) { 02020 *v1 = SmearUniform(*v1, "smear_v1"); 02021 type1 = (*v1)->getType(); 02022 } 02023 if (!llvm::isa<llvm::VectorType>(type0) && 02024 llvm::isa<llvm::VectorType>(type1)) { 02025 *v0 = SmearUniform(*v0, "smear_v0"); 02026 type0 = (*v0)->getType(); 02027 } 02028 02029 // And then update to match bit widths 02030 if (type0 == LLVMTypes::Int32VectorType && 02031 type1 == LLVMTypes::Int64VectorType) 02032 *v0 = SExtInst(*v0, LLVMTypes::Int64VectorType); 02033 else if (type1 == LLVMTypes::Int32VectorType && 02034 type0 == LLVMTypes::Int64VectorType) 02035 *v1 = SExtInst(*v1, LLVMTypes::Int64VectorType); 02036 } 02037 02038 02039 /** Given an integer index in indexValue that's indexing into an array of 02040 soa<> structures with given soaWidth, compute the two sub-indices we 02041 need to do the actual indexing calculation: 02042 02043 subIndices[0] = (indexValue >> log(soaWidth)) 02044 subIndices[1] = (indexValue & (soaWidth-1)) 02045 */ 02046 static llvm::Value * 02047 lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, 02048 llvm::Value *indexValue, llvm::Value *ptrSliceOffset, 02049 llvm::Value **newSliceOffset) { 02050 // Compute the log2 of the soaWidth. 02051 Assert(soaWidth > 0); 02052 int logWidth = 0, sw = soaWidth; 02053 while (sw > 1) { 02054 ++logWidth; 02055 sw >>= 1; 02056 } 02057 Assert((1 << logWidth) == soaWidth); 02058 02059 ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset); 02060 02061 llvm::Type *indexType = indexValue->getType(); 02062 llvm::Value *shift = LLVMIntAsType(logWidth, indexType); 02063 llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType); 02064 02065 llvm::Value *indexSum = 02066 ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset, 02067 "index_sum"); 02068 02069 // minor index = (index & (soaWidth - 1)) 02070 *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum, 02071 mask, "slice_index_minor"); 02072 // slice offsets are always 32 bits... 02073 if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type) 02074 *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type); 02075 else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType) 02076 *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType); 02077 02078 // major index = (index >> logWidth) 02079 return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum, 02080 shift, "slice_index_major"); 02081 } 02082 02083 02084 llvm::Value * 02085 FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) { 02086 // Create a small struct where the first element is the type of the 02087 // given pointer and the second element is the type of the offset 02088 // value. 02089 std::vector<llvm::Type *> eltTypes; 02090 eltTypes.push_back(ptr->getType()); 02091 eltTypes.push_back(offset->getType()); 02092 llvm::StructType *st = 02093 llvm::StructType::get(*g->ctx, eltTypes); 02094 02095 llvm::Value *ret = llvm::UndefValue::get(st); 02096 ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr")); 02097 ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset")); 02098 return ret; 02099 } 02100 02101 02102 llvm::Value * 02103 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, 02104 const Type *ptrRefType, const char *name) { 02105 if (basePtr == NULL || index == NULL) { 02106 AssertPos(currentPos, m->errorCount > 0); 02107 return NULL; 02108 } 02109 02110 // Regularize to a standard pointer type for basePtr's type 02111 const PointerType *ptrType; 02112 if (CastType<ReferenceType>(ptrRefType) != NULL) 02113 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); 02114 else { 02115 ptrType = CastType<PointerType>(ptrRefType); 02116 AssertPos(currentPos, ptrType != NULL); 02117 } 02118 02119 if (ptrType->IsSlice()) { 02120 AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType())); 02121 02122 llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); 02123 if (ptrType->IsFrozenSlice() == false) { 02124 // For slice pointers that aren't frozen, we compute a new 02125 // index based on the given index plus the offset in the slice 02126 // pointer. This gives us an updated integer slice index for 02127 // the resulting slice pointer and then an index to index into 02128 // the soa<> structs with. 02129 llvm::Value *newSliceOffset; 02130 int soaWidth = ptrType->GetBaseType()->GetSOAWidth(); 02131 index = lComputeSliceIndex(this, soaWidth, index, 02132 ptrSliceOffset, &newSliceOffset); 02133 ptrSliceOffset = newSliceOffset; 02134 } 02135 02136 // Handle the indexing into the soa<> structs with the major 02137 // component of the index through a recursive call 02138 llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index, 02139 ptrType->GetAsNonSlice(), name); 02140 02141 // And mash the results together for the return value 02142 return MakeSlicePointer(p, ptrSliceOffset); 02143 } 02144 02145 // Double-check consistency between the given pointer type and its LLVM 02146 // type. 02147 if (ptrType->IsUniformType()) 02148 AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType())); 02149 else if (ptrType->IsVaryingType()) 02150 AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType())); 02151 02152 bool indexIsVaryingType = 02153 llvm::isa<llvm::VectorType>(index->getType()); 02154 02155 if (indexIsVaryingType == false && ptrType->IsUniformType() == true) { 02156 // The easy case: both the base pointer and the indices are 02157 // uniform, so just emit the regular LLVM GEP instruction 02158 llvm::Value *ind[1] = { index }; 02159 llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]); 02160 llvm::Instruction *inst = 02161 llvm::GetElementPtrInst::Create(basePtr, arrayRef, 02162 name ? name : "gep", bblock); 02163 AddDebugPos(inst); 02164 return inst; 02165 } 02166 else 02167 return applyVaryingGEP(basePtr, index, ptrType); 02168 } 02169 02170 02171 llvm::Value * 02172 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, 02173 llvm::Value *index1, const Type *ptrRefType, 02174 const char *name) { 02175 if (basePtr == NULL || index0 == NULL || index1 == NULL) { 02176 AssertPos(currentPos, m->errorCount > 0); 02177 return NULL; 02178 } 02179 02180 // Regaularize the pointer type for basePtr 02181 const PointerType *ptrType = NULL; 02182 if (CastType<ReferenceType>(ptrRefType) != NULL) 02183 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); 02184 else { 02185 ptrType = CastType<PointerType>(ptrRefType); 02186 AssertPos(currentPos, ptrType != NULL); 02187 } 02188 02189 if (ptrType->IsSlice()) { 02190 // Similar to the 1D GEP implementation above, for non-frozen slice 02191 // pointers we do the two-step indexing calculation and then pass 02192 // the new major index on to a recursive GEP call. 02193 AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType())); 02194 llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); 02195 if (ptrType->IsFrozenSlice() == false) { 02196 llvm::Value *newSliceOffset; 02197 int soaWidth = ptrType->GetBaseType()->GetSOAWidth(); 02198 index1 = lComputeSliceIndex(this, soaWidth, index1, 02199 ptrSliceOffset, &newSliceOffset); 02200 ptrSliceOffset = newSliceOffset; 02201 } 02202 02203 llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0, 02204 index1, ptrType->GetAsNonSlice(), 02205 name); 02206 return MakeSlicePointer(p, ptrSliceOffset); 02207 } 02208 02209 bool index0IsVaryingType = 02210 llvm::isa<llvm::VectorType>(index0->getType()); 02211 bool index1IsVaryingType = 02212 llvm::isa<llvm::VectorType>(index1->getType()); 02213 02214 if (index0IsVaryingType == false && index1IsVaryingType == false && 02215 ptrType->IsUniformType() == true) { 02216 // The easy case: both the base pointer and the indices are 02217 // uniform, so just emit the regular LLVM GEP instruction 02218 llvm::Value *indices[2] = { index0, index1 }; 02219 llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]); 02220 llvm::Instruction *inst = 02221 llvm::GetElementPtrInst::Create(basePtr, arrayRef, 02222 name ? name : "gep", bblock); 02223 AddDebugPos(inst); 02224 return inst; 02225 } 02226 else { 02227 // Handle the first dimension with index0 02228 llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType); 02229 02230 // Now index into the second dimension with index1. First figure 02231 // out the type of ptr0. 02232 const Type *baseType = ptrType->GetBaseType(); 02233 const SequentialType *st = CastType<SequentialType>(baseType); 02234 AssertPos(currentPos, st != NULL); 02235 02236 bool ptr0IsUniform = 02237 llvm::isa<llvm::PointerType>(ptr0->getType()); 02238 const Type *ptr0BaseType = st->GetElementType(); 02239 const Type *ptr0Type = ptr0IsUniform ? 02240 PointerType::GetUniform(ptr0BaseType) : 02241 PointerType::GetVarying(ptr0BaseType); 02242 02243 return applyVaryingGEP(ptr0, index1, ptr0Type); 02244 } 02245 } 02246 02247 02248 llvm::Value * 02249 FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, 02250 const Type *ptrRefType, const char *name, 02251 const PointerType **resultPtrType) { 02252 if (resultPtrType != NULL) 02253 AssertPos(currentPos, ptrRefType != NULL); 02254 02255 llvm::PointerType *llvmPtrType = 02256 llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType()); 02257 if (llvmPtrType != NULL) { 02258 llvm::StructType *llvmStructType = 02259 llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType()); 02260 if (llvmStructType != NULL && llvmStructType->isSized() == false) { 02261 AssertPos(currentPos, m->errorCount > 0); 02262 return NULL; 02263 } 02264 } 02265 02266 // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but 02267 // if we have one, regularize into a pointer type. 02268 const PointerType *ptrType = NULL; 02269 if (ptrRefType != NULL) { 02270 // Normalize references to uniform pointers 02271 if (CastType<ReferenceType>(ptrRefType) != NULL) 02272 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); 02273 else 02274 ptrType = CastType<PointerType>(ptrRefType); 02275 AssertPos(currentPos, ptrType != NULL); 02276 } 02277 02278 // Similarly, we have to see if the pointer type is a struct to see if 02279 // we have a slice pointer instead of looking at ptrType; this is also 02280 // unfortunate... 02281 llvm::Value *basePtr = fullBasePtr; 02282 bool baseIsSlicePtr = 02283 llvm::isa<llvm::StructType>(fullBasePtr->getType()); 02284 const PointerType *rpt; 02285 if (baseIsSlicePtr) { 02286 AssertPos(currentPos, ptrType != NULL); 02287 // Update basePtr to just be the part that actually points to the 02288 // start of an soa<> struct for now; the element offset computation 02289 // doesn't change the slice offset, so we'll incorporate that into 02290 // the final value right before this method returns. 02291 basePtr = ExtractInst(fullBasePtr, 0); 02292 if (resultPtrType == NULL) 02293 resultPtrType = &rpt; 02294 } 02295 02296 // Return the pointer type of the result of this call, for callers that 02297 // want it. 02298 if (resultPtrType != NULL) { 02299 AssertPos(currentPos, ptrType != NULL); 02300 const CollectionType *ct = 02301 CastType<CollectionType>(ptrType->GetBaseType()); 02302 AssertPos(currentPos, ct != NULL); 02303 *resultPtrType = new PointerType(ct->GetElementType(elementNum), 02304 ptrType->GetVariability(), 02305 ptrType->IsConstType(), 02306 ptrType->IsSlice()); 02307 } 02308 02309 llvm::Value *resultPtr = NULL; 02310 if (ptrType == NULL || ptrType->IsUniformType()) { 02311 // If the pointer is uniform, we can use the regular LLVM GEP. 02312 llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) }; 02313 llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]); 02314 resultPtr = 02315 llvm::GetElementPtrInst::Create(basePtr, arrayRef, 02316 name ? name : "struct_offset", bblock); 02317 } 02318 else { 02319 // Otherwise do the math to find the offset and add it to the given 02320 // varying pointers 02321 const StructType *st = CastType<StructType>(ptrType->GetBaseType()); 02322 llvm::Value *offset = NULL; 02323 if (st != NULL) 02324 // If the pointer is to a structure, Target::StructOffset() gives 02325 // us the offset in bytes to the given element of the structure 02326 offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum, 02327 bblock); 02328 else { 02329 // Otherwise we should have a vector or array here and the offset 02330 // is given by the element number times the size of the element 02331 // type of the vector. 02332 const SequentialType *st = 02333 CastType<SequentialType>(ptrType->GetBaseType()); 02334 AssertPos(currentPos, st != NULL); 02335 llvm::Value *size = 02336 g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock); 02337 llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ? 02338 LLVMInt32(elementNum) : LLVMInt64(elementNum); 02339 offset = BinaryOperator(llvm::Instruction::Mul, size, scale); 02340 } 02341 02342 offset = SmearUniform(offset, "offset_smear"); 02343 02344 if (g->target.is32Bit == false && g->opt.force32BitAddressing == true) 02345 // If we're doing 32 bit addressing with a 64 bit target, although 02346 // we did the math above in 32 bit, we need to go to 64 bit before 02347 // we add the offset to the varying pointers. 02348 offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); 02349 02350 resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset, 02351 "struct_ptr_offset"); 02352 } 02353 02354 // Finally, if had a slice pointer going in, mash back together with 02355 // the original (unchanged) slice offset. 02356 if (baseIsSlicePtr) 02357 return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1)); 02358 else 02359 return resultPtr; 02360 } 02361 02362 02363 llvm::Value * 02364 FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { 02365 if (ptr == NULL) { 02366 AssertPos(currentPos, m->errorCount > 0); 02367 return NULL; 02368 } 02369 02370 llvm::PointerType *pt = 02371 llvm::dyn_cast<llvm::PointerType>(ptr->getType()); 02372 AssertPos(currentPos, pt != NULL); 02373 02374 if (name == NULL) 02375 name = LLVMGetName(ptr, "_load"); 02376 02377 // FIXME: it's not clear to me that we generate unaligned vector loads 02378 // of varying stuff out of the front-end any more. (Only by the 02379 // optimization passes that lower gathers to vector loads, I think..) 02380 // So remove this?? 02381 int align = 0; 02382 if (llvm::isa<llvm::VectorType>(pt->getElementType())) 02383 align = 1; 02384 llvm::Instruction *inst = new llvm::LoadInst(ptr, name, 02385 false /* not volatile */, 02386 align, bblock); 02387 AddDebugPos(inst); 02388 return inst; 02389 } 02390 02391 02392 /** Given a slice pointer to soa'd data that is a basic type (atomic, 02393 pointer, or enum type), use the slice offset to compute pointer(s) to 02394 the appropriate individual data element(s). 02395 */ 02396 static llvm::Value * 02397 lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, 02398 const PointerType **ptrType) { 02399 Assert(CastType<PointerType>(*ptrType) != NULL); 02400 02401 llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr")); 02402 llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset")); 02403 02404 // slicePtr should be a pointer to an soa-width wide array of the 02405 // final atomic/enum/pointer type 02406 const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType(); 02407 Assert(Type::IsBasicType(unifBaseType)); 02408 02409 // The final pointer type is a uniform or varying pointer to the 02410 // underlying uniform type, depending on whether the given pointer is 02411 // uniform or varying. 02412 *ptrType = (*ptrType)->IsUniformType() ? 02413 PointerType::GetUniform(unifBaseType) : 02414 PointerType::GetVarying(unifBaseType); 02415 02416 // For uniform pointers, bitcast to a pointer to the uniform element 02417 // type, so that the GEP below does the desired indexing 02418 if ((*ptrType)->IsUniformType()) 02419 slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx)); 02420 02421 // And finally index based on the slice offset 02422 return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, 02423 LLVMGetName(slicePtr, "_final_gep")); 02424 } 02425 02426 02427 /** Utility routine that loads from a uniform pointer to soa<> data, 02428 returning a regular uniform (non-SOA result). 02429 */ 02430 llvm::Value * 02431 FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, 02432 const PointerType *ptrType, 02433 const char *name) { 02434 const Type *unifType = ptrType->GetBaseType()->GetAsUniformType(); 02435 02436 const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType()); 02437 if (ct != NULL) { 02438 // If we have a struct/array, we need to decompose it into 02439 // individual element loads to fill in the result structure since 02440 // the SOA slice of values we need isn't contiguous in memory... 02441 llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx); 02442 llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType); 02443 02444 for (int i = 0; i < ct->GetElementCount(); ++i) { 02445 const PointerType *eltPtrType; 02446 llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, 02447 "elt_offset", &eltPtrType); 02448 llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name); 02449 retValue = InsertInst(retValue, eltValue, i, "set_value"); 02450 } 02451 02452 return retValue; 02453 } 02454 else { 02455 // Otherwise we've made our way to a slice pointer to a basic type; 02456 // we need to apply the slice offset into this terminal SOA array 02457 // and then perform the final load 02458 ptr = lFinalSliceOffset(this, ptr, &ptrType); 02459 return LoadInst(ptr, mask, ptrType, name); 02460 } 02461 } 02462 02463 02464 llvm::Value * 02465 FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, 02466 const Type *ptrRefType, const char *name) { 02467 if (ptr == NULL) { 02468 AssertPos(currentPos, m->errorCount > 0); 02469 return NULL; 02470 } 02471 02472 AssertPos(currentPos, ptrRefType != NULL && mask != NULL); 02473 02474 if (name == NULL) 02475 name = LLVMGetName(ptr, "_load"); 02476 02477 const PointerType *ptrType; 02478 if (CastType<ReferenceType>(ptrRefType) != NULL) 02479 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); 02480 else { 02481 ptrType = CastType<PointerType>(ptrRefType); 02482 AssertPos(currentPos, ptrType != NULL); 02483 } 02484 02485 if (ptrType->IsUniformType()) { 02486 if (ptrType->IsSlice()) { 02487 return loadUniformFromSOA(ptr, mask, ptrType, name); 02488 } 02489 else { 02490 // FIXME: same issue as above load inst regarding alignment... 02491 // 02492 // If the ptr is a straight up regular pointer, then just issue 02493 // a regular load. First figure out the alignment; in general we 02494 // can just assume the natural alignment (0 here), but for varying 02495 // atomic types, we need to make sure that the compiler emits 02496 // unaligned vector loads, so we specify a reduced alignment here. 02497 int align = 0; 02498 const AtomicType *atomicType = 02499 CastType<AtomicType>(ptrType->GetBaseType()); 02500 if (atomicType != NULL && atomicType->IsVaryingType()) 02501 // We actually just want to align to the vector element 02502 // alignment, but can't easily get that here, so just tell LLVM 02503 // it's totally unaligned. (This shouldn't make any difference 02504 // vs the proper alignment in practice.) 02505 align = 1; 02506 llvm::Instruction *inst = new llvm::LoadInst(ptr, name, 02507 false /* not volatile */, 02508 align, bblock); 02509 AddDebugPos(inst); 02510 return inst; 02511 } 02512 } 02513 else { 02514 // Otherwise we should have a varying ptr and it's time for a 02515 // gather. 02516 return gather(ptr, ptrType, GetFullMask(), name); 02517 } 02518 } 02519 02520 02521 llvm::Value * 02522 FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, 02523 llvm::Value *mask, const char *name) { 02524 // We should have a varying pointer if we get here... 02525 AssertPos(currentPos, ptrType->IsVaryingType()); 02526 02527 const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType(); 02528 llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); 02529 02530 const CollectionType *collectionType = 02531 CastType<CollectionType>(ptrType->GetBaseType()); 02532 if (collectionType != NULL) { 02533 // For collections, recursively gather element wise to find the 02534 // result. 02535 llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType); 02536 02537 for (int i = 0; i < collectionType->GetElementCount(); ++i) { 02538 const PointerType *eltPtrType; 02539 llvm::Value *eltPtr = 02540 AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType); 02541 02542 eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType); 02543 02544 // This in turn will be another gather 02545 llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name); 02546 02547 retValue = InsertInst(retValue, eltValues, i, "set_value"); 02548 } 02549 return retValue; 02550 } 02551 else if (ptrType->IsSlice()) { 02552 // If we have a slice pointer, we need to add the final slice 02553 // offset here right before issuing the actual gather 02554 // 02555 // FIXME: would it be better to do the corresponding same thing for 02556 // all of the varying offsets stuff here (and in scatter)? 02557 ptr = lFinalSliceOffset(this, ptr, &ptrType); 02558 } 02559 02560 // Otherwise we should just have a basic scalar or pointer type and we 02561 // can go and do the actual gather 02562 AddInstrumentationPoint("gather"); 02563 02564 // Figure out which gather function to call based on the size of 02565 // the elements. 02566 const PointerType *pt = CastType<PointerType>(returnType); 02567 const char *funcName = NULL; 02568 if (pt != NULL) 02569 funcName = g->target.is32Bit ? "__pseudo_gather32_i32" : 02570 "__pseudo_gather64_i64"; 02571 else if (llvmReturnType == LLVMTypes::DoubleVectorType) 02572 funcName = g->target.is32Bit ? "__pseudo_gather32_double" : 02573 "__pseudo_gather64_double"; 02574 else if (llvmReturnType == LLVMTypes::Int64VectorType) 02575 funcName = g->target.is32Bit ? "__pseudo_gather32_i64" : 02576 "__pseudo_gather64_i64"; 02577 else if (llvmReturnType == LLVMTypes::FloatVectorType) 02578 funcName = g->target.is32Bit ? "__pseudo_gather32_float" : 02579 "__pseudo_gather64_float"; 02580 else if (llvmReturnType == LLVMTypes::Int32VectorType) 02581 funcName = g->target.is32Bit ? "__pseudo_gather32_i32" : 02582 "__pseudo_gather64_i32"; 02583 else if (llvmReturnType == LLVMTypes::Int16VectorType) 02584 funcName = g->target.is32Bit ? "__pseudo_gather32_i16" : 02585 "__pseudo_gather64_i16"; 02586 else { 02587 AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType); 02588 funcName = g->target.is32Bit ? "__pseudo_gather32_i8" : 02589 "__pseudo_gather64_i8"; 02590 } 02591 02592 llvm::Function *gatherFunc = m->module->getFunction(funcName); 02593 AssertPos(currentPos, gatherFunc != NULL); 02594 02595 llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name); 02596 02597 // Add metadata about the source file location so that the 02598 // optimization passes can print useful performance warnings if we 02599 // can't optimize out this gather 02600 if (disableGSWarningCount == 0) 02601 addGSMetadata(gatherCall, currentPos); 02602 02603 return gatherCall; 02604 } 02605 02606 02607 /** Add metadata to the given instruction to encode the current source file 02608 position. This data is used in the lGetSourcePosFromMetadata() 02609 function in opt.cpp. 02610 */ 02611 void 02612 FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) { 02613 llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v); 02614 if (inst == NULL) 02615 return; 02616 02617 llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name); 02618 llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str); 02619 inst->setMetadata("filename", md); 02620 02621 llvm::Value *first_line = LLVMInt32(pos.first_line); 02622 md = llvm::MDNode::get(*g->ctx, first_line); 02623 inst->setMetadata("first_line", md); 02624 02625 llvm::Value *first_column = LLVMInt32(pos.first_column); 02626 md = llvm::MDNode::get(*g->ctx, first_column); 02627 inst->setMetadata("first_column", md); 02628 02629 llvm::Value *last_line = LLVMInt32(pos.last_line); 02630 md = llvm::MDNode::get(*g->ctx, last_line); 02631 inst->setMetadata("last_line", md); 02632 02633 llvm::Value *last_column = LLVMInt32(pos.last_column); 02634 md = llvm::MDNode::get(*g->ctx, last_column); 02635 inst->setMetadata("last_column", md); 02636 } 02637 02638 02639 llvm::Value * 02640 FunctionEmitContext::AllocaInst(llvm::Type *llvmType, 02641 const char *name, int align, 02642 bool atEntryBlock) { 02643 if (llvmType == NULL) { 02644 AssertPos(currentPos, m->errorCount > 0); 02645 return NULL; 02646 } 02647 02648 llvm::AllocaInst *inst = NULL; 02649 if (atEntryBlock) { 02650 // We usually insert it right before the jump instruction at the 02651 // end of allocaBlock 02652 llvm::Instruction *retInst = allocaBlock->getTerminator(); 02653 AssertPos(currentPos, retInst); 02654 inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst); 02655 } 02656 else 02657 // Unless the caller overrode the default and wants it in the 02658 // current basic block 02659 inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock); 02660 02661 // If no alignment was specified but we have an array of a uniform 02662 // type, then align it to 4 * the native vector width; it's not 02663 // unlikely that this array will be loaded into varying variables with 02664 // what will be aligned accesses if the uniform -> varying load is done 02665 // in regular chunks. 02666 llvm::ArrayType *arrayType = 02667 llvm::dyn_cast<llvm::ArrayType>(llvmType); 02668 if (align == 0 && arrayType != NULL && 02669 !llvm::isa<llvm::VectorType>(arrayType->getElementType())) 02670 align = 4 * g->target.nativeVectorWidth; 02671 02672 if (align != 0) 02673 inst->setAlignment(align); 02674 // Don't add debugging info to alloca instructions 02675 return inst; 02676 } 02677 02678 02679 /** Code to store the given varying value to the given location, only 02680 storing the elements that correspond to active program instances as 02681 given by the provided storeMask value. Note that the lvalue is only a 02682 single pointer, not a varying lvalue of one pointer per program 02683 instance (that case is handled by scatters). 02684 */ 02685 void 02686 FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, 02687 const Type *ptrType, llvm::Value *mask) { 02688 if (value == NULL || ptr == NULL) { 02689 AssertPos(currentPos, m->errorCount > 0); 02690 return; 02691 } 02692 02693 AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL); 02694 AssertPos(currentPos, ptrType->IsUniformType()); 02695 02696 const Type *valueType = ptrType->GetBaseType(); 02697 const CollectionType *collectionType = CastType<CollectionType>(valueType); 02698 if (collectionType != NULL) { 02699 // Assigning a structure / array / vector. Handle each element 02700 // individually with what turns into a recursive call to 02701 // makedStore() 02702 for (int i = 0; i < collectionType->GetElementCount(); ++i) { 02703 const Type *eltType = collectionType->GetElementType(i); 02704 if (eltType == NULL) { 02705 Assert(m->errorCount > 0); 02706 continue; 02707 } 02708 llvm::Value *eltValue = ExtractInst(value, i, "value_member"); 02709 llvm::Value *eltPtr = 02710 AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr"); 02711 const Type *eltPtrType = PointerType::GetUniform(eltType); 02712 StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType); 02713 } 02714 return; 02715 } 02716 02717 // We must have a regular atomic, enumerator, or pointer type at this 02718 // point. 02719 AssertPos(currentPos, Type::IsBasicType(valueType)); 02720 valueType = valueType->GetAsNonConstType(); 02721 02722 // Figure out if we need a 8, 16, 32 or 64-bit masked store. 02723 llvm::Function *maskedStoreFunc = NULL; 02724 02725 const PointerType *pt = CastType<PointerType>(valueType); 02726 if (pt != NULL) { 02727 if (pt->IsSlice()) { 02728 // Masked store of (varying) slice pointer. 02729 AssertPos(currentPos, pt->IsVaryingType()); 02730 02731 // First, extract the pointer from the slice struct and masked 02732 // store that. 02733 llvm::Value *v0 = ExtractInst(value, 0); 02734 llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType); 02735 maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()), 02736 mask); 02737 02738 // And then do same for the integer offset 02739 llvm::Value *v1 = ExtractInst(value, 1); 02740 llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType); 02741 const Type *offsetType = AtomicType::VaryingInt32; 02742 maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask); 02743 02744 return; 02745 } 02746 02747 if (g->target.is32Bit) 02748 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32"); 02749 else 02750 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64"); 02751 } 02752 else if (Type::Equal(valueType, AtomicType::VaryingBool) && 02753 g->target.maskBitCount == 1) { 02754 llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask, 02755 LLVMMaskAllOn, "~mask"); 02756 llvm::Value *old = LoadInst(ptr); 02757 llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old, 02758 notMask, "old&~mask"); 02759 llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value, 02760 mask, "new&mask"); 02761 llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld, 02762 maskedNew, "old_new_result"); 02763 StoreInst(final, ptr); 02764 return; 02765 } 02766 else if (Type::Equal(valueType, AtomicType::VaryingDouble)) { 02767 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double"); 02768 } 02769 else if (Type::Equal(valueType, AtomicType::VaryingInt64) || 02770 Type::Equal(valueType, AtomicType::VaryingUInt64)) { 02771 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64"); 02772 } 02773 else if (Type::Equal(valueType, AtomicType::VaryingFloat)) { 02774 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float"); 02775 } 02776 else if (Type::Equal(valueType, AtomicType::VaryingBool) || 02777 Type::Equal(valueType, AtomicType::VaryingInt32) || 02778 Type::Equal(valueType, AtomicType::VaryingUInt32) || 02779 CastType<EnumType>(valueType) != NULL) { 02780 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32"); 02781 } 02782 else if (Type::Equal(valueType, AtomicType::VaryingInt16) || 02783 Type::Equal(valueType, AtomicType::VaryingUInt16)) { 02784 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16"); 02785 } 02786 else if (Type::Equal(valueType, AtomicType::VaryingInt8) || 02787 Type::Equal(valueType, AtomicType::VaryingUInt8)) { 02788 maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8"); 02789 } 02790 AssertPos(currentPos, maskedStoreFunc != NULL); 02791 02792 std::vector<llvm::Value *> args; 02793 args.push_back(ptr); 02794 args.push_back(value); 02795 args.push_back(mask); 02796 CallInst(maskedStoreFunc, NULL, args); 02797 } 02798 02799 02800 02801 /** Scatter the given varying value to the locations given by the varying 02802 lvalue (which should be an array of pointers with size equal to the 02803 target's vector width. We want to store each rvalue element at the 02804 corresponding pointer's location, *if* the mask for the corresponding 02805 program instance are on. If they're off, don't do anything. 02806 */ 02807 void 02808 FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, 02809 const Type *valueType, const Type *origPt, 02810 llvm::Value *mask) { 02811 const PointerType *ptrType = CastType<PointerType>(origPt); 02812 AssertPos(currentPos, ptrType != NULL); 02813 AssertPos(currentPos, ptrType->IsVaryingType()); 02814 02815 const CollectionType *srcCollectionType = 02816 CastType<CollectionType>(valueType); 02817 if (srcCollectionType != NULL) { 02818 // We're scattering a collection type--we need to keep track of the 02819 // source type (the type of the data values to be stored) and the 02820 // destination type (the type of objects in memory that will be 02821 // stored into) separately. This is necessary so that we can get 02822 // all of the addressing calculations right if we're scattering 02823 // from a varying struct to an array of uniform instances of the 02824 // same struct type, versus scattering into an array of varying 02825 // instances of the struct type, etc. 02826 const CollectionType *dstCollectionType = 02827 CastType<CollectionType>(ptrType->GetBaseType()); 02828 AssertPos(currentPos, dstCollectionType != NULL); 02829 02830 // Scatter the collection elements individually 02831 for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) { 02832 // First, get the values for the current element out of the 02833 // source. 02834 llvm::Value *eltValue = ExtractInst(value, i); 02835 const Type *srcEltType = srcCollectionType->GetElementType(i); 02836 02837 // We may be scattering a uniform atomic element; in this case 02838 // we'll smear it out to be varying before making the recursive 02839 // scatter() call below. 02840 if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) { 02841 eltValue = SmearUniform(eltValue, "to_varying"); 02842 srcEltType = srcEltType->GetAsVaryingType(); 02843 } 02844 02845 // Get the (varying) pointer to the i'th element of the target 02846 // collection 02847 llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType); 02848 02849 // The destination element type may be uniform (e.g. if we're 02850 // scattering to an array of uniform structs). Thus, we need 02851 // to be careful about passing the correct type to 02852 // addVaryingOffsetsIfNeeded() here. 02853 const Type *dstEltType = dstCollectionType->GetElementType(i); 02854 const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType); 02855 if (ptrType->IsSlice()) 02856 dstEltPtrType = dstEltPtrType->GetAsSlice(); 02857 02858 eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType); 02859 02860 // And recursively scatter() until we hit a basic type, at 02861 // which point the actual memory operations can be performed... 02862 scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask); 02863 } 02864 return; 02865 } 02866 else if (ptrType->IsSlice()) { 02867 // As with gather, we need to add the final slice offset finally 02868 // once we get to a terminal SOA array of basic types.. 02869 ptr = lFinalSliceOffset(this, ptr, &ptrType); 02870 } 02871 02872 const PointerType *pt = CastType<PointerType>(valueType); 02873 02874 // And everything should be a pointer or atomic from here on out... 02875 AssertPos(currentPos, pt != NULL || CastType<AtomicType>(valueType) != NULL); 02876 02877 llvm::Type *type = value->getType(); 02878 const char *funcName = NULL; 02879 if (pt != NULL) { 02880 funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" : 02881 "__pseudo_scatter64_i64"; 02882 } 02883 else if (type == LLVMTypes::DoubleVectorType) { 02884 funcName = g->target.is32Bit ? "__pseudo_scatter32_double" : 02885 "__pseudo_scatter64_double"; 02886 } 02887 else if (type == LLVMTypes::Int64VectorType) { 02888 funcName = g->target.is32Bit ? "__pseudo_scatter32_i64" : 02889 "__pseudo_scatter64_i64"; 02890 } 02891 else if (type == LLVMTypes::FloatVectorType) { 02892 funcName = g->target.is32Bit ? "__pseudo_scatter32_float" : 02893 "__pseudo_scatter64_float"; 02894 } 02895 else if (type == LLVMTypes::Int32VectorType) { 02896 funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" : 02897 "__pseudo_scatter64_i32"; 02898 } 02899 else if (type == LLVMTypes::Int16VectorType) { 02900 funcName = g->target.is32Bit ? "__pseudo_scatter32_i16" : 02901 "__pseudo_scatter64_i16"; 02902 } 02903 else if (type == LLVMTypes::Int8VectorType) { 02904 funcName = g->target.is32Bit ? "__pseudo_scatter32_i8" : 02905 "__pseudo_scatter64_i8"; 02906 } 02907 02908 llvm::Function *scatterFunc = m->module->getFunction(funcName); 02909 AssertPos(currentPos, scatterFunc != NULL); 02910 02911 AddInstrumentationPoint("scatter"); 02912 02913 std::vector<llvm::Value *> args; 02914 args.push_back(ptr); 02915 args.push_back(value); 02916 args.push_back(mask); 02917 llvm::Value *inst = CallInst(scatterFunc, NULL, args); 02918 02919 if (disableGSWarningCount == 0) 02920 addGSMetadata(inst, currentPos); 02921 } 02922 02923 02924 void 02925 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) { 02926 if (value == NULL || ptr == NULL) { 02927 // may happen due to error elsewhere 02928 AssertPos(currentPos, m->errorCount > 0); 02929 return; 02930 } 02931 02932 llvm::Instruction *inst; 02933 if (llvm::isa<llvm::VectorType>(value->getType())) 02934 // FIXME: same for load--do we still need/want this?? 02935 // Specify an unaligned store, since we don't know that the ptr 02936 // will in fact be aligned to a vector width here. (Actually 02937 // should be aligned to the alignment of the vector elment type...) 02938 inst = new llvm::StoreInst(value, ptr, false /* not volatile */, 02939 1, bblock); 02940 else 02941 inst = new llvm::StoreInst(value, ptr, bblock); 02942 02943 AddDebugPos(inst); 02944 } 02945 02946 02947 void 02948 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, 02949 llvm::Value *mask, const Type *valueType, 02950 const Type *ptrRefType) { 02951 if (value == NULL || ptr == NULL) { 02952 // may happen due to error elsewhere 02953 AssertPos(currentPos, m->errorCount > 0); 02954 return; 02955 } 02956 02957 const PointerType *ptrType; 02958 if (CastType<ReferenceType>(ptrRefType) != NULL) 02959 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); 02960 else { 02961 ptrType = CastType<PointerType>(ptrRefType); 02962 AssertPos(currentPos, ptrType != NULL); 02963 } 02964 02965 // Figure out what kind of store we're doing here 02966 if (ptrType->IsUniformType()) { 02967 if (ptrType->IsSlice()) 02968 // storing a uniform value to a single slice of a SOA type 02969 storeUniformToSOA(value, ptr, mask, valueType, ptrType); 02970 else if (ptrType->GetBaseType()->IsUniformType()) 02971 // the easy case 02972 StoreInst(value, ptr); 02973 else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations) 02974 // Otherwise it is a masked store unless we can determine that the 02975 // mask is all on... (Unclear if this check is actually useful.) 02976 StoreInst(value, ptr); 02977 else 02978 maskedStore(value, ptr, ptrType, mask); 02979 } 02980 else { 02981 AssertPos(currentPos, ptrType->IsVaryingType()); 02982 // We have a varying ptr (an array of pointers), so it's time to 02983 // scatter 02984 scatter(value, ptr, valueType, ptrType, GetFullMask()); 02985 } 02986 } 02987 02988 02989 /** Store a uniform type to SOA-laid-out memory. 02990 */ 02991 void 02992 FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, 02993 llvm::Value *mask, const Type *valueType, 02994 const PointerType *ptrType) { 02995 AssertPos(currentPos, Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), 02996 valueType)); 02997 02998 const CollectionType *ct = CastType<CollectionType>(valueType); 02999 if (ct != NULL) { 03000 // Handle collections element wise... 03001 for (int i = 0; i < ct->GetElementCount(); ++i) { 03002 llvm::Value *eltValue = ExtractInst(value, i); 03003 const Type *eltType = ct->GetElementType(i); 03004 const PointerType *dstEltPtrType; 03005 llvm::Value *dstEltPtr = 03006 AddElementOffset(ptr, i, ptrType, "slice_offset", 03007 &dstEltPtrType); 03008 StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType); 03009 } 03010 } 03011 else { 03012 // We're finally at a leaf SOA array; apply the slice offset and 03013 // then we can do a final regular store 03014 AssertPos(currentPos, Type::IsBasicType(valueType)); 03015 ptr = lFinalSliceOffset(this, ptr, &ptrType); 03016 StoreInst(value, ptr); 03017 } 03018 } 03019 03020 03021 void 03022 FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, 03023 llvm::Value *count, llvm::Value *align) { 03024 dest = BitCastInst(dest, LLVMTypes::VoidPointerType); 03025 src = BitCastInst(src, LLVMTypes::VoidPointerType); 03026 if (count->getType() != LLVMTypes::Int64Type) { 03027 AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type); 03028 count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64"); 03029 } 03030 if (align == NULL) 03031 align = LLVMInt32(1); 03032 03033 llvm::Constant *mcFunc = 03034 m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", 03035 LLVMTypes::VoidType, LLVMTypes::VoidPointerType, 03036 LLVMTypes::VoidPointerType, LLVMTypes::Int64Type, 03037 LLVMTypes::Int32Type, LLVMTypes::BoolType, NULL); 03038 AssertPos(currentPos, mcFunc != NULL); 03039 AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc)); 03040 03041 std::vector<llvm::Value *> args; 03042 args.push_back(dest); 03043 args.push_back(src); 03044 args.push_back(count); 03045 args.push_back(align); 03046 args.push_back(LLVMFalse); /* not volatile */ 03047 CallInst(mcFunc, NULL, args, ""); 03048 } 03049 03050 03051 void 03052 FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) { 03053 llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock); 03054 AddDebugPos(b); 03055 } 03056 03057 03058 void 03059 FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, 03060 llvm::BasicBlock *falseBlock, 03061 llvm::Value *test) { 03062 if (test == NULL) { 03063 AssertPos(currentPos, m->errorCount > 0); 03064 return; 03065 } 03066 03067 llvm::Instruction *b = 03068 llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock); 03069 AddDebugPos(b); 03070 } 03071 03072 03073 llvm::Value * 03074 FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) { 03075 if (v == NULL) { 03076 AssertPos(currentPos, m->errorCount > 0); 03077 return NULL; 03078 } 03079 03080 if (name == NULL) { 03081 char buf[32]; 03082 sprintf(buf, "_extract_%d", elt); 03083 name = LLVMGetName(v, buf); 03084 } 03085 03086 llvm::Instruction *ei = NULL; 03087 if (llvm::isa<llvm::VectorType>(v->getType())) 03088 ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock); 03089 else 03090 ei = llvm::ExtractValueInst::Create(v, elt, name, bblock); 03091 AddDebugPos(ei); 03092 return ei; 03093 } 03094 03095 03096 llvm::Value * 03097 FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, 03098 const char *name) { 03099 if (v == NULL || eltVal == NULL) { 03100 AssertPos(currentPos, m->errorCount > 0); 03101 return NULL; 03102 } 03103 03104 if (name == NULL) { 03105 char buf[32]; 03106 sprintf(buf, "_insert_%d", elt); 03107 name = LLVMGetName(v, buf); 03108 } 03109 03110 llvm::Instruction *ii = NULL; 03111 if (llvm::isa<llvm::VectorType>(v->getType())) 03112 ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), 03113 name, bblock); 03114 else 03115 ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock); 03116 AddDebugPos(ii); 03117 return ii; 03118 } 03119 03120 03121 llvm::PHINode * 03122 FunctionEmitContext::PhiNode(llvm::Type *type, int count, 03123 const char *name) { 03124 llvm::PHINode *pn = llvm::PHINode::Create(type, count, 03125 name ? name : "phi", bblock); 03126 AddDebugPos(pn); 03127 return pn; 03128 } 03129 03130 03131 llvm::Instruction * 03132 FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, 03133 llvm::Value *val1, const char *name) { 03134 if (test == NULL || val0 == NULL || val1 == NULL) { 03135 AssertPos(currentPos, m->errorCount > 0); 03136 return NULL; 03137 } 03138 03139 if (name == NULL) 03140 name = LLVMGetName(test, "_select"); 03141 03142 llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name, 03143 bblock); 03144 AddDebugPos(inst); 03145 return inst; 03146 } 03147 03148 03149 /** Given a value representing a function to be called or possibly-varying 03150 pointer to a function to be called, figure out how many arguments the 03151 function has. */ 03152 static unsigned int 03153 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) { 03154 llvm::FunctionType *ft = 03155 llvm::dyn_cast<llvm::FunctionType>(callee->getType()); 03156 03157 if (ft == NULL) { 03158 llvm::PointerType *pt = 03159 llvm::dyn_cast<llvm::PointerType>(callee->getType()); 03160 if (pt == NULL) { 03161 // varying--in this case, it must be the version of the 03162 // function that takes a mask 03163 return funcType->GetNumParameters() + 1; 03164 } 03165 ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType()); 03166 } 03167 03168 Assert(ft != NULL); 03169 return ft->getNumParams(); 03170 } 03171 03172 03173 llvm::Value * 03174 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, 03175 const std::vector<llvm::Value *> &args, 03176 const char *name) { 03177 if (func == NULL) { 03178 AssertPos(currentPos, m->errorCount > 0); 03179 return NULL; 03180 } 03181 03182 std::vector<llvm::Value *> argVals = args; 03183 // Most of the time, the mask is passed as the last argument. this 03184 // isn't the case for things like intrinsics, builtins, and extern "C" 03185 // functions from the application. Add the mask if it's needed. 03186 unsigned int calleeArgCount = lCalleeArgCount(func, funcType); 03187 AssertPos(currentPos, argVals.size() + 1 == calleeArgCount || 03188 argVals.size() == calleeArgCount); 03189 if (argVals.size() + 1 == calleeArgCount) 03190 argVals.push_back(GetFullMask()); 03191 03192 if (llvm::isa<llvm::VectorType>(func->getType()) == false) { 03193 // Regular 'uniform' function call--just one function or function 03194 // pointer, so just emit the IR directly. 03195 llvm::Instruction *ci = 03196 llvm::CallInst::Create(func, argVals, name ? name : "", bblock); 03197 AddDebugPos(ci); 03198 return ci; 03199 } 03200 else { 03201 // Emit the code for a varying function call, where we have an 03202 // vector of function pointers, one for each program instance. The 03203 // basic strategy is that we go through the function pointers, and 03204 // for the executing program instances, for each unique function 03205 // pointer that's in the vector, call that function with a mask 03206 // equal to the set of active program instances that also have that 03207 // function pointer. When all unique function pointers have been 03208 // called, we're done. 03209 03210 llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test"); 03211 llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call"); 03212 llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done"); 03213 03214 // Get the current mask value so we can restore it later 03215 llvm::Value *origMask = GetInternalMask(); 03216 03217 // First allocate memory to accumulate the various program 03218 // instances' return values... 03219 const Type *returnType = funcType->GetReturnType(); 03220 llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); 03221 llvm::Value *resultPtr = NULL; 03222 if (llvmReturnType->isVoidTy() == false) 03223 resultPtr = AllocaInst(llvmReturnType); 03224 03225 // The memory pointed to by maskPointer tracks the set of program 03226 // instances for which we still need to call the function they are 03227 // pointing to. It starts out initialized with the mask of 03228 // currently running program instances. 03229 llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType); 03230 StoreInst(GetFullMask(), maskPtr); 03231 03232 // And now we branch to the test to see if there's more work to be 03233 // done. 03234 BranchInst(bbTest); 03235 03236 // bbTest: are any lanes of the mask still on? If so, jump to 03237 // bbCall 03238 SetCurrentBasicBlock(bbTest); { 03239 llvm::Value *maskLoad = LoadInst(maskPtr); 03240 llvm::Value *any = Any(maskLoad); 03241 BranchInst(bbCall, bbDone, any); 03242 } 03243 03244 // bbCall: this is the body of the loop that calls out to one of 03245 // the active function pointer values. 03246 SetCurrentBasicBlock(bbCall); { 03247 // Figure out the first lane that still needs its function 03248 // pointer to be called. 03249 llvm::Value *currentMask = LoadInst(maskPtr); 03250 llvm::Function *cttz = 03251 m->module->getFunction("__count_trailing_zeros_i64"); 03252 AssertPos(currentPos, cttz != NULL); 03253 llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask), 03254 "first_lane64"); 03255 llvm::Value *firstLane = 03256 TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32"); 03257 03258 // Get the pointer to the function we're going to call this 03259 // time through: ftpr = func[firstLane] 03260 llvm::Value *fptr = 03261 llvm::ExtractElementInst::Create(func, firstLane, 03262 "extract_fptr", bblock); 03263 03264 // Smear it out into an array of function pointers 03265 llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr"); 03266 03267 // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a 03268 // mask for the set of program instances that have the same 03269 // value for their function pointer. 03270 llvm::Value *fpOverlap = 03271 CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, 03272 fptrSmear, func); 03273 fpOverlap = I1VecToBoolVec(fpOverlap); 03274 03275 // Figure out the mask to use when calling the function 03276 // pointer: we need to AND the current execution mask to handle 03277 // the case of any non-running program instances that happen to 03278 // have this function pointer value. 03279 // callMask = (currentMask & fpOverlap) 03280 llvm::Value *callMask = 03281 BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap, 03282 "call_mask"); 03283 03284 // Set the mask 03285 SetInternalMask(callMask); 03286 03287 // bitcast the i32/64 function pointer to the actual function 03288 // pointer type. 03289 llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx); 03290 llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0); 03291 llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType); 03292 03293 // Call the function: callResult = call ftpr(args, args, call mask) 03294 llvm::Value *callResult = CallInst(fptrCast, funcType, args, name); 03295 03296 // Now, do a masked store into the memory allocated to 03297 // accumulate the result using the call mask. 03298 if (callResult != NULL && 03299 callResult->getType() != LLVMTypes::VoidType) { 03300 AssertPos(currentPos, resultPtr != NULL); 03301 StoreInst(callResult, resultPtr, callMask, returnType, 03302 PointerType::GetUniform(returnType)); 03303 } 03304 else 03305 AssertPos(currentPos, resultPtr == NULL); 03306 03307 // Update the mask to turn off the program instances for which 03308 // we just called the function. 03309 // currentMask = currentMask & ~callmask 03310 llvm::Value *notCallMask = 03311 BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn, 03312 "~callMask"); 03313 currentMask = BinaryOperator(llvm::Instruction::And, currentMask, 03314 notCallMask, "currentMask&~callMask"); 03315 StoreInst(currentMask, maskPtr); 03316 03317 // And go back to the test to see if we need to do another 03318 // call. 03319 BranchInst(bbTest); 03320 } 03321 03322 // bbDone: We're all done; clean up and return the result we've 03323 // accumulated in the result memory. 03324 SetCurrentBasicBlock(bbDone); 03325 SetInternalMask(origMask); 03326 return resultPtr ? LoadInst(resultPtr) : NULL; 03327 } 03328 } 03329 03330 03331 llvm::Value * 03332 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, 03333 llvm::Value *arg, const char *name) { 03334 std::vector<llvm::Value *> args; 03335 args.push_back(arg); 03336 return CallInst(func, funcType, args, name); 03337 } 03338 03339 03340 llvm::Value * 03341 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, 03342 llvm::Value *arg0, llvm::Value *arg1, 03343 const char *name) { 03344 std::vector<llvm::Value *> args; 03345 args.push_back(arg0); 03346 args.push_back(arg1); 03347 return CallInst(func, funcType, args, name); 03348 } 03349 03350 03351 llvm::Instruction * 03352 FunctionEmitContext::ReturnInst() { 03353 if (launchedTasks) 03354 // Add a sync call at the end of any function that launched tasks 03355 SyncInst(); 03356 03357 llvm::Instruction *rinst = NULL; 03358 if (returnValuePtr != NULL) { 03359 // We have value(s) to return; load them from their storage 03360 // location 03361 llvm::Value *retVal = LoadInst(returnValuePtr, "return_value"); 03362 rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); 03363 } 03364 else { 03365 AssertPos(currentPos, Type::Equal(function->GetReturnType(), AtomicType::Void)); 03366 rinst = llvm::ReturnInst::Create(*g->ctx, bblock); 03367 } 03368 03369 AddDebugPos(rinst); 03370 bblock = NULL; 03371 return rinst; 03372 } 03373 03374 03375 llvm::Value * 03376 FunctionEmitContext::LaunchInst(llvm::Value *callee, 03377 std::vector<llvm::Value *> &argVals, 03378 llvm::Value *launchCount) { 03379 if (callee == NULL) { 03380 AssertPos(currentPos, m->errorCount > 0); 03381 return NULL; 03382 } 03383 03384 launchedTasks = true; 03385 03386 AssertPos(currentPos, llvm::isa<llvm::Function>(callee)); 03387 llvm::Type *argType = 03388 (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType(); 03389 AssertPos(currentPos, llvm::PointerType::classof(argType)); 03390 llvm::PointerType *pt = 03391 llvm::dyn_cast<llvm::PointerType>(argType); 03392 AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); 03393 llvm::StructType *argStructType = 03394 static_cast<llvm::StructType *>(pt->getElementType()); 03395 03396 llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); 03397 AssertPos(currentPos, falloc != NULL); 03398 llvm::Value *structSize = g->target.SizeOf(argStructType, bblock); 03399 if (structSize->getType() != LLVMTypes::Int64Type) 03400 // ISPCAlloc expects the size as an uint64_t, but on 32-bit 03401 // targets, SizeOf returns a 32-bit value 03402 structSize = ZExtInst(structSize, LLVMTypes::Int64Type, 03403 "struct_size_to_64"); 03404 int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); 03405 03406 std::vector<llvm::Value *> allocArgs; 03407 allocArgs.push_back(launchGroupHandlePtr); 03408 allocArgs.push_back(structSize); 03409 allocArgs.push_back(LLVMInt32(align)); 03410 llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); 03411 llvm::Value *argmem = BitCastInst(voidmem, pt); 03412 03413 // Copy the values of the parameters into the appropriate place in 03414 // the argument block 03415 for (unsigned int i = 0; i < argVals.size(); ++i) { 03416 llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); 03417 // don't need to do masked store here, I think 03418 StoreInst(argVals[i], ptr); 03419 } 03420 03421 if (argStructType->getNumElements() == argVals.size() + 1) { 03422 // copy in the mask 03423 llvm::Value *mask = GetFullMask(); 03424 llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, 03425 "funarg_mask"); 03426 StoreInst(mask, ptr); 03427 } 03428 03429 // And emit the call to the user-supplied task launch function, passing 03430 // a pointer to the task function being called and a pointer to the 03431 // argument block we just filled in 03432 llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); 03433 llvm::Function *flaunch = m->module->getFunction("ISPCLaunch"); 03434 AssertPos(currentPos, flaunch != NULL); 03435 std::vector<llvm::Value *> args; 03436 args.push_back(launchGroupHandlePtr); 03437 args.push_back(fptr); 03438 args.push_back(voidmem); 03439 args.push_back(launchCount); 03440 return CallInst(flaunch, NULL, args, ""); 03441 } 03442 03443 03444 void 03445 FunctionEmitContext::SyncInst() { 03446 llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); 03447 llvm::Value *nullPtrValue = 03448 llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); 03449 llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, 03450 llvm::CmpInst::ICMP_NE, 03451 launchGroupHandle, nullPtrValue); 03452 llvm::BasicBlock *bSync = CreateBasicBlock("call_sync"); 03453 llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync"); 03454 BranchInst(bSync, bPostSync, nonNull); 03455 03456 SetCurrentBasicBlock(bSync); 03457 llvm::Function *fsync = m->module->getFunction("ISPCSync"); 03458 if (fsync == NULL) 03459 FATAL("Couldn't find ISPCSync declaration?!"); 03460 CallInst(fsync, NULL, launchGroupHandle, ""); 03461 03462 // zero out the handle so that if ISPCLaunch is called again in this 03463 // function, it knows it's starting out from scratch 03464 StoreInst(nullPtrValue, launchGroupHandlePtr); 03465 03466 BranchInst(bPostSync); 03467 03468 SetCurrentBasicBlock(bPostSync); 03469 } 03470 03471 03472 /** When we gathering from or scattering to a varying atomic type, we need 03473 to add an appropriate offset to the final address for each lane right 03474 before we use it. Given a varying pointer we're about to use and its 03475 type, this function determines whether these offsets are needed and 03476 returns an updated pointer that incorporates these offsets if needed. 03477 */ 03478 llvm::Value * 03479 FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, 03480 const Type *ptrType) { 03481 // This should only be called for varying pointers 03482 const PointerType *pt = CastType<PointerType>(ptrType); 03483 AssertPos(currentPos, pt && pt->IsVaryingType()); 03484 03485 const Type *baseType = ptrType->GetBaseType(); 03486 if (Type::IsBasicType(baseType) == false) 03487 return ptr; 03488 03489 if (baseType->IsVaryingType() == false) 03490 return ptr; 03491 03492 // Find the size of a uniform element of the varying type 03493 llvm::Type *llvmBaseUniformType = 03494 baseType->GetAsUniformType()->LLVMType(g->ctx); 03495 llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock); 03496 unifSize = SmearUniform(unifSize); 03497 03498 // Compute offset = <0, 1, .. > * unifSize 03499 llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType()); 03500 for (int i = 0; i < g->target.vectorWidth; ++i) { 03501 llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ? 03502 LLVMInt32(i) : LLVMInt64(i); 03503 varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta"); 03504 } 03505 llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, 03506 varyingOffsets); 03507 03508 if (g->opt.force32BitAddressing == true && g->target.is32Bit == false) 03509 // On 64-bit targets where we're doing 32-bit addressing 03510 // calculations, we need to convert to an i64 vector before adding 03511 // to the pointer 03512 offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); 03513 03514 return BinaryOperator(llvm::Instruction::Add, ptr, offset); 03515 } 03516 03517 03518 CFInfo * 03519 FunctionEmitContext::popCFState() { 03520 AssertPos(currentPos, controlFlowInfo.size() > 0); 03521 CFInfo *ci = controlFlowInfo.back(); 03522 controlFlowInfo.pop_back(); 03523 03524 if (ci->IsSwitch()) { 03525 breakTarget = ci->savedBreakTarget; 03526 continueTarget = ci->savedContinueTarget; 03527 breakLanesPtr = ci->savedBreakLanesPtr; 03528 continueLanesPtr = ci->savedContinueLanesPtr; 03529 loopMask = ci->savedLoopMask; 03530 switchExpr = ci->savedSwitchExpr; 03531 defaultBlock = ci->savedDefaultBlock; 03532 caseBlocks = ci->savedCaseBlocks; 03533 nextBlocks = ci->savedNextBlocks; 03534 switchConditionWasUniform = ci->savedSwitchConditionWasUniform; 03535 } 03536 else if (ci->IsLoop() || ci->IsForeach()) { 03537 breakTarget = ci->savedBreakTarget; 03538 continueTarget = ci->savedContinueTarget; 03539 breakLanesPtr = ci->savedBreakLanesPtr; 03540 continueLanesPtr = ci->savedContinueLanesPtr; 03541 loopMask = ci->savedLoopMask; 03542 } 03543 else { 03544 AssertPos(currentPos, ci->IsIf()); 03545 // nothing to do 03546 } 03547 03548 return ci; 03549 }
1.7.5.1