Intel SPMD Program Compiler  1.3.0
ctx.cpp
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2010-2012, Intel Corporation
00003   All rights reserved.
00004 
00005   Redistribution and use in source and binary forms, with or without
00006   modification, are permitted provided that the following conditions are
00007   met:
00008 
00009     * Redistributions of source code must retain the above copyright
00010       notice, this list of conditions and the following disclaimer.
00011 
00012     * Redistributions in binary form must reproduce the above copyright
00013       notice, this list of conditions and the following disclaimer in the
00014       documentation and/or other materials provided with the distribution.
00015 
00016     * Neither the name of Intel Corporation nor the names of its
00017       contributors may be used to endorse or promote products derived from
00018       this software without specific prior written permission.
00019 
00020 
00021    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00022    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00023    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00024    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00025    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
00032 */
00033 
00034 /** @file ctx.cpp
00035     @brief Implementation of the FunctionEmitContext class
00036 */
00037 
00038 #include "ctx.h"
00039 #include "util.h"
00040 #include "func.h"
00041 #include "llvmutil.h"
00042 #include "type.h"
00043 #include "stmt.h"
00044 #include "expr.h"
00045 #include "module.h"
00046 #include "sym.h"
00047 #include <map>
00048 #include <llvm/DerivedTypes.h>
00049 #include <llvm/Instructions.h>
00050 #include <llvm/Support/Dwarf.h>
00051 #include <llvm/Metadata.h>
00052 #include <llvm/Module.h>
00053 
00054 /** This is a small utility structure that records information related to one
00055     level of nested control flow.  It's mostly used in correctly restoring
00056     the mask and other state as we exit control flow nesting levels. 
00057 */
00058 struct CFInfo {
00059     /** Returns a new instance of the structure that represents entering an
00060         'if' statement */
00061     static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
00062 
00063     /** Returns a new instance of the structure that represents entering a
00064         loop. */
00065     static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
00066                            llvm::BasicBlock *continueTarget, 
00067                            llvm::Value *savedBreakLanesPtr,
00068                            llvm::Value *savedContinueLanesPtr,
00069                            llvm::Value *savedMask, llvm::Value *savedLoopMask);
00070 
00071     static CFInfo *GetForeach(FunctionEmitContext::ForeachType ft,
00072                               llvm::BasicBlock *breakTarget,
00073                               llvm::BasicBlock *continueTarget, 
00074                               llvm::Value *savedBreakLanesPtr,
00075                               llvm::Value *savedContinueLanesPtr,
00076                               llvm::Value *savedMask, llvm::Value *savedLoopMask);
00077 
00078     static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
00079                              llvm::BasicBlock *continueTarget, 
00080                              llvm::Value *savedBreakLanesPtr,
00081                              llvm::Value *savedContinueLanesPtr,
00082                              llvm::Value *savedMask, llvm::Value *savedLoopMask,
00083                              llvm::Value *switchExpr,
00084                              llvm::BasicBlock *bbDefault,
00085                              const std::vector<std::pair<int, llvm::BasicBlock *> > *bbCases,
00086                              const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext,
00087                              bool scUniform);
00088     
00089     bool IsIf() { return type == If; }
00090     bool IsLoop() { return type == Loop; }
00091     bool IsForeach() { return (type == ForeachRegular ||
00092                                type == ForeachActive ||
00093                                type == ForeachUnique); }
00094     bool IsSwitch() { return type == Switch; }
00095     bool IsVarying() { return !isUniform; }
00096     bool IsUniform() { return isUniform; }
00097 
00098     enum CFType { If, Loop, ForeachRegular, ForeachActive, ForeachUnique, 
00099                   Switch };
00100     CFType type;
00101     bool isUniform;
00102     llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
00103     llvm::Value *savedBreakLanesPtr, *savedContinueLanesPtr;
00104     llvm::Value *savedMask, *savedLoopMask;
00105     llvm::Value *savedSwitchExpr;
00106     llvm::BasicBlock *savedDefaultBlock;
00107     const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCaseBlocks;
00108     const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
00109     bool savedSwitchConditionWasUniform;
00110 
00111 private:
00112     CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
00113         Assert(t == If);
00114         type = t;
00115         isUniform = uniformIf;
00116         savedBreakTarget = savedContinueTarget = NULL;
00117         savedBreakLanesPtr = savedContinueLanesPtr = NULL;
00118         savedMask = savedLoopMask = sm;
00119         savedSwitchExpr = NULL;
00120         savedDefaultBlock = NULL;
00121         savedCaseBlocks = NULL;
00122         savedNextBlocks = NULL;
00123     }
00124     CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
00125            llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
00126            llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL, 
00127            const std::vector<std::pair<int, llvm::BasicBlock *> > *bbc = NULL,
00128            const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL,
00129            bool scu = false) {
00130         Assert(t == Loop || t == Switch);
00131         type = t;
00132         isUniform = iu;
00133         savedBreakTarget = bt;
00134         savedContinueTarget = ct;
00135         savedBreakLanesPtr = sb;
00136         savedContinueLanesPtr = sc;
00137         savedMask = sm;
00138         savedLoopMask = lm;
00139         savedSwitchExpr = sse;
00140         savedDefaultBlock = bbd;
00141         savedCaseBlocks = bbc;
00142         savedNextBlocks = bbn;
00143         savedSwitchConditionWasUniform = scu;
00144     }
00145     CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
00146            llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
00147            llvm::Value *lm) {
00148         Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
00149         type = t;
00150         isUniform = false;
00151         savedBreakTarget = bt;
00152         savedContinueTarget = ct;
00153         savedBreakLanesPtr = sb;
00154         savedContinueLanesPtr = sc;
00155         savedMask = sm;
00156         savedLoopMask = lm;
00157         savedSwitchExpr = NULL;
00158         savedDefaultBlock = NULL;
00159         savedCaseBlocks = NULL;
00160         savedNextBlocks = NULL;
00161     }
00162 };
00163 
00164 
00165 CFInfo *
00166 CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) {
00167     return new CFInfo(If, isUniform, savedMask);
00168 }
00169 
00170 
00171 CFInfo *
00172 CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
00173                 llvm::BasicBlock *continueTarget, 
00174                 llvm::Value *savedBreakLanesPtr,
00175                 llvm::Value *savedContinueLanesPtr,
00176                 llvm::Value *savedMask, llvm::Value *savedLoopMask) {
00177     return new CFInfo(Loop, isUniform, breakTarget, continueTarget,
00178                       savedBreakLanesPtr, savedContinueLanesPtr,
00179                       savedMask, savedLoopMask);
00180 }
00181 
00182 
00183 CFInfo *
00184 CFInfo::GetForeach(FunctionEmitContext::ForeachType ft,
00185                    llvm::BasicBlock *breakTarget,
00186                    llvm::BasicBlock *continueTarget, 
00187                    llvm::Value *savedBreakLanesPtr,
00188                    llvm::Value *savedContinueLanesPtr,
00189                    llvm::Value *savedMask, llvm::Value *savedForeachMask) {
00190     CFType cfType;
00191     switch (ft) {
00192     case FunctionEmitContext::FOREACH_REGULAR:
00193         cfType = ForeachRegular;
00194         break;
00195     case FunctionEmitContext::FOREACH_ACTIVE:
00196         cfType = ForeachActive;
00197         break;
00198     case FunctionEmitContext::FOREACH_UNIQUE:
00199         cfType = ForeachUnique;
00200         break;
00201     default:
00202         FATAL("Unhandled foreach type");
00203         return NULL;
00204     }
00205 
00206     return new CFInfo(cfType, breakTarget, continueTarget,
00207                       savedBreakLanesPtr, savedContinueLanesPtr,
00208                       savedMask, savedForeachMask);
00209 }
00210 
00211 
00212 CFInfo *
00213 CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
00214                   llvm::BasicBlock *continueTarget, 
00215                   llvm::Value *savedBreakLanesPtr,
00216                   llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
00217                   llvm::Value *savedLoopMask, llvm::Value *savedSwitchExpr,
00218                   llvm::BasicBlock *savedDefaultBlock,
00219                   const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCases,
00220                   const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
00221                   bool savedSwitchConditionUniform) {
00222     return new CFInfo(Switch, isUniform, breakTarget, continueTarget, 
00223                       savedBreakLanesPtr, savedContinueLanesPtr,
00224                       savedMask, savedLoopMask, savedSwitchExpr, savedDefaultBlock, 
00225                       savedCases, savedNext, savedSwitchConditionUniform);
00226 }
00227 
00228 ///////////////////////////////////////////////////////////////////////////
00229 
00230 FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
00231                                          llvm::Function *lf,
00232                                          SourcePos firstStmtPos) {
00233     function = func;
00234     llvmFunction = lf;
00235 
00236     /* Create a new basic block to store all of the allocas */
00237     allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
00238     bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
00239     /* But jump from it immediately into the real entry block */
00240     llvm::BranchInst::Create(bblock, allocaBlock);
00241 
00242     funcStartPos = funSym->pos;
00243 
00244     internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
00245     StoreInst(LLVMMaskAllOn, internalMaskPointer);
00246 
00247     functionMaskValue = LLVMMaskAllOn;
00248 
00249     fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
00250     StoreInst(LLVMMaskAllOn, fullMaskPointer);
00251 
00252     loopMask = NULL;
00253     breakLanesPtr = continueLanesPtr = NULL;
00254     breakTarget = continueTarget = NULL;
00255 
00256     switchExpr = NULL;
00257     caseBlocks = NULL;
00258     defaultBlock = NULL;
00259     nextBlocks = NULL;
00260 
00261     returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
00262     StoreInst(LLVMMaskAllOff, returnedLanesPtr);
00263 
00264     launchedTasks = false;
00265     launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle");
00266     StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType), 
00267               launchGroupHandlePtr);
00268 
00269     disableGSWarningCount = 0;
00270 
00271     const Type *returnType = function->GetReturnType();
00272     if (!returnType || Type::Equal(returnType, AtomicType::Void))
00273         returnValuePtr = NULL;
00274     else {
00275         llvm::Type *ftype = returnType->LLVMType(g->ctx);
00276         returnValuePtr = AllocaInst(ftype, "return_value_memory");
00277     }
00278 
00279     if (g->opt.disableMaskAllOnOptimizations) {
00280         // This is really disgusting.  We want to be able to fool the
00281         // compiler to not be able to reason that the mask is all on, but
00282         // we don't want to pay too much of a price at the start of each
00283         // function to do so.
00284         //
00285         // Therefore: first, we declare a module-static __all_on_mask
00286         // variable that will hold an "all on" mask value.  At the start of
00287         // each function, we'll load its value and call SetInternalMaskAnd
00288         // with the result to set the current internal execution mask.
00289         // (This is a no-op at runtime.)
00290         //
00291         // Then, to fool the optimizer that maybe the value of
00292         // __all_on_mask can't be guaranteed to be "all on", we emit a
00293         // dummy function that sets __all_on_mask be "all off".  (That
00294         // function is never actually called.)
00295         llvm::Value *globalAllOnMaskPtr = 
00296             m->module->getNamedGlobal("__all_on_mask");
00297         if (globalAllOnMaskPtr == NULL) {
00298             globalAllOnMaskPtr = 
00299                 new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
00300                                          llvm::GlobalValue::InternalLinkage,
00301                                          LLVMMaskAllOn, "__all_on_mask");
00302 
00303             char buf[256];
00304             sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
00305             llvm::Constant *offFunc = 
00306                 m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
00307                                                NULL);
00308             AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
00309             llvm::BasicBlock *offBB = 
00310                    llvm::BasicBlock::Create(*g->ctx, "entry", 
00311                                             (llvm::Function *)offFunc, 0);
00312             new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
00313             llvm::ReturnInst::Create(*g->ctx, offBB);
00314         }
00315 
00316         llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
00317         SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
00318     }
00319 
00320     if (m->diBuilder) {
00321         currentPos = funSym->pos;
00322 
00323         /* If debugging is enabled, tell the debug information emission
00324            code about this new function */
00325         diFile = funcStartPos.GetDIFile();
00326         AssertPos(currentPos, diFile.Verify());
00327 
00328         llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU());
00329         AssertPos(currentPos, scope.Verify());
00330 
00331         const FunctionType *functionType = function->GetType();
00332         llvm::DIType diSubprogramType;
00333         if (functionType == NULL)
00334             AssertPos(currentPos, m->errorCount > 0);
00335         else {
00336             diSubprogramType = functionType->GetDIType(scope);
00337             AssertPos(currentPos, diSubprogramType.Verify());
00338         }
00339 
00340         std::string mangledName = llvmFunction->getName();
00341         if (mangledName == funSym->name)
00342             mangledName = "";
00343 
00344         bool isStatic = (funSym->storageClass == SC_STATIC);
00345         bool isOptimized = (g->opt.level > 0);
00346         int firstLine = funcStartPos.first_line;
00347         int flags =  (llvm::DIDescriptor::FlagPrototyped);
00348 
00349         diSubprogram = 
00350             m->diBuilder->createFunction(diFile /* scope */, funSym->name,
00351                                          mangledName,        diFile,
00352                                          firstLine,          diSubprogramType,
00353                                          isStatic,           true, /* is defn */
00354 #ifndef LLVM_3_0
00355                                          firstLine,
00356 #endif // !LLVM_3_0
00357                                          flags,
00358                                          isOptimized,        llvmFunction);
00359         AssertPos(currentPos, diSubprogram.Verify());
00360 
00361         /* And start a scope representing the initial function scope */
00362         StartScope();
00363     }
00364 }
00365 
00366 
00367 FunctionEmitContext::~FunctionEmitContext() {
00368     AssertPos(currentPos, controlFlowInfo.size() == 0);
00369     AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
00370 }
00371 
00372 
00373 const Function *
00374 FunctionEmitContext::GetFunction() const {
00375     return function;
00376 }
00377 
00378 
00379 llvm::BasicBlock *
00380 FunctionEmitContext::GetCurrentBasicBlock() {
00381     return bblock;
00382 }
00383 
00384 
00385 void
00386 FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) {
00387     bblock = bb;
00388 }
00389 
00390 
00391 llvm::Value *
00392 FunctionEmitContext::GetFunctionMask() {
00393     return functionMaskValue;
00394 }
00395 
00396 
00397 llvm::Value *
00398 FunctionEmitContext::GetInternalMask() {
00399     return LoadInst(internalMaskPointer, "load_mask");
00400 }
00401 
00402 
00403 llvm::Value *
00404 FunctionEmitContext::GetFullMask() {
00405     return BinaryOperator(llvm::Instruction::And, GetInternalMask(), 
00406                           functionMaskValue, "internal_mask&function_mask");
00407 }
00408 
00409 
00410 llvm::Value *
00411 FunctionEmitContext::GetFullMaskPointer() {
00412     return fullMaskPointer;
00413 }
00414 
00415 
00416 void
00417 FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
00418     functionMaskValue = value;
00419     if (bblock != NULL)
00420         StoreInst(GetFullMask(), fullMaskPointer);
00421 }
00422 
00423 
00424 void
00425 FunctionEmitContext::SetLoopMask(llvm::Value *value) {
00426     loopMask = value;
00427 }
00428 
00429 
00430 void
00431 FunctionEmitContext::SetInternalMask(llvm::Value *value) {
00432     StoreInst(value, internalMaskPointer);
00433     // kludge so that __mask returns the right value in ispc code.
00434     StoreInst(GetFullMask(), fullMaskPointer);
00435 }
00436 
00437 
00438 void
00439 FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
00440     llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, 
00441                                        test, "oldMask&test");
00442     SetInternalMask(mask);
00443 }
00444 
00445 
00446 void
00447 FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
00448     llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
00449                                           "~test");
00450     llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
00451                                        "oldMask&~test");
00452     SetInternalMask(mask);
00453 }
00454 
00455 
00456 void
00457 FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
00458     AssertPos(currentPos, bblock != NULL);
00459     llvm::Value *any = Any(GetFullMask());
00460     BranchInst(btrue, bfalse, any);
00461     // It's illegal to add any additional instructions to the basic block
00462     // now that it's terminated, so set bblock to NULL to be safe
00463     bblock = NULL;
00464 }
00465 
00466 
00467 void
00468 FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
00469     AssertPos(currentPos, bblock != NULL);
00470     llvm::Value *all = All(GetFullMask());
00471     BranchInst(btrue, bfalse, all);
00472     // It's illegal to add any additional instructions to the basic block
00473     // now that it's terminated, so set bblock to NULL to be safe
00474     bblock = NULL;
00475 }
00476 
00477 
00478 void
00479 FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
00480     AssertPos(currentPos, bblock != NULL);
00481     // switch sense of true/false bblocks
00482     BranchIfMaskAny(bfalse, btrue);
00483     // It's illegal to add any additional instructions to the basic block
00484     // now that it's terminated, so set bblock to NULL to be safe
00485     bblock = NULL;
00486 }
00487 
00488 
00489 void
00490 FunctionEmitContext::StartUniformIf() {
00491     controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
00492 }
00493 
00494 
00495 void
00496 FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
00497     controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
00498 }
00499 
00500 
00501 void
00502 FunctionEmitContext::EndIf() {
00503     CFInfo *ci = popCFState();
00504     // Make sure we match up with a Start{Uniform,Varying}If().
00505     AssertPos(currentPos, ci->IsIf());
00506 
00507     // 'uniform' ifs don't change the mask so we only need to restore the
00508     // mask going into the if for 'varying' if statements
00509     if (ci->IsUniform() || bblock == NULL)
00510         return;
00511 
00512     // We can't just restore the mask as it was going into the 'if'
00513     // statement.  First we have to take into account any program
00514     // instances that have executed 'return' statements; the restored
00515     // mask must be off for those lanes.
00516     restoreMaskGivenReturns(ci->savedMask);
00517 
00518     // If the 'if' statement is inside a loop with a 'varying'
00519     // condition, we also need to account for any break or continue
00520     // statements that executed inside the 'if' statmeent; we also must
00521     // leave the lane masks for the program instances that ran those
00522     // off after we restore the mask after the 'if'.  The code below
00523     // ends up being optimized out in the case that there were no break
00524     // or continue statements (and breakLanesPtr and continueLanesPtr
00525     // have their initial 'all off' values), so we don't need to check
00526     // for that here.
00527     // 
00528     // There are three general cases to deal with here:
00529     // - Loops: both break and continue are allowed, and thus the corresponding
00530     //   lane mask pointers are non-NULL
00531     // - Foreach: only continueLanesPtr may be non-NULL
00532     // - Switch: only breakLanesPtr may be non-NULL
00533     if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
00534         // We want to compute:
00535         // newMask = (oldMask & ~(breakLanes | continueLanes)),
00536         // treading breakLanes or continueLanes as "all off" if the
00537         // corresponding pointer is NULL.
00538         llvm::Value *bcLanes = NULL;
00539 
00540         if (continueLanesPtr != NULL)
00541             bcLanes = LoadInst(continueLanesPtr, "continue_lanes");
00542         else
00543             bcLanes = LLVMMaskAllOff;
00544 
00545         if (breakLanesPtr != NULL) {
00546             llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
00547             bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes, 
00548                                      breakLanes, "|break_lanes");
00549         }
00550 
00551         llvm::Value *notBreakOrContinue = 
00552             NotOperator(bcLanes, "!(break|continue)_lanes");
00553         llvm::Value *oldMask = GetInternalMask();
00554         llvm::Value *newMask = 
00555             BinaryOperator(llvm::Instruction::And, oldMask, 
00556                            notBreakOrContinue, "new_mask");
00557         SetInternalMask(newMask);
00558     }
00559 }
00560 
00561 
00562 void
00563 FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, 
00564                                bool uniformCF) {
00565     // Store the current values of various loop-related state so that we
00566     // can restore it when we exit this loop.
00567     llvm::Value *oldMask = GetInternalMask();
00568     controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget, 
00569                                               continueTarget, breakLanesPtr,
00570                                               continueLanesPtr, oldMask, loopMask));
00571     if (uniformCF)
00572         // If the loop has a uniform condition, we don't need to track
00573         // which lanes 'break' or 'continue'; all of the running ones go
00574         // together, so we just jump
00575         breakLanesPtr = continueLanesPtr = NULL;
00576     else {
00577         // For loops with varying conditions, allocate space to store masks
00578         // that record which lanes have done these
00579         continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
00580         StoreInst(LLVMMaskAllOff, continueLanesPtr);
00581         breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
00582         StoreInst(LLVMMaskAllOff, breakLanesPtr);
00583     }
00584 
00585     breakTarget = bt;
00586     continueTarget = ct;
00587     loopMask = NULL; // this better be set by the loop!
00588 }
00589 
00590 
00591 void
00592 FunctionEmitContext::EndLoop() {
00593     CFInfo *ci = popCFState();
00594     AssertPos(currentPos, ci->IsLoop());
00595 
00596     if (!ci->IsUniform())
00597         // If the loop had a 'uniform' test, then it didn't make any
00598         // changes to the mask so there's nothing to restore.  If it had a
00599         // varying test, we need to restore the mask to what it was going
00600         // into the loop, but still leaving off any lanes that executed a
00601         // 'return' statement.
00602         restoreMaskGivenReturns(ci->savedMask);
00603 }
00604 
00605 
00606 void
00607 FunctionEmitContext::StartForeach(ForeachType ft) {
00608     // Issue an error if we're in a nested foreach...
00609     if (ft == FOREACH_REGULAR) {
00610         for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
00611             if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
00612                 Error(currentPos, "Nested \"foreach\" statements are currently "
00613                       "illegal.");
00614                 break;
00615                 // Don't return here, however, and in turn allow the caller to
00616                 // do the rest of its codegen and then call EndForeach()
00617                 // normally--the idea being that this gives a chance to find
00618                 // any other errors inside the body of the foreach loop...
00619             }
00620         }
00621     }
00622 
00623     // Store the current values of various loop-related state so that we
00624     // can restore it when we exit this loop.
00625     llvm::Value *oldMask = GetInternalMask();
00626     controlFlowInfo.push_back(CFInfo::GetForeach(ft, breakTarget, continueTarget, 
00627                                                  breakLanesPtr, continueLanesPtr,
00628                                                  oldMask, loopMask));
00629     breakLanesPtr = NULL;
00630     breakTarget = NULL;
00631 
00632     continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
00633     StoreInst(LLVMMaskAllOff, continueLanesPtr);
00634     continueTarget = NULL; // should be set by SetContinueTarget()
00635 
00636     loopMask = NULL;
00637 }
00638 
00639 
00640 void
00641 FunctionEmitContext::EndForeach() {
00642     CFInfo *ci = popCFState();
00643     AssertPos(currentPos, ci->IsForeach());
00644 }
00645 
00646 
00647 void
00648 FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
00649     if (!bblock)
00650         return;
00651 
00652     // Restore the mask to the given old mask, but leave off any lanes that
00653     // executed a return statement.
00654     // newMask = (oldMask & ~returnedLanes)
00655     llvm::Value *returnedLanes = LoadInst(returnedLanesPtr,
00656                                           "returned_lanes");
00657     llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes");
00658     llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
00659                                           oldMask, notReturned, "new_mask");
00660     SetInternalMask(newMask);
00661 }
00662 
00663 
00664 /** Returns "true" if the first enclosing non-if control flow expression is
00665     a "switch" statement.
00666 */
00667 bool
00668 FunctionEmitContext::inSwitchStatement() const {
00669     // Go backwards through controlFlowInfo, since we add new nested scopes
00670     // to the back.
00671     int i = controlFlowInfo.size() - 1;
00672     while (i >= 0 && controlFlowInfo[i]->IsIf())
00673         --i;
00674     // Got to the first non-if (or end of CF info)
00675     if (i == -1)
00676         return false;
00677     return controlFlowInfo[i]->IsSwitch();
00678 }
00679 
00680 
00681 void
00682 FunctionEmitContext::Break(bool doCoherenceCheck) {
00683     if (breakTarget == NULL) {
00684         Error(currentPos, "\"break\" statement is illegal outside of "
00685               "for/while/do loops and \"switch\" statements.");
00686         return;
00687     }
00688     AssertPos(currentPos, controlFlowInfo.size() > 0);
00689 
00690     if (bblock == NULL)
00691         return;
00692 
00693     if (inSwitchStatement() == true &&
00694         switchConditionWasUniform == true && 
00695         ifsInCFAllUniform(CFInfo::Switch)) {
00696         // We know that all program instances are executing the break, so
00697         // just jump to the block immediately after the switch.
00698         AssertPos(currentPos, breakTarget != NULL);
00699         BranchInst(breakTarget);
00700         bblock = NULL;
00701         return;
00702     }
00703 
00704     // If all of the enclosing 'if' tests in the loop have uniform control
00705     // flow or if we can tell that the mask is all on, then we can just
00706     // jump to the break location.
00707     if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
00708         BranchInst(breakTarget);
00709         if (ifsInCFAllUniform(CFInfo::Loop) && doCoherenceCheck)
00710             Warning(currentPos, "Coherent break statement not necessary in "
00711                     "fully uniform control flow.");
00712         // Set bblock to NULL since the jump has terminated the basic block
00713         bblock = NULL;
00714     }
00715     else {
00716         // Varying switch, uniform switch where the 'break' is under
00717         // varying control flow, or a loop with varying 'if's above the
00718         // break.  In these cases, we need to update the mask of the lanes
00719         // that have executed a 'break' statement: 
00720         // breakLanes = breakLanes | mask
00721         AssertPos(currentPos, breakLanesPtr != NULL);
00722         llvm::Value *mask = GetInternalMask();
00723         llvm::Value *breakMask = LoadInst(breakLanesPtr,
00724                                           "break_mask");
00725         llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
00726                                               mask, breakMask, "mask|break_mask");
00727         StoreInst(newMask, breakLanesPtr);
00728 
00729         // Set the current mask to be all off, just in case there are any
00730         // statements in the same scope after the 'break'.  Most of time
00731         // this will be optimized away since we'll likely end the scope of
00732         // an 'if' statement and restore the mask then.
00733         SetInternalMask(LLVMMaskAllOff);
00734 
00735         if (doCoherenceCheck) {
00736             if (continueTarget != NULL)
00737                 // If the user has indicated that this is a 'coherent'
00738                 // break statement, then check to see if the mask is all
00739                 // off.  If so, we have to conservatively jump to the
00740                 // continueTarget, not the breakTarget, since part of the
00741                 // reason the mask is all off may be due to 'continue'
00742                 // statements that executed in the current loop iteration.
00743                 jumpIfAllLoopLanesAreDone(continueTarget);
00744             else if (breakTarget != NULL)
00745                 // Similarly handle these for switch statements, where we
00746                 // only have a break target.
00747                 jumpIfAllLoopLanesAreDone(breakTarget);
00748         }
00749     }
00750 }
00751 
00752 
00753 static bool
00754 lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
00755     for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
00756         if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
00757             return true;
00758     }
00759     return false;
00760 }
00761 
00762 
00763 void
00764 FunctionEmitContext::Continue(bool doCoherenceCheck) {
00765     if (!continueTarget) {
00766         Error(currentPos, "\"continue\" statement illegal outside of "
00767               "for/while/do/foreach loops.");
00768         return;
00769     }
00770     AssertPos(currentPos, controlFlowInfo.size() > 0);
00771 
00772     if (ifsInCFAllUniform(CFInfo::Loop) ||
00773         lEnclosingLoopIsForeachActive(controlFlowInfo)) {
00774         // Similarly to 'break' statements, we can immediately jump to the
00775         // continue target if we're only in 'uniform' control flow within
00776         // loop or if we can tell that the mask is all on.  Here, we can
00777         // also jump if the enclosing loop is a 'foreach_active' loop, in
00778         // which case we know that only a single program instance is
00779         // executing.
00780         AddInstrumentationPoint("continue: uniform CF, jumped");
00781         if (doCoherenceCheck)
00782             Warning(currentPos, "Coherent continue statement not necessary in "
00783                     "fully uniform control flow.");
00784         BranchInst(continueTarget);
00785         bblock = NULL;
00786     }
00787     else {
00788         // Otherwise update the stored value of which lanes have 'continue'd.
00789         // continueLanes = continueLanes | mask
00790         AssertPos(currentPos, continueLanesPtr);
00791         llvm::Value *mask = GetInternalMask();
00792         llvm::Value *continueMask = 
00793             LoadInst(continueLanesPtr, "continue_mask");
00794         llvm::Value *newMask = 
00795             BinaryOperator(llvm::Instruction::Or, mask, continueMask,
00796                            "mask|continueMask");
00797         StoreInst(newMask, continueLanesPtr);
00798 
00799         // And set the current mask to be all off in case there are any
00800         // statements in the same scope after the 'continue'
00801         SetInternalMask(LLVMMaskAllOff);
00802 
00803         if (doCoherenceCheck) 
00804             // If this is a 'coherent continue' statement, then emit the
00805             // code to see if all of the lanes are now off due to
00806             // breaks/continues and jump to the continue target if so.
00807             jumpIfAllLoopLanesAreDone(continueTarget);
00808     }
00809 }
00810 
00811 
00812 /** This function checks to see if all of the 'if' statements (if any)
00813     between the current scope and the first enclosing loop/switch of given
00814     control flow type have 'uniform' tests.
00815  */
00816 bool
00817 FunctionEmitContext::ifsInCFAllUniform(int type) const {
00818     AssertPos(currentPos, controlFlowInfo.size() > 0);
00819     // Go backwards through controlFlowInfo, since we add new nested scopes
00820     // to the back.  Stop once we come to the first enclosing control flow
00821     // structure of the desired type.
00822     int i = controlFlowInfo.size() - 1;
00823     while (i >= 0 && controlFlowInfo[i]->type != type) {
00824         if (controlFlowInfo[i]->isUniform == false)
00825             // Found a scope due to an 'if' statement with a varying test
00826             return false;
00827         --i;
00828     }
00829     AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
00830     return true;
00831 }
00832 
00833 
00834 void
00835 FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
00836     llvm::Value *allDone = NULL;
00837     AssertPos(currentPos, continueLanesPtr != NULL);
00838     if (breakLanesPtr == NULL) {
00839         // In a foreach loop, break and return are illegal, and
00840         // breakLanesPtr is NULL.  In this case, the mask is guaranteed to
00841         // be all on at the start of each iteration, so we only need to
00842         // check if all lanes have continued..
00843         llvm::Value *continued = LoadInst(continueLanesPtr,
00844                                           "continue_lanes");
00845         allDone = All(continued);
00846     }
00847     else {
00848         // Check to see if (returned lanes | continued lanes | break lanes) is
00849         // equal to the value of mask at the start of the loop iteration.  If
00850         // so, everyone is done and we can jump to the given target
00851         llvm::Value *returned = LoadInst(returnedLanesPtr,
00852                                          "returned_lanes");
00853         llvm::Value *continued = LoadInst(continueLanesPtr,
00854                                           "continue_lanes");
00855         llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
00856         llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or, 
00857                                                           returned, continued,
00858                                                           "returned|continued");
00859         llvm::Value *returnedOrContinuedOrBreaked = 
00860             BinaryOperator(llvm::Instruction::Or, returnedOrContinued,
00861                            breaked, "returned|continued");
00862 
00863         // Do we match the mask at loop entry?
00864         allDone = MasksAllEqual(returnedOrContinuedOrBreaked, loopMask);
00865     }
00866 
00867     llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
00868     llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
00869     BranchInst(bAll, bNotAll, allDone);
00870 
00871     // If so, have an extra basic block along the way to add
00872     // instrumentation, if the user asked for it.
00873     bblock = bAll;
00874     AddInstrumentationPoint("break/continue: all dynamically went");
00875     BranchInst(target);
00876 
00877     // And set the current basic block to a new one for future instructions
00878     // for the path where we weren't able to jump
00879     bblock = bNotAll;
00880     AddInstrumentationPoint("break/continue: not all went");
00881 }
00882 
00883 
00884 void
00885 FunctionEmitContext::RestoreContinuedLanes() {
00886     if (continueLanesPtr == NULL)
00887         return;
00888 
00889     // mask = mask & continueFlags
00890     llvm::Value *mask = GetInternalMask();
00891     llvm::Value *continueMask = LoadInst(continueLanesPtr,
00892                                          "continue_mask");
00893     llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
00894                                          mask, continueMask, "mask|continue_mask");
00895     SetInternalMask(orMask);
00896 
00897     // continueLanes = 0
00898     StoreInst(LLVMMaskAllOff, continueLanesPtr);
00899 }
00900 
00901 
00902 void
00903 FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
00904     llvm::Value *oldMask = GetInternalMask();
00905     controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget, 
00906                                                 continueTarget, breakLanesPtr,
00907                                                 continueLanesPtr, oldMask, 
00908                                                 loopMask, switchExpr, defaultBlock, 
00909                                                 caseBlocks, nextBlocks,
00910                                                 switchConditionWasUniform));
00911 
00912     breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
00913     StoreInst(LLVMMaskAllOff, breakLanesPtr);
00914     breakTarget = bbBreak;
00915 
00916     continueLanesPtr = NULL;
00917     continueTarget = NULL;
00918     loopMask = NULL;
00919 
00920     // These will be set by the SwitchInst() method
00921     switchExpr = NULL;
00922     defaultBlock = NULL;
00923     caseBlocks = NULL;
00924     nextBlocks = NULL;
00925 }
00926 
00927 
00928 void
00929 FunctionEmitContext::EndSwitch() {
00930     AssertPos(currentPos, bblock != NULL);
00931 
00932     CFInfo *ci = popCFState();
00933     if (ci->IsVarying() && bblock != NULL)
00934         restoreMaskGivenReturns(ci->savedMask);
00935 }
00936 
00937 
00938 /** Emit code to check for an "all off" mask before the code for a 
00939     case or default label in a "switch" statement.
00940  */
00941 void
00942 FunctionEmitContext::addSwitchMaskCheck(llvm::Value *mask) {
00943     llvm::Value *allOff = None(mask);
00944     llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
00945 
00946     // Find the basic block for the case or default label immediately after
00947     // the current one in the switch statement--that's where we want to
00948     // jump if the mask is all off at this label.
00949     AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
00950     llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
00951 
00952     // Jump to the next one of the mask is all off; otherwise jump to the
00953     // newly created block that will hold the actual code for this label.
00954     BranchInst(bbNext, bbSome, allOff);
00955     SetCurrentBasicBlock(bbSome);
00956 }
00957 
00958 
00959 /** Returns the execution mask at entry to the first enclosing "switch"
00960     statement. */
00961 llvm::Value *
00962 FunctionEmitContext::getMaskAtSwitchEntry() {
00963     AssertPos(currentPos, controlFlowInfo.size() > 0);
00964     int i = controlFlowInfo.size() - 1;
00965     while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
00966         --i;
00967     AssertPos(currentPos, i != -1);
00968     return controlFlowInfo[i]->savedMask;
00969 }
00970 
00971 
00972 void
00973 FunctionEmitContext::EmitDefaultLabel(bool checkMask, SourcePos pos) {
00974     if (inSwitchStatement() == false) {
00975         Error(pos, "\"default\" label illegal outside of \"switch\" "
00976               "statement.");
00977         return;
00978     }
00979 
00980     // If there's a default label in the switch, a basic block for it
00981     // should have been provided in the previous call to SwitchInst().
00982     AssertPos(currentPos, defaultBlock != NULL);
00983 
00984     if (bblock != NULL)
00985         // The previous case in the switch fell through, or we're in a
00986         // varying switch; terminate the current block with a jump to the
00987         // block for the code for the default label.
00988         BranchInst(defaultBlock);
00989     SetCurrentBasicBlock(defaultBlock);
00990 
00991     if (switchConditionWasUniform)
00992         // Nothing more to do for this case; return back to the caller,
00993         // which will then emit the code for the default case.
00994         return;
00995 
00996     // For a varying switch, we need to update the execution mask.
00997     //
00998     // First, compute the mask that corresponds to which program instances
00999     // should execute the "default" code; this corresponds to the set of
01000     // program instances that don't match any of the case statements.
01001     // Therefore, we generate code that compares the value of the switch
01002     // expression to the value associated with each of the "case"
01003     // statements such that the surviving lanes didn't match any of them.
01004     llvm::Value *matchesDefault = getMaskAtSwitchEntry();
01005     for (int i = 0; i < (int)caseBlocks->size(); ++i) {
01006         int value = (*caseBlocks)[i].first;
01007         llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
01008             LLVMInt32Vector(value) : LLVMInt64Vector(value);
01009         // TODO: for AVX2 at least, the following generates better code
01010         // than doing ICMP_NE and skipping the NotOperator() below; file a
01011         // LLVM bug?
01012         llvm::Value *matchesCaseValue = 
01013             CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
01014                     valueVec, "cmp_case_value");
01015         matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
01016 
01017         llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
01018         matchesDefault = BinaryOperator(llvm::Instruction::And, matchesDefault, 
01019                                         notMatchesCaseValue, "default&~case_match");
01020     }
01021 
01022     // The mask may have some lanes on, which corresponds to the previous
01023     // label falling through; compute the updated mask by ANDing with the
01024     // current mask.
01025     llvm::Value *oldMask = GetInternalMask();
01026     llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, 
01027                                           matchesDefault, "old_mask|matches_default");
01028     SetInternalMask(newMask);
01029 
01030     if (checkMask)
01031         addSwitchMaskCheck(newMask);
01032 }
01033 
01034 
01035 void
01036 FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
01037     if (inSwitchStatement() == false) {
01038         Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
01039         return;
01040     }
01041 
01042     // Find the basic block for this case statement.
01043     llvm::BasicBlock *bbCase = NULL;
01044     AssertPos(currentPos, caseBlocks != NULL);
01045     for (int i = 0; i < (int)caseBlocks->size(); ++i)
01046         if ((*caseBlocks)[i].first == value) {
01047             bbCase = (*caseBlocks)[i].second;
01048             break;
01049         }
01050     AssertPos(currentPos, bbCase != NULL);
01051 
01052     if (bblock != NULL)
01053         // fall through from the previous case
01054         BranchInst(bbCase);
01055     SetCurrentBasicBlock(bbCase);
01056 
01057     if (switchConditionWasUniform)
01058         return;
01059 
01060     // update the mask: first, get a mask that indicates which program
01061     // instances have a value for the switch expression that matches this
01062     // case statement.
01063     llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
01064         LLVMInt32Vector(value) : LLVMInt64Vector(value);
01065     llvm::Value *matchesCaseValue = 
01066         CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
01067                 valueVec, "cmp_case_value");
01068     matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
01069 
01070     // If a lane was off going into the switch, we don't care if has a
01071     // value in the switch expression that happens to match this case.
01072     llvm::Value *entryMask = getMaskAtSwitchEntry();
01073     matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask,
01074                                       matchesCaseValue, "entry_mask&case_match");
01075 
01076     // Take the surviving lanes and turn on the mask for them.
01077     llvm::Value *oldMask = GetInternalMask();
01078     llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, 
01079                                           matchesCaseValue, "mask|case_match");
01080     SetInternalMask(newMask);
01081 
01082     if (checkMask)
01083         addSwitchMaskCheck(newMask);
01084 }
01085 
01086 
01087 void
01088 FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
01089                 const std::vector<std::pair<int, llvm::BasicBlock *> > &bbCases,
01090                 const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
01091     // The calling code should have called StartSwitch() before calling
01092     // SwitchInst().
01093     AssertPos(currentPos, controlFlowInfo.size() &&
01094            controlFlowInfo.back()->IsSwitch());
01095 
01096     switchExpr = expr;
01097     defaultBlock = bbDefault;
01098     caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases);
01099     nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
01100     switchConditionWasUniform = 
01101         (llvm::isa<llvm::VectorType>(expr->getType()) == false);
01102 
01103     if (switchConditionWasUniform == true) {
01104         // For a uniform switch condition, just wire things up to the LLVM
01105         // switch instruction.
01106         llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault, 
01107                                                        bbCases.size(), bblock);
01108         for (int i = 0; i < (int)bbCases.size(); ++i) {
01109             if (expr->getType() == LLVMTypes::Int32Type)
01110                 s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
01111             else {
01112                 AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
01113                 s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
01114             }
01115         }
01116 
01117         AddDebugPos(s);
01118         // switch is a terminator
01119         bblock = NULL;
01120     }
01121     else {
01122         // For a varying switch, we first turn off all lanes of the mask
01123         SetInternalMask(LLVMMaskAllOff);
01124 
01125         if (nextBlocks->size() > 0) {
01126             // If there are any labels inside the switch, jump to the first
01127             // one; any code before the first label won't be executed by
01128             // anyone.
01129             std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
01130             iter = nextBlocks->find(NULL);
01131             AssertPos(currentPos, iter != nextBlocks->end());
01132             llvm::BasicBlock *bbFirst = iter->second;
01133             BranchInst(bbFirst);
01134             bblock = NULL;
01135         }
01136     }
01137 }
01138 
01139 
01140 int
01141 FunctionEmitContext::VaryingCFDepth() const { 
01142     int sum = 0;
01143     for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
01144         if (controlFlowInfo[i]->IsVarying())
01145             ++sum;
01146     return sum;
01147 }
01148 
01149 
01150 bool
01151 FunctionEmitContext::InForeachLoop() const {
01152     for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
01153         if (controlFlowInfo[i]->IsForeach())
01154             return true;
01155     return false;
01156 }
01157 
01158 
01159 void
01160 FunctionEmitContext::DisableGatherScatterWarnings() {
01161     ++disableGSWarningCount;
01162 }
01163 
01164 
01165 void
01166 FunctionEmitContext::EnableGatherScatterWarnings() {
01167     --disableGSWarningCount;
01168 }
01169 
01170 
01171 
01172 bool
01173 FunctionEmitContext::initLabelBBlocks(ASTNode *node, void *data) {
01174     LabeledStmt *ls = dynamic_cast<LabeledStmt *>(node);
01175     if (ls == NULL)
01176         return true;
01177 
01178     FunctionEmitContext *ctx = (FunctionEmitContext *)data;
01179 
01180     if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
01181         Error(ls->pos, "Multiple labels named \"%s\" in function.",
01182               ls->name.c_str());
01183     else {
01184         llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
01185         ctx->labelMap[ls->name] = bb;
01186     }
01187     return true;
01188 }
01189 
01190 
01191 void
01192 FunctionEmitContext::InitializeLabelMap(Stmt *code) {
01193     labelMap.erase(labelMap.begin(), labelMap.end());
01194     WalkAST(code, initLabelBBlocks, NULL, this);
01195 }
01196 
01197 
01198 llvm::BasicBlock *
01199 FunctionEmitContext::GetLabeledBasicBlock(const std::string &label) {
01200     if (labelMap.find(label) != labelMap.end())
01201         return labelMap[label];
01202     else
01203         return NULL;
01204 }
01205 
01206 std::vector<std::string>
01207 FunctionEmitContext::GetLabels() {
01208     // Initialize vector to the right size
01209     std::vector<std::string> labels(labelMap.size());
01210 
01211     // Iterate through labelMap and grab only the keys
01212     std::map<std::string, llvm::BasicBlock*>::iterator iter;
01213     for (iter=labelMap.begin(); iter != labelMap.end(); iter++)
01214         labels.push_back(iter->first);
01215 
01216     return labels;
01217 }
01218 
01219 
01220 void
01221 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
01222     const Type *returnType = function->GetReturnType();
01223     if (Type::Equal(returnType, AtomicType::Void)) {
01224         if (expr != NULL)
01225             Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
01226                   expr->GetType()->GetString().c_str());
01227     }
01228     else {
01229         if (expr == NULL) {
01230             Error(funcStartPos, "Must provide return value for return "
01231                   "statement for non-void function.");
01232             return;
01233         }
01234         
01235         expr = TypeConvertExpr(expr, returnType, "return statement");
01236         if (expr != NULL) {
01237             llvm::Value *retVal = expr->GetValue(this);
01238             if (retVal != NULL) {
01239                 if (returnType->IsUniformType() ||
01240                     CastType<ReferenceType>(returnType) != NULL)
01241                     StoreInst(retVal, returnValuePtr);
01242                 else {
01243                     // Use a masked store to store the value of the expression
01244                     // in the return value memory; this preserves the return
01245                     // values from other lanes that may have executed return
01246                     // statements previously.
01247                     StoreInst(retVal, returnValuePtr, GetInternalMask(), 
01248                               returnType, PointerType::GetUniform(returnType));
01249                 }
01250             }
01251         }
01252     }
01253 
01254     if (VaryingCFDepth() == 0) {
01255         // If there is only uniform control flow between us and the
01256         // function entry, then it's guaranteed that all lanes are running,
01257         // so we can just emit a true return instruction
01258         AddInstrumentationPoint("return: uniform control flow");
01259         ReturnInst();
01260     }
01261     else {
01262         // Otherwise we update the returnedLanes value by ANDing it with
01263         // the current lane mask.
01264         llvm::Value *oldReturnedLanes = 
01265             LoadInst(returnedLanesPtr, "old_returned_lanes");
01266         llvm::Value *newReturnedLanes = 
01267             BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, 
01268                            GetInternalMask(), "old_mask|returned_lanes");
01269         
01270         // For 'coherent' return statements, emit code to check if all
01271         // lanes have returned
01272         if (doCoherenceCheck) {
01273             // if newReturnedLanes == functionMaskValue, get out of here!
01274             llvm::Value *cmp = MasksAllEqual(functionMaskValue, 
01275                                              newReturnedLanes);
01276             llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
01277             llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
01278             BranchInst(bDoReturn, bNoReturn, cmp);
01279 
01280             bblock = bDoReturn;
01281             AddInstrumentationPoint("return: all lanes have returned");
01282             ReturnInst();
01283 
01284             bblock = bNoReturn;
01285         }
01286         // Otherwise update returnedLanesPtr and turn off all of the lanes
01287         // in the current mask so that any subsequent statements in the
01288         // same scope after the return have no effect
01289         StoreInst(newReturnedLanes, returnedLanesPtr);
01290         AddInstrumentationPoint("return: some but not all lanes have returned");
01291         SetInternalMask(LLVMMaskAllOff);
01292     }
01293 }
01294 
01295 
01296 llvm::Value *
01297 FunctionEmitContext::Any(llvm::Value *mask) {
01298     llvm::Value *mmval = LaneMask(mask);
01299     return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, mmval,
01300                    LLVMInt64(0), LLVMGetName(mask, "_any"));
01301 }
01302 
01303 
01304 llvm::Value *
01305 FunctionEmitContext::All(llvm::Value *mask) {
01306     llvm::Value *mmval = LaneMask(mask);
01307     llvm::Value *allOnMaskValue = (g->target.vectorWidth == 64) ?
01308         LLVMInt64(~0ull) :
01309         LLVMInt64((1ull << g->target.vectorWidth) - 1);
01310 
01311     return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval,
01312                    allOnMaskValue, LLVMGetName(mask, "_all"));
01313 }
01314 
01315 
01316 llvm::Value *
01317 FunctionEmitContext::None(llvm::Value *mask) {
01318     llvm::Value *mmval = LaneMask(mask);
01319     return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval,
01320                    LLVMInt64(0), LLVMGetName(mask, "_none"));
01321 }
01322 
01323 
01324 llvm::Value *
01325 FunctionEmitContext::LaneMask(llvm::Value *v) {
01326     // Call the target-dependent movmsk function to turn the vector mask
01327     // into an i64 value
01328     std::vector<Symbol *> mm;
01329     m->symbolTable->LookupFunction("__movmsk", &mm);
01330     if (g->target.maskBitCount == 1)
01331         AssertPos(currentPos, mm.size() == 1);
01332     else
01333         // There should be one with signed int signature, one unsigned int.
01334         AssertPos(currentPos, mm.size() == 2); 
01335     // We can actually call either one, since both are i32s as far as
01336     // LLVM's type system is concerned...
01337     llvm::Function *fmm = mm[0]->function;
01338     return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
01339 }
01340 
01341 
01342 llvm::Value *
01343 FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
01344 #if 0
01345     // Compare the two masks to get a vector of i1s
01346     llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
01347                                v1, v2, "v1==v2");
01348     // Turn that into a bool vector type (often i32s)
01349     cmp = I1VecToBoolVec(cmp);
01350     // And see if it's all on
01351     return All(cmp);
01352 #else
01353     llvm::Value *mm1 = LaneMask(v1);
01354     llvm::Value *mm2 = LaneMask(v2);
01355     return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
01356                    LLVMGetName("equal", v1, v2));
01357 #endif
01358 }
01359 
01360 
01361 llvm::Value *
01362 FunctionEmitContext::GetStringPtr(const std::string &str) {
01363 #ifdef LLVM_3_0
01364     llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str);
01365 #else
01366     llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
01367 #endif
01368     llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
01369     llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
01370                                                     true /*isConst*/, 
01371                                                     linkage, lstr, "__str");
01372     return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType, 
01373                                  "str_void_ptr", bblock);
01374 }
01375 
01376 
01377 llvm::BasicBlock *
01378 FunctionEmitContext::CreateBasicBlock(const char *name) {
01379     return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
01380 }
01381 
01382 
01383 llvm::Value *
01384 FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
01385     if (b == NULL) {
01386         AssertPos(currentPos, m->errorCount > 0);
01387         return NULL;
01388     }
01389 
01390     if (g->target.maskBitCount == 1)
01391         return b;
01392 
01393     llvm::ArrayType *at = 
01394         llvm::dyn_cast<llvm::ArrayType>(b->getType());
01395     if (at) {
01396         // If we're given an array of vectors of i1s, then do the
01397         // conversion for each of the elements
01398         llvm::Type *boolArrayType = 
01399             llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
01400         llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
01401 
01402         for (unsigned int i = 0; i < at->getNumElements(); ++i) {
01403             llvm::Value *elt = ExtractInst(b, i);
01404             llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType, 
01405                                          LLVMGetName(elt, "_to_boolvec32"));
01406             ret = InsertInst(ret, sext, i);
01407         }
01408         return ret;
01409     }
01410     else
01411         return SExtInst(b, LLVMTypes::BoolVectorType, "val_to_boolvec32");
01412 }
01413 
01414 
01415 static llvm::Value *
01416 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
01417 #ifdef LLVM_3_0
01418     llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s);
01419 #else
01420     llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
01421 #endif
01422     llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), 
01423                                                  true /* const */,
01424                                                  llvm::GlobalValue::InternalLinkage,
01425                                                  sConstant, s);
01426     llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
01427     llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
01428     return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
01429 }
01430 
01431 
01432 void
01433 FunctionEmitContext::AddInstrumentationPoint(const char *note) {
01434     AssertPos(currentPos, note != NULL);
01435     if (!g->emitInstrumentation)
01436         return;
01437 
01438     std::vector<llvm::Value *> args;
01439     // arg 1: filename as string
01440     args.push_back(lGetStringAsValue(bblock, currentPos.name));
01441     // arg 2: provided note
01442     args.push_back(lGetStringAsValue(bblock, note));
01443     // arg 3: line number
01444     args.push_back(LLVMInt32(currentPos.first_line));
01445     // arg 4: current mask, movmsk'ed down to an int64
01446     args.push_back(LaneMask(GetFullMask()));
01447 
01448     llvm::Function *finst = m->module->getFunction("ISPCInstrument");
01449     CallInst(finst, NULL, args, "");
01450 }
01451 
01452 
01453 void
01454 FunctionEmitContext::SetDebugPos(SourcePos pos) { 
01455     currentPos = pos; 
01456 }
01457 
01458 
01459 SourcePos
01460 FunctionEmitContext::GetDebugPos() const {
01461     return currentPos;
01462 }
01463 
01464 
01465 void
01466 FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos, 
01467                                  llvm::DIScope *scope) {
01468     llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
01469     if (inst != NULL && m->diBuilder) {
01470         SourcePos p = pos ? *pos : currentPos;
01471         if (p.first_line != 0)
01472             // If first_line == 0, then we're in the middle of setting up
01473             // the standard library or the like; don't add debug positions
01474             // for those functions
01475             inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column, 
01476                                                   scope ? *scope : GetDIScope()));
01477     }
01478 }
01479 
01480 
01481 void
01482 FunctionEmitContext::StartScope() {
01483     if (m->diBuilder != NULL) {
01484         llvm::DIScope parentScope;
01485         if (debugScopes.size() > 0)
01486             parentScope = debugScopes.back();
01487         else
01488             parentScope = diSubprogram;
01489 
01490         llvm::DILexicalBlock lexicalBlock = 
01491             m->diBuilder->createLexicalBlock(parentScope, diFile,
01492                                              currentPos.first_line,
01493                                              currentPos.first_column);
01494         AssertPos(currentPos, lexicalBlock.Verify());
01495         debugScopes.push_back(lexicalBlock);
01496     }
01497 }
01498 
01499 
01500 void
01501 FunctionEmitContext::EndScope() {
01502     if (m->diBuilder != NULL) {
01503         AssertPos(currentPos, debugScopes.size() > 0);
01504         debugScopes.pop_back();
01505     }
01506 }
01507 
01508 
01509 llvm::DIScope 
01510 FunctionEmitContext::GetDIScope() const {
01511     AssertPos(currentPos, debugScopes.size() > 0);
01512     return debugScopes.back();
01513 }
01514 
01515 
01516 void
01517 FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) {
01518     if (m->diBuilder == NULL)
01519         return;
01520 
01521     llvm::DIScope scope = GetDIScope();
01522     llvm::DIType diType = sym->type->GetDIType(scope);
01523     AssertPos(currentPos, diType.Verify());
01524     llvm::DIVariable var = 
01525         m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
01526                                           scope,
01527                                           sym->name,
01528                                           sym->pos.GetDIFile(),
01529                                           sym->pos.first_line,
01530                                           diType,
01531                                           true /* preserve through opts */);
01532     AssertPos(currentPos, var.Verify());
01533     llvm::Instruction *declareInst = 
01534         m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
01535     AddDebugPos(declareInst, &sym->pos, &scope);
01536 }
01537 
01538 
01539 void
01540 FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) {
01541     if (m->diBuilder == NULL)
01542         return;
01543 
01544     llvm::DIScope scope = diSubprogram;
01545     llvm::DIType diType = sym->type->GetDIType(scope);
01546     AssertPos(currentPos, diType.Verify());
01547     int flags = 0;
01548 
01549     llvm::DIVariable var = 
01550         m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
01551                                           scope,
01552                                           sym->name,
01553                                           sym->pos.GetDIFile(),
01554                                           sym->pos.first_line,
01555                                           diType,
01556                                           true /* preserve through opts */,
01557                                           flags,
01558                                           argNum+1);
01559     AssertPos(currentPos, var.Verify());
01560     llvm::Instruction *declareInst = 
01561         m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
01562     AddDebugPos(declareInst, &sym->pos, &scope);
01563 }
01564 
01565 
01566 /** If the given type is an array of vector types, then it's the
01567     representation of an ispc VectorType with varying elements.  If it is
01568     one of these, return the array size (i.e. the VectorType's size).
01569     Otherwise return zero.
01570  */
01571 static int
01572 lArrayVectorWidth(llvm::Type *t) {
01573     llvm::ArrayType *arrayType = 
01574         llvm::dyn_cast<llvm::ArrayType>(t);
01575     if (arrayType == NULL)
01576         return 0;
01577 
01578     // We shouldn't be seeing arrays of anything but vectors being passed
01579     // to things like FunctionEmitContext::BinaryOperator() as operands.
01580     llvm::VectorType *vectorElementType = 
01581         llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
01582     Assert((vectorElementType != NULL &&
01583             (int)vectorElementType->getNumElements() == g->target.vectorWidth));
01584            
01585     return (int)arrayType->getNumElements();
01586 }
01587 
01588 
01589 llvm::Value *
01590 FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, 
01591                                     llvm::Value *v0, llvm::Value *v1, 
01592                                     const char *name) {
01593     if (v0 == NULL || v1 == NULL) {
01594         AssertPos(currentPos, m->errorCount > 0);
01595         return NULL;
01596     }
01597 
01598     AssertPos(currentPos, v0->getType() == v1->getType());
01599     llvm::Type *type = v0->getType();
01600     int arraySize = lArrayVectorWidth(type);
01601     if (arraySize == 0) {
01602         llvm::Instruction *bop = 
01603             llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
01604         AddDebugPos(bop);
01605         return bop;
01606     }
01607     else {
01608         // If this is an ispc VectorType, apply the binary operator to each
01609         // of the elements of the array (which in turn should be either
01610         // scalar types or llvm::VectorTypes.)
01611         llvm::Value *ret = llvm::UndefValue::get(type);
01612         for (int i = 0; i < arraySize; ++i) {
01613             llvm::Value *a = ExtractInst(v0, i);
01614             llvm::Value *b = ExtractInst(v1, i);
01615             llvm::Value *op = BinaryOperator(inst, a, b);
01616             ret = InsertInst(ret, op, i);
01617         }
01618         return ret;
01619     }
01620 }
01621 
01622 
01623 llvm::Value *
01624 FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
01625     if (v == NULL) {
01626         AssertPos(currentPos, m->errorCount > 0);
01627         return NULL;
01628     }
01629 
01630     // Similarly to BinaryOperator, do the operation on all the elements of
01631     // the array if we're given an array type; otherwise just do the
01632     // regular llvm operation.
01633     llvm::Type *type = v->getType();
01634     int arraySize = lArrayVectorWidth(type);
01635     if (arraySize == 0) {
01636         llvm::Instruction *binst = 
01637             llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
01638         AddDebugPos(binst);
01639         return binst;
01640     }
01641     else {
01642         llvm::Value *ret = llvm::UndefValue::get(type);
01643         for (int i = 0; i < arraySize; ++i) {
01644             llvm::Value *a = ExtractInst(v, i);
01645             llvm::Value *op = 
01646                 llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
01647             AddDebugPos(op);
01648             ret = InsertInst(ret, op, i);
01649         }
01650         return ret;
01651     }
01652 }
01653 
01654 
01655 // Given the llvm Type that represents an ispc VectorType, return an
01656 // equally-shaped type with boolean elements.  (This is the type that will
01657 // be returned from CmpInst with ispc VectorTypes).
01658 static llvm::Type *
01659 lGetMatchingBoolVectorType(llvm::Type *type) {
01660     llvm::ArrayType *arrayType = 
01661         llvm::dyn_cast<llvm::ArrayType>(type);
01662     Assert(arrayType != NULL);
01663 
01664     llvm::VectorType *vectorElementType = 
01665         llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
01666     Assert(vectorElementType != NULL);
01667     Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
01668 
01669     llvm::Type *base = 
01670         llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
01671     return llvm::ArrayType::get(base, arrayType->getNumElements());
01672 }
01673 
01674 
01675 llvm::Value *
01676 FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, 
01677                              llvm::CmpInst::Predicate pred,
01678                              llvm::Value *v0, llvm::Value *v1, 
01679                              const char *name) {
01680     if (v0 == NULL || v1 == NULL) {
01681         AssertPos(currentPos, m->errorCount > 0);
01682         return NULL;
01683     }
01684 
01685     AssertPos(currentPos, v0->getType() == v1->getType());
01686     llvm::Type *type = v0->getType();
01687     int arraySize = lArrayVectorWidth(type);
01688     if (arraySize == 0) {
01689         llvm::Instruction *ci = 
01690             llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp", 
01691                                   bblock);
01692         AddDebugPos(ci);
01693         return ci;
01694     }
01695     else {
01696         llvm::Type *boolType = lGetMatchingBoolVectorType(type);
01697         llvm::Value *ret = llvm::UndefValue::get(boolType);
01698         for (int i = 0; i < arraySize; ++i) {
01699             llvm::Value *a = ExtractInst(v0, i);
01700             llvm::Value *b = ExtractInst(v1, i);
01701             llvm::Value *op = CmpInst(inst, pred, a, b, name);
01702             ret = InsertInst(ret, op, i);
01703         }
01704         return ret;
01705     }
01706 }
01707 
01708 
01709 llvm::Value *
01710 FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
01711     if (value == NULL) {
01712         AssertPos(currentPos, m->errorCount > 0);
01713         return NULL;
01714     }
01715 
01716     llvm::Value *ret = NULL;
01717     llvm::Type *eltType = value->getType();
01718 
01719     llvm::PointerType *pt = 
01720         llvm::dyn_cast<llvm::PointerType>(eltType);
01721     if (pt != NULL) {
01722         // Varying pointers are represented as vectors of i32/i64s
01723         ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType);
01724         value = PtrToIntInst(value);
01725     }
01726     else
01727         // All other varying types are represented as vectors of the
01728         // underlying type.
01729         ret = llvm::UndefValue::get(llvm::VectorType::get(eltType,
01730                                                           g->target.vectorWidth));
01731 
01732     for (int i = 0; i < g->target.vectorWidth; ++i) {
01733         llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") + 
01734             llvm::Twine(i);
01735         ret = InsertInst(ret, value, i, n.str().c_str());
01736     }
01737 
01738     return ret;
01739 }
01740                                     
01741 
01742 llvm::Value *
01743 FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, 
01744                                  const char *name) {
01745     if (value == NULL) {
01746         AssertPos(currentPos, m->errorCount > 0);
01747         return NULL;
01748     }
01749 
01750     if (name == NULL)
01751         name = LLVMGetName(value, "_bitcast");
01752 
01753     llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
01754     AddDebugPos(inst);
01755     return inst;
01756 }
01757 
01758 
01759 llvm::Value *
01760 FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
01761     if (value == NULL) {
01762         AssertPos(currentPos, m->errorCount > 0);
01763         return NULL;
01764     }
01765 
01766     if (llvm::isa<llvm::VectorType>(value->getType()))
01767         // no-op for varying pointers; they're already vectors of ints
01768         return value;
01769 
01770     if (name == NULL)
01771         name = LLVMGetName(value, "_ptr2int");
01772     llvm::Type *type = LLVMTypes::PointerIntType;
01773     llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
01774     AddDebugPos(inst);
01775     return inst;
01776 }
01777 
01778 
01779 llvm::Value *
01780 FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
01781                                   const char *name) {
01782     if (value == NULL) {
01783         AssertPos(currentPos, m->errorCount > 0);
01784         return NULL;
01785     }
01786 
01787     if (name == NULL)
01788         name = LLVMGetName(value, "_ptr2int");
01789 
01790     llvm::Type *fromType = value->getType();
01791     if (llvm::isa<llvm::VectorType>(fromType)) {
01792         // varying pointer
01793         if (fromType == toType)
01794             // already the right type--done
01795             return value;
01796         else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
01797             return TruncInst(value, toType, name);
01798         else {
01799             AssertPos(currentPos, fromType->getScalarSizeInBits() <
01800                    toType->getScalarSizeInBits());
01801             return ZExtInst(value, toType, name);
01802         }
01803     }
01804 
01805     llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
01806     AddDebugPos(inst);
01807     return inst;
01808 }
01809 
01810 
01811 llvm::Value *
01812 FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType,
01813                                   const char *name) {
01814     if (value == NULL) {
01815         AssertPos(currentPos, m->errorCount > 0);
01816         return NULL;
01817     }
01818 
01819     if (name == NULL)
01820         name = LLVMGetName(value, "_int2ptr");
01821 
01822     llvm::Type *fromType = value->getType();
01823     if (llvm::isa<llvm::VectorType>(fromType)) {
01824         // varying pointer
01825         if (fromType == toType)
01826             // done
01827             return value;
01828         else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
01829             return TruncInst(value, toType, name);
01830         else {
01831             AssertPos(currentPos, fromType->getScalarSizeInBits() <
01832                    toType->getScalarSizeInBits());
01833             return ZExtInst(value, toType, name);
01834         }
01835     }
01836 
01837     llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, 
01838                                                      bblock);
01839     AddDebugPos(inst);
01840     return inst;
01841 }
01842 
01843 
01844 llvm::Instruction *
01845 FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type,
01846                                const char *name) {
01847     if (value == NULL) {
01848         AssertPos(currentPos, m->errorCount > 0);
01849         return NULL;
01850     }
01851 
01852     if (name == NULL)
01853         name = LLVMGetName(value, "_trunc");
01854 
01855     // TODO: we should probably handle the array case as in
01856     // e.g. BitCastInst(), but we don't currently need that functionality
01857     llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
01858     AddDebugPos(inst);
01859     return inst;
01860 }
01861 
01862 
01863 llvm::Instruction *
01864 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
01865                               llvm::Type *type, const char *name) {
01866     if (value == NULL) {
01867         AssertPos(currentPos, m->errorCount > 0);
01868         return NULL;
01869     }
01870 
01871     if (name == NULL)
01872         name = LLVMGetName(value, "_cast");
01873 
01874     // TODO: we should probably handle the array case as in
01875     // e.g. BitCastInst(), but we don't currently need that functionality
01876     llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name,
01877                                                      bblock);
01878     AddDebugPos(inst);
01879     return inst;
01880 }
01881 
01882 
01883 llvm::Instruction *
01884 FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, 
01885                                 const char *name) {
01886     if (value == NULL) {
01887         AssertPos(currentPos, m->errorCount > 0);
01888         return NULL;
01889     }
01890 
01891     if (name == NULL)
01892         name = LLVMGetName(value, "_cast");
01893 
01894     // TODO: we should probably handle the array case as in
01895     // e.g. BitCastInst(), but we don't currently need that functionality
01896     llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
01897     AddDebugPos(inst);
01898     return inst;
01899 }
01900 
01901 
01902 llvm::Instruction *
01903 FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, 
01904                               const char *name) {
01905     if (value == NULL) {
01906         AssertPos(currentPos, m->errorCount > 0);
01907         return NULL;
01908     }
01909 
01910     if (name == NULL)
01911         name = LLVMGetName(value, "_sext");
01912 
01913     // TODO: we should probably handle the array case as in
01914     // e.g. BitCastInst(), but we don't currently need that functionality
01915     llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
01916     AddDebugPos(inst);
01917     return inst;
01918 }
01919 
01920 
01921 llvm::Instruction *
01922 FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, 
01923                               const char *name) {
01924     if (value == NULL) {
01925         AssertPos(currentPos, m->errorCount > 0);
01926         return NULL;
01927     }
01928 
01929     if (name == NULL)
01930         name = LLVMGetName(value, "_zext");
01931 
01932     // TODO: we should probably handle the array case as in
01933     // e.g. BitCastInst(), but we don't currently need that functionality
01934     llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
01935     AddDebugPos(inst);
01936     return inst;
01937 }
01938 
01939 
01940 /** Utility routine used by the GetElementPtrInst() methods; given a
01941     pointer to some type (either uniform or varying) and an index (also
01942     either uniform or varying), this returns the new pointer (varying if
01943     appropriate) given by offsetting the base pointer by the index times
01944     the size of the object that the pointer points to.
01945  */
01946 llvm::Value *
01947 FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, 
01948                                      const Type *ptrType) {
01949     // Find the scale factor for the index (i.e. the size of the object
01950     // that the pointer(s) point(s) to.
01951     const Type *scaleType = ptrType->GetBaseType();
01952     llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock);
01953 
01954     bool indexIsVarying = 
01955         llvm::isa<llvm::VectorType>(index->getType());
01956     llvm::Value *offset = NULL;
01957     if (indexIsVarying == false) {
01958         // Truncate or sign extend the index as appropriate to a 32 or
01959         // 64-bit type.
01960         if ((g->target.is32Bit || g->opt.force32BitAddressing) && 
01961             index->getType() == LLVMTypes::Int64Type)
01962             index = TruncInst(index, LLVMTypes::Int32Type);
01963         else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
01964                  index->getType() == LLVMTypes::Int32Type)
01965             index = SExtInst(index, LLVMTypes::Int64Type);
01966 
01967         // do a scalar multiply to get the offset as index * scale and then
01968         // smear the result out to be a vector; this is more efficient than
01969         // first promoting both the scale and the index to vectors and then
01970         // multiplying.
01971         offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
01972         offset = SmearUniform(offset);
01973     }
01974     else {
01975         // Similarly, truncate or sign extend the index to be a 32 or 64
01976         // bit vector type
01977         if ((g->target.is32Bit || g->opt.force32BitAddressing) && 
01978             index->getType() == LLVMTypes::Int64VectorType)
01979             index = TruncInst(index, LLVMTypes::Int32VectorType); 
01980         else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
01981                  index->getType() == LLVMTypes::Int32VectorType)
01982             index = SExtInst(index, LLVMTypes::Int64VectorType);
01983 
01984         scale = SmearUniform(scale);
01985 
01986         // offset = index * scale
01987         offset = BinaryOperator(llvm::Instruction::Mul, scale, index, 
01988                                 LLVMGetName("mul", scale, index));
01989     }
01990 
01991     // For 64-bit targets, if we've been doing our offset calculations in
01992     // 32 bits, we still have to convert to a 64-bit value before we
01993     // actually add the offset to the pointer.
01994     if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
01995         offset = SExtInst(offset, LLVMTypes::Int64VectorType, 
01996                           LLVMGetName(offset, "_to_64"));
01997 
01998     // Smear out the pointer to be varying; either the base pointer or the
01999     // index must be varying for this method to be called.
02000     bool baseIsUniform = 
02001         (llvm::isa<llvm::PointerType>(basePtr->getType()));
02002     AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
02003     llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
02004 
02005     // newPtr = ptr + offset
02006     return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, 
02007                           LLVMGetName(basePtr, "_offset"));
02008 }
02009 
02010 
02011 void
02012 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
02013     llvm::Type *type0 = (*v0)->getType();
02014     llvm::Type *type1 = (*v1)->getType();
02015 
02016     // First, promote to a vector type if one of the two values is a vector
02017     // type
02018     if (llvm::isa<llvm::VectorType>(type0) &&
02019         !llvm::isa<llvm::VectorType>(type1)) {
02020         *v1 = SmearUniform(*v1, "smear_v1");
02021         type1 = (*v1)->getType();
02022     }
02023     if (!llvm::isa<llvm::VectorType>(type0) &&
02024         llvm::isa<llvm::VectorType>(type1)) {
02025         *v0 = SmearUniform(*v0, "smear_v0");
02026         type0 = (*v0)->getType();
02027     }
02028 
02029     // And then update to match bit widths
02030     if (type0 == LLVMTypes::Int32VectorType &&
02031         type1 == LLVMTypes::Int64VectorType)
02032         *v0 = SExtInst(*v0, LLVMTypes::Int64VectorType);
02033     else if (type1 == LLVMTypes::Int32VectorType &&
02034              type0 == LLVMTypes::Int64VectorType)
02035         *v1 = SExtInst(*v1, LLVMTypes::Int64VectorType);
02036 }
02037 
02038 
02039 /** Given an integer index in indexValue that's indexing into an array of
02040     soa<> structures with given soaWidth, compute the two sub-indices we
02041     need to do the actual indexing calculation:
02042 
02043     subIndices[0] = (indexValue >> log(soaWidth))
02044     subIndices[1] = (indexValue & (soaWidth-1))
02045  */
02046 static llvm::Value *
02047 lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth,
02048                    llvm::Value *indexValue, llvm::Value *ptrSliceOffset, 
02049                    llvm::Value **newSliceOffset) {
02050     // Compute the log2 of the soaWidth.
02051     Assert(soaWidth > 0);
02052     int logWidth = 0, sw = soaWidth;
02053     while (sw > 1) {
02054         ++logWidth;
02055         sw >>= 1;
02056     }
02057     Assert((1 << logWidth) == soaWidth);
02058 
02059     ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
02060 
02061     llvm::Type *indexType = indexValue->getType();
02062     llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
02063     llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType);
02064 
02065     llvm::Value *indexSum = 
02066         ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset,
02067                             "index_sum");
02068 
02069     // minor index = (index & (soaWidth - 1))
02070     *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum,
02071                                           mask, "slice_index_minor");
02072     // slice offsets are always 32 bits...
02073     if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
02074         *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
02075     else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
02076         *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
02077 
02078     // major index = (index >> logWidth)
02079     return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum,
02080                                shift, "slice_index_major");
02081 }
02082 
02083 
02084 llvm::Value *
02085 FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
02086     // Create a small struct where the first element is the type of the
02087     // given pointer and the second element is the type of the offset
02088     // value.
02089     std::vector<llvm::Type *> eltTypes;
02090     eltTypes.push_back(ptr->getType());
02091     eltTypes.push_back(offset->getType());
02092     llvm::StructType *st = 
02093         llvm::StructType::get(*g->ctx, eltTypes);
02094 
02095     llvm::Value *ret = llvm::UndefValue::get(st);
02096     ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
02097     ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
02098     return ret;
02099 }
02100 
02101 
02102 llvm::Value *
02103 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, 
02104                                        const Type *ptrRefType, const char *name) {
02105     if (basePtr == NULL || index == NULL) {
02106         AssertPos(currentPos, m->errorCount > 0);
02107         return NULL;
02108     }
02109 
02110     // Regularize to a standard pointer type for basePtr's type
02111     const PointerType *ptrType;
02112     if (CastType<ReferenceType>(ptrRefType) != NULL)
02113         ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
02114     else {
02115         ptrType = CastType<PointerType>(ptrRefType);
02116         AssertPos(currentPos, ptrType != NULL);
02117     }
02118 
02119     if (ptrType->IsSlice()) {
02120         AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
02121 
02122         llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
02123         if (ptrType->IsFrozenSlice() == false) {
02124             // For slice pointers that aren't frozen, we compute a new
02125             // index based on the given index plus the offset in the slice
02126             // pointer.  This gives us an updated integer slice index for
02127             // the resulting slice pointer and then an index to index into
02128             // the soa<> structs with.
02129             llvm::Value *newSliceOffset;
02130             int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
02131             index = lComputeSliceIndex(this, soaWidth, index, 
02132                                        ptrSliceOffset, &newSliceOffset);
02133             ptrSliceOffset = newSliceOffset;
02134         }
02135 
02136         // Handle the indexing into the soa<> structs with the major
02137         // component of the index through a recursive call
02138         llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index,
02139                                            ptrType->GetAsNonSlice(), name);
02140 
02141         // And mash the results together for the return value
02142         return MakeSlicePointer(p, ptrSliceOffset);
02143     }
02144 
02145     // Double-check consistency between the given pointer type and its LLVM
02146     // type.
02147     if (ptrType->IsUniformType())
02148         AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
02149     else if (ptrType->IsVaryingType())
02150         AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
02151 
02152     bool indexIsVaryingType = 
02153         llvm::isa<llvm::VectorType>(index->getType());
02154 
02155     if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
02156         // The easy case: both the base pointer and the indices are
02157         // uniform, so just emit the regular LLVM GEP instruction
02158         llvm::Value *ind[1] = { index };
02159         llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
02160         llvm::Instruction *inst = 
02161             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
02162                                             name ? name : "gep", bblock);
02163         AddDebugPos(inst);
02164         return inst;
02165     }
02166     else
02167         return applyVaryingGEP(basePtr, index, ptrType);
02168 }
02169 
02170 
02171 llvm::Value *
02172 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, 
02173                                        llvm::Value *index1, const Type *ptrRefType,
02174                                        const char *name) {
02175     if (basePtr == NULL || index0 == NULL || index1 == NULL) {
02176         AssertPos(currentPos, m->errorCount > 0);
02177         return NULL;
02178     }
02179 
02180     // Regaularize the pointer type for basePtr
02181     const PointerType *ptrType = NULL;
02182     if (CastType<ReferenceType>(ptrRefType) != NULL)
02183         ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
02184     else {
02185         ptrType = CastType<PointerType>(ptrRefType);
02186         AssertPos(currentPos, ptrType != NULL);
02187     }
02188 
02189     if (ptrType->IsSlice()) {
02190         // Similar to the 1D GEP implementation above, for non-frozen slice
02191         // pointers we do the two-step indexing calculation and then pass
02192         // the new major index on to a recursive GEP call.
02193         AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
02194         llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
02195         if (ptrType->IsFrozenSlice() == false) {
02196             llvm::Value *newSliceOffset;
02197             int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
02198             index1 = lComputeSliceIndex(this, soaWidth, index1,
02199                                         ptrSliceOffset, &newSliceOffset);
02200             ptrSliceOffset = newSliceOffset;
02201         }
02202 
02203         llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0,
02204                                            index1, ptrType->GetAsNonSlice(), 
02205                                            name);
02206         return MakeSlicePointer(p, ptrSliceOffset);
02207     }
02208 
02209     bool index0IsVaryingType = 
02210         llvm::isa<llvm::VectorType>(index0->getType());
02211     bool index1IsVaryingType = 
02212         llvm::isa<llvm::VectorType>(index1->getType());
02213 
02214     if (index0IsVaryingType == false && index1IsVaryingType == false && 
02215         ptrType->IsUniformType() == true) {
02216         // The easy case: both the base pointer and the indices are
02217         // uniform, so just emit the regular LLVM GEP instruction
02218         llvm::Value *indices[2] = { index0, index1 };
02219         llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
02220         llvm::Instruction *inst = 
02221             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
02222                                             name ? name : "gep", bblock);
02223         AddDebugPos(inst);
02224         return inst;
02225     }
02226     else {
02227         // Handle the first dimension with index0
02228         llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
02229 
02230         // Now index into the second dimension with index1.  First figure
02231         // out the type of ptr0.
02232         const Type *baseType = ptrType->GetBaseType();
02233         const SequentialType *st = CastType<SequentialType>(baseType);
02234         AssertPos(currentPos, st != NULL);
02235 
02236         bool ptr0IsUniform = 
02237             llvm::isa<llvm::PointerType>(ptr0->getType());
02238         const Type *ptr0BaseType = st->GetElementType();
02239         const Type *ptr0Type = ptr0IsUniform ?
02240             PointerType::GetUniform(ptr0BaseType) : 
02241             PointerType::GetVarying(ptr0BaseType);
02242 
02243         return applyVaryingGEP(ptr0, index1, ptr0Type);
02244     }
02245 }
02246 
02247 
02248 llvm::Value *
02249 FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
02250                                       const Type *ptrRefType, const char *name,
02251                                       const PointerType **resultPtrType) {
02252     if (resultPtrType != NULL)
02253         AssertPos(currentPos, ptrRefType != NULL);
02254 
02255     llvm::PointerType *llvmPtrType = 
02256         llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
02257     if (llvmPtrType != NULL) {
02258         llvm::StructType *llvmStructType = 
02259             llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
02260         if (llvmStructType != NULL && llvmStructType->isSized() == false) {
02261             AssertPos(currentPos, m->errorCount > 0);
02262             return NULL;
02263         }
02264     }
02265 
02266     // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
02267     // if we have one, regularize into a pointer type.
02268     const PointerType *ptrType = NULL;
02269     if (ptrRefType != NULL) {
02270         // Normalize references to uniform pointers
02271         if (CastType<ReferenceType>(ptrRefType) != NULL)
02272             ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
02273         else
02274             ptrType = CastType<PointerType>(ptrRefType);
02275         AssertPos(currentPos, ptrType != NULL);
02276     }
02277 
02278     // Similarly, we have to see if the pointer type is a struct to see if
02279     // we have a slice pointer instead of looking at ptrType; this is also
02280     // unfortunate...
02281     llvm::Value *basePtr = fullBasePtr;
02282     bool baseIsSlicePtr = 
02283         llvm::isa<llvm::StructType>(fullBasePtr->getType());
02284     const PointerType *rpt;
02285     if (baseIsSlicePtr) {
02286         AssertPos(currentPos, ptrType != NULL);
02287         // Update basePtr to just be the part that actually points to the
02288         // start of an soa<> struct for now; the element offset computation
02289         // doesn't change the slice offset, so we'll incorporate that into
02290         // the final value right before this method returns.
02291         basePtr = ExtractInst(fullBasePtr, 0);
02292         if (resultPtrType == NULL)
02293             resultPtrType = &rpt;
02294     }
02295 
02296     // Return the pointer type of the result of this call, for callers that
02297     // want it.
02298     if (resultPtrType != NULL) {
02299         AssertPos(currentPos, ptrType != NULL);
02300         const CollectionType *ct =
02301             CastType<CollectionType>(ptrType->GetBaseType());
02302         AssertPos(currentPos, ct != NULL);
02303         *resultPtrType = new PointerType(ct->GetElementType(elementNum),
02304                                          ptrType->GetVariability(),
02305                                          ptrType->IsConstType(),
02306                                          ptrType->IsSlice());
02307     }
02308 
02309     llvm::Value *resultPtr = NULL;
02310     if (ptrType == NULL || ptrType->IsUniformType()) {
02311         // If the pointer is uniform, we can use the regular LLVM GEP.
02312         llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
02313         llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
02314         resultPtr = 
02315             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
02316                                             name ? name : "struct_offset", bblock);
02317     }
02318     else {
02319         // Otherwise do the math to find the offset and add it to the given
02320         // varying pointers
02321         const StructType *st = CastType<StructType>(ptrType->GetBaseType());
02322         llvm::Value *offset = NULL;
02323         if (st != NULL)
02324             // If the pointer is to a structure, Target::StructOffset() gives
02325             // us the offset in bytes to the given element of the structure
02326             offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum,
02327                                             bblock);
02328         else {
02329             // Otherwise we should have a vector or array here and the offset
02330             // is given by the element number times the size of the element
02331             // type of the vector.
02332             const SequentialType *st =
02333                 CastType<SequentialType>(ptrType->GetBaseType());
02334             AssertPos(currentPos, st != NULL);
02335             llvm::Value *size = 
02336                 g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
02337             llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ?
02338                 LLVMInt32(elementNum) : LLVMInt64(elementNum);
02339             offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
02340         }
02341 
02342         offset = SmearUniform(offset, "offset_smear");
02343 
02344         if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
02345             // If we're doing 32 bit addressing with a 64 bit target, although
02346             // we did the math above in 32 bit, we need to go to 64 bit before
02347             // we add the offset to the varying pointers.
02348             offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
02349 
02350         resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset, 
02351                                    "struct_ptr_offset");
02352     }
02353 
02354     // Finally, if had a slice pointer going in, mash back together with
02355     // the original (unchanged) slice offset.
02356     if (baseIsSlicePtr)
02357         return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
02358     else
02359         return resultPtr;
02360 }
02361     
02362 
02363 llvm::Value *
02364 FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
02365     if (ptr == NULL) {
02366         AssertPos(currentPos, m->errorCount > 0);
02367         return NULL;
02368     }
02369 
02370     llvm::PointerType *pt = 
02371         llvm::dyn_cast<llvm::PointerType>(ptr->getType());
02372     AssertPos(currentPos, pt != NULL);
02373 
02374     if (name == NULL)
02375         name = LLVMGetName(ptr, "_load");
02376 
02377     // FIXME: it's not clear to me that we generate unaligned vector loads
02378     // of varying stuff out of the front-end any more.  (Only by the
02379     // optimization passes that lower gathers to vector loads, I think..)
02380     // So remove this??
02381     int align = 0;
02382     if (llvm::isa<llvm::VectorType>(pt->getElementType()))
02383         align = 1;
02384     llvm::Instruction *inst = new llvm::LoadInst(ptr, name, 
02385                                                  false /* not volatile */,
02386                                                  align, bblock);
02387     AddDebugPos(inst);
02388     return inst;
02389 }
02390 
02391 
02392 /** Given a slice pointer to soa'd data that is a basic type (atomic,
02393     pointer, or enum type), use the slice offset to compute pointer(s) to
02394     the appropriate individual data element(s).
02395  */
02396 static llvm::Value *
02397 lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr,
02398                   const PointerType **ptrType) {
02399     Assert(CastType<PointerType>(*ptrType) != NULL);
02400 
02401     llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
02402     llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
02403 
02404     // slicePtr should be a pointer to an soa-width wide array of the
02405     // final atomic/enum/pointer type
02406     const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
02407     Assert(Type::IsBasicType(unifBaseType));
02408 
02409     // The final pointer type is a uniform or varying pointer to the
02410     // underlying uniform type, depending on whether the given pointer is
02411     // uniform or varying.
02412     *ptrType = (*ptrType)->IsUniformType() ? 
02413         PointerType::GetUniform(unifBaseType) : 
02414         PointerType::GetVarying(unifBaseType);
02415 
02416     // For uniform pointers, bitcast to a pointer to the uniform element
02417     // type, so that the GEP below does the desired indexing
02418     if ((*ptrType)->IsUniformType())
02419         slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
02420 
02421     // And finally index based on the slice offset
02422     return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType,
02423                                   LLVMGetName(slicePtr, "_final_gep"));
02424 }
02425 
02426 
02427 /** Utility routine that loads from a uniform pointer to soa<> data,
02428     returning a regular uniform (non-SOA result).
02429  */
02430 llvm::Value *
02431 FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
02432                                         const PointerType *ptrType,
02433                                         const char *name) {
02434     const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
02435 
02436     const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
02437     if (ct != NULL) {
02438         // If we have a struct/array, we need to decompose it into
02439         // individual element loads to fill in the result structure since
02440         // the SOA slice of values we need isn't contiguous in memory...
02441         llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
02442         llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
02443 
02444         for (int i = 0; i < ct->GetElementCount(); ++i) {
02445             const PointerType *eltPtrType;
02446             llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, 
02447                                                    "elt_offset", &eltPtrType);
02448             llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
02449             retValue = InsertInst(retValue, eltValue, i, "set_value");
02450         }
02451 
02452         return retValue;
02453     }
02454     else {
02455         // Otherwise we've made our way to a slice pointer to a basic type;
02456         // we need to apply the slice offset into this terminal SOA array
02457         // and then perform the final load
02458         ptr = lFinalSliceOffset(this, ptr, &ptrType);
02459         return LoadInst(ptr, mask, ptrType, name);
02460     }
02461 }
02462 
02463 
02464 llvm::Value *
02465 FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
02466                               const Type *ptrRefType, const char *name) {
02467     if (ptr == NULL) {
02468         AssertPos(currentPos, m->errorCount > 0);
02469         return NULL;
02470     }
02471 
02472     AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
02473 
02474     if (name == NULL)
02475         name = LLVMGetName(ptr, "_load");
02476 
02477     const PointerType *ptrType;
02478     if (CastType<ReferenceType>(ptrRefType) != NULL)
02479         ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
02480     else {
02481         ptrType = CastType<PointerType>(ptrRefType);
02482         AssertPos(currentPos, ptrType != NULL);
02483     }
02484 
02485     if (ptrType->IsUniformType()) {
02486         if (ptrType->IsSlice()) {
02487             return loadUniformFromSOA(ptr, mask, ptrType, name);
02488         }
02489         else {
02490             // FIXME: same issue as above load inst regarding alignment...
02491             //
02492             // If the ptr is a straight up regular pointer, then just issue
02493             // a regular load.  First figure out the alignment; in general we
02494             // can just assume the natural alignment (0 here), but for varying
02495             // atomic types, we need to make sure that the compiler emits
02496             // unaligned vector loads, so we specify a reduced alignment here.
02497             int align = 0;
02498             const AtomicType *atomicType =
02499                 CastType<AtomicType>(ptrType->GetBaseType());
02500             if (atomicType != NULL && atomicType->IsVaryingType())
02501                 // We actually just want to align to the vector element
02502                 // alignment, but can't easily get that here, so just tell LLVM
02503                 // it's totally unaligned.  (This shouldn't make any difference
02504                 // vs the proper alignment in practice.)
02505                 align = 1;
02506             llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
02507                                                          false /* not volatile */,
02508                                                          align, bblock);
02509             AddDebugPos(inst);
02510             return inst;
02511         }
02512     }
02513     else {
02514         // Otherwise we should have a varying ptr and it's time for a
02515         // gather.
02516         return gather(ptr, ptrType, GetFullMask(), name);
02517     }
02518 }
02519 
02520 
02521 llvm::Value *
02522 FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, 
02523                             llvm::Value *mask, const char *name) {
02524     // We should have a varying pointer if we get here...
02525     AssertPos(currentPos, ptrType->IsVaryingType());
02526 
02527     const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
02528     llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
02529 
02530     const CollectionType *collectionType = 
02531         CastType<CollectionType>(ptrType->GetBaseType());
02532     if (collectionType != NULL) {
02533         // For collections, recursively gather element wise to find the
02534         // result.
02535         llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
02536 
02537         for (int i = 0; i < collectionType->GetElementCount(); ++i) {
02538             const PointerType *eltPtrType;
02539             llvm::Value *eltPtr = 
02540                 AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
02541 
02542             eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
02543 
02544             // This in turn will be another gather
02545             llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name);
02546 
02547             retValue = InsertInst(retValue, eltValues, i, "set_value");
02548         }
02549         return retValue;
02550     }
02551     else if (ptrType->IsSlice()) {
02552         // If we have a slice pointer, we need to add the final slice
02553         // offset here right before issuing the actual gather
02554         //
02555         // FIXME: would it be better to do the corresponding same thing for
02556         // all of the varying offsets stuff here (and in scatter)?
02557         ptr = lFinalSliceOffset(this, ptr, &ptrType);
02558     }
02559 
02560     // Otherwise we should just have a basic scalar or pointer type and we
02561     // can go and do the actual gather
02562     AddInstrumentationPoint("gather");
02563 
02564     // Figure out which gather function to call based on the size of
02565     // the elements.
02566     const PointerType *pt = CastType<PointerType>(returnType);
02567     const char *funcName = NULL;
02568     if (pt != NULL)
02569         funcName = g->target.is32Bit ? "__pseudo_gather32_i32" : 
02570             "__pseudo_gather64_i64";
02571     else if (llvmReturnType == LLVMTypes::DoubleVectorType)
02572         funcName = g->target.is32Bit ? "__pseudo_gather32_double" :
02573             "__pseudo_gather64_double";
02574     else if (llvmReturnType == LLVMTypes::Int64VectorType)
02575         funcName = g->target.is32Bit ? "__pseudo_gather32_i64" : 
02576             "__pseudo_gather64_i64";
02577     else if (llvmReturnType == LLVMTypes::FloatVectorType)
02578         funcName = g->target.is32Bit ? "__pseudo_gather32_float" : 
02579             "__pseudo_gather64_float";
02580     else if (llvmReturnType == LLVMTypes::Int32VectorType)
02581         funcName = g->target.is32Bit ? "__pseudo_gather32_i32" : 
02582             "__pseudo_gather64_i32";
02583     else if (llvmReturnType == LLVMTypes::Int16VectorType)
02584         funcName = g->target.is32Bit ? "__pseudo_gather32_i16" : 
02585             "__pseudo_gather64_i16";
02586     else {
02587         AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
02588         funcName = g->target.is32Bit ? "__pseudo_gather32_i8" : 
02589             "__pseudo_gather64_i8";
02590     }
02591 
02592     llvm::Function *gatherFunc = m->module->getFunction(funcName);
02593     AssertPos(currentPos, gatherFunc != NULL);
02594 
02595     llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
02596 
02597     // Add metadata about the source file location so that the
02598     // optimization passes can print useful performance warnings if we
02599     // can't optimize out this gather
02600     if (disableGSWarningCount == 0)
02601         addGSMetadata(gatherCall, currentPos);
02602 
02603     return gatherCall;
02604 }
02605 
02606 
02607 /** Add metadata to the given instruction to encode the current source file
02608     position.  This data is used in the lGetSourcePosFromMetadata()
02609     function in opt.cpp. 
02610 */
02611 void
02612 FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) {
02613     llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
02614     if (inst == NULL)
02615         return;
02616 
02617     llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
02618     llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
02619     inst->setMetadata("filename", md);
02620 
02621     llvm::Value *first_line = LLVMInt32(pos.first_line);
02622     md = llvm::MDNode::get(*g->ctx, first_line);
02623     inst->setMetadata("first_line", md);
02624 
02625     llvm::Value *first_column = LLVMInt32(pos.first_column);
02626     md = llvm::MDNode::get(*g->ctx, first_column);
02627     inst->setMetadata("first_column", md);
02628 
02629     llvm::Value *last_line = LLVMInt32(pos.last_line);
02630     md = llvm::MDNode::get(*g->ctx, last_line);
02631     inst->setMetadata("last_line", md);
02632 
02633     llvm::Value *last_column = LLVMInt32(pos.last_column);
02634     md = llvm::MDNode::get(*g->ctx, last_column);
02635     inst->setMetadata("last_column", md);
02636 }
02637 
02638 
02639 llvm::Value *
02640 FunctionEmitContext::AllocaInst(llvm::Type *llvmType, 
02641                                 const char *name, int align, 
02642                                 bool atEntryBlock) {
02643     if (llvmType == NULL) {
02644         AssertPos(currentPos, m->errorCount > 0);
02645         return NULL;
02646     }
02647 
02648     llvm::AllocaInst *inst = NULL;
02649     if (atEntryBlock) {
02650         // We usually insert it right before the jump instruction at the
02651         // end of allocaBlock
02652         llvm::Instruction *retInst = allocaBlock->getTerminator();
02653         AssertPos(currentPos, retInst);
02654         inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
02655     }
02656     else
02657         // Unless the caller overrode the default and wants it in the
02658         // current basic block
02659         inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
02660 
02661     // If no alignment was specified but we have an array of a uniform
02662     // type, then align it to 4 * the native vector width; it's not
02663     // unlikely that this array will be loaded into varying variables with
02664     // what will be aligned accesses if the uniform -> varying load is done
02665     // in regular chunks.
02666     llvm::ArrayType *arrayType = 
02667         llvm::dyn_cast<llvm::ArrayType>(llvmType);
02668     if (align == 0 && arrayType != NULL && 
02669         !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
02670         align = 4 * g->target.nativeVectorWidth;
02671 
02672     if (align != 0)
02673         inst->setAlignment(align);
02674     // Don't add debugging info to alloca instructions
02675     return inst;
02676 }
02677 
02678 
02679 /** Code to store the given varying value to the given location, only
02680     storing the elements that correspond to active program instances as
02681     given by the provided storeMask value.  Note that the lvalue is only a
02682     single pointer, not a varying lvalue of one pointer per program
02683     instance (that case is handled by scatters).
02684  */
02685 void
02686 FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
02687                                  const Type *ptrType, llvm::Value *mask) {
02688     if (value == NULL || ptr == NULL) {
02689         AssertPos(currentPos, m->errorCount > 0);
02690         return;
02691     }
02692 
02693     AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
02694     AssertPos(currentPos, ptrType->IsUniformType());
02695 
02696     const Type *valueType = ptrType->GetBaseType();
02697     const CollectionType *collectionType = CastType<CollectionType>(valueType);
02698     if (collectionType != NULL) {
02699         // Assigning a structure / array / vector. Handle each element
02700         // individually with what turns into a recursive call to
02701         // makedStore()
02702         for (int i = 0; i < collectionType->GetElementCount(); ++i) {
02703             const Type *eltType = collectionType->GetElementType(i);
02704             if (eltType == NULL) {
02705                 Assert(m->errorCount > 0);
02706                 continue;
02707             }
02708             llvm::Value *eltValue = ExtractInst(value, i, "value_member");
02709             llvm::Value *eltPtr = 
02710                 AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
02711             const Type *eltPtrType = PointerType::GetUniform(eltType);
02712             StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
02713         }
02714         return;
02715     }
02716 
02717     // We must have a regular atomic, enumerator, or pointer type at this
02718     // point.
02719     AssertPos(currentPos, Type::IsBasicType(valueType));
02720     valueType = valueType->GetAsNonConstType();
02721 
02722     // Figure out if we need a 8, 16, 32 or 64-bit masked store.
02723     llvm::Function *maskedStoreFunc = NULL;
02724 
02725     const PointerType *pt = CastType<PointerType>(valueType);
02726     if (pt != NULL) {
02727         if (pt->IsSlice()) {
02728             // Masked store of (varying) slice pointer.
02729             AssertPos(currentPos, pt->IsVaryingType());
02730                     
02731             // First, extract the pointer from the slice struct and masked
02732             // store that.
02733             llvm::Value *v0 = ExtractInst(value, 0);
02734             llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
02735             maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()),
02736                         mask);
02737 
02738             // And then do same for the integer offset
02739             llvm::Value *v1 = ExtractInst(value, 1);
02740             llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
02741             const Type *offsetType = AtomicType::VaryingInt32;
02742             maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
02743 
02744             return;
02745         }
02746 
02747         if (g->target.is32Bit)
02748             maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
02749         else
02750             maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
02751     }
02752     else if (Type::Equal(valueType, AtomicType::VaryingBool) &&
02753              g->target.maskBitCount == 1) {
02754         llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
02755                                               LLVMMaskAllOn, "~mask");
02756         llvm::Value *old = LoadInst(ptr);
02757         llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old,
02758                                                 notMask, "old&~mask");
02759         llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value,
02760                                                 mask, "new&mask");
02761         llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld,
02762                                             maskedNew, "old_new_result");
02763         StoreInst(final, ptr);
02764         return;
02765     }
02766     else if (Type::Equal(valueType, AtomicType::VaryingDouble)) {
02767         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
02768     }
02769     else if (Type::Equal(valueType, AtomicType::VaryingInt64) ||
02770              Type::Equal(valueType, AtomicType::VaryingUInt64)) {
02771         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
02772     }
02773     else if (Type::Equal(valueType, AtomicType::VaryingFloat)) {
02774         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
02775     }
02776     else if (Type::Equal(valueType, AtomicType::VaryingBool) ||
02777              Type::Equal(valueType, AtomicType::VaryingInt32) ||
02778              Type::Equal(valueType, AtomicType::VaryingUInt32) ||
02779              CastType<EnumType>(valueType) != NULL) {
02780         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
02781     }
02782     else if (Type::Equal(valueType, AtomicType::VaryingInt16) ||
02783              Type::Equal(valueType, AtomicType::VaryingUInt16)) {
02784         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
02785     }
02786     else if (Type::Equal(valueType, AtomicType::VaryingInt8) ||
02787              Type::Equal(valueType, AtomicType::VaryingUInt8)) {
02788         maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
02789     }
02790     AssertPos(currentPos, maskedStoreFunc != NULL);
02791 
02792     std::vector<llvm::Value *> args;
02793     args.push_back(ptr);
02794     args.push_back(value);
02795     args.push_back(mask);
02796     CallInst(maskedStoreFunc, NULL, args);
02797 }
02798 
02799 
02800 
02801 /** Scatter the given varying value to the locations given by the varying
02802     lvalue (which should be an array of pointers with size equal to the
02803     target's vector width.  We want to store each rvalue element at the
02804     corresponding pointer's location, *if* the mask for the corresponding
02805     program instance are on.  If they're off, don't do anything.  
02806 */
02807 void
02808 FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, 
02809                              const Type *valueType, const Type *origPt,
02810                              llvm::Value *mask) {
02811     const PointerType *ptrType = CastType<PointerType>(origPt);
02812     AssertPos(currentPos, ptrType != NULL);
02813     AssertPos(currentPos, ptrType->IsVaryingType());
02814 
02815     const CollectionType *srcCollectionType = 
02816         CastType<CollectionType>(valueType);
02817     if (srcCollectionType != NULL) {
02818         // We're scattering a collection type--we need to keep track of the
02819         // source type (the type of the data values to be stored) and the
02820         // destination type (the type of objects in memory that will be
02821         // stored into) separately.  This is necessary so that we can get
02822         // all of the addressing calculations right if we're scattering
02823         // from a varying struct to an array of uniform instances of the
02824         // same struct type, versus scattering into an array of varying
02825         // instances of the struct type, etc.
02826         const CollectionType *dstCollectionType =
02827             CastType<CollectionType>(ptrType->GetBaseType());
02828         AssertPos(currentPos, dstCollectionType != NULL);
02829             
02830         // Scatter the collection elements individually
02831         for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
02832             // First, get the values for the current element out of the
02833             // source.
02834             llvm::Value *eltValue = ExtractInst(value, i);
02835             const Type *srcEltType = srcCollectionType->GetElementType(i);
02836 
02837             // We may be scattering a uniform atomic element; in this case
02838             // we'll smear it out to be varying before making the recursive
02839             // scatter() call below.
02840             if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
02841                 eltValue = SmearUniform(eltValue, "to_varying");
02842                 srcEltType = srcEltType->GetAsVaryingType();
02843             }
02844 
02845             // Get the (varying) pointer to the i'th element of the target
02846             // collection
02847             llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
02848 
02849             // The destination element type may be uniform (e.g. if we're
02850             // scattering to an array of uniform structs).  Thus, we need
02851             // to be careful about passing the correct type to
02852             // addVaryingOffsetsIfNeeded() here.
02853             const Type *dstEltType = dstCollectionType->GetElementType(i);
02854             const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
02855             if (ptrType->IsSlice())
02856                 dstEltPtrType = dstEltPtrType->GetAsSlice();
02857 
02858             eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
02859 
02860             // And recursively scatter() until we hit a basic type, at
02861             // which point the actual memory operations can be performed...
02862             scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
02863         }
02864         return;
02865     }
02866     else if (ptrType->IsSlice()) {
02867         // As with gather, we need to add the final slice offset finally
02868         // once we get to a terminal SOA array of basic types..
02869         ptr = lFinalSliceOffset(this, ptr, &ptrType);
02870     }
02871 
02872     const PointerType *pt = CastType<PointerType>(valueType);
02873 
02874     // And everything should be a pointer or atomic from here on out...
02875     AssertPos(currentPos, pt != NULL || CastType<AtomicType>(valueType) != NULL);
02876 
02877     llvm::Type *type = value->getType();
02878     const char *funcName = NULL;
02879     if (pt != NULL) {
02880         funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
02881             "__pseudo_scatter64_i64";
02882     }
02883     else if (type == LLVMTypes::DoubleVectorType) {
02884         funcName = g->target.is32Bit ? "__pseudo_scatter32_double" :
02885             "__pseudo_scatter64_double";
02886     }
02887     else if (type == LLVMTypes::Int64VectorType) {
02888         funcName = g->target.is32Bit ? "__pseudo_scatter32_i64" :
02889             "__pseudo_scatter64_i64";
02890     }
02891     else if (type == LLVMTypes::FloatVectorType) {
02892         funcName = g->target.is32Bit ? "__pseudo_scatter32_float" :
02893             "__pseudo_scatter64_float";
02894     }
02895     else if (type == LLVMTypes::Int32VectorType) {
02896         funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
02897             "__pseudo_scatter64_i32";
02898     }
02899     else if (type == LLVMTypes::Int16VectorType) {
02900         funcName = g->target.is32Bit ? "__pseudo_scatter32_i16" :
02901             "__pseudo_scatter64_i16";
02902     }
02903     else if (type == LLVMTypes::Int8VectorType) {
02904         funcName = g->target.is32Bit ? "__pseudo_scatter32_i8" :
02905             "__pseudo_scatter64_i8";
02906     }
02907 
02908     llvm::Function *scatterFunc = m->module->getFunction(funcName);
02909     AssertPos(currentPos, scatterFunc != NULL);
02910     
02911     AddInstrumentationPoint("scatter");
02912 
02913     std::vector<llvm::Value *> args;
02914     args.push_back(ptr);
02915     args.push_back(value);
02916     args.push_back(mask);
02917     llvm::Value *inst = CallInst(scatterFunc, NULL, args);
02918 
02919     if (disableGSWarningCount == 0)
02920         addGSMetadata(inst, currentPos);
02921 }
02922 
02923 
02924 void
02925 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
02926     if (value == NULL || ptr == NULL) {
02927         // may happen due to error elsewhere
02928         AssertPos(currentPos, m->errorCount > 0);
02929         return;
02930     }
02931 
02932     llvm::Instruction *inst;
02933     if (llvm::isa<llvm::VectorType>(value->getType()))
02934         // FIXME: same for load--do we still need/want this??
02935         // Specify an unaligned store, since we don't know that the ptr
02936         // will in fact be aligned to a vector width here.  (Actually
02937         // should be aligned to the alignment of the vector elment type...)
02938         inst = new llvm::StoreInst(value, ptr, false /* not volatile */,
02939                                    1, bblock);
02940     else
02941         inst = new llvm::StoreInst(value, ptr, bblock);
02942 
02943     AddDebugPos(inst);
02944 }
02945 
02946 
02947 void
02948 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
02949                                llvm::Value *mask, const Type *valueType,
02950                                const Type *ptrRefType) {
02951     if (value == NULL || ptr == NULL) {
02952         // may happen due to error elsewhere
02953         AssertPos(currentPos, m->errorCount > 0);
02954         return;
02955     }
02956 
02957     const PointerType *ptrType;
02958     if (CastType<ReferenceType>(ptrRefType) != NULL)
02959         ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
02960     else {
02961         ptrType = CastType<PointerType>(ptrRefType);
02962         AssertPos(currentPos, ptrType != NULL);
02963     }
02964 
02965     // Figure out what kind of store we're doing here
02966     if (ptrType->IsUniformType()) {
02967         if (ptrType->IsSlice())
02968             // storing a uniform value to a single slice of a SOA type
02969             storeUniformToSOA(value, ptr, mask, valueType, ptrType);
02970         else if (ptrType->GetBaseType()->IsUniformType())
02971             // the easy case
02972             StoreInst(value, ptr);
02973         else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
02974             // Otherwise it is a masked store unless we can determine that the
02975             // mask is all on...  (Unclear if this check is actually useful.)
02976             StoreInst(value, ptr);
02977         else
02978             maskedStore(value, ptr, ptrType, mask);
02979     }
02980     else {
02981         AssertPos(currentPos, ptrType->IsVaryingType());
02982         // We have a varying ptr (an array of pointers), so it's time to
02983         // scatter
02984         scatter(value, ptr, valueType, ptrType, GetFullMask());
02985     }
02986 }
02987 
02988 
02989 /** Store a uniform type to SOA-laid-out memory.
02990  */
02991 void
02992 FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr,
02993                                        llvm::Value *mask, const Type *valueType,
02994                                        const PointerType *ptrType) {
02995     AssertPos(currentPos, Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), 
02996                                     valueType));
02997 
02998     const CollectionType *ct = CastType<CollectionType>(valueType);
02999     if (ct != NULL) {
03000         // Handle collections element wise...
03001         for (int i = 0; i < ct->GetElementCount(); ++i) {
03002             llvm::Value *eltValue = ExtractInst(value, i);
03003             const Type *eltType = ct->GetElementType(i);
03004             const PointerType *dstEltPtrType;
03005             llvm::Value *dstEltPtr = 
03006                 AddElementOffset(ptr, i, ptrType, "slice_offset",
03007                                  &dstEltPtrType);
03008             StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
03009         }
03010     }
03011     else {
03012         // We're finally at a leaf SOA array; apply the slice offset and
03013         // then we can do a final regular store
03014         AssertPos(currentPos, Type::IsBasicType(valueType));
03015         ptr = lFinalSliceOffset(this, ptr, &ptrType);
03016         StoreInst(value, ptr);
03017     }
03018 }
03019 
03020 
03021 void
03022 FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, 
03023                                 llvm::Value *count, llvm::Value *align) {
03024     dest = BitCastInst(dest, LLVMTypes::VoidPointerType);
03025     src = BitCastInst(src, LLVMTypes::VoidPointerType);
03026     if (count->getType() != LLVMTypes::Int64Type) {
03027         AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
03028         count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
03029     }
03030     if (align == NULL)
03031         align = LLVMInt32(1);
03032 
03033     llvm::Constant *mcFunc = 
03034         m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", 
03035                                        LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
03036                                        LLVMTypes::VoidPointerType, LLVMTypes::Int64Type,
03037                                        LLVMTypes::Int32Type, LLVMTypes::BoolType, NULL);
03038     AssertPos(currentPos, mcFunc != NULL);
03039     AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
03040 
03041     std::vector<llvm::Value *> args;
03042     args.push_back(dest);
03043     args.push_back(src);
03044     args.push_back(count);
03045     args.push_back(align);
03046     args.push_back(LLVMFalse); /* not volatile */
03047     CallInst(mcFunc, NULL, args, "");
03048 }
03049 
03050 
03051 void
03052 FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
03053     llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
03054     AddDebugPos(b);
03055 }
03056 
03057 
03058 void
03059 FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, 
03060                                 llvm::BasicBlock *falseBlock,
03061                                 llvm::Value *test) {
03062     if (test == NULL) {
03063         AssertPos(currentPos, m->errorCount > 0);
03064         return;
03065     }
03066 
03067     llvm::Instruction *b = 
03068         llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
03069     AddDebugPos(b);
03070 }
03071 
03072 
03073 llvm::Value *
03074 FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
03075     if (v == NULL) {
03076         AssertPos(currentPos, m->errorCount > 0);
03077         return NULL;
03078     }
03079 
03080     if (name == NULL) {
03081         char buf[32];
03082         sprintf(buf, "_extract_%d", elt);
03083         name = LLVMGetName(v, buf);
03084     }
03085 
03086     llvm::Instruction *ei = NULL;
03087     if (llvm::isa<llvm::VectorType>(v->getType()))
03088         ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
03089     else
03090         ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
03091     AddDebugPos(ei);
03092     return ei;
03093 }
03094 
03095 
03096 llvm::Value *
03097 FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, 
03098                                 const char *name) {
03099     if (v == NULL || eltVal == NULL) {
03100         AssertPos(currentPos, m->errorCount > 0);
03101         return NULL;
03102     }
03103 
03104     if (name == NULL) {
03105         char buf[32];
03106         sprintf(buf, "_insert_%d", elt);
03107         name = LLVMGetName(v, buf);
03108     }
03109 
03110     llvm::Instruction *ii = NULL;
03111     if (llvm::isa<llvm::VectorType>(v->getType()))
03112         ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), 
03113                                              name, bblock);
03114     else
03115         ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
03116     AddDebugPos(ii);
03117     return ii;
03118 }
03119 
03120 
03121 llvm::PHINode *
03122 FunctionEmitContext::PhiNode(llvm::Type *type, int count, 
03123                              const char *name) {
03124     llvm::PHINode *pn = llvm::PHINode::Create(type, count,
03125                                               name ? name : "phi", bblock);
03126     AddDebugPos(pn);
03127     return pn;
03128 }
03129 
03130 
03131 llvm::Instruction *
03132 FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
03133                                 llvm::Value *val1, const char *name) {
03134     if (test == NULL || val0 == NULL || val1 == NULL) {
03135         AssertPos(currentPos, m->errorCount > 0);
03136         return NULL;
03137     }
03138 
03139     if (name == NULL)
03140         name = LLVMGetName(test, "_select");
03141 
03142     llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name,
03143                                                        bblock);
03144     AddDebugPos(inst);
03145     return inst;
03146 }
03147 
03148 
03149 /** Given a value representing a function to be called or possibly-varying
03150     pointer to a function to be called, figure out how many arguments the
03151     function has. */
03152 static unsigned int
03153 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
03154     llvm::FunctionType *ft = 
03155         llvm::dyn_cast<llvm::FunctionType>(callee->getType());
03156 
03157     if (ft == NULL) {
03158         llvm::PointerType *pt =
03159             llvm::dyn_cast<llvm::PointerType>(callee->getType());
03160         if (pt == NULL) {
03161             // varying--in this case, it must be the version of the
03162             // function that takes a mask
03163             return funcType->GetNumParameters() + 1;
03164         }
03165         ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
03166     }
03167 
03168     Assert(ft != NULL);
03169     return ft->getNumParams();
03170 }
03171 
03172 
03173 llvm::Value *
03174 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
03175                               const std::vector<llvm::Value *> &args,
03176                               const char *name) {
03177     if (func == NULL) {
03178         AssertPos(currentPos, m->errorCount > 0);
03179         return NULL;
03180     }
03181 
03182     std::vector<llvm::Value *> argVals = args;
03183     // Most of the time, the mask is passed as the last argument.  this
03184     // isn't the case for things like intrinsics, builtins, and extern "C"
03185     // functions from the application.  Add the mask if it's needed.
03186     unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
03187     AssertPos(currentPos, argVals.size() + 1 == calleeArgCount ||
03188            argVals.size() == calleeArgCount);
03189     if (argVals.size() + 1 == calleeArgCount)
03190         argVals.push_back(GetFullMask());
03191 
03192     if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
03193         // Regular 'uniform' function call--just one function or function
03194         // pointer, so just emit the IR directly.
03195         llvm::Instruction *ci = 
03196             llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
03197         AddDebugPos(ci);
03198         return ci;
03199     }
03200     else {
03201         // Emit the code for a varying function call, where we have an
03202         // vector of function pointers, one for each program instance.  The
03203         // basic strategy is that we go through the function pointers, and
03204         // for the executing program instances, for each unique function
03205         // pointer that's in the vector, call that function with a mask
03206         // equal to the set of active program instances that also have that
03207         // function pointer.  When all unique function pointers have been
03208         // called, we're done.
03209 
03210         llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
03211         llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
03212         llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
03213 
03214         // Get the current mask value so we can restore it later
03215         llvm::Value *origMask = GetInternalMask();
03216 
03217         // First allocate memory to accumulate the various program
03218         // instances' return values...
03219         const Type *returnType = funcType->GetReturnType();
03220         llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
03221         llvm::Value *resultPtr = NULL;
03222         if (llvmReturnType->isVoidTy() == false)
03223             resultPtr = AllocaInst(llvmReturnType);
03224 
03225         // The memory pointed to by maskPointer tracks the set of program
03226         // instances for which we still need to call the function they are
03227         // pointing to.  It starts out initialized with the mask of
03228         // currently running program instances.
03229         llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
03230         StoreInst(GetFullMask(), maskPtr);
03231 
03232         // And now we branch to the test to see if there's more work to be
03233         // done.
03234         BranchInst(bbTest);
03235 
03236         // bbTest: are any lanes of the mask still on?  If so, jump to
03237         // bbCall
03238         SetCurrentBasicBlock(bbTest); {
03239             llvm::Value *maskLoad = LoadInst(maskPtr);
03240             llvm::Value *any = Any(maskLoad);
03241             BranchInst(bbCall, bbDone, any);
03242         }
03243 
03244         // bbCall: this is the body of the loop that calls out to one of
03245         // the active function pointer values.
03246         SetCurrentBasicBlock(bbCall); {
03247             // Figure out the first lane that still needs its function
03248             // pointer to be called.
03249             llvm::Value *currentMask = LoadInst(maskPtr);
03250             llvm::Function *cttz = 
03251                 m->module->getFunction("__count_trailing_zeros_i64");
03252             AssertPos(currentPos, cttz != NULL);
03253             llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask),
03254                                                 "first_lane64");
03255             llvm::Value *firstLane = 
03256                 TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
03257 
03258             // Get the pointer to the function we're going to call this
03259             // time through: ftpr = func[firstLane]
03260             llvm::Value *fptr = 
03261                 llvm::ExtractElementInst::Create(func, firstLane, 
03262                                                  "extract_fptr", bblock);
03263 
03264             // Smear it out into an array of function pointers
03265             llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
03266 
03267             // fpOverlap = (fpSmearAsVec == fpOrigAsVec).  This gives us a
03268             // mask for the set of program instances that have the same
03269             // value for their function pointer.
03270             llvm::Value *fpOverlap = 
03271                 CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
03272                         fptrSmear, func);
03273             fpOverlap = I1VecToBoolVec(fpOverlap);
03274 
03275             // Figure out the mask to use when calling the function
03276             // pointer: we need to AND the current execution mask to handle
03277             // the case of any non-running program instances that happen to
03278             // have this function pointer value.
03279             // callMask = (currentMask & fpOverlap)
03280             llvm::Value *callMask = 
03281                 BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap,
03282                                "call_mask");
03283 
03284             // Set the mask
03285             SetInternalMask(callMask);
03286 
03287             // bitcast the i32/64 function pointer to the actual function
03288             // pointer type.
03289             llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
03290             llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
03291             llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
03292 
03293             // Call the function: callResult = call ftpr(args, args, call mask)
03294             llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
03295 
03296             // Now, do a masked store into the memory allocated to
03297             // accumulate the result using the call mask.
03298             if (callResult != NULL && 
03299                 callResult->getType() != LLVMTypes::VoidType) {
03300                 AssertPos(currentPos, resultPtr != NULL);
03301                 StoreInst(callResult, resultPtr, callMask, returnType,
03302                           PointerType::GetUniform(returnType));
03303             }
03304             else
03305                 AssertPos(currentPos, resultPtr == NULL);
03306 
03307             // Update the mask to turn off the program instances for which
03308             // we just called the function.
03309             // currentMask = currentMask & ~callmask
03310             llvm::Value *notCallMask = 
03311                 BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn,
03312                                "~callMask");
03313             currentMask = BinaryOperator(llvm::Instruction::And, currentMask, 
03314                                          notCallMask, "currentMask&~callMask");
03315             StoreInst(currentMask, maskPtr);
03316 
03317             // And go back to the test to see if we need to do another
03318             // call.
03319             BranchInst(bbTest);
03320         }
03321 
03322         // bbDone: We're all done; clean up and return the result we've
03323         // accumulated in the result memory.
03324         SetCurrentBasicBlock(bbDone);
03325         SetInternalMask(origMask);
03326         return resultPtr ? LoadInst(resultPtr) : NULL;
03327     }
03328 }
03329 
03330 
03331 llvm::Value *
03332 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
03333                               llvm::Value *arg, const char *name) {
03334     std::vector<llvm::Value *> args;
03335     args.push_back(arg);
03336     return CallInst(func, funcType, args, name);
03337 }
03338 
03339 
03340 llvm::Value *
03341 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
03342                               llvm::Value *arg0, llvm::Value *arg1, 
03343                               const char *name) {
03344     std::vector<llvm::Value *> args;
03345     args.push_back(arg0);
03346     args.push_back(arg1);
03347     return CallInst(func, funcType, args, name);
03348 }
03349 
03350 
03351 llvm::Instruction *
03352 FunctionEmitContext::ReturnInst() {
03353     if (launchedTasks)
03354         // Add a sync call at the end of any function that launched tasks
03355         SyncInst();
03356 
03357     llvm::Instruction *rinst = NULL;
03358     if (returnValuePtr != NULL) {
03359         // We have value(s) to return; load them from their storage
03360         // location
03361         llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
03362         rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
03363     }
03364     else {
03365         AssertPos(currentPos, Type::Equal(function->GetReturnType(), AtomicType::Void));
03366         rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
03367     }
03368 
03369     AddDebugPos(rinst);
03370     bblock = NULL;
03371     return rinst;
03372 }
03373 
03374 
03375 llvm::Value *
03376 FunctionEmitContext::LaunchInst(llvm::Value *callee, 
03377                                 std::vector<llvm::Value *> &argVals,
03378                                 llvm::Value *launchCount) {
03379     if (callee == NULL) {
03380         AssertPos(currentPos, m->errorCount > 0);
03381         return NULL;
03382     }
03383 
03384     launchedTasks = true;
03385 
03386     AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
03387     llvm::Type *argType = 
03388         (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
03389     AssertPos(currentPos, llvm::PointerType::classof(argType));
03390     llvm::PointerType *pt = 
03391         llvm::dyn_cast<llvm::PointerType>(argType);
03392     AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
03393     llvm::StructType *argStructType = 
03394         static_cast<llvm::StructType *>(pt->getElementType());
03395 
03396     llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
03397     AssertPos(currentPos, falloc != NULL);
03398     llvm::Value *structSize = g->target.SizeOf(argStructType, bblock);
03399     if (structSize->getType() != LLVMTypes::Int64Type)
03400         // ISPCAlloc expects the size as an uint64_t, but on 32-bit
03401         // targets, SizeOf returns a 32-bit value
03402         structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
03403                               "struct_size_to_64");
03404     int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
03405 
03406     std::vector<llvm::Value *> allocArgs;
03407     allocArgs.push_back(launchGroupHandlePtr);
03408     allocArgs.push_back(structSize);
03409     allocArgs.push_back(LLVMInt32(align));
03410     llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
03411     llvm::Value *argmem = BitCastInst(voidmem, pt);
03412 
03413     // Copy the values of the parameters into the appropriate place in
03414     // the argument block
03415     for (unsigned int i = 0; i < argVals.size(); ++i) {
03416         llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
03417         // don't need to do masked store here, I think
03418         StoreInst(argVals[i], ptr);
03419     }
03420 
03421     if (argStructType->getNumElements() == argVals.size() + 1) {
03422         // copy in the mask
03423         llvm::Value *mask = GetFullMask();
03424         llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
03425                                             "funarg_mask");
03426         StoreInst(mask, ptr);
03427     }
03428 
03429     // And emit the call to the user-supplied task launch function, passing
03430     // a pointer to the task function being called and a pointer to the
03431     // argument block we just filled in
03432     llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
03433     llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
03434     AssertPos(currentPos, flaunch != NULL);
03435     std::vector<llvm::Value *> args;
03436     args.push_back(launchGroupHandlePtr);
03437     args.push_back(fptr);
03438     args.push_back(voidmem);
03439     args.push_back(launchCount);
03440     return CallInst(flaunch, NULL, args, "");
03441 }
03442 
03443 
03444 void
03445 FunctionEmitContext::SyncInst() {
03446     llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
03447     llvm::Value *nullPtrValue = 
03448         llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
03449     llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
03450                                    llvm::CmpInst::ICMP_NE,
03451                                    launchGroupHandle, nullPtrValue);
03452     llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
03453     llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
03454     BranchInst(bSync, bPostSync, nonNull);
03455 
03456     SetCurrentBasicBlock(bSync);
03457     llvm::Function *fsync = m->module->getFunction("ISPCSync");
03458     if (fsync == NULL)
03459         FATAL("Couldn't find ISPCSync declaration?!");
03460     CallInst(fsync, NULL, launchGroupHandle, "");
03461 
03462     // zero out the handle so that if ISPCLaunch is called again in this
03463     // function, it knows it's starting out from scratch
03464     StoreInst(nullPtrValue, launchGroupHandlePtr);
03465 
03466     BranchInst(bPostSync);
03467 
03468     SetCurrentBasicBlock(bPostSync);
03469 }
03470 
03471 
03472 /** When we gathering from or scattering to a varying atomic type, we need
03473     to add an appropriate offset to the final address for each lane right
03474     before we use it.  Given a varying pointer we're about to use and its
03475     type, this function determines whether these offsets are needed and
03476     returns an updated pointer that incorporates these offsets if needed.
03477  */
03478 llvm::Value *
03479 FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, 
03480                                                const Type *ptrType) {
03481     // This should only be called for varying pointers
03482     const PointerType *pt = CastType<PointerType>(ptrType);
03483     AssertPos(currentPos, pt && pt->IsVaryingType());
03484 
03485     const Type *baseType = ptrType->GetBaseType();
03486     if (Type::IsBasicType(baseType) == false)
03487         return ptr;
03488 
03489     if (baseType->IsVaryingType() == false)
03490         return ptr;
03491     
03492     // Find the size of a uniform element of the varying type
03493     llvm::Type *llvmBaseUniformType = 
03494         baseType->GetAsUniformType()->LLVMType(g->ctx);
03495     llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock);
03496     unifSize = SmearUniform(unifSize);
03497 
03498     // Compute offset = <0, 1, .. > * unifSize
03499     llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
03500     for (int i = 0; i < g->target.vectorWidth; ++i) {
03501         llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ?
03502             LLVMInt32(i) : LLVMInt64(i);
03503         varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
03504     }
03505     llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, 
03506                                          varyingOffsets);
03507     
03508     if (g->opt.force32BitAddressing == true && g->target.is32Bit == false)
03509         // On 64-bit targets where we're doing 32-bit addressing
03510         // calculations, we need to convert to an i64 vector before adding
03511         // to the pointer
03512         offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
03513 
03514     return BinaryOperator(llvm::Instruction::Add, ptr, offset);
03515 }
03516 
03517 
03518 CFInfo *
03519 FunctionEmitContext::popCFState() {
03520     AssertPos(currentPos, controlFlowInfo.size() > 0);
03521     CFInfo *ci = controlFlowInfo.back();
03522     controlFlowInfo.pop_back();
03523 
03524     if (ci->IsSwitch()) {
03525         breakTarget = ci->savedBreakTarget;
03526         continueTarget = ci->savedContinueTarget;
03527         breakLanesPtr = ci->savedBreakLanesPtr;
03528         continueLanesPtr = ci->savedContinueLanesPtr;
03529         loopMask = ci->savedLoopMask;
03530         switchExpr = ci->savedSwitchExpr;
03531         defaultBlock = ci->savedDefaultBlock;
03532         caseBlocks = ci->savedCaseBlocks;
03533         nextBlocks = ci->savedNextBlocks;
03534         switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
03535     }
03536     else if (ci->IsLoop() || ci->IsForeach()) {
03537         breakTarget = ci->savedBreakTarget;
03538         continueTarget = ci->savedContinueTarget;
03539         breakLanesPtr = ci->savedBreakLanesPtr;
03540         continueLanesPtr = ci->savedContinueLanesPtr;
03541         loopMask = ci->savedLoopMask;
03542     }
03543     else {
03544         AssertPos(currentPos, ci->IsIf());
03545         // nothing to do
03546     }
03547 
03548     return ci;
03549 }