Intel SPMD Program Compiler  1.3.0
stmt.cpp
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2010-2012, Intel Corporation
00003   All rights reserved.
00004 
00005   Redistribution and use in source and binary forms, with or without
00006   modification, are permitted provided that the following conditions are
00007   met:
00008 
00009     * Redistributions of source code must retain the above copyright
00010       notice, this list of conditions and the following disclaimer.
00011 
00012     * Redistributions in binary form must reproduce the above copyright
00013       notice, this list of conditions and the following disclaimer in the
00014       documentation and/or other materials provided with the distribution.
00015 
00016     * Neither the name of Intel Corporation nor the names of its
00017       contributors may be used to endorse or promote products derived from
00018       this software without specific prior written permission.
00019 
00020 
00021    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00022    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00023    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00024    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00025    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
00032 */
00033 
00034 /** @file stmt.cpp
00035     @brief File with definitions classes related to statements in the language
00036 */
00037 
00038 #include "stmt.h"
00039 #include "ctx.h"
00040 #include "util.h"
00041 #include "expr.h"
00042 #include "type.h"
00043 #include "func.h"
00044 #include "sym.h"
00045 #include "module.h"
00046 #include "llvmutil.h"
00047 
00048 #include <stdio.h>
00049 #include <map>
00050 
00051 #include <llvm/Module.h>
00052 #include <llvm/Function.h>
00053 #include <llvm/Type.h>
00054 #include <llvm/DerivedTypes.h>
00055 #include <llvm/LLVMContext.h>
00056 #include <llvm/Metadata.h>
00057 #include <llvm/Instructions.h>
00058 #include <llvm/CallingConv.h>
00059 #include <llvm/Support/IRBuilder.h>
00060 #include <llvm/Support/raw_ostream.h>
00061 
00062 ///////////////////////////////////////////////////////////////////////////
00063 // Stmt
00064 
00065 Stmt *
00066 Stmt::Optimize() {
00067     return this;
00068 }
00069 
00070 
00071 ///////////////////////////////////////////////////////////////////////////
00072 // ExprStmt
00073 
00074 ExprStmt::ExprStmt(Expr *e, SourcePos p) 
00075   : Stmt(p) {
00076     expr = e;
00077 }
00078 
00079 void
00080 ExprStmt::EmitCode(FunctionEmitContext *ctx) const {
00081     if (!ctx->GetCurrentBasicBlock()) 
00082         return;
00083 
00084     ctx->SetDebugPos(pos);
00085     if (expr) 
00086         expr->GetValue(ctx);
00087 }
00088 
00089 
00090 Stmt *
00091 ExprStmt::TypeCheck() {
00092     return this;
00093 }
00094 
00095 
00096 void
00097 ExprStmt::Print(int indent) const {
00098     if (!expr) 
00099         return;
00100 
00101     printf("%*c", indent, ' ');
00102     printf("Expr stmt: ");
00103     pos.Print();
00104     expr->Print();
00105     printf("\n");
00106 }
00107 
00108 
00109 int
00110 ExprStmt::EstimateCost() const {
00111     return 0;
00112 }
00113 
00114 
00115 ///////////////////////////////////////////////////////////////////////////
00116 // DeclStmt
00117 
00118 DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
00119     : Stmt(p), vars(v) {
00120 }
00121 
00122 
00123 static bool
00124 lHasUnsizedArrays(const Type *type) {
00125     const ArrayType *at = CastType<ArrayType>(type);
00126     if (at == NULL)
00127         return false;
00128 
00129     if (at->GetElementCount() == 0)
00130         return true;
00131     else
00132         return lHasUnsizedArrays(at->GetElementType());
00133 }
00134     
00135 
00136 void
00137 DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
00138     if (!ctx->GetCurrentBasicBlock()) 
00139         return;
00140 
00141     for (unsigned int i = 0; i < vars.size(); ++i) {
00142         Symbol *sym = vars[i].sym;
00143         AssertPos(pos, sym != NULL);
00144         if (sym->type == NULL)
00145             continue;
00146         Expr *initExpr = vars[i].init;
00147 
00148         // Now that we're in the thick of emitting code, it's easy for us
00149         // to find out the level of nesting of varying control flow we're
00150         // in at this declaration.  So we can finally set that
00151         // Symbol::varyingCFDepth variable.
00152         // @todo It's disgusting to be doing this here.
00153         sym->varyingCFDepth = ctx->VaryingCFDepth();
00154 
00155         ctx->SetDebugPos(sym->pos);
00156 
00157         // If it's an array that was declared without a size but has an
00158         // initializer list, then use the number of elements in the
00159         // initializer list to finally set the array's size.
00160         sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr);
00161         if (sym->type == NULL)
00162             continue;
00163 
00164         if (lHasUnsizedArrays(sym->type)) {
00165             Error(pos, "Illegal to declare an unsized array variable without "
00166                   "providing an initializer expression to set its size.");
00167             continue;
00168         }
00169 
00170         // References must have initializer expressions as well.
00171         if (IsReferenceType(sym->type) == true) {
00172             if (initExpr == NULL) {
00173                 Error(sym->pos, "Must provide initializer for reference-type "
00174                       "variable \"%s\".", sym->name.c_str());
00175                 continue;
00176             }
00177             if (IsReferenceType(initExpr->GetType()) == false) {
00178                 const Type *initLVType = initExpr->GetLValueType();
00179                 if (initLVType == NULL) {
00180                     Error(initExpr->pos, "Initializer for reference-type variable "
00181                           "\"%s\" must have an lvalue type.", sym->name.c_str());
00182                     continue;
00183                 }
00184                 if (initLVType->IsUniformType() == false) {
00185                     Error(initExpr->pos, "Initializer for reference-type variable "
00186                           "\"%s\" must have a uniform lvalue type.", sym->name.c_str());
00187                     continue;
00188                 }
00189             }
00190         }
00191 
00192         llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
00193         if (llvmType == NULL) {
00194             AssertPos(pos, m->errorCount > 0);
00195             return;
00196         }
00197 
00198         if (sym->storageClass == SC_STATIC) {
00199             // For static variables, we need a compile-time constant value
00200             // for its initializer; if there's no initializer, we use a
00201             // zero value.
00202             llvm::Constant *cinit = NULL;
00203             if (initExpr != NULL) {
00204                 if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
00205                     continue;
00206                 // FIXME: we only need this for function pointers; it was
00207                 // already done for atomic types and enums in
00208                 // DeclStmt::TypeCheck()...
00209                 if (dynamic_cast<ExprList *>(initExpr) == NULL) {
00210                     initExpr = TypeConvertExpr(initExpr, sym->type,
00211                                                "initializer");
00212                     // FIXME: and this is only needed to re-establish
00213                     // constant-ness so that GetConstant below works for
00214                     // constant artithmetic expressions...
00215                     initExpr = ::Optimize(initExpr);
00216                 }
00217 
00218                 cinit = initExpr->GetConstant(sym->type);
00219                 if (cinit == NULL)
00220                     Error(initExpr->pos, "Initializer for static variable "
00221                           "\"%s\" must be a constant.", sym->name.c_str());
00222             }
00223             if (cinit == NULL)
00224                 cinit = llvm::Constant::getNullValue(llvmType);
00225 
00226             // Allocate space for the static variable in global scope, so
00227             // that it persists across function calls
00228             sym->storagePtr =
00229                 new llvm::GlobalVariable(*m->module, llvmType, 
00230                                          sym->type->IsConstType(),
00231                                          llvm::GlobalValue::InternalLinkage, cinit,
00232                                          llvm::Twine("static.") +
00233                                          llvm::Twine(sym->pos.first_line) + 
00234                                          llvm::Twine(".") + sym->name.c_str());
00235             // Tell the FunctionEmitContext about the variable 
00236             ctx->EmitVariableDebugInfo(sym);
00237         }
00238         else {
00239             // For non-static variables, allocate storage on the stack
00240             sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
00241 
00242             // Tell the FunctionEmitContext about the variable; must do
00243             // this before the initializer stuff.
00244             ctx->EmitVariableDebugInfo(sym);
00245 
00246             // And then get it initialized...
00247             sym->parentFunction = ctx->GetFunction();
00248             InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
00249         }
00250     }
00251 }
00252 
00253 
00254 Stmt *
00255 DeclStmt::Optimize() {
00256     for (unsigned int i = 0; i < vars.size(); ++i) {
00257         Expr *init = vars[i].init;
00258         if (init != NULL && dynamic_cast<ExprList *>(init) == NULL) {
00259             // If the variable is const-qualified, after we've optimized
00260             // the initializer expression, see if we have a ConstExpr.  If
00261             // so, save it in Symbol::constValue where it can be used in
00262             // optimizing later expressions that have this symbol in them.
00263             // Note that there are cases where the expression may be
00264             // constant but where we don't have a ConstExpr; an example is
00265             // const arrays--the ConstExpr implementation just can't
00266             // represent an array of values.
00267             //
00268             // All this is fine in terms of the code that's generated in
00269             // the end (LLVM's constant folding stuff is good), but it
00270             // means that the ispc compiler's ability to reason about what
00271             // is definitely a compile-time constant for things like
00272             // computing array sizes from non-trivial expressions is
00273             // consequently limited.
00274             Symbol *sym = vars[i].sym;
00275             if (sym->type && sym->type->IsConstType() && 
00276                 Type::Equal(init->GetType(), sym->type))
00277                 sym->constValue = dynamic_cast<ConstExpr *>(init);
00278         }
00279     }
00280     return this;
00281 }
00282 
00283 
00284 Stmt *
00285 DeclStmt::TypeCheck() {
00286     bool encounteredError = false;
00287     for (unsigned int i = 0; i < vars.size(); ++i) {
00288         if (vars[i].sym == NULL) {
00289             encounteredError = true;
00290             continue;
00291         }
00292 
00293         if (vars[i].init == NULL)
00294             continue;
00295 
00296         // get the right type for stuff like const float foo = 2; so that
00297         // the int->float type conversion is in there and we don't return
00298         // an int as the constValue later...
00299         const Type *type = vars[i].sym->type;
00300         if (CastType<AtomicType>(type) != NULL ||
00301             CastType<EnumType>(type) != NULL) {
00302             // If it's an expr list with an atomic type, we'll later issue
00303             // an error.  Need to leave vars[i].init as is in that case so
00304             // it is in fact caught later, though.
00305             if (dynamic_cast<ExprList *>(vars[i].init) == NULL) {
00306                 vars[i].init = TypeConvertExpr(vars[i].init, type,
00307                                                "initializer");
00308                 if (vars[i].init == NULL)
00309                     encounteredError = true;
00310             }
00311         }
00312     }
00313     return encounteredError ? NULL : this;
00314 }
00315 
00316 
00317 void
00318 DeclStmt::Print(int indent) const {
00319     printf("%*cDecl Stmt:", indent, ' ');
00320     pos.Print();
00321     for (unsigned int i = 0; i < vars.size(); ++i) {
00322         printf("%*cVariable %s (%s)", indent+4, ' ', 
00323                vars[i].sym->name.c_str(),
00324                vars[i].sym->type->GetString().c_str());
00325         if (vars[i].init != NULL) {
00326             printf(" = ");
00327             vars[i].init->Print();
00328         }
00329         printf("\n");
00330     }
00331     printf("\n");
00332 }
00333 
00334 
00335 int
00336 DeclStmt::EstimateCost() const {
00337     return 0;
00338 }
00339 
00340 
00341 ///////////////////////////////////////////////////////////////////////////
00342 // IfStmt
00343 
00344 IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p) 
00345     : Stmt(p), test(t), trueStmts(ts), falseStmts(fs), 
00346       doAllCheck(checkCoherence &&
00347                  !g->opt.disableCoherentControlFlow) {
00348 }
00349 
00350 
00351 static void
00352 lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse) {
00353     if (!stmts)
00354         return;
00355 
00356     if (dynamic_cast<StmtList *>(stmts) == NULL)
00357         ctx->StartScope();
00358     ctx->AddInstrumentationPoint(trueOrFalse);
00359     stmts->EmitCode(ctx);
00360     if (dynamic_cast<const StmtList *>(stmts) == NULL)
00361         ctx->EndScope();
00362 }
00363 
00364 
00365 /** Returns true if the "true" block for the if statement consists of a
00366     single 'break' statement, and the "false" block is empty. */
00367 static bool
00368 lCanApplyBreakOptimization(Stmt *trueStmts, Stmt *falseStmts) {
00369     if (falseStmts != NULL) {
00370         if (StmtList *sl = dynamic_cast<StmtList *>(falseStmts)) {
00371             return (sl->stmts.size() == 0);
00372         }
00373         else
00374             return false;
00375     }
00376 
00377     if (dynamic_cast<BreakStmt *>(trueStmts))
00378         return true;
00379     else if (StmtList *sl = dynamic_cast<StmtList *>(trueStmts))
00380         return (sl->stmts.size() == 1 &&
00381                 dynamic_cast<BreakStmt *>(sl->stmts[0]) != NULL);
00382     else
00383         return false;
00384 }
00385 
00386 
00387 void
00388 IfStmt::EmitCode(FunctionEmitContext *ctx) const {
00389     // First check all of the things that might happen due to errors
00390     // earlier in compilation and bail out if needed so that we don't
00391     // dereference NULL pointers in the below...
00392     if (!ctx->GetCurrentBasicBlock()) 
00393         return;
00394     if (!test) 
00395         return;
00396     const Type *testType = test->GetType();
00397     if (!testType)
00398         return;
00399 
00400     ctx->SetDebugPos(pos);
00401     bool isUniform = testType->IsUniformType();
00402 
00403     llvm::Value *testValue = test->GetValue(ctx);
00404     if (testValue == NULL)
00405         return;
00406 
00407     if (isUniform) {
00408         ctx->StartUniformIf();
00409         if (doAllCheck)
00410             Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
00411 
00412         // 'If' statements with uniform conditions are relatively
00413         // straightforward.  We evaluate the condition and then jump to
00414         // either the 'then' or 'else' clause depending on its value.
00415         llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
00416         llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
00417         llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
00418 
00419         // Jump to the appropriate basic block based on the value of
00420         // the 'if' test
00421         ctx->BranchInst(bthen, belse, testValue);
00422 
00423         // Emit code for the 'true' case
00424         ctx->SetCurrentBasicBlock(bthen);
00425         lEmitIfStatements(ctx, trueStmts, "true");
00426         if (ctx->GetCurrentBasicBlock()) 
00427             ctx->BranchInst(bexit);
00428 
00429         // Emit code for the 'false' case
00430         ctx->SetCurrentBasicBlock(belse);
00431         lEmitIfStatements(ctx, falseStmts, "false");
00432         if (ctx->GetCurrentBasicBlock())
00433             ctx->BranchInst(bexit);
00434 
00435         // Set the active basic block to the newly-created exit block
00436         // so that subsequent emitted code starts there.
00437         ctx->SetCurrentBasicBlock(bexit);
00438         ctx->EndIf();
00439     }
00440     else if (lCanApplyBreakOptimization(trueStmts, falseStmts)) {
00441         // If we have a simple break statement inside the 'if' and are
00442         // under varying control flow, just update the execution mask
00443         // directly and don't emit code for the statements.  This leads to
00444         // better code for this case--this is surprising and should be
00445         // root-caused further, but for now this gives us performance
00446         // benefit in this case.
00447         ctx->SetInternalMaskAndNot(ctx->GetInternalMask(), testValue);
00448     }
00449     else
00450         emitVaryingIf(ctx, testValue);
00451 }
00452 
00453 
00454 Stmt *
00455 IfStmt::TypeCheck() {
00456     if (test != NULL) {
00457         const Type *testType = test->GetType();
00458         if (testType != NULL) {
00459             bool isUniform = (testType->IsUniformType() && 
00460                               !g->opt.disableUniformControlFlow);
00461             test = TypeConvertExpr(test, isUniform ? AtomicType::UniformBool : 
00462                                                      AtomicType::VaryingBool, 
00463                                    "\"if\" statement test");
00464             if (test == NULL)
00465                 return NULL;
00466         }
00467     }
00468 
00469     return this;
00470 }
00471 
00472 
00473 int
00474 IfStmt::EstimateCost() const {
00475     const Type *type;
00476     if (test == NULL || (type = test->GetType()) == NULL)
00477         return 0;
00478 
00479     return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
00480 }
00481 
00482 
00483 void
00484 IfStmt::Print(int indent) const {
00485     printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : "");
00486     pos.Print();
00487     printf("\n%*cTest: ", indent+4, ' ');
00488     test->Print();
00489     printf("\n");
00490     if (trueStmts) {
00491         printf("%*cTrue:\n", indent+4, ' ');
00492         trueStmts->Print(indent+8);
00493     }
00494     if (falseStmts) {
00495         printf("%*cFalse:\n", indent+4, ' ');
00496         falseStmts->Print(indent+8);
00497     }
00498 }
00499 
00500 
00501 /** Emit code to run both the true and false statements for the if test,
00502     with the mask set appropriately before running each one. 
00503 */
00504 void
00505 IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, 
00506                                llvm::Value *test) const {
00507     if (trueStmts) {
00508         ctx->SetInternalMaskAnd(oldMask, test);
00509         lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
00510         // under varying control flow,, returns can't stop instruction
00511         // emission, so this better be non-NULL...
00512         AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock()); 
00513     }
00514     if (falseStmts) {
00515         ctx->SetInternalMaskAndNot(oldMask, test);
00516         lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
00517         AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock());
00518     }
00519 }
00520 
00521 
00522 /** Emit code for an if test that checks the mask and the test values and
00523     tries to be smart about jumping over code that doesn't need to be run.
00524  */
00525 void
00526 IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
00527     llvm::Value *oldMask = ctx->GetInternalMask();
00528     if (doAllCheck) {
00529         // We can't tell if the mask going into the if is all on at the
00530         // compile time.  Emit code to check for this and then either run
00531         // the code for the 'all on' or the 'mixed' case depending on the
00532         // mask's value at runtime.
00533         llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("cif_mask_all");
00534         llvm::BasicBlock *bMixedOn = ctx->CreateBasicBlock("cif_mask_mixed");
00535         llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
00536 
00537         // Jump to either bAllOn or bMixedOn, depending on the mask's value 
00538         llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask());
00539         ctx->BranchInst(bAllOn, bMixedOn, maskAllQ);
00540 
00541         // Emit code for the 'mask all on' case
00542         ctx->SetCurrentBasicBlock(bAllOn);
00543         emitMaskAllOn(ctx, ltest, bDone);
00544         
00545         // And emit code for the mixed mask case
00546         ctx->SetCurrentBasicBlock(bMixedOn);
00547         emitMaskMixed(ctx, oldMask, ltest, bDone);
00548 
00549         // When done, set the current basic block to the block that the two
00550         // paths above jump to when they're done.
00551         ctx->SetCurrentBasicBlock(bDone);
00552     }
00553     else if (trueStmts != NULL || falseStmts != NULL) {
00554         // If there is nothing that is potentially unsafe to run with all
00555         // lanes off in the true and false statements and if the total
00556         // complexity of those two is relatively simple, then we'll go
00557         // ahead and emit straightline code that runs both sides, updating
00558         // the mask accordingly.  This is useful for efficiently compiling
00559         // things like:
00560         //
00561         // if (foo) x = 0;
00562         // else     ++x;
00563         //
00564         // Where the overhead of checking if any of the program instances wants
00565         // to run one side or the other is more than the actual computation.
00566         // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
00567         // for potentially dangerous code like:
00568         //
00569         // if (index < count) array[index] = 0;
00570         //
00571         // where our use of blend for conditional assignments doesn't check
00572         // for the 'all lanes' off case.
00573         int trueFalseCost = (::EstimateCost(trueStmts) + 
00574                              ::EstimateCost(falseStmts));
00575         bool costIsAcceptable = (trueFalseCost <
00576                                  PREDICATE_SAFE_IF_STATEMENT_COST);
00577 
00578         bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
00579                                          SafeToRunWithMaskAllOff(falseStmts));
00580 
00581         Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).",
00582               ::EstimateCost(trueStmts), (int)SafeToRunWithMaskAllOff(trueStmts),
00583               ::EstimateCost(falseStmts), (int)SafeToRunWithMaskAllOff(falseStmts));
00584               
00585         if (safeToRunWithAllLanesOff &&
00586             (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
00587             ctx->StartVaryingIf(oldMask);
00588             emitMaskedTrueAndFalse(ctx, oldMask, ltest);
00589             AssertPos(pos, ctx->GetCurrentBasicBlock());
00590             ctx->EndIf();
00591         }
00592         else {
00593             llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
00594             emitMaskMixed(ctx, oldMask, ltest, bDone);
00595             ctx->SetCurrentBasicBlock(bDone);
00596         }
00597     }
00598 }
00599 
00600 
00601 /** Emits code for 'if' tests under the case where we know that the program
00602     mask is all on going into the 'if'.
00603  */
00604 void
00605 IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, 
00606                       llvm::BasicBlock *bDone) const {
00607     // We start by explicitly storing "all on" into the mask mask.  Note
00608     // that this doesn't change its actual value, but doing so lets the
00609     // compiler see what's going on so that subsequent optimizations for
00610     // code emitted here can operate with the knowledge that the mask is
00611     // definitely all on (until it modifies the mask itself).
00612     AssertPos(pos, !g->opt.disableCoherentControlFlow);
00613     if (!g->opt.disableMaskAllOnOptimizations)
00614         ctx->SetInternalMask(LLVMMaskAllOn);
00615     llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
00616     if (!g->opt.disableMaskAllOnOptimizations)
00617         ctx->SetFunctionMask(LLVMMaskAllOn);
00618 
00619     // First, check the value of the test.  If it's all on, then we jump to
00620     // a basic block that will only have code for the true case.
00621     llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all");
00622     llvm::BasicBlock *bTestNoneCheck = ctx->CreateBasicBlock("cif_test_none_check");
00623     llvm::Value *testAllQ = ctx->All(ltest);
00624     ctx->BranchInst(bTestAll, bTestNoneCheck, testAllQ);
00625 
00626     // Emit code for the 'test is all true' case
00627     ctx->SetCurrentBasicBlock(bTestAll);
00628     ctx->StartVaryingIf(LLVMMaskAllOn);
00629     lEmitIfStatements(ctx, trueStmts, "if: all on mask, expr all true");
00630     ctx->EndIf();
00631     if (ctx->GetCurrentBasicBlock() != NULL)
00632         // bblock may legitimately be NULL since if there's a return stmt
00633         // or break or continue we can actually jump and end emission since
00634         // we know all of the lanes are following this path...
00635         ctx->BranchInst(bDone);
00636 
00637     // The test isn't all true.  Now emit code to determine if it's all
00638     // false, or has mixed values.
00639     ctx->SetCurrentBasicBlock(bTestNoneCheck);
00640     llvm::BasicBlock *bTestNone = ctx->CreateBasicBlock("cif_test_none");
00641     llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_test_mixed");
00642     llvm::Value *testMixedQ = ctx->Any(ltest);
00643     ctx->BranchInst(bTestMixed, bTestNone, testMixedQ);
00644 
00645     // Emit code for the 'test is all false' case
00646     ctx->SetCurrentBasicBlock(bTestNone);
00647     ctx->StartVaryingIf(LLVMMaskAllOn);
00648     lEmitIfStatements(ctx, falseStmts, "if: all on mask, expr all false");
00649     ctx->EndIf();
00650     if (ctx->GetCurrentBasicBlock())
00651         // bblock may be NULL since if there's a return stmt or break or
00652         // continue we can actually jump or whatever and end emission...
00653         ctx->BranchInst(bDone);
00654 
00655     // Finally emit code for the 'mixed true/false' case.  We unavoidably
00656     // need to run both the true and the false statements.
00657     ctx->SetCurrentBasicBlock(bTestMixed);
00658     ctx->StartVaryingIf(LLVMMaskAllOn);
00659     emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest);
00660     // In this case, return/break/continue isn't allowed to jump and end
00661     // emission.
00662     AssertPos(pos, ctx->GetCurrentBasicBlock());
00663     ctx->EndIf();
00664     ctx->BranchInst(bDone);
00665 
00666     ctx->SetCurrentBasicBlock(bDone);
00667     ctx->SetFunctionMask(oldFunctionMask);
00668 }
00669 
00670 
00671 /** Emit code for an 'if' test where the lane mask is known to be mixed
00672     on/off going into it.
00673  */
00674 void
00675 IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, 
00676                       llvm::Value *ltest, llvm::BasicBlock *bDone) const {
00677     ctx->StartVaryingIf(oldMask);
00678     llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
00679     if (trueStmts != NULL) {
00680         llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
00681         ctx->SetInternalMaskAnd(oldMask, ltest);
00682 
00683         // Do any of the program instances want to run the 'true'
00684         // block?  If not, jump ahead to bNext.
00685         llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask());
00686         ctx->BranchInst(bRunTrue, bNext, maskAnyQ);
00687 
00688         // Emit statements for true
00689         ctx->SetCurrentBasicBlock(bRunTrue);
00690         lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
00691         AssertPos(pos, ctx->GetCurrentBasicBlock()); 
00692         ctx->BranchInst(bNext);
00693         ctx->SetCurrentBasicBlock(bNext);
00694     }
00695     if (falseStmts != NULL) {
00696         llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
00697         bNext = ctx->CreateBasicBlock("safe_if_after_false");
00698         ctx->SetInternalMaskAndNot(oldMask, ltest);
00699 
00700         // Similarly, check to see if any of the instances want to
00701         // run the 'false' block...
00702         llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask());
00703         ctx->BranchInst(bRunFalse, bNext, maskAnyQ);
00704 
00705         // Emit code for false
00706         ctx->SetCurrentBasicBlock(bRunFalse);
00707         lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
00708         AssertPos(pos, ctx->GetCurrentBasicBlock());
00709         ctx->BranchInst(bNext);
00710         ctx->SetCurrentBasicBlock(bNext);
00711     }
00712     ctx->BranchInst(bDone);
00713     ctx->SetCurrentBasicBlock(bDone);
00714     ctx->EndIf();
00715 }
00716 
00717 
00718 ///////////////////////////////////////////////////////////////////////////
00719 // DoStmt
00720 
00721 struct VaryingBCCheckInfo {
00722     VaryingBCCheckInfo() {
00723         varyingControlFlowDepth = 0;
00724         foundVaryingBreakOrContinue = false;
00725     }
00726 
00727     int varyingControlFlowDepth;
00728     bool foundVaryingBreakOrContinue;
00729 };
00730 
00731 
00732 /** Returns true if the given node is an 'if' statement where the test
00733     condition has varying type. */
00734 static bool
00735 lIsVaryingFor(ASTNode *node) {
00736     IfStmt *ifStmt;
00737     if ((ifStmt = dynamic_cast<IfStmt *>(node)) != NULL &&
00738         ifStmt->test != NULL) {
00739         const Type *type = ifStmt->test->GetType();
00740         return (type != NULL && type->IsVaryingType());
00741     }
00742     else
00743         return false;
00744 }
00745 
00746 
00747 /** Preorder callback function for checking for varying breaks or
00748     continues. */
00749 static bool
00750 lVaryingBCPreFunc(ASTNode *node, void *d) {
00751     VaryingBCCheckInfo *info = (VaryingBCCheckInfo *)d;
00752 
00753     // We found a break or continue statement; if we're under varying
00754     // control flow, then bingo.
00755     if ((dynamic_cast<BreakStmt *>(node) != NULL ||
00756          dynamic_cast<ContinueStmt *>(node) != NULL) &&
00757         info->varyingControlFlowDepth > 0) {
00758         info->foundVaryingBreakOrContinue = true;
00759         return false;
00760     }
00761 
00762     // Update the count of the nesting depth of varying control flow if
00763     // this is an if statement with a varying condition.
00764     if (lIsVaryingFor(node))
00765         ++info->varyingControlFlowDepth;
00766 
00767     if (dynamic_cast<ForStmt *>(node) != NULL ||
00768         dynamic_cast<DoStmt *>(node) != NULL ||
00769         dynamic_cast<ForeachStmt *>(node) != NULL)
00770         // Don't recurse into these guys, since we don't care about varying
00771         // breaks or continues within them...
00772         return false;
00773     else
00774         return true;
00775 }
00776 
00777 
00778 /** Postorder callback function for checking for varying breaks or
00779     continues; decrement the varying control flow depth after the node's
00780     children have been processed, if this is a varying if statement. */
00781 static ASTNode *
00782 lVaryingBCPostFunc(ASTNode *node, void *d) {
00783     VaryingBCCheckInfo *info = (VaryingBCCheckInfo *)d;
00784     if (lIsVaryingFor(node))
00785         --info->varyingControlFlowDepth;
00786     return node;
00787 }
00788 
00789 
00790 /** Given a statment, walk through it to see if there is a 'break' or
00791     'continue' statement inside if its children, under varying control
00792     flow.  We need to detect this case for loops since what might otherwise
00793     look like a 'uniform' loop needs to have code emitted to do all of the
00794     lane management stuff if this is the case.
00795  */ 
00796 static bool
00797 lHasVaryingBreakOrContinue(Stmt *stmt) {
00798     VaryingBCCheckInfo info;
00799     WalkAST(stmt, lVaryingBCPreFunc, lVaryingBCPostFunc, &info);
00800     return info.foundVaryingBreakOrContinue;
00801 }
00802 
00803 
00804 DoStmt::DoStmt(Expr *t, Stmt *s, bool cc, SourcePos p) 
00805     : Stmt(p), testExpr(t), bodyStmts(s), 
00806       doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
00807 }
00808 
00809 
00810 void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
00811     // Check for things that could be NULL due to earlier errors during
00812     // compilation.
00813     if (!ctx->GetCurrentBasicBlock()) 
00814         return;
00815     if (!testExpr || !testExpr->GetType()) 
00816         return;
00817 
00818     bool uniformTest = testExpr->GetType()->IsUniformType();
00819     if (uniformTest && doCoherentCheck)
00820         Warning(testExpr->pos, "Uniform condition supplied to \"cdo\" "
00821                 "statement.");
00822 
00823     llvm::BasicBlock *bloop = ctx->CreateBasicBlock("do_loop");
00824     llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit");
00825     llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test");
00826 
00827     ctx->StartLoop(bexit, btest, uniformTest);
00828 
00829     // Start by jumping into the loop body
00830     ctx->BranchInst(bloop);
00831 
00832     // And now emit code for the loop body
00833     ctx->SetCurrentBasicBlock(bloop);
00834     ctx->SetLoopMask(ctx->GetInternalMask());
00835     ctx->SetDebugPos(pos);
00836     // FIXME: in the StmtList::EmitCode() method takes starts/stops a new
00837     // scope around the statements in the list.  So if the body is just a
00838     // single statement (and thus not a statement list), we need a new
00839     // scope, but we don't want two scopes in the StmtList case.
00840     if (!dynamic_cast<StmtList *>(bodyStmts))
00841         ctx->StartScope();
00842 
00843     ctx->AddInstrumentationPoint("do loop body");
00844     if (doCoherentCheck && !uniformTest) {
00845         // Check to see if the mask is all on
00846         llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("do_all_on");
00847         llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("do_mixed");
00848         ctx->BranchIfMaskAll(bAllOn, bMixed);
00849 
00850         // If so, emit code for the 'mask all on' case.  In particular,
00851         // explicitly set the mask to 'all on' (see rationale in
00852         // IfStmt::emitCoherentTests()), and then emit the code for the
00853         // loop body.
00854         ctx->SetCurrentBasicBlock(bAllOn);
00855         if (!g->opt.disableMaskAllOnOptimizations)
00856             ctx->SetInternalMask(LLVMMaskAllOn);
00857         llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
00858         if (!g->opt.disableMaskAllOnOptimizations)
00859             ctx->SetFunctionMask(LLVMMaskAllOn);
00860         if (bodyStmts)
00861             bodyStmts->EmitCode(ctx);
00862         AssertPos(pos, ctx->GetCurrentBasicBlock());
00863         ctx->SetFunctionMask(oldFunctionMask);
00864         ctx->BranchInst(btest);
00865 
00866         // The mask is mixed.  Just emit the code for the loop body.
00867         ctx->SetCurrentBasicBlock(bMixed);
00868         if (bodyStmts)
00869             bodyStmts->EmitCode(ctx);
00870         AssertPos(pos, ctx->GetCurrentBasicBlock());
00871         ctx->BranchInst(btest);
00872     }
00873     else {
00874         // Otherwise just emit the code for the loop body.  The current
00875         // mask is good.
00876         if (bodyStmts)
00877             bodyStmts->EmitCode(ctx);
00878         if (ctx->GetCurrentBasicBlock())
00879             ctx->BranchInst(btest);
00880     }
00881     // End the scope we started above, if needed.
00882     if (!dynamic_cast<StmtList *>(bodyStmts))
00883         ctx->EndScope();
00884 
00885     // Now emit code for the loop test.
00886     ctx->SetCurrentBasicBlock(btest);
00887     // First, emit code to restore the mask value for any lanes that
00888     // executed a 'continue' during the current loop before we go and emit
00889     // the code for the test.  This is only necessary for varying loops;
00890     // 'uniform' loops just jump when they hit a continue statement and
00891     // don't mess with the mask.
00892     if (!uniformTest)
00893         ctx->RestoreContinuedLanes();
00894     llvm::Value *testValue = testExpr->GetValue(ctx);
00895     if (!testValue)
00896         return;
00897 
00898     if (uniformTest)
00899         // For the uniform case, just jump to the top of the loop or the
00900         // exit basic block depending on the value of the test.
00901         ctx->BranchInst(bloop, bexit, testValue);
00902     else {
00903         // For the varying case, update the mask based on the value of the
00904         // test.  If any program instances still want to be running, jump
00905         // to the top of the loop.  Otherwise, jump out.
00906         llvm::Value *mask = ctx->GetInternalMask();
00907         ctx->SetInternalMaskAnd(mask, testValue);
00908         ctx->BranchIfMaskAny(bloop, bexit);
00909     }
00910 
00911     // ...and we're done.  Set things up for subsequent code to be emitted
00912     // in the right basic block.
00913     ctx->SetCurrentBasicBlock(bexit);
00914     ctx->EndLoop();
00915 }
00916 
00917 
00918 Stmt *
00919 DoStmt::TypeCheck() {
00920     const Type *testType;
00921     if (testExpr != NULL && (testType = testExpr->GetType()) != NULL) {
00922         if (!testType->IsNumericType() && !testType->IsBoolType()) {
00923             Error(testExpr->pos, "Type \"%s\" can't be converted to boolean for \"while\" "
00924                   "test in \"do\" loop.", testExpr->GetType()->GetString().c_str());
00925             return NULL;
00926         }
00927 
00928         // Should the test condition for the loop be uniform or varying?
00929         // It can be uniform only if three conditions are met:
00930         //
00931         // - First and foremost, the type of the test condition must be
00932         //   uniform.
00933         //
00934         // - Second, the user must not have set the dis-optimization option
00935         //   that disables uniform flow control.
00936         //
00937         // - Thirdly, and most subtlely, there must not be any break or
00938         //   continue statements inside the loop that are within the scope
00939         //   of a 'varying' if statement.  If there are, then we type cast
00940         //   the test to be 'varying', so that the code generated for the
00941         //   loop includes masking stuff, so that we can track which lanes
00942         //   actually want to be running, accounting for breaks/continues.
00943         //
00944         bool uniformTest = (testType->IsUniformType() &&
00945                             !g->opt.disableUniformControlFlow &&
00946                             !lHasVaryingBreakOrContinue(bodyStmts));
00947         testExpr = new TypeCastExpr(uniformTest ? AtomicType::UniformBool :
00948                                                   AtomicType::VaryingBool,
00949                                     testExpr, testExpr->pos);
00950     }
00951 
00952     return this;
00953 }
00954 
00955 
00956 int
00957 DoStmt::EstimateCost() const {
00958     bool uniformTest = testExpr ? testExpr->GetType()->IsUniformType() :
00959         (!g->opt.disableUniformControlFlow &&
00960          !lHasVaryingBreakOrContinue(bodyStmts));
00961 
00962     return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
00963 }
00964 
00965 
00966 void
00967 DoStmt::Print(int indent) const {
00968     printf("%*cDo Stmt", indent, ' ');
00969     pos.Print();
00970     printf(":\n");
00971     printf("%*cTest: ", indent+4, ' ');
00972     if (testExpr) testExpr->Print();
00973     printf("\n");
00974     if (bodyStmts) {
00975         printf("%*cStmts:\n", indent+4, ' ');
00976         bodyStmts->Print(indent+8);
00977     }
00978 }
00979 
00980 
00981 ///////////////////////////////////////////////////////////////////////////
00982 // ForStmt
00983 
00984 ForStmt::ForStmt(Stmt *i, Expr *t, Stmt *s, Stmt *st, bool cc, SourcePos p) 
00985     : Stmt(p), init(i), test(t), step(s), stmts(st), 
00986       doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
00987 }
00988 
00989 
00990 void
00991 ForStmt::EmitCode(FunctionEmitContext *ctx) const {
00992     if (!ctx->GetCurrentBasicBlock()) 
00993         return;
00994 
00995     llvm::BasicBlock *btest = ctx->CreateBasicBlock("for_test");
00996     llvm::BasicBlock *bstep = ctx->CreateBasicBlock("for_step");
00997     llvm::BasicBlock *bloop = ctx->CreateBasicBlock("for_loop");
00998     llvm::BasicBlock *bexit = ctx->CreateBasicBlock("for_exit");
00999 
01000     bool uniformTest = test ? test->GetType()->IsUniformType() :
01001         (!g->opt.disableUniformControlFlow &&
01002          !lHasVaryingBreakOrContinue(stmts));
01003 
01004     ctx->StartLoop(bexit, bstep, uniformTest);
01005     ctx->SetDebugPos(pos);
01006 
01007     // If we have an initiailizer statement, start by emitting the code for
01008     // it and then jump into the loop test code.  (Also start a new scope
01009     // since the initiailizer may be a declaration statement).
01010     if (init) {
01011         AssertPos(pos, dynamic_cast<StmtList *>(init) == NULL);
01012         ctx->StartScope();
01013         init->EmitCode(ctx);
01014     }
01015     ctx->BranchInst(btest);
01016 
01017     // Emit code to get the value of the loop test.  If no test expression
01018     // was provided, just go with a true value.
01019     ctx->SetCurrentBasicBlock(btest);
01020     llvm::Value *ltest = NULL;
01021     if (test) {
01022         ltest = test->GetValue(ctx);
01023         if (!ltest) {
01024             ctx->EndScope();
01025             ctx->EndLoop();
01026             return;
01027         }
01028     }
01029     else
01030         ltest = uniformTest ? LLVMTrue : LLVMBoolVector(true);
01031 
01032     // Now use the test's value.  For a uniform loop, we can either jump to
01033     // the loop body or the loop exit, based on whether it's true or false.
01034     // For a non-uniform loop, we update the mask and jump into the loop if
01035     // any of the mask values are true.
01036     if (uniformTest) {
01037         if (doCoherentCheck)
01038             Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
01039                     "statement.");
01040         AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
01041         ctx->BranchInst(bloop, bexit, ltest);
01042     }
01043     else {
01044         llvm::Value *mask = ctx->GetInternalMask();
01045         ctx->SetInternalMaskAnd(mask, ltest);
01046         ctx->BranchIfMaskAny(bloop, bexit);
01047     }
01048 
01049     // On to emitting the code for the loop body.
01050     ctx->SetCurrentBasicBlock(bloop);
01051     ctx->SetLoopMask(ctx->GetInternalMask());
01052     ctx->AddInstrumentationPoint("for loop body");
01053     if (!dynamic_cast<StmtList *>(stmts))
01054         ctx->StartScope();
01055 
01056     if (doCoherentCheck && !uniformTest) {
01057         // For 'varying' loops with the coherence check, we start by
01058         // checking to see if the mask is all on, after it has been updated
01059         // based on the value of the test.
01060         llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("for_all_on");
01061         llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("for_mixed");
01062         ctx->BranchIfMaskAll(bAllOn, bMixed);
01063 
01064         // Emit code for the mask being all on.  Explicitly set the mask to
01065         // be on so that the optimizer can see that it's on (i.e. now that
01066         // the runtime test has passed, make this fact clear for code
01067         // generation at compile time here.)
01068         ctx->SetCurrentBasicBlock(bAllOn);
01069         if (!g->opt.disableMaskAllOnOptimizations)
01070             ctx->SetInternalMask(LLVMMaskAllOn);
01071         llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
01072         if (!g->opt.disableMaskAllOnOptimizations)
01073             ctx->SetFunctionMask(LLVMMaskAllOn);
01074         if (stmts)
01075             stmts->EmitCode(ctx);
01076         AssertPos(pos, ctx->GetCurrentBasicBlock());
01077         ctx->SetFunctionMask(oldFunctionMask);
01078         ctx->BranchInst(bstep);
01079 
01080         // Emit code for the mask being mixed.  We should never run the
01081         // loop with the mask all off, based on the BranchIfMaskAny call
01082         // above.
01083         ctx->SetCurrentBasicBlock(bMixed);
01084         if (stmts)
01085             stmts->EmitCode(ctx);
01086         ctx->BranchInst(bstep);
01087     }
01088     else {
01089         // For both uniform loops and varying loops without the coherence
01090         // check, we know that at least one program instance wants to be
01091         // running the loop, so just emit code for the loop body and jump
01092         // to the loop step code.
01093         if (stmts)
01094             stmts->EmitCode(ctx);
01095         if (ctx->GetCurrentBasicBlock())
01096             ctx->BranchInst(bstep);
01097     }
01098     if (!dynamic_cast<StmtList *>(stmts))
01099         ctx->EndScope();
01100 
01101     // Emit code for the loop step.  First, restore the lane mask of any
01102     // program instances that executed a 'continue' during the previous
01103     // iteration.  Then emit code for the loop step and then jump to the
01104     // test code.
01105     ctx->SetCurrentBasicBlock(bstep);
01106     ctx->RestoreContinuedLanes();
01107     if (step)
01108         step->EmitCode(ctx);
01109     ctx->BranchInst(btest);
01110 
01111     // Set the current emission basic block to the loop exit basic block
01112     ctx->SetCurrentBasicBlock(bexit);
01113     if (init)
01114         ctx->EndScope();
01115     ctx->EndLoop();
01116 }
01117 
01118 
01119 Stmt *
01120 ForStmt::TypeCheck() {
01121     const Type *testType;
01122     if (test && (testType = test->GetType()) != NULL) {
01123         if (!testType->IsNumericType() && !testType->IsBoolType()) {
01124             Error(test->pos, "Type \"%s\" can't be converted to boolean for \"for\" "
01125                   "loop test.", test->GetType()->GetString().c_str());
01126             return NULL;
01127         }
01128 
01129         // See comments in DoStmt::TypeCheck() regarding
01130         // 'uniformTest' and the type cast here.
01131         bool uniformTest = (testType->IsUniformType() &&
01132                             !g->opt.disableUniformControlFlow &&
01133                             !lHasVaryingBreakOrContinue(stmts));
01134         test = new TypeCastExpr(uniformTest ? AtomicType::UniformBool :
01135                                 AtomicType::VaryingBool, test, test->pos);
01136         test = ::TypeCheck(test);
01137         if (test == NULL)
01138             return NULL;
01139     }
01140 
01141     return this;
01142 }
01143 
01144 
01145 int
01146 ForStmt::EstimateCost() const {
01147     bool uniformTest = test ? test->GetType()->IsUniformType() :
01148         (!g->opt.disableUniformControlFlow &&
01149          !lHasVaryingBreakOrContinue(stmts));
01150 
01151     return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
01152 }
01153 
01154 
01155 void
01156 ForStmt::Print(int indent) const {
01157     printf("%*cFor Stmt", indent, ' ');
01158     pos.Print();
01159     printf("\n");
01160     if (init) {
01161         printf("%*cInit:\n", indent+4, ' ');
01162         init->Print(indent+8);
01163     }
01164     if (test) {
01165         printf("%*cTest: ", indent+4, ' ');
01166         test->Print();
01167         printf("\n");
01168     }
01169     if (step) {
01170         printf("%*cStep:\n", indent+4, ' ');
01171         step->Print(indent+8);
01172     }
01173     if (stmts) {
01174         printf("%*cStmts:\n", indent+4, ' ');
01175         stmts->Print(indent+8);
01176     }
01177 }
01178 
01179 ///////////////////////////////////////////////////////////////////////////
01180 // BreakStmt
01181 
01182 BreakStmt::BreakStmt(bool cc, SourcePos p) 
01183     : Stmt(p), doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
01184 }
01185 
01186 
01187 void
01188 BreakStmt::EmitCode(FunctionEmitContext *ctx) const {
01189     if (!ctx->GetCurrentBasicBlock()) 
01190         return;
01191 
01192     ctx->SetDebugPos(pos);
01193     ctx->Break(doCoherenceCheck);
01194 }
01195 
01196 
01197 Stmt *
01198 BreakStmt::TypeCheck() {
01199     return this;
01200 }
01201 
01202 
01203 int
01204 BreakStmt::EstimateCost() const {
01205     return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE : 
01206         COST_REGULAR_BREAK_CONTINUE;
01207 }
01208 
01209 
01210 void
01211 BreakStmt::Print(int indent) const {
01212     printf("%*c%sBreak Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
01213     pos.Print();
01214     printf("\n");
01215 }
01216 
01217 
01218 ///////////////////////////////////////////////////////////////////////////
01219 // ContinueStmt
01220 
01221 ContinueStmt::ContinueStmt(bool cc, SourcePos p) 
01222     : Stmt(p), doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
01223 }
01224 
01225 
01226 void
01227 ContinueStmt::EmitCode(FunctionEmitContext *ctx) const {
01228     if (!ctx->GetCurrentBasicBlock()) 
01229         return;
01230 
01231     ctx->SetDebugPos(pos);
01232     ctx->Continue(doCoherenceCheck);
01233 }
01234 
01235 
01236 Stmt *
01237 ContinueStmt::TypeCheck() {
01238     return this;
01239 }
01240 
01241 
01242 int
01243 ContinueStmt::EstimateCost() const {
01244     return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE : 
01245         COST_REGULAR_BREAK_CONTINUE;
01246 }
01247 
01248 
01249 void
01250 ContinueStmt::Print(int indent) const {
01251     printf("%*c%sContinue Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
01252     pos.Print();
01253     printf("\n");
01254 }
01255 
01256 
01257 ///////////////////////////////////////////////////////////////////////////
01258 // ForeachStmt
01259 
01260 ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs, 
01261                          const std::vector<Expr *> &se, 
01262                          const std::vector<Expr *> &ee, 
01263                          Stmt *s, bool t, SourcePos pos)
01264     : Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
01265       stmts(s) {
01266 }
01267 
01268 
01269 /* Given a uniform counter value in the memory location pointed to by
01270    uniformCounterPtr, compute the corresponding set of varying counter
01271    values for use within the loop body.
01272  */
01273 static llvm::Value *
01274 lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, 
01275                       llvm::Value *uniformCounterPtr,
01276                       llvm::Value *varyingCounterPtr,
01277                       const std::vector<int> &spans) {
01278     // Smear the uniform counter value out to be varying
01279     llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
01280     llvm::Value *smearCounter = 
01281         llvm::UndefValue::get(LLVMTypes::Int32VectorType);
01282     for (int i = 0; i < g->target.vectorWidth; ++i)
01283         smearCounter = 
01284             ctx->InsertInst(smearCounter, counter, i, "smear_counter");
01285 
01286     // Figure out the offsets; this is a little bit tricky.  As an example,
01287     // consider a 2D tiled foreach loop, where we're running 8-wide and
01288     // where the inner dimension has a stride of 4 and the outer dimension
01289     // has a stride of 2.  For the inner dimension, we want the offsets
01290     // (0,1,2,3,0,1,2,3), and for the outer dimension we want
01291     // (0,0,0,0,1,1,1,1).
01292     int32_t delta[ISPC_MAX_NVEC];
01293     for (int i = 0; i < g->target.vectorWidth; ++i) {
01294         int d = i;
01295         // First, account for the effect of any dimensions at deeper
01296         // nesting levels than the current one.
01297         int prevDimSpanCount = 1;
01298         for (int j = dim; j < nDims-1; ++j)
01299             prevDimSpanCount *= spans[j+1];
01300         d /= prevDimSpanCount;
01301 
01302         // And now with what's left, figure out our own offset
01303         delta[i] = d % spans[dim];
01304     }
01305 
01306     // Add the deltas to compute the varying counter values; store the
01307     // result to memory and then return it directly as well.
01308     llvm::Value *varyingCounter = 
01309         ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
01310                             LLVMInt32Vector(delta), "iter_val");
01311     ctx->StoreInst(varyingCounter, varyingCounterPtr);
01312     return varyingCounter;
01313 }
01314 
01315 
01316 /** Returns the integer log2 of the given integer. */
01317 static int
01318 lLog2(int i) {
01319     int ret = 0;
01320     while (i != 0) {
01321         ++ret;
01322         i >>= 1;
01323     }
01324     return ret-1;
01325 }
01326 
01327 
01328 /* Figure out how many elements to process in each dimension for each time
01329    through a foreach loop.  The untiled case is easy; all of the outer
01330    dimensions up until the innermost one have a span of 1, and the
01331    innermost one takes the entire vector width.  For the tiled case, we
01332    give wider spans to the innermost dimensions while also trying to
01333    generate relatively square domains.
01334 
01335    This code works recursively from outer dimensions to inner dimensions.
01336  */
01337 static void
01338 lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
01339     if (dimsLeft == 0) {
01340         // Nothing left to do but give all of the remaining work to the
01341         // innermost domain.
01342         *a = itemsLeft;
01343         return;
01344     }
01345 
01346     if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
01347         // If we're not tiled, or if there are enough dimensions left that
01348         // giving this one any more than a span of one would mean that a
01349         // later dimension would have to have a span of one, give this one
01350         // a span of one to save the available items for later.
01351         *a = 1;
01352     else if (itemsLeft >= 16 && (dimsLeft == 1))
01353         // Special case to have 4x4 domains for the 2D case when running
01354         // 16-wide.
01355         *a = 4;
01356     else
01357         // Otherwise give this dimension a span of two. 
01358         *a = 2;
01359 
01360     lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
01361 }
01362 
01363 
01364 /* Emit code for a foreach statement.  We effectively emit code to run the
01365    set of n-dimensional nested loops corresponding to the dimensionality of
01366    the foreach statement along with the extra logic to deal with mismatches
01367    between the vector width we're compiling to and the number of elements
01368    to process.
01369  */
01370 void
01371 ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
01372     if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL) 
01373         return;
01374 
01375     llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
01376     llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
01377     llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
01378 
01379     llvm::Value *oldMask = ctx->GetInternalMask();
01380     llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
01381 
01382     ctx->SetDebugPos(pos);
01383     ctx->StartScope();
01384 
01385     ctx->SetInternalMask(LLVMMaskAllOn);
01386     ctx->SetFunctionMask(LLVMMaskAllOn);
01387 
01388     // This should be caught during typechecking
01389     AssertPos(pos, startExprs.size() == dimVariables.size() && 
01390               endExprs.size() == dimVariables.size());
01391     int nDims = (int)dimVariables.size();
01392 
01393     ///////////////////////////////////////////////////////////////////////
01394     // Setup: compute the number of items we have to work on in each
01395     // dimension and a number of derived values.
01396     std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
01397     std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
01398     std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
01399 
01400     std::vector<int> span(nDims, 0);
01401     lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
01402 
01403     for (int i = 0; i < nDims; ++i) {
01404         // Basic blocks that we'll fill in later with the looping logic for
01405         // this dimension.
01406         bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
01407         if (i < nDims-1)
01408             // stepping for the innermost dimension is handled specially
01409             bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
01410         bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
01411 
01412         // Start and end value for this loop dimension
01413         llvm::Value *sv = startExprs[i]->GetValue(ctx);
01414         llvm::Value *ev = endExprs[i]->GetValue(ctx);
01415         if (sv == NULL || ev == NULL)
01416             return;
01417         startVals.push_back(sv);
01418         endVals.push_back(ev);
01419 
01420         // nItems = endVal - startVal
01421         llvm::Value *nItems = 
01422             ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
01423 
01424         // nExtras = nItems % (span for this dimension)
01425         // This gives us the number of extra elements we need to deal with
01426         // at the end of the loop for this dimension that don't fit cleanly
01427         // into a vector width.
01428         nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
01429                                               LLVMInt32(span[i]), "nextras"));
01430 
01431         // alignedEnd = endVal - nExtras
01432         alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
01433                                                  nExtras[i], "aligned_end"));
01434 
01435         ///////////////////////////////////////////////////////////////////////
01436         // Each dimension has a loop counter that is a uniform value that
01437         // goes from startVal to endVal, in steps of the span for this
01438         // dimension.  Its value is only used internally here for looping
01439         // logic and isn't directly available in the user's program code.
01440         uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type, 
01441                                                      "counter"));
01442         ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
01443 
01444         // There is also a varying variable that holds the set of index
01445         // values for each dimension in the current loop iteration; this is
01446         // the value that is program-visible.
01447         dimVariables[i]->storagePtr = 
01448             ctx->AllocaInst(LLVMTypes::Int32VectorType, 
01449                             dimVariables[i]->name.c_str());
01450         dimVariables[i]->parentFunction = ctx->GetFunction();
01451         ctx->EmitVariableDebugInfo(dimVariables[i]);
01452 
01453         // Each dimension also maintains a mask that represents which of
01454         // the varying elements in the current iteration should be
01455         // processed.  (i.e. this is used to disable the lanes that have
01456         // out-of-bounds offsets.)
01457         extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
01458         ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
01459     }
01460 
01461     ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
01462 
01463     // On to the outermost loop's test
01464     ctx->BranchInst(bbTest[0]);
01465 
01466     ///////////////////////////////////////////////////////////////////////////
01467     // foreach_reset: this code runs when we need to reset the counter for
01468     // a given dimension in preparation for running through its loop again,
01469     // after the enclosing level advances its counter.
01470     for (int i = 0; i < nDims; ++i) {
01471         ctx->SetCurrentBasicBlock(bbReset[i]);
01472         if (i == 0)
01473             ctx->BranchInst(bbExit);
01474         else {
01475             ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
01476             ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
01477             ctx->BranchInst(bbStep[i-1]);
01478         }
01479     }
01480 
01481     ///////////////////////////////////////////////////////////////////////////
01482     // foreach_step: increment the uniform counter by the vector width.
01483     // Note that we don't increment the varying counter here as well but
01484     // just generate its value when we need it in the loop body.  Don't do
01485     // this for the innermost dimension, which has a more complex stepping
01486     // structure..
01487     for (int i = 0; i < nDims-1; ++i) {
01488         ctx->SetCurrentBasicBlock(bbStep[i]);
01489         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
01490         llvm::Value *newCounter =  
01491             ctx->BinaryOperator(llvm::Instruction::Add, counter,
01492                                 LLVMInt32(span[i]), "new_counter");
01493         ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
01494         ctx->BranchInst(bbTest[i]);
01495     }
01496 
01497     ///////////////////////////////////////////////////////////////////////////
01498     // foreach_test (for all dimensions other than the innermost...)
01499     std::vector<llvm::Value *> inExtras;
01500     for (int i = 0; i < nDims-1; ++i) {
01501         ctx->SetCurrentBasicBlock(bbTest[i]);
01502 
01503         llvm::Value *haveExtras = 
01504             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
01505                          endVals[i], alignedEnd[i], "have_extras");
01506 
01507         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
01508         llvm::Value *atAlignedEnd = 
01509             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
01510                          counter, alignedEnd[i], "at_aligned_end");
01511         llvm::Value *inEx = 
01512             ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
01513                                 atAlignedEnd, "in_extras");
01514 
01515         if (i == 0)
01516             inExtras.push_back(inEx);
01517         else
01518             inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
01519                                                    inExtras[i-1], "in_extras_all"));
01520 
01521         llvm::Value *varyingCounter = 
01522             lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], 
01523                                   dimVariables[i]->storagePtr, span);
01524 
01525         llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
01526         for (int j = 0; j < g->target.vectorWidth; ++j)
01527             smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
01528         // Do a vector compare of its value to the end value to generate a
01529         // mask for this last bit of work.
01530         llvm::Value *emask = 
01531             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01532                          varyingCounter, smearEnd);
01533         emask = ctx->I1VecToBoolVec(emask);
01534 
01535         if (i == 0)
01536             ctx->StoreInst(emask, extrasMaskPtrs[i]);
01537         else {
01538             llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
01539             llvm::Value *newMask =
01540                 ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
01541                                     "extras_mask");
01542             ctx->StoreInst(newMask, extrasMaskPtrs[i]);
01543         }
01544 
01545         llvm::Value *notAtEnd = 
01546             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01547                          counter, endVals[i]);
01548         ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
01549     }
01550 
01551     ///////////////////////////////////////////////////////////////////////////
01552     // foreach_test (for innermost dimension)
01553     //
01554     // All of the outer dimensions are handled generically--basically as a
01555     // for() loop from the start value to the end value, where at each loop
01556     // test, we compute the mask of active elements for the current
01557     // dimension and then update an overall mask that is the AND
01558     // combination of all of the outer ones.
01559     //
01560     // The innermost loop is handled specially, for performance purposes.
01561     // When starting the innermost dimension, we start by checking once
01562     // whether any of the outer dimensions has set the mask to be
01563     // partially-active or not.  We follow different code paths for these
01564     // two cases, taking advantage of the knowledge that the mask is all
01565     // on, when this is the case.
01566     //
01567     // In each of these code paths, we start with a loop from the starting
01568     // value to the aligned end value for the innermost dimension; we can
01569     // guarantee that the innermost loop will have an "all on" mask (as far
01570     // as its dimension is concerned) for the duration of this loop.  Doing
01571     // so allows us to emit code that assumes the mask is all on (for the
01572     // case where none of the outer dimensions has set the mask to be
01573     // partially on), or allows us to emit code that just uses the mask
01574     // from the outer dimensions directly (for the case where they have).
01575     //
01576     // After this loop, we just need to deal with one vector's worth of
01577     // "ragged extra bits", where the mask used includes the effect of the
01578     // mask for the innermost dimension.
01579     //
01580     // We start out this process by emitting the check that determines
01581     // whether any of the enclosing dimensions is partially active
01582     // (i.e. processing extra elements that don't exactly fit into a
01583     // vector).
01584     llvm::BasicBlock *bbOuterInExtras = 
01585         ctx->CreateBasicBlock("outer_in_extras");
01586     llvm::BasicBlock *bbOuterNotInExtras = 
01587         ctx->CreateBasicBlock("outer_not_in_extras");
01588 
01589     ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
01590     if (inExtras.size())
01591         ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
01592                         inExtras.back());
01593     else
01594         // for a 1D iteration domain, we certainly don't have any enclosing
01595         // dimensions that are processing extra elements.
01596         ctx->BranchInst(bbOuterNotInExtras);
01597 
01598     ///////////////////////////////////////////////////////////////////////////
01599     // One or more outer dimensions in extras, so we need to mask for the loop
01600     // body regardless.  We break this into two cases, roughly:
01601     // for (counter = start; counter < alignedEnd; counter += step) {
01602     //   // mask is all on for inner, so set mask to outer mask
01603     //   // run loop body with mask
01604     // }
01605     // // counter == alignedEnd
01606     // if (counter < end) {
01607     //   // set mask to outermask & (counter+programCounter < end)
01608     //   // run loop body with mask
01609     // }
01610     llvm::BasicBlock *bbAllInnerPartialOuter =
01611         ctx->CreateBasicBlock("all_inner_partial_outer");
01612     llvm::BasicBlock *bbPartial =
01613         ctx->CreateBasicBlock("both_partial");
01614     ctx->SetCurrentBasicBlock(bbOuterInExtras); {
01615         // Update the varying counter value here, since all subsequent
01616         // blocks along this path need it.
01617         lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 
01618                               dimVariables[nDims-1]->storagePtr, span);
01619 
01620         // here we just check to see if counter < alignedEnd
01621         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
01622         llvm::Value *beforeAlignedEnd = 
01623             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01624                          counter, alignedEnd[nDims-1], "before_aligned_end");
01625         ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
01626     }
01627 
01628     // Below we have a basic block that runs the loop body code for the
01629     // case where the mask is partially but not fully on.  This same block
01630     // runs in multiple cases: both for handling any ragged extra data for
01631     // the innermost dimension but also when outer dimensions have set the
01632     // mask to be partially on. 
01633     //
01634     // The value stored in stepIndexAfterMaskedBodyPtr is used after each
01635     // execution of the body code to determine whether the innermost index
01636     // value should be incremented by the step (we're running the "for"
01637     // loop of full vectors at the innermost dimension, with outer
01638     // dimensions having set the mask to be partially on), or whether we're
01639     // running once for the ragged extra bits at the end of the innermost
01640     // dimension, in which case we're done with the innermost dimension and
01641     // should step the loop counter for the next enclosing dimension
01642     // instead.
01643     llvm::Value *stepIndexAfterMaskedBodyPtr =
01644         ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
01645 
01646     ///////////////////////////////////////////////////////////////////////////
01647     // We're in the inner loop part where the only masking is due to outer
01648     // dimensions but the innermost dimension fits fully into a vector's
01649     // width.  Set the mask and jump to the masked loop body.
01650     ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
01651         llvm::Value *mask;
01652         if (nDims == 1)
01653             // 1D loop; we shouldn't ever get here anyway
01654             mask = LLVMMaskAllOff;
01655         else
01656             mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
01657 
01658         ctx->SetInternalMask(mask);
01659 
01660         ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
01661         ctx->BranchInst(bbMaskedBody);
01662     }
01663 
01664     ///////////////////////////////////////////////////////////////////////////
01665     // We need to include the effect of the innermost dimension in the mask
01666     // for the final bits here
01667     ctx->SetCurrentBasicBlock(bbPartial); {
01668         llvm::Value *varyingCounter = 
01669             ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
01670         llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
01671         for (int j = 0; j < g->target.vectorWidth; ++j)
01672             smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
01673         llvm::Value *emask = 
01674             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01675                          varyingCounter, smearEnd);
01676         emask = ctx->I1VecToBoolVec(emask);
01677 
01678         if (nDims == 1)
01679             ctx->SetInternalMask(emask);
01680         else {
01681             llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
01682             llvm::Value *newMask =
01683                 ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
01684                                     "extras_mask");
01685             ctx->SetInternalMask(newMask);
01686         }
01687 
01688         ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
01689         ctx->BranchInst(bbMaskedBody);
01690     }
01691 
01692     ///////////////////////////////////////////////////////////////////////////
01693     // None of the outer dimensions is processing extras; along the lines
01694     // of above, we can express this as:
01695     // for (counter = start; counter < alignedEnd; counter += step) {
01696     //   // mask is all on
01697     //   // run loop body with mask all on
01698     // }
01699     // // counter == alignedEnd
01700     // if (counter < end) {
01701     //   // set mask to (counter+programCounter < end)
01702     //   // run loop body with mask
01703     // }
01704     llvm::BasicBlock *bbPartialInnerAllOuter =
01705         ctx->CreateBasicBlock("partial_inner_all_outer");
01706     ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
01707         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
01708         llvm::Value *beforeAlignedEnd = 
01709             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01710                          counter, alignedEnd[nDims-1], "before_aligned_end");
01711         ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
01712                         beforeAlignedEnd);
01713     }
01714 
01715     ///////////////////////////////////////////////////////////////////////////
01716     // full_body: do a full vector's worth of work.  We know that all
01717     // lanes will be running here, so we explicitly set the mask to be 'all
01718     // on'.  This ends up being relatively straightforward: just update the
01719     // value of the varying loop counter and have the statements in the
01720     // loop body emit their code.
01721     llvm::BasicBlock *bbFullBodyContinue = 
01722         ctx->CreateBasicBlock("foreach_full_continue");
01723     ctx->SetCurrentBasicBlock(bbFullBody); {
01724         ctx->SetInternalMask(LLVMMaskAllOn);
01725         lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 
01726                               dimVariables[nDims-1]->storagePtr, span);
01727         ctx->SetContinueTarget(bbFullBodyContinue);
01728         ctx->AddInstrumentationPoint("foreach loop body (all on)");
01729         stmts->EmitCode(ctx);
01730         AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
01731         ctx->BranchInst(bbFullBodyContinue);
01732     }
01733     ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
01734         ctx->RestoreContinuedLanes();
01735         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
01736         llvm::Value *newCounter =  
01737             ctx->BinaryOperator(llvm::Instruction::Add, counter,
01738                                 LLVMInt32(span[nDims-1]), "new_counter");
01739         ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
01740         ctx->BranchInst(bbOuterNotInExtras);
01741     }
01742 
01743     ///////////////////////////////////////////////////////////////////////////
01744     // We're done running blocks with the mask all on; see if the counter is
01745     // less than the end value, in which case we need to run the body one
01746     // more time to get the extra bits.
01747     llvm::BasicBlock *bbSetInnerMask = 
01748         ctx->CreateBasicBlock("partial_inner_only");
01749     ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
01750         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
01751         llvm::Value *beforeFullEnd = 
01752             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01753                          counter, endVals[nDims-1], "before_full_end");
01754         ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
01755     }
01756 
01757     ///////////////////////////////////////////////////////////////////////////
01758     // The outer dimensions are all on, so the mask is just given by the
01759     // mask for the innermost dimension
01760     ctx->SetCurrentBasicBlock(bbSetInnerMask); {
01761         llvm::Value *varyingCounter = 
01762             lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 
01763                                   dimVariables[nDims-1]->storagePtr, span);
01764         llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
01765         for (int j = 0; j < g->target.vectorWidth; ++j)
01766             smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
01767         llvm::Value *emask = 
01768             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
01769                          varyingCounter, smearEnd);
01770         emask = ctx->I1VecToBoolVec(emask);
01771         ctx->SetInternalMask(emask);
01772 
01773         ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
01774         ctx->BranchInst(bbMaskedBody);
01775     }
01776 
01777     ///////////////////////////////////////////////////////////////////////////
01778     // masked_body: set the mask and have the statements emit their
01779     // code again.  Note that it's generally worthwhile having two copies
01780     // of the statements' code, since the code above is emitted with the
01781     // mask known to be all-on, which in turn leads to more efficient code
01782     // for that case.
01783     llvm::BasicBlock *bbStepInnerIndex = 
01784         ctx->CreateBasicBlock("step_inner_index");
01785     llvm::BasicBlock *bbMaskedBodyContinue = 
01786         ctx->CreateBasicBlock("foreach_masked_continue");
01787     ctx->SetCurrentBasicBlock(bbMaskedBody); {
01788         ctx->AddInstrumentationPoint("foreach loop body (masked)");
01789         ctx->SetContinueTarget(bbMaskedBodyContinue);
01790         ctx->DisableGatherScatterWarnings();
01791         stmts->EmitCode(ctx);
01792         ctx->EnableGatherScatterWarnings();
01793         ctx->BranchInst(bbMaskedBodyContinue);
01794     }
01795     ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
01796         ctx->RestoreContinuedLanes();
01797         llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
01798         ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
01799     }
01800 
01801     ///////////////////////////////////////////////////////////////////////////
01802     // step the innermost index, for the case where we're doing the
01803     // innermost for loop over full vectors.
01804     ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
01805         llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
01806         llvm::Value *newCounter =  
01807             ctx->BinaryOperator(llvm::Instruction::Add, counter,
01808                                 LLVMInt32(span[nDims-1]), "new_counter");
01809         ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
01810         ctx->BranchInst(bbOuterInExtras);
01811     }
01812 
01813     ///////////////////////////////////////////////////////////////////////////
01814     // foreach_exit: All done.  Restore the old mask and clean up
01815     ctx->SetCurrentBasicBlock(bbExit);
01816 
01817     ctx->SetInternalMask(oldMask);
01818     ctx->SetFunctionMask(oldFunctionMask);
01819 
01820     ctx->EndForeach();
01821     ctx->EndScope();
01822 }
01823 
01824 
01825 Stmt *
01826 ForeachStmt::TypeCheck() {
01827     bool anyErrors = false;
01828     for (unsigned int i = 0; i < startExprs.size(); ++i) {
01829         if (startExprs[i] != NULL)
01830             startExprs[i] = TypeConvertExpr(startExprs[i], 
01831                                             AtomicType::UniformInt32, 
01832                                             "foreach starting value");
01833         anyErrors |= (startExprs[i] == NULL);
01834     }
01835     for (unsigned int i = 0; i < endExprs.size(); ++i) {
01836         if (endExprs[i] != NULL)
01837             endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32,
01838                                           "foreach ending value");
01839         anyErrors |= (endExprs[i] == NULL);
01840     }
01841 
01842     if (startExprs.size() < dimVariables.size()) {
01843         Error(pos, "Not enough initial values provided for \"foreach\" loop; "
01844               "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
01845         anyErrors = true;
01846     }
01847     else if (startExprs.size() > dimVariables.size()) {
01848         Error(pos, "Too many initial values provided for \"foreach\" loop; "
01849               "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
01850         anyErrors = true;
01851     }
01852 
01853     if (endExprs.size() < dimVariables.size()) {
01854         Error(pos, "Not enough initial values provided for \"foreach\" loop; "
01855               "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
01856         anyErrors = true;
01857     }
01858     else if (endExprs.size() > dimVariables.size()) {
01859         Error(pos, "Too many initial values provided for \"foreach\" loop; "
01860               "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
01861         anyErrors = true;
01862     }
01863 
01864     return anyErrors ? NULL : this;
01865 }
01866 
01867 
01868 int
01869 ForeachStmt::EstimateCost() const {
01870     return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP);
01871 }
01872 
01873 
01874 void
01875 ForeachStmt::Print(int indent) const {
01876     printf("%*cForeach Stmt", indent, ' ');
01877     pos.Print();
01878     printf("\n");
01879     
01880     for (unsigned int i = 0; i < dimVariables.size(); ++i)
01881         if (dimVariables[i] != NULL)
01882             printf("%*cVar %d: %s\n", indent+4, ' ', i, 
01883                    dimVariables[i]->name.c_str());
01884         else 
01885             printf("%*cVar %d: NULL\n", indent+4, ' ', i);
01886 
01887     printf("Start values:\n");
01888     for (unsigned int i = 0; i < startExprs.size(); ++i) {
01889         if (startExprs[i] != NULL)
01890             startExprs[i]->Print();
01891         else
01892             printf("NULL");
01893         if (i != startExprs.size()-1)
01894             printf(", ");
01895         else
01896             printf("\n");
01897     }
01898 
01899     printf("End values:\n");
01900     for (unsigned int i = 0; i < endExprs.size(); ++i) {
01901         if (endExprs[i] != NULL)
01902             endExprs[i]->Print();
01903         else
01904             printf("NULL");
01905         if (i != endExprs.size()-1)
01906             printf(", ");
01907         else
01908             printf("\n");
01909     }
01910 
01911     if (stmts != NULL) {
01912         printf("%*cStmts:\n", indent+4, ' ');
01913         stmts->Print(indent+8);
01914     }
01915 }
01916 
01917 
01918 ///////////////////////////////////////////////////////////////////////////
01919 // ForeachActiveStmt
01920 
01921 ForeachActiveStmt::ForeachActiveStmt(Symbol *s, Stmt *st, SourcePos pos) 
01922     : Stmt(pos) {
01923     sym = s;
01924     stmts = st;
01925 }
01926 
01927 
01928 void
01929 ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
01930     if (!ctx->GetCurrentBasicBlock()) 
01931         return;
01932 
01933     // Allocate storage for the symbol that we'll use for the uniform
01934     // variable that holds the current program instance in each loop
01935     // iteration.
01936     if (sym->type == NULL) {
01937         Assert(m->errorCount > 0);
01938         return;
01939     }
01940     Assert(Type::Equal(sym->type, 
01941                        AtomicType::UniformInt64->GetAsConstType()));
01942     sym->storagePtr = ctx->AllocaInst(LLVMTypes::Int64Type, sym->name.c_str());
01943 
01944     ctx->SetDebugPos(pos);
01945     ctx->EmitVariableDebugInfo(sym);
01946 
01947     // The various basic blocks that we'll need in the below
01948     llvm::BasicBlock *bbFindNext = 
01949         ctx->CreateBasicBlock("foreach_active_find_next");
01950     llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_active_body");
01951     llvm::BasicBlock *bbCheckForMore = 
01952         ctx->CreateBasicBlock("foreach_active_check_for_more");
01953     llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_active_done");
01954 
01955     // Save the old mask so that we can restore it at the end
01956     llvm::Value *oldInternalMask = ctx->GetInternalMask();
01957     
01958     // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
01959     // to be processed by a pass through the loop body.  It starts out with
01960     // the current execution mask (which should never be all off going in
01961     // to this)...
01962     llvm::Value *oldFullMask = ctx->GetFullMask();
01963     llvm::Value *maskBitsPtr = 
01964         ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
01965     llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
01966     ctx->StoreInst(movmsk, maskBitsPtr);
01967 
01968     // Officially start the loop.
01969     ctx->StartScope();
01970     ctx->StartForeach(FunctionEmitContext::FOREACH_ACTIVE);
01971     ctx->SetContinueTarget(bbCheckForMore);
01972 
01973     // Onward to find the first set of program instance to run the loop for
01974     ctx->BranchInst(bbFindNext);
01975     
01976     ctx->SetCurrentBasicBlock(bbFindNext); {
01977         // Load the bitmask of the lanes left to be processed
01978         llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
01979 
01980         // Find the index of the first set bit in the mask
01981         llvm::Function *ctlzFunc = 
01982             m->module->getFunction("__count_trailing_zeros_i64");
01983         Assert(ctlzFunc != NULL);
01984         llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
01985                                               "first_set");
01986 
01987         // Store that value into the storage allocated for the iteration
01988         // variable.
01989         ctx->StoreInst(firstSet, sym->storagePtr);
01990 
01991         // Now set the execution mask to be only on for the current program
01992         // instance.  (TODO: is there a more efficient way to do this? e.g.
01993         // for AVX1, we might want to do this as float rather than int
01994         // math...)
01995 
01996         // Get the "program index" vector value
01997         llvm::Value *programIndex = 
01998             llvm::UndefValue::get(LLVMTypes::Int32VectorType);
01999         for (int i = 0; i < g->target.vectorWidth; ++i)
02000             programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i,
02001                                            "prog_index");
02002 
02003         // And smear the current lane out to a vector
02004         llvm::Value *firstSet32 = 
02005             ctx->TruncInst(firstSet, LLVMTypes::Int32Type, "first_set32");
02006         llvm::Value *firstSet32Smear = ctx->SmearUniform(firstSet32);
02007 
02008         // Now set the execution mask based on doing a vector compare of
02009         // these two
02010         llvm::Value *iterMask = 
02011             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
02012                          firstSet32Smear, programIndex);
02013         iterMask = ctx->I1VecToBoolVec(iterMask);
02014 
02015         ctx->SetInternalMask(iterMask);
02016 
02017         // Also update the bitvector of lanes left to turn off the bit for
02018         // the lane we're about to run.
02019         llvm::Value *setMask = 
02020             ctx->BinaryOperator(llvm::Instruction::Shl, LLVMInt64(1),
02021                                 firstSet, "set_mask");
02022         llvm::Value *notSetMask = ctx->NotOperator(setMask);
02023         llvm::Value *newRemaining = 
02024             ctx->BinaryOperator(llvm::Instruction::And, remainingBits, 
02025                                 notSetMask, "new_remaining");
02026         ctx->StoreInst(newRemaining, maskBitsPtr);
02027 
02028         // and onward to run the loop body...
02029         ctx->BranchInst(bbBody);
02030     }
02031 
02032     ctx->SetCurrentBasicBlock(bbBody); {
02033         // Run the code in the body of the loop.  This is easy now.
02034         if (stmts)
02035             stmts->EmitCode(ctx);
02036 
02037         Assert(ctx->GetCurrentBasicBlock() != NULL);
02038         ctx->BranchInst(bbCheckForMore);
02039     }
02040 
02041     ctx->SetCurrentBasicBlock(bbCheckForMore); {
02042         // At the end of the loop body (either due to running the
02043         // statements normally, or a continue statement in the middle of
02044         // the loop that jumps to the end, see if there are any lanes left
02045         // to be processed.
02046         llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
02047         llvm::Value *nonZero = 
02048             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
02049                          remainingBits, LLVMInt64(0), "remaining_ne_zero");
02050         ctx->BranchInst(bbFindNext, bbDone, nonZero);
02051     }
02052 
02053     ctx->SetCurrentBasicBlock(bbDone);
02054     ctx->SetInternalMask(oldInternalMask);
02055     ctx->EndForeach();
02056     ctx->EndScope();
02057 }
02058 
02059 
02060 void
02061 ForeachActiveStmt::Print(int indent) const {
02062     printf("%*cForeach_active Stmt", indent, ' ');
02063     pos.Print();
02064     printf("\n");
02065 
02066     printf("%*cIter symbol: ", indent+4, ' ');
02067     if (sym != NULL) {
02068         printf("%s", sym->name.c_str());
02069         if (sym->type != NULL)
02070             printf(" %s", sym->type->GetString().c_str());
02071     }
02072     else
02073         printf("NULL");
02074     printf("\n");
02075 
02076     printf("%*cStmts:\n", indent+4, ' ');
02077     if (stmts != NULL)
02078         stmts->Print(indent+8);
02079     else
02080         printf("NULL");
02081     printf("\n");
02082 }
02083 
02084 
02085 Stmt *
02086 ForeachActiveStmt::TypeCheck() {
02087     if (sym == NULL)
02088         return NULL;
02089 
02090     return this;
02091 }
02092 
02093 
02094 int
02095 ForeachActiveStmt::EstimateCost() const {
02096     return COST_VARYING_LOOP;
02097 }
02098 
02099 
02100 ///////////////////////////////////////////////////////////////////////////
02101 // ForeachUniqueStmt
02102 
02103 ForeachUniqueStmt::ForeachUniqueStmt(const char *iterName, Expr *e, 
02104                                      Stmt *s, SourcePos pos) 
02105     : Stmt(pos) {
02106     sym = m->symbolTable->LookupVariable(iterName);
02107     expr = e;
02108     stmts = s;
02109 }
02110 
02111 
02112 void
02113 ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
02114     if (!ctx->GetCurrentBasicBlock()) 
02115         return;
02116 
02117     // First, allocate local storage for the symbol that we'll use for the
02118     // uniform variable that holds the current unique value through each
02119     // loop.
02120     if (sym->type == NULL) {
02121         Assert(m->errorCount > 0);
02122         return;
02123     }
02124     llvm::Type *symType = sym->type->LLVMType(g->ctx);
02125     if (symType == NULL) {
02126         Assert(m->errorCount > 0);
02127         return;
02128     }
02129     sym->storagePtr = ctx->AllocaInst(symType, sym->name.c_str());
02130 
02131     ctx->SetDebugPos(pos);
02132     ctx->EmitVariableDebugInfo(sym);
02133 
02134     // The various basic blocks that we'll need in the below
02135     llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_find_next");
02136     llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
02137     llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_check_for_more");
02138     llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_done");
02139 
02140     // Prepare the FunctionEmitContext
02141     ctx->StartScope();
02142 
02143     // Save the old internal mask so that we can restore it at the end
02144     llvm::Value *oldMask = ctx->GetInternalMask();
02145 
02146     // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
02147     // to be processed by a pass through the foreach_unique loop body.  It
02148     // starts out with the full execution mask (which should never be all
02149     // off going in to this)...
02150     llvm::Value *oldFullMask = ctx->GetFullMask();
02151     llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
02152     llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
02153     ctx->StoreInst(movmsk, maskBitsPtr);
02154 
02155     // Officially start the loop.
02156     ctx->StartForeach(FunctionEmitContext::FOREACH_UNIQUE);
02157     ctx->SetContinueTarget(bbCheckForMore);
02158 
02159     // Evaluate the varying expression we're iterating over just once.
02160     llvm::Value *exprValue = expr->GetValue(ctx);
02161 
02162     // And we'll store its value into locally-allocated storage, for ease
02163     // of indexing over it with non-compile-time-constant indices.
02164     const Type *exprType;
02165     llvm::VectorType *llvmExprType;
02166     if (exprValue == NULL ||
02167         (exprType = expr->GetType()) == NULL ||
02168         (llvmExprType = 
02169          llvm::dyn_cast<llvm::VectorType>(exprValue->getType())) == NULL) {
02170         Assert(m->errorCount > 0);
02171         return;
02172     }
02173     ctx->SetDebugPos(pos);
02174     const Type *exprPtrType = PointerType::GetUniform(exprType);
02175     llvm::Value *exprMem = ctx->AllocaInst(llvmExprType, "expr_mem");
02176     ctx->StoreInst(exprValue, exprMem);
02177 
02178     // Onward to find the first set of lanes to run the loop for
02179     ctx->BranchInst(bbFindNext);
02180     
02181     ctx->SetCurrentBasicBlock(bbFindNext); {
02182         // Load the bitmask of the lanes left to be processed
02183         llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
02184 
02185         // Find the index of the first set bit in the mask
02186         llvm::Function *ctlzFunc = 
02187             m->module->getFunction("__count_trailing_zeros_i64");
02188         Assert(ctlzFunc != NULL);
02189         llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
02190                                               "first_set");
02191 
02192         // And load the corresponding element value from the temporary
02193         // memory storing the value of the varying expr.
02194         llvm::Value *uniqueValuePtr = 
02195             ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
02196                                    "unique_index_ptr");
02197         llvm::Value *uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
02198 
02199         // If it's a varying pointer type, need to convert from the int
02200         // type we store in the vector to the actual pointer type
02201         if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
02202             uniqueValue = ctx->IntToPtrInst(uniqueValue, symType);
02203 
02204         // Store that value in sym's storage so that the iteration variable
02205         // has the right value inside the loop body
02206         ctx->StoreInst(uniqueValue, sym->storagePtr);
02207 
02208         // Set the execution mask so that it's on for any lane that a) was
02209         // running at the start of the foreach loop, and b) where that
02210         // lane's value of the varying expression is the same as the value
02211         // we've selected to process this time through--i.e.:
02212         // oldMask & (smear(element) == exprValue)
02213         llvm::Value *uniqueSmear = ctx->SmearUniform(uniqueValue, "unique_semar");
02214         llvm::Value *matchingLanes = NULL;
02215         if (uniqueValue->getType()->isFloatingPointTy())
02216             matchingLanes = 
02217                 ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_OEQ,
02218                              uniqueSmear, exprValue, "matching_lanes");
02219         else
02220             matchingLanes = 
02221                 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
02222                              uniqueSmear, exprValue, "matching_lanes");
02223         matchingLanes = ctx->I1VecToBoolVec(matchingLanes);
02224 
02225         llvm::Value *loopMask = 
02226             ctx->BinaryOperator(llvm::Instruction::And, oldMask, matchingLanes,
02227                                 "foreach_unique_loop_mask");
02228         ctx->SetInternalMask(loopMask);
02229 
02230         // Also update the bitvector of lanes left to process in subsequent
02231         // loop iterations:
02232         // remainingBits &= ~movmsk(current mask)
02233         llvm::Value *loopMaskMM = ctx->LaneMask(loopMask);
02234         llvm::Value *notLoopMaskMM = ctx->NotOperator(loopMaskMM);
02235         llvm::Value *newRemaining = 
02236             ctx->BinaryOperator(llvm::Instruction::And, remainingBits, 
02237                                 notLoopMaskMM, "new_remaining");
02238         ctx->StoreInst(newRemaining, maskBitsPtr);
02239 
02240         // and onward...
02241         ctx->BranchInst(bbBody);
02242     }
02243 
02244     ctx->SetCurrentBasicBlock(bbBody); {
02245         // Run the code in the body of the loop.  This is easy now.
02246         if (stmts)
02247             stmts->EmitCode(ctx);
02248 
02249         Assert(ctx->GetCurrentBasicBlock() != NULL);
02250         ctx->BranchInst(bbCheckForMore);
02251     }
02252 
02253     ctx->SetCurrentBasicBlock(bbCheckForMore); {
02254         // At the end of the loop body (either due to running the
02255         // statements normally, or a continue statement in the middle of
02256         // the loop that jumps to the end, see if there are any lanes left
02257         // to be processed.
02258         llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
02259         llvm::Value *nonZero = 
02260             ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
02261                          remainingBits, LLVMInt64(0), "remaining_ne_zero");
02262         ctx->BranchInst(bbFindNext, bbDone, nonZero);
02263     }
02264 
02265     ctx->SetCurrentBasicBlock(bbDone);
02266     ctx->SetInternalMask(oldMask);
02267     ctx->EndForeach();
02268     ctx->EndScope();
02269 }
02270 
02271 
02272 void
02273 ForeachUniqueStmt::Print(int indent) const {
02274     printf("%*cForeach_unique Stmt", indent, ' ');
02275     pos.Print();
02276     printf("\n");
02277 
02278     printf("%*cIter symbol: ", indent+4, ' ');
02279     if (sym != NULL) {
02280         printf("%s", sym->name.c_str());
02281         if (sym->type != NULL)
02282             printf(" %s", sym->type->GetString().c_str());
02283     }
02284     else
02285         printf("NULL");
02286     printf("\n");
02287 
02288     printf("%*cIter expr: ", indent+4, ' ');
02289     if (expr != NULL)
02290         expr->Print();
02291     else
02292         printf("NULL");
02293     printf("\n");
02294 
02295     printf("%*cStmts:\n", indent+4, ' ');
02296     if (stmts != NULL)
02297         stmts->Print(indent+8);
02298     else
02299         printf("NULL");
02300     printf("\n");
02301 }
02302 
02303 
02304 Stmt *
02305 ForeachUniqueStmt::TypeCheck() {
02306     const Type *type;
02307     if (sym == NULL || expr == NULL || (type = expr->GetType()) == NULL)
02308         return NULL;
02309 
02310     if (type->IsVaryingType() == false) {
02311         Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
02312               "must be \"varying\" type, not \"%s\".",
02313               type->GetString().c_str());
02314         return false;
02315     }
02316 
02317     if (Type::IsBasicType(type) == false) {
02318         Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
02319               "must be an atomic, pointer, or enum type, not \"%s\".",
02320               type->GetString().c_str());
02321         return false;
02322     }
02323 
02324     return this;
02325 }
02326 
02327 
02328 int
02329 ForeachUniqueStmt::EstimateCost() const {
02330     return COST_VARYING_LOOP;
02331 }
02332 
02333 
02334 ///////////////////////////////////////////////////////////////////////////
02335 // CaseStmt
02336 
02337 /** Given the statements following a 'case' or 'default' label, this
02338     function determines whether the mask should be checked to see if it is
02339     "all off" immediately after the label, before executing the code for
02340     the statements.
02341  */
02342 static bool
02343 lCheckMask(Stmt *stmts) {
02344     if (stmts == NULL)
02345         return false;
02346 
02347     int cost = EstimateCost(stmts);
02348     bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
02349 
02350     // The mask should be checked if the code following the
02351     // 'case'/'default' is relatively complex, or if it would be unsafe to
02352     // run that code with the execution mask all off.
02353     return (cost > PREDICATE_SAFE_IF_STATEMENT_COST ||
02354             safeToRunWithAllLanesOff == false);
02355 }
02356 
02357 
02358 CaseStmt::CaseStmt(int v, Stmt *s, SourcePos pos) 
02359     : Stmt(pos), value(v) {
02360     stmts = s;
02361 }
02362 
02363 
02364 void
02365 CaseStmt::EmitCode(FunctionEmitContext *ctx) const {
02366     ctx->EmitCaseLabel(value, lCheckMask(stmts), pos);
02367     if (stmts)
02368         stmts->EmitCode(ctx);
02369 }
02370 
02371 
02372 void
02373 CaseStmt::Print(int indent) const {
02374     printf("%*cCase [%d] label", indent, ' ', value);
02375     pos.Print();
02376     printf("\n");
02377     stmts->Print(indent+4);
02378 }
02379 
02380 
02381 Stmt *
02382 CaseStmt::TypeCheck() {
02383     return this;
02384 }
02385 
02386 
02387 int
02388 CaseStmt::EstimateCost() const {
02389     return 0;
02390 }
02391 
02392 
02393 ///////////////////////////////////////////////////////////////////////////
02394 // DefaultStmt
02395 
02396 DefaultStmt::DefaultStmt(Stmt *s, SourcePos pos) 
02397     : Stmt(pos) {
02398     stmts = s;
02399 }
02400 
02401 
02402 void
02403 DefaultStmt::EmitCode(FunctionEmitContext *ctx) const {
02404     ctx->EmitDefaultLabel(lCheckMask(stmts), pos);
02405     if (stmts)
02406         stmts->EmitCode(ctx);
02407 }
02408 
02409 
02410 void
02411 DefaultStmt::Print(int indent) const {
02412     printf("%*cDefault Stmt", indent, ' ');
02413     pos.Print();
02414     printf("\n");
02415     stmts->Print(indent+4);
02416 }
02417 
02418 
02419 Stmt *
02420 DefaultStmt::TypeCheck() {
02421     return this;
02422 }
02423 
02424 
02425 int
02426 DefaultStmt::EstimateCost() const {
02427     return 0;
02428 }
02429 
02430 
02431 ///////////////////////////////////////////////////////////////////////////
02432 // SwitchStmt
02433 
02434 SwitchStmt::SwitchStmt(Expr *e, Stmt *s, SourcePos pos) 
02435     : Stmt(pos) {
02436     expr = e;
02437     stmts = s;
02438 }
02439 
02440 
02441 /* An instance of this structure is carried along as we traverse the AST
02442    nodes for the statements after a "switch" statement.  We use this
02443    structure to record all of the 'case' and 'default' statements after the
02444    "switch". */
02445 struct SwitchVisitInfo {
02446     SwitchVisitInfo(FunctionEmitContext *c) { 
02447         ctx = c;
02448         defaultBlock = NULL; 
02449         lastBlock = NULL;
02450     }
02451 
02452     FunctionEmitContext *ctx;
02453 
02454     /* Basic block for the code following the "default" label (if any). */
02455     llvm::BasicBlock *defaultBlock;
02456 
02457     /* Map from integer values after "case" labels to the basic blocks that
02458        follow the corresponding "case" label. */
02459     std::vector<std::pair<int, llvm::BasicBlock *> > caseBlocks;
02460 
02461     /* For each basic block for a "case" label or a "default" label,
02462        nextBlock[block] stores the basic block pointer for the next
02463        subsequent "case" or "default" label in the program. */
02464     std::map<llvm::BasicBlock *, llvm::BasicBlock *> nextBlock;
02465 
02466     /* The last basic block created for a "case" or "default" label; when
02467        we create the basic block for the next one, we'll use this to update
02468        the nextBlock map<> above. */
02469     llvm::BasicBlock *lastBlock;
02470 };
02471 
02472 
02473 static bool
02474 lSwitchASTPreVisit(ASTNode *node, void *d) {
02475     if (dynamic_cast<SwitchStmt *>(node) != NULL)
02476         // don't continue recursively into a nested switch--we only want
02477         // our own case and default statements!
02478         return false;
02479 
02480     CaseStmt *cs = dynamic_cast<CaseStmt *>(node);
02481     DefaultStmt *ds = dynamic_cast<DefaultStmt *>(node);
02482 
02483     SwitchVisitInfo *svi = (SwitchVisitInfo *)d;
02484     llvm::BasicBlock *bb = NULL;
02485     if (cs != NULL) {
02486         // Complain if we've seen a case statement with the same value
02487         // already
02488         for (int i = 0; i < (int)svi->caseBlocks.size(); ++i) {
02489             if (svi->caseBlocks[i].first == cs->value) {
02490                 Error(cs->pos, "Duplicate case value \"%d\".", cs->value); 
02491                 return true;
02492             }
02493         }
02494         
02495         // Otherwise create a new basic block for the code following this
02496         // 'case' statement and record the mappign between the case label
02497         // value and the basic block
02498         char buf[32];
02499         sprintf(buf, "case_%d", cs->value);
02500         bb = svi->ctx->CreateBasicBlock(buf);
02501         svi->caseBlocks.push_back(std::make_pair(cs->value, bb));
02502     }
02503     else if (ds != NULL) {
02504         // And complain if we've seen another 'default' label..
02505         if (svi->defaultBlock != NULL) {
02506             Error(ds->pos, "Multiple \"default\" lables in switch statement.");
02507             return true;
02508         }
02509         else {
02510             // Otherwise create a basic block for the code following the
02511             // "default".
02512             bb = svi->ctx->CreateBasicBlock("default");
02513             svi->defaultBlock = bb;
02514         }
02515     }
02516 
02517     // If we saw a "case" or "default" label, then update the map to record
02518     // that the block we just created follows the block created for the
02519     // previous label in the "switch".
02520     if (bb != NULL) {
02521         svi->nextBlock[svi->lastBlock] = bb;
02522         svi->lastBlock = bb;
02523     }
02524 
02525     return true;
02526 }
02527 
02528 
02529 void
02530 SwitchStmt::EmitCode(FunctionEmitContext *ctx) const {
02531     if (ctx->GetCurrentBasicBlock() == NULL)
02532         return;
02533 
02534     const Type *type;
02535     if (expr == NULL || ((type = expr->GetType()) == NULL)) {
02536         AssertPos(pos, m->errorCount > 0);
02537         return;
02538     }
02539 
02540     // Basic block we'll end up after the switch statement
02541     llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("switch_done");
02542 
02543     // Walk the AST of the statements after the 'switch' to collect a bunch
02544     // of information about the structure of the 'case' and 'default'
02545     // statements.
02546     SwitchVisitInfo svi(ctx);
02547     WalkAST(stmts, lSwitchASTPreVisit, NULL, &svi);
02548     // Record that the basic block following the last one created for a
02549     // case/default is the block after the end of the switch statement.
02550     svi.nextBlock[svi.lastBlock] = bbDone;
02551 
02552     llvm::Value *exprValue = expr->GetValue(ctx);
02553     if (exprValue == NULL) {
02554         AssertPos(pos, m->errorCount > 0);
02555         return;
02556     }
02557 
02558     bool isUniformCF = (type->IsUniformType() &&
02559                         lHasVaryingBreakOrContinue(stmts) == false);
02560     ctx->StartSwitch(isUniformCF, bbDone);
02561     ctx->SwitchInst(exprValue, svi.defaultBlock ? svi.defaultBlock : bbDone,
02562                     svi.caseBlocks, svi.nextBlock);
02563 
02564     if (stmts != NULL)
02565         stmts->EmitCode(ctx);
02566 
02567     if (ctx->GetCurrentBasicBlock() != NULL)
02568         ctx->BranchInst(bbDone);
02569 
02570     ctx->SetCurrentBasicBlock(bbDone);
02571     ctx->EndSwitch();
02572 }
02573 
02574 
02575 void
02576 SwitchStmt::Print(int indent) const {
02577     printf("%*cSwitch Stmt", indent, ' ');
02578     pos.Print();
02579     printf("\n");
02580     printf("%*cexpr = ", indent, ' ');
02581     expr->Print();
02582     printf("\n");
02583     stmts->Print(indent+4);
02584 }
02585 
02586 
02587 Stmt *
02588 SwitchStmt::TypeCheck() {
02589     const Type *exprType;
02590     if (expr == NULL ||
02591         (exprType = expr->GetType()) == NULL) {
02592         Assert(m->errorCount > 0);
02593         return NULL;
02594     }
02595 
02596     const Type *toType = NULL;
02597     exprType = exprType->GetAsConstType();
02598     bool is64bit = (Type::EqualIgnoringConst(exprType->GetAsUniformType(),
02599                                              AtomicType::UniformUInt64) ||
02600                     Type::EqualIgnoringConst(exprType->GetAsUniformType(),
02601                                              AtomicType::UniformInt64));
02602 
02603     if (exprType->IsUniformType()) {
02604         if (is64bit) toType = AtomicType::UniformInt64;
02605         else         toType = AtomicType::UniformInt32;
02606     }
02607     else {
02608         if (is64bit) toType = AtomicType::VaryingInt64;
02609         else         toType = AtomicType::VaryingInt32;
02610     }
02611 
02612     expr = TypeConvertExpr(expr, toType, "switch expression");
02613     if (expr == NULL)
02614         return NULL;
02615 
02616     return this;
02617 }
02618 
02619 
02620 int
02621 SwitchStmt::EstimateCost() const {
02622     const Type *type = expr->GetType();
02623     if (type && type->IsVaryingType())
02624         return COST_VARYING_SWITCH;
02625     else
02626         return COST_UNIFORM_SWITCH;
02627 }
02628 
02629 
02630 ///////////////////////////////////////////////////////////////////////////
02631 // UnmaskedStmt
02632 
02633 UnmaskedStmt::UnmaskedStmt(Stmt *s, SourcePos pos)
02634     : Stmt(pos) {
02635     stmts = s;
02636 }
02637 
02638 
02639 void
02640 UnmaskedStmt::EmitCode(FunctionEmitContext *ctx) const {
02641     if (!ctx->GetCurrentBasicBlock() || !stmts)
02642         return;
02643 
02644     llvm::Value *oldInternalMask = ctx->GetInternalMask();
02645     llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
02646 
02647     ctx->SetInternalMask(LLVMMaskAllOn);
02648     ctx->SetFunctionMask(LLVMMaskAllOn);
02649 
02650     stmts->EmitCode(ctx);
02651 
02652     ctx->SetInternalMask(oldInternalMask);
02653     ctx->SetFunctionMask(oldFunctionMask);
02654 }
02655 
02656 
02657 void
02658 UnmaskedStmt::Print(int indent) const {
02659     printf("%*cUnmasked Stmt", indent, ' ');
02660     pos.Print();
02661     printf("\n");
02662 
02663     printf("%*cStmts:\n", indent+4, ' ');
02664     if (stmts != NULL)
02665         stmts->Print(indent+8);
02666     else
02667         printf("NULL");
02668     printf("\n");
02669 }
02670 
02671 
02672 Stmt *
02673 UnmaskedStmt::TypeCheck() {
02674     return this;
02675 }
02676 
02677 
02678 int
02679 UnmaskedStmt::EstimateCost() const {
02680     return COST_ASSIGN;
02681 }
02682 
02683 
02684 ///////////////////////////////////////////////////////////////////////////
02685 // ReturnStmt
02686 
02687 ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p) 
02688     : Stmt(p), expr(e), 
02689       doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
02690 }
02691 
02692 
02693 void
02694 ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
02695     if (!ctx->GetCurrentBasicBlock()) 
02696         return;
02697 
02698     if (ctx->InForeachLoop()) {
02699         Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
02700         return;
02701     }
02702 
02703     // Make sure we're not trying to return a reference to something where
02704     // that doesn't make sense
02705     const Function *func = ctx->GetFunction();
02706     const Type *returnType = func->GetReturnType();
02707     if (IsReferenceType(returnType) == true &&
02708         IsReferenceType(expr->GetType()) == false) {
02709         const Type *lvType = expr->GetLValueType();
02710         if (lvType == NULL) {
02711             Error(expr->pos, "Illegal to return non-lvalue from function "
02712                   "returning reference type \"%s\".",
02713                   returnType->GetString().c_str());
02714             return;
02715         }
02716         else if (lvType->IsUniformType() == false) {
02717             Error(expr->pos, "Illegal to return varying lvalue type from "
02718                   "function returning a reference type \"%s\".",
02719                   returnType->GetString().c_str());
02720             return;
02721         }
02722     }
02723 
02724     ctx->SetDebugPos(pos);
02725     ctx->CurrentLanesReturned(expr, doCoherenceCheck);
02726 }
02727 
02728 
02729 Stmt *
02730 ReturnStmt::TypeCheck() {
02731     return this;
02732 }
02733 
02734 
02735 int
02736 ReturnStmt::EstimateCost() const {
02737     return COST_RETURN;
02738 }
02739 
02740 
02741 void
02742 ReturnStmt::Print(int indent) const {
02743     printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
02744     pos.Print();
02745     if (expr)
02746         expr->Print();
02747     else printf("(void)");
02748     printf("\n");
02749 }
02750 
02751 
02752 ///////////////////////////////////////////////////////////////////////////
02753 // GotoStmt
02754 
02755 GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip) 
02756     : Stmt(gotoPos) {
02757     label = l;
02758     identifierPos = ip;
02759 }
02760 
02761 
02762 void
02763 GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
02764     if (!ctx->GetCurrentBasicBlock()) 
02765         return;
02766 
02767     if (ctx->VaryingCFDepth() > 0) {
02768         Error(pos, "\"goto\" statements are only legal under \"uniform\" "
02769               "control flow.");
02770         return;
02771     }
02772     if (ctx->InForeachLoop()) {
02773         Error(pos, "\"goto\" statements are currently illegal inside "
02774               "\"foreach\" loops.");
02775         return;
02776     }
02777 
02778     llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
02779     if (bb == NULL) {
02780         /* Label wasn't found. Look for suggestions that are close */
02781         std::vector<std::string> labels = ctx->GetLabels();
02782         std::vector<std::string> matches = MatchStrings(label, labels);
02783         std::string match_output;
02784         if (! matches.empty()) {
02785             /* Print up to 5 matches. Don't want to spew too much */
02786             match_output += "\nDid you mean:";
02787             for (unsigned int i=0; i<matches.size() && i<5; i++)
02788                 match_output += "\n " + matches[i] + "?";
02789         }
02790 
02791         /* Label wasn't found. Emit an error */
02792         Error(identifierPos, 
02793                 "No label named \"%s\" found in current function.%s",
02794               label.c_str(), match_output.c_str());
02795 
02796         return;
02797     }
02798 
02799     ctx->BranchInst(bb);
02800     ctx->SetCurrentBasicBlock(NULL);
02801 }
02802 
02803 
02804 void
02805 GotoStmt::Print(int indent) const {
02806     printf("%*cGoto label \"%s\"\n", indent, ' ', label.c_str());
02807 }
02808 
02809 
02810 Stmt *
02811 GotoStmt::Optimize() {
02812     return this;
02813 }
02814 
02815 
02816 Stmt *
02817 GotoStmt::TypeCheck() {
02818     return this;
02819 }
02820 
02821 
02822 int
02823 GotoStmt::EstimateCost() const {
02824     return COST_GOTO;
02825 }
02826 
02827 
02828 ///////////////////////////////////////////////////////////////////////////
02829 // LabeledStmt
02830 
02831 LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p) 
02832     : Stmt(p) {
02833     name = n;
02834     stmt = s;
02835 }
02836 
02837 
02838 void
02839 LabeledStmt::EmitCode(FunctionEmitContext *ctx) const {
02840     llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
02841     AssertPos(pos, bblock != NULL);
02842 
02843     // End the current basic block with a jump to our basic block and then
02844     // set things up for emission to continue there.  Note that the current
02845     // basic block may validly be NULL going into this statement due to an
02846     // earlier goto that NULLed it out; that doesn't stop us from
02847     // re-establishing a current basic block starting at the label..
02848     if (ctx->GetCurrentBasicBlock() != NULL)
02849         ctx->BranchInst(bblock);
02850     ctx->SetCurrentBasicBlock(bblock);
02851 
02852     if (stmt != NULL)
02853         stmt->EmitCode(ctx);
02854 }
02855 
02856 
02857 void
02858 LabeledStmt::Print(int indent) const {
02859     printf("%*cLabel \"%s\"\n", indent, ' ', name.c_str());
02860     if (stmt != NULL)
02861         stmt->Print(indent);
02862 }
02863 
02864 
02865 Stmt *
02866 LabeledStmt::Optimize() {
02867     return this;
02868 }
02869 
02870 
02871 Stmt *
02872 LabeledStmt::TypeCheck() {
02873     if (!isalpha(name[0]) || name[0] == '_') {
02874         Error(pos, "Label must start with either alphabetic character or '_'.");
02875         return NULL;
02876     }
02877     for (unsigned int i = 1; i < name.size(); ++i) {
02878         if (!isalnum(name[i]) && name[i] != '_') {
02879             Error(pos, "Character \"%c\" is illegal in labels.", name[i]);
02880             return NULL;
02881         }
02882     }
02883     return this;
02884 }
02885 
02886 
02887 int
02888 LabeledStmt::EstimateCost() const {
02889     return 0;
02890 }
02891 
02892 
02893 ///////////////////////////////////////////////////////////////////////////
02894 // StmtList
02895 
02896 void
02897 StmtList::EmitCode(FunctionEmitContext *ctx) const {
02898     ctx->StartScope();
02899     ctx->SetDebugPos(pos);
02900     for (unsigned int i = 0; i < stmts.size(); ++i)
02901         if (stmts[i])
02902             stmts[i]->EmitCode(ctx);
02903     ctx->EndScope();
02904 }
02905 
02906 
02907 Stmt *
02908 StmtList::TypeCheck() {
02909     return this;
02910 }
02911 
02912 
02913 int
02914 StmtList::EstimateCost() const {
02915     return 0;
02916 }
02917 
02918 
02919 void
02920 StmtList::Print(int indent) const {
02921     printf("%*cStmt List", indent, ' ');
02922     pos.Print();
02923     printf(":\n");
02924     for (unsigned int i = 0; i < stmts.size(); ++i)
02925         if (stmts[i])
02926             stmts[i]->Print(indent+4);
02927 }
02928 
02929 
02930 ///////////////////////////////////////////////////////////////////////////
02931 // PrintStmt
02932 
02933 PrintStmt::PrintStmt(const std::string &f, Expr *v, SourcePos p) 
02934     : Stmt(p), format(f), values(v) {
02935 }
02936 
02937 /* Because the pointers to values that are passed to __do_print() are all
02938    void *s (and because ispc print() formatting strings statements don't
02939    encode types), we pass along a string to __do_print() where the i'th
02940    character encodes the type of the i'th value to be printed.  Needless to
02941    say, the encoding chosen here and the decoding code in __do_print() need
02942    to agree on the below!
02943  */
02944 static char
02945 lEncodeType(const Type *t) {
02946     if (Type::Equal(t, AtomicType::UniformBool))   return 'b';
02947     if (Type::Equal(t, AtomicType::VaryingBool))   return 'B';
02948     if (Type::Equal(t, AtomicType::UniformInt32))  return 'i';
02949     if (Type::Equal(t, AtomicType::VaryingInt32))  return 'I';
02950     if (Type::Equal(t, AtomicType::UniformUInt32)) return 'u';
02951     if (Type::Equal(t, AtomicType::VaryingUInt32)) return 'U';
02952     if (Type::Equal(t, AtomicType::UniformFloat))  return 'f';
02953     if (Type::Equal(t, AtomicType::VaryingFloat))  return 'F';
02954     if (Type::Equal(t, AtomicType::UniformInt64))  return 'l';
02955     if (Type::Equal(t, AtomicType::VaryingInt64))  return 'L';
02956     if (Type::Equal(t, AtomicType::UniformUInt64)) return 'v';
02957     if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V';
02958     if (Type::Equal(t, AtomicType::UniformDouble)) return 'd';
02959     if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D';
02960     if (CastType<PointerType>(t) != NULL) {
02961         if (t->IsUniformType())
02962             return 'p';
02963         else
02964             return 'P';
02965     }
02966     else return '\0';
02967 }
02968 
02969 
02970 /** Given an Expr for a value to be printed, emit the code to evaluate the
02971     expression and store the result to alloca'd memory.  Update the
02972     argTypes string with the type encoding for this expression.
02973  */
02974 static llvm::Value *
02975 lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
02976     const Type *type = expr->GetType();
02977     if (type == NULL)
02978         return NULL;
02979 
02980     if (CastType<ReferenceType>(type) != NULL) {
02981         expr = new RefDerefExpr(expr, expr->pos);
02982         type = expr->GetType();
02983         if (type == NULL)
02984             return NULL;
02985     }
02986 
02987     // Just int8 and int16 types to int32s...
02988     const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
02989     if (Type::Equal(baseType, AtomicType::UniformInt8) ||
02990         Type::Equal(baseType, AtomicType::UniformUInt8) ||
02991         Type::Equal(baseType, AtomicType::UniformInt16) ||
02992         Type::Equal(baseType, AtomicType::UniformUInt16)) {
02993         expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
02994                                                         AtomicType::VaryingInt32, 
02995                                 expr, expr->pos);
02996         type = expr->GetType();
02997     }
02998         
02999     char t = lEncodeType(type->GetAsNonConstType());
03000     if (t == '\0') {
03001         Error(expr->pos, "Only atomic types are allowed in print statements; "
03002               "type \"%s\" is illegal.", type->GetString().c_str());
03003         return NULL;
03004     }
03005     else {
03006         argTypes.push_back(t);
03007 
03008         llvm::Type *llvmExprType = type->LLVMType(g->ctx);
03009         llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
03010         llvm::Value *val = expr->GetValue(ctx);
03011         if (!val)
03012             return NULL;
03013         ctx->StoreInst(val, ptr);
03014 
03015         ptr = ctx->BitCastInst(ptr, LLVMTypes::VoidPointerType);
03016         return ptr;
03017     }
03018 }
03019 
03020 
03021 /* PrintStmt works closely with the __do_print() function implemented in
03022    the builtins-c.c file.  In particular, the EmitCode() method here needs to
03023    take the arguments passed to it from ispc and generate a valid call to
03024    __do_print() with the information that __do_print() then needs to do the
03025    actual printing work at runtime.
03026  */
03027 void
03028 PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
03029     if (!ctx->GetCurrentBasicBlock()) 
03030         return;
03031 
03032     ctx->SetDebugPos(pos);
03033 
03034     // __do_print takes 5 arguments; we'll get them stored in the args[] array
03035     // in the code emitted below
03036     //
03037     // 1. the format string
03038     // 2. a string encoding the types of the values being printed, 
03039     //    one character per value
03040     // 3. the number of running program instances (i.e. the target's
03041     //    vector width)
03042     // 4. the current lane mask
03043     // 5. a pointer to an array of pointers to the values to be printed
03044     llvm::Value *args[5];
03045     std::string argTypes;
03046 
03047     if (values == NULL) {
03048         llvm::Type *ptrPtrType = 
03049             llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
03050         args[4] = llvm::Constant::getNullValue(ptrPtrType);
03051     }
03052     else {
03053         // Get the values passed to the print() statement evaluated and
03054         // stored in memory so that we set up the array of pointers to them
03055         // for the 5th __do_print() argument
03056         ExprList *elist = dynamic_cast<ExprList *>(values);
03057         int nArgs = elist ? elist->exprs.size() : 1;
03058 
03059         // Allocate space for the array of pointers to values to be printed 
03060         llvm::Type *argPtrArrayType = 
03061             llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
03062         llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
03063                                                    "print_arg_ptrs");
03064         // Store the array pointer as a void **, which is what __do_print()
03065         // expects
03066         args[4] = ctx->BitCastInst(argPtrArray, 
03067                                    llvm::PointerType::get(LLVMTypes::VoidPointerType, 0));
03068 
03069         // Now, for each of the arguments, emit code to evaluate its value
03070         // and store the value into alloca'd storage.  Then store the
03071         // pointer to the alloca'd storage into argPtrArray.
03072         if (elist) {
03073             for (unsigned int i = 0; i < elist->exprs.size(); ++i) {
03074                 Expr *expr = elist->exprs[i];
03075                 if (!expr)
03076                     return;
03077                 llvm::Value *ptr = lProcessPrintArg(expr, ctx, argTypes);
03078                 if (!ptr)
03079                     return;
03080 
03081                 llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL);
03082                 ctx->StoreInst(ptr, arrayPtr);
03083             }
03084         }
03085         else {
03086             llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes);
03087             if (!ptr)
03088                 return;
03089             llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL);
03090             ctx->StoreInst(ptr, arrayPtr);
03091         }
03092     }
03093 
03094     // Now we can emit code to call __do_print()
03095     llvm::Function *printFunc = m->module->getFunction("__do_print");
03096     AssertPos(pos, printFunc);
03097 
03098     llvm::Value *mask = ctx->GetFullMask();
03099     // Set up the rest of the parameters to it
03100     args[0] = ctx->GetStringPtr(format);
03101     args[1] = ctx->GetStringPtr(argTypes);
03102     args[2] = LLVMInt32(g->target.vectorWidth);
03103     args[3] = ctx->LaneMask(mask);
03104     std::vector<llvm::Value *> argVec(&args[0], &args[5]);
03105     ctx->CallInst(printFunc, NULL, argVec, "");
03106 }
03107 
03108 
03109 void
03110 PrintStmt::Print(int indent) const {
03111     printf("%*cPrint Stmt (%s)", indent, ' ', format.c_str());
03112 }
03113 
03114 
03115 Stmt *
03116 PrintStmt::TypeCheck() {
03117     return this;
03118 }
03119 
03120 
03121 int
03122 PrintStmt::EstimateCost() const {
03123     return COST_FUNCALL;
03124 }
03125 
03126 
03127 ///////////////////////////////////////////////////////////////////////////
03128 // AssertStmt
03129 
03130 AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p) 
03131     : Stmt(p), message(msg), expr(e) {
03132 }
03133 
03134 
03135 void
03136 AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
03137     if (!ctx->GetCurrentBasicBlock()) 
03138         return;
03139 
03140     const Type *type;
03141     if (expr == NULL ||
03142         (type = expr->GetType()) == NULL) {
03143         AssertPos(pos, m->errorCount > 0);
03144         return;
03145     }
03146     bool isUniform = type->IsUniformType();
03147 
03148     // The actual functionality to do the check and then handle falure is
03149     // done via a builtin written in bitcode in builtins.m4.
03150     llvm::Function *assertFunc = 
03151         isUniform ? m->module->getFunction("__do_assert_uniform") :
03152                     m->module->getFunction("__do_assert_varying");
03153     AssertPos(pos, assertFunc != NULL);
03154 
03155     char *errorString;
03156     if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s\n", 
03157                  pos.name, pos.first_line, pos.first_column, 
03158                  message.c_str()) == -1) {
03159         Error(pos, "Fatal error when generating assert string: asprintf() "
03160               "unable to allocate memory!");
03161         return;
03162     }
03163 
03164     std::vector<llvm::Value *> args;
03165     args.push_back(ctx->GetStringPtr(errorString));
03166     llvm::Value *exprValue = expr->GetValue(ctx);
03167     if (exprValue == NULL) {
03168         AssertPos(pos, m->errorCount > 0);
03169         return;
03170     }
03171     args.push_back(exprValue);
03172     args.push_back(ctx->GetFullMask());
03173     ctx->CallInst(assertFunc, NULL, args, "");
03174 
03175     free(errorString);
03176 }
03177 
03178 
03179 void
03180 AssertStmt::Print(int indent) const {
03181     printf("%*cAssert Stmt (%s)", indent, ' ', message.c_str());
03182 }
03183 
03184 
03185 Stmt *
03186 AssertStmt::TypeCheck() {
03187     const Type *type;
03188     if (expr && (type = expr->GetType()) != NULL) {
03189         bool isUniform = type->IsUniformType();
03190         if (!type->IsNumericType() && !type->IsBoolType()) {
03191             Error(expr->pos, "Type \"%s\" can't be converted to boolean for \"assert\".",
03192                   type->GetString().c_str());
03193             return NULL;
03194         }
03195         expr = new TypeCastExpr(isUniform ? AtomicType::UniformBool : 
03196                                             AtomicType::VaryingBool, 
03197                                 expr, expr->pos);
03198         expr = ::TypeCheck(expr);
03199     }
03200     return this;
03201 }
03202 
03203 
03204 int
03205 AssertStmt::EstimateCost() const {
03206     return COST_ASSERT;
03207 }
03208 
03209 
03210 ///////////////////////////////////////////////////////////////////////////
03211 // DeleteStmt
03212 
03213 DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
03214     : Stmt(p) {
03215     expr = e;
03216 }
03217 
03218 
03219 void
03220 DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
03221     if (!ctx->GetCurrentBasicBlock()) 
03222         return;
03223 
03224     const Type *exprType;
03225     if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
03226         AssertPos(pos, m->errorCount > 0);
03227         return;
03228     }
03229 
03230     llvm::Value *exprValue = expr->GetValue(ctx);
03231     if (exprValue == NULL) {
03232         AssertPos(pos, m->errorCount > 0);
03233         return;
03234     }
03235 
03236     // Typechecking should catch this
03237     AssertPos(pos, CastType<PointerType>(exprType) != NULL);
03238 
03239     if (exprType->IsUniformType()) {
03240         // For deletion of a uniform pointer, we just need to cast the
03241         // pointer type to a void pointer type, to match what
03242         // __delete_uniform() from the builtins expects.
03243         exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
03244                                      "ptr_to_void");
03245         llvm::Function *func = m->module->getFunction("__delete_uniform");
03246         AssertPos(pos, func != NULL);
03247 
03248         ctx->CallInst(func, NULL, exprValue, "");
03249     }
03250     else {
03251         // Varying pointers are arrays of ints, and __delete_varying()
03252         // takes a vector of i64s (even for 32-bit targets).  Therefore, we
03253         // only need to extend to 64-bit values on 32-bit targets before
03254         // calling it.
03255         llvm::Function *func = m->module->getFunction("__delete_varying");
03256         AssertPos(pos, func != NULL);
03257         if (g->target.is32Bit)
03258             exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
03259                                       "ptr_to_64");
03260         ctx->CallInst(func, NULL, exprValue, "");
03261     }
03262 }
03263 
03264 
03265 void
03266 DeleteStmt::Print(int indent) const {
03267     printf("%*cDelete Stmt", indent, ' ');
03268 }
03269 
03270 
03271 Stmt *
03272 DeleteStmt::TypeCheck() {
03273     const Type *exprType;
03274     if (expr == NULL || ((exprType = expr->GetType()) == NULL))
03275         return NULL;
03276 
03277     if (CastType<PointerType>(exprType) == NULL) {
03278         Error(pos, "Illegal to delete non-pointer type \"%s\".",
03279               exprType->GetString().c_str());
03280         return NULL;
03281     }
03282 
03283     return this;
03284 }
03285 
03286 
03287 int
03288 DeleteStmt::EstimateCost() const {
03289     return COST_DELETE;
03290 }