|
Intel SPMD Program Compiler
1.3.0
|
00001 /* 00002 Copyright (c) 2010-2012, Intel Corporation 00003 All rights reserved. 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions are 00007 met: 00008 00009 * Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 * Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 * Neither the name of Intel Corporation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00022 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00023 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00024 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00025 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 00034 /** @file stmt.cpp 00035 @brief File with definitions classes related to statements in the language 00036 */ 00037 00038 #include "stmt.h" 00039 #include "ctx.h" 00040 #include "util.h" 00041 #include "expr.h" 00042 #include "type.h" 00043 #include "func.h" 00044 #include "sym.h" 00045 #include "module.h" 00046 #include "llvmutil.h" 00047 00048 #include <stdio.h> 00049 #include <map> 00050 00051 #include <llvm/Module.h> 00052 #include <llvm/Function.h> 00053 #include <llvm/Type.h> 00054 #include <llvm/DerivedTypes.h> 00055 #include <llvm/LLVMContext.h> 00056 #include <llvm/Metadata.h> 00057 #include <llvm/Instructions.h> 00058 #include <llvm/CallingConv.h> 00059 #include <llvm/Support/IRBuilder.h> 00060 #include <llvm/Support/raw_ostream.h> 00061 00062 /////////////////////////////////////////////////////////////////////////// 00063 // Stmt 00064 00065 Stmt * 00066 Stmt::Optimize() { 00067 return this; 00068 } 00069 00070 00071 /////////////////////////////////////////////////////////////////////////// 00072 // ExprStmt 00073 00074 ExprStmt::ExprStmt(Expr *e, SourcePos p) 00075 : Stmt(p) { 00076 expr = e; 00077 } 00078 00079 void 00080 ExprStmt::EmitCode(FunctionEmitContext *ctx) const { 00081 if (!ctx->GetCurrentBasicBlock()) 00082 return; 00083 00084 ctx->SetDebugPos(pos); 00085 if (expr) 00086 expr->GetValue(ctx); 00087 } 00088 00089 00090 Stmt * 00091 ExprStmt::TypeCheck() { 00092 return this; 00093 } 00094 00095 00096 void 00097 ExprStmt::Print(int indent) const { 00098 if (!expr) 00099 return; 00100 00101 printf("%*c", indent, ' '); 00102 printf("Expr stmt: "); 00103 pos.Print(); 00104 expr->Print(); 00105 printf("\n"); 00106 } 00107 00108 00109 int 00110 ExprStmt::EstimateCost() const { 00111 return 0; 00112 } 00113 00114 00115 /////////////////////////////////////////////////////////////////////////// 00116 // DeclStmt 00117 00118 DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p) 00119 : Stmt(p), vars(v) { 00120 } 00121 00122 00123 static bool 00124 lHasUnsizedArrays(const Type *type) { 00125 const ArrayType *at = CastType<ArrayType>(type); 00126 if (at == NULL) 00127 return false; 00128 00129 if (at->GetElementCount() == 0) 00130 return true; 00131 else 00132 return lHasUnsizedArrays(at->GetElementType()); 00133 } 00134 00135 00136 void 00137 DeclStmt::EmitCode(FunctionEmitContext *ctx) const { 00138 if (!ctx->GetCurrentBasicBlock()) 00139 return; 00140 00141 for (unsigned int i = 0; i < vars.size(); ++i) { 00142 Symbol *sym = vars[i].sym; 00143 AssertPos(pos, sym != NULL); 00144 if (sym->type == NULL) 00145 continue; 00146 Expr *initExpr = vars[i].init; 00147 00148 // Now that we're in the thick of emitting code, it's easy for us 00149 // to find out the level of nesting of varying control flow we're 00150 // in at this declaration. So we can finally set that 00151 // Symbol::varyingCFDepth variable. 00152 // @todo It's disgusting to be doing this here. 00153 sym->varyingCFDepth = ctx->VaryingCFDepth(); 00154 00155 ctx->SetDebugPos(sym->pos); 00156 00157 // If it's an array that was declared without a size but has an 00158 // initializer list, then use the number of elements in the 00159 // initializer list to finally set the array's size. 00160 sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr); 00161 if (sym->type == NULL) 00162 continue; 00163 00164 if (lHasUnsizedArrays(sym->type)) { 00165 Error(pos, "Illegal to declare an unsized array variable without " 00166 "providing an initializer expression to set its size."); 00167 continue; 00168 } 00169 00170 // References must have initializer expressions as well. 00171 if (IsReferenceType(sym->type) == true) { 00172 if (initExpr == NULL) { 00173 Error(sym->pos, "Must provide initializer for reference-type " 00174 "variable \"%s\".", sym->name.c_str()); 00175 continue; 00176 } 00177 if (IsReferenceType(initExpr->GetType()) == false) { 00178 const Type *initLVType = initExpr->GetLValueType(); 00179 if (initLVType == NULL) { 00180 Error(initExpr->pos, "Initializer for reference-type variable " 00181 "\"%s\" must have an lvalue type.", sym->name.c_str()); 00182 continue; 00183 } 00184 if (initLVType->IsUniformType() == false) { 00185 Error(initExpr->pos, "Initializer for reference-type variable " 00186 "\"%s\" must have a uniform lvalue type.", sym->name.c_str()); 00187 continue; 00188 } 00189 } 00190 } 00191 00192 llvm::Type *llvmType = sym->type->LLVMType(g->ctx); 00193 if (llvmType == NULL) { 00194 AssertPos(pos, m->errorCount > 0); 00195 return; 00196 } 00197 00198 if (sym->storageClass == SC_STATIC) { 00199 // For static variables, we need a compile-time constant value 00200 // for its initializer; if there's no initializer, we use a 00201 // zero value. 00202 llvm::Constant *cinit = NULL; 00203 if (initExpr != NULL) { 00204 if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false) 00205 continue; 00206 // FIXME: we only need this for function pointers; it was 00207 // already done for atomic types and enums in 00208 // DeclStmt::TypeCheck()... 00209 if (dynamic_cast<ExprList *>(initExpr) == NULL) { 00210 initExpr = TypeConvertExpr(initExpr, sym->type, 00211 "initializer"); 00212 // FIXME: and this is only needed to re-establish 00213 // constant-ness so that GetConstant below works for 00214 // constant artithmetic expressions... 00215 initExpr = ::Optimize(initExpr); 00216 } 00217 00218 cinit = initExpr->GetConstant(sym->type); 00219 if (cinit == NULL) 00220 Error(initExpr->pos, "Initializer for static variable " 00221 "\"%s\" must be a constant.", sym->name.c_str()); 00222 } 00223 if (cinit == NULL) 00224 cinit = llvm::Constant::getNullValue(llvmType); 00225 00226 // Allocate space for the static variable in global scope, so 00227 // that it persists across function calls 00228 sym->storagePtr = 00229 new llvm::GlobalVariable(*m->module, llvmType, 00230 sym->type->IsConstType(), 00231 llvm::GlobalValue::InternalLinkage, cinit, 00232 llvm::Twine("static.") + 00233 llvm::Twine(sym->pos.first_line) + 00234 llvm::Twine(".") + sym->name.c_str()); 00235 // Tell the FunctionEmitContext about the variable 00236 ctx->EmitVariableDebugInfo(sym); 00237 } 00238 else { 00239 // For non-static variables, allocate storage on the stack 00240 sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str()); 00241 00242 // Tell the FunctionEmitContext about the variable; must do 00243 // this before the initializer stuff. 00244 ctx->EmitVariableDebugInfo(sym); 00245 00246 // And then get it initialized... 00247 sym->parentFunction = ctx->GetFunction(); 00248 InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); 00249 } 00250 } 00251 } 00252 00253 00254 Stmt * 00255 DeclStmt::Optimize() { 00256 for (unsigned int i = 0; i < vars.size(); ++i) { 00257 Expr *init = vars[i].init; 00258 if (init != NULL && dynamic_cast<ExprList *>(init) == NULL) { 00259 // If the variable is const-qualified, after we've optimized 00260 // the initializer expression, see if we have a ConstExpr. If 00261 // so, save it in Symbol::constValue where it can be used in 00262 // optimizing later expressions that have this symbol in them. 00263 // Note that there are cases where the expression may be 00264 // constant but where we don't have a ConstExpr; an example is 00265 // const arrays--the ConstExpr implementation just can't 00266 // represent an array of values. 00267 // 00268 // All this is fine in terms of the code that's generated in 00269 // the end (LLVM's constant folding stuff is good), but it 00270 // means that the ispc compiler's ability to reason about what 00271 // is definitely a compile-time constant for things like 00272 // computing array sizes from non-trivial expressions is 00273 // consequently limited. 00274 Symbol *sym = vars[i].sym; 00275 if (sym->type && sym->type->IsConstType() && 00276 Type::Equal(init->GetType(), sym->type)) 00277 sym->constValue = dynamic_cast<ConstExpr *>(init); 00278 } 00279 } 00280 return this; 00281 } 00282 00283 00284 Stmt * 00285 DeclStmt::TypeCheck() { 00286 bool encounteredError = false; 00287 for (unsigned int i = 0; i < vars.size(); ++i) { 00288 if (vars[i].sym == NULL) { 00289 encounteredError = true; 00290 continue; 00291 } 00292 00293 if (vars[i].init == NULL) 00294 continue; 00295 00296 // get the right type for stuff like const float foo = 2; so that 00297 // the int->float type conversion is in there and we don't return 00298 // an int as the constValue later... 00299 const Type *type = vars[i].sym->type; 00300 if (CastType<AtomicType>(type) != NULL || 00301 CastType<EnumType>(type) != NULL) { 00302 // If it's an expr list with an atomic type, we'll later issue 00303 // an error. Need to leave vars[i].init as is in that case so 00304 // it is in fact caught later, though. 00305 if (dynamic_cast<ExprList *>(vars[i].init) == NULL) { 00306 vars[i].init = TypeConvertExpr(vars[i].init, type, 00307 "initializer"); 00308 if (vars[i].init == NULL) 00309 encounteredError = true; 00310 } 00311 } 00312 } 00313 return encounteredError ? NULL : this; 00314 } 00315 00316 00317 void 00318 DeclStmt::Print(int indent) const { 00319 printf("%*cDecl Stmt:", indent, ' '); 00320 pos.Print(); 00321 for (unsigned int i = 0; i < vars.size(); ++i) { 00322 printf("%*cVariable %s (%s)", indent+4, ' ', 00323 vars[i].sym->name.c_str(), 00324 vars[i].sym->type->GetString().c_str()); 00325 if (vars[i].init != NULL) { 00326 printf(" = "); 00327 vars[i].init->Print(); 00328 } 00329 printf("\n"); 00330 } 00331 printf("\n"); 00332 } 00333 00334 00335 int 00336 DeclStmt::EstimateCost() const { 00337 return 0; 00338 } 00339 00340 00341 /////////////////////////////////////////////////////////////////////////// 00342 // IfStmt 00343 00344 IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p) 00345 : Stmt(p), test(t), trueStmts(ts), falseStmts(fs), 00346 doAllCheck(checkCoherence && 00347 !g->opt.disableCoherentControlFlow) { 00348 } 00349 00350 00351 static void 00352 lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse) { 00353 if (!stmts) 00354 return; 00355 00356 if (dynamic_cast<StmtList *>(stmts) == NULL) 00357 ctx->StartScope(); 00358 ctx->AddInstrumentationPoint(trueOrFalse); 00359 stmts->EmitCode(ctx); 00360 if (dynamic_cast<const StmtList *>(stmts) == NULL) 00361 ctx->EndScope(); 00362 } 00363 00364 00365 /** Returns true if the "true" block for the if statement consists of a 00366 single 'break' statement, and the "false" block is empty. */ 00367 static bool 00368 lCanApplyBreakOptimization(Stmt *trueStmts, Stmt *falseStmts) { 00369 if (falseStmts != NULL) { 00370 if (StmtList *sl = dynamic_cast<StmtList *>(falseStmts)) { 00371 return (sl->stmts.size() == 0); 00372 } 00373 else 00374 return false; 00375 } 00376 00377 if (dynamic_cast<BreakStmt *>(trueStmts)) 00378 return true; 00379 else if (StmtList *sl = dynamic_cast<StmtList *>(trueStmts)) 00380 return (sl->stmts.size() == 1 && 00381 dynamic_cast<BreakStmt *>(sl->stmts[0]) != NULL); 00382 else 00383 return false; 00384 } 00385 00386 00387 void 00388 IfStmt::EmitCode(FunctionEmitContext *ctx) const { 00389 // First check all of the things that might happen due to errors 00390 // earlier in compilation and bail out if needed so that we don't 00391 // dereference NULL pointers in the below... 00392 if (!ctx->GetCurrentBasicBlock()) 00393 return; 00394 if (!test) 00395 return; 00396 const Type *testType = test->GetType(); 00397 if (!testType) 00398 return; 00399 00400 ctx->SetDebugPos(pos); 00401 bool isUniform = testType->IsUniformType(); 00402 00403 llvm::Value *testValue = test->GetValue(ctx); 00404 if (testValue == NULL) 00405 return; 00406 00407 if (isUniform) { 00408 ctx->StartUniformIf(); 00409 if (doAllCheck) 00410 Warning(test->pos, "Uniform condition supplied to \"cif\" statement."); 00411 00412 // 'If' statements with uniform conditions are relatively 00413 // straightforward. We evaluate the condition and then jump to 00414 // either the 'then' or 'else' clause depending on its value. 00415 llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then"); 00416 llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else"); 00417 llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit"); 00418 00419 // Jump to the appropriate basic block based on the value of 00420 // the 'if' test 00421 ctx->BranchInst(bthen, belse, testValue); 00422 00423 // Emit code for the 'true' case 00424 ctx->SetCurrentBasicBlock(bthen); 00425 lEmitIfStatements(ctx, trueStmts, "true"); 00426 if (ctx->GetCurrentBasicBlock()) 00427 ctx->BranchInst(bexit); 00428 00429 // Emit code for the 'false' case 00430 ctx->SetCurrentBasicBlock(belse); 00431 lEmitIfStatements(ctx, falseStmts, "false"); 00432 if (ctx->GetCurrentBasicBlock()) 00433 ctx->BranchInst(bexit); 00434 00435 // Set the active basic block to the newly-created exit block 00436 // so that subsequent emitted code starts there. 00437 ctx->SetCurrentBasicBlock(bexit); 00438 ctx->EndIf(); 00439 } 00440 else if (lCanApplyBreakOptimization(trueStmts, falseStmts)) { 00441 // If we have a simple break statement inside the 'if' and are 00442 // under varying control flow, just update the execution mask 00443 // directly and don't emit code for the statements. This leads to 00444 // better code for this case--this is surprising and should be 00445 // root-caused further, but for now this gives us performance 00446 // benefit in this case. 00447 ctx->SetInternalMaskAndNot(ctx->GetInternalMask(), testValue); 00448 } 00449 else 00450 emitVaryingIf(ctx, testValue); 00451 } 00452 00453 00454 Stmt * 00455 IfStmt::TypeCheck() { 00456 if (test != NULL) { 00457 const Type *testType = test->GetType(); 00458 if (testType != NULL) { 00459 bool isUniform = (testType->IsUniformType() && 00460 !g->opt.disableUniformControlFlow); 00461 test = TypeConvertExpr(test, isUniform ? AtomicType::UniformBool : 00462 AtomicType::VaryingBool, 00463 "\"if\" statement test"); 00464 if (test == NULL) 00465 return NULL; 00466 } 00467 } 00468 00469 return this; 00470 } 00471 00472 00473 int 00474 IfStmt::EstimateCost() const { 00475 const Type *type; 00476 if (test == NULL || (type = test->GetType()) == NULL) 00477 return 0; 00478 00479 return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF; 00480 } 00481 00482 00483 void 00484 IfStmt::Print(int indent) const { 00485 printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : ""); 00486 pos.Print(); 00487 printf("\n%*cTest: ", indent+4, ' '); 00488 test->Print(); 00489 printf("\n"); 00490 if (trueStmts) { 00491 printf("%*cTrue:\n", indent+4, ' '); 00492 trueStmts->Print(indent+8); 00493 } 00494 if (falseStmts) { 00495 printf("%*cFalse:\n", indent+4, ' '); 00496 falseStmts->Print(indent+8); 00497 } 00498 } 00499 00500 00501 /** Emit code to run both the true and false statements for the if test, 00502 with the mask set appropriately before running each one. 00503 */ 00504 void 00505 IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, 00506 llvm::Value *test) const { 00507 if (trueStmts) { 00508 ctx->SetInternalMaskAnd(oldMask, test); 00509 lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements"); 00510 // under varying control flow,, returns can't stop instruction 00511 // emission, so this better be non-NULL... 00512 AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock()); 00513 } 00514 if (falseStmts) { 00515 ctx->SetInternalMaskAndNot(oldMask, test); 00516 lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements"); 00517 AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock()); 00518 } 00519 } 00520 00521 00522 /** Emit code for an if test that checks the mask and the test values and 00523 tries to be smart about jumping over code that doesn't need to be run. 00524 */ 00525 void 00526 IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { 00527 llvm::Value *oldMask = ctx->GetInternalMask(); 00528 if (doAllCheck) { 00529 // We can't tell if the mask going into the if is all on at the 00530 // compile time. Emit code to check for this and then either run 00531 // the code for the 'all on' or the 'mixed' case depending on the 00532 // mask's value at runtime. 00533 llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("cif_mask_all"); 00534 llvm::BasicBlock *bMixedOn = ctx->CreateBasicBlock("cif_mask_mixed"); 00535 llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done"); 00536 00537 // Jump to either bAllOn or bMixedOn, depending on the mask's value 00538 llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask()); 00539 ctx->BranchInst(bAllOn, bMixedOn, maskAllQ); 00540 00541 // Emit code for the 'mask all on' case 00542 ctx->SetCurrentBasicBlock(bAllOn); 00543 emitMaskAllOn(ctx, ltest, bDone); 00544 00545 // And emit code for the mixed mask case 00546 ctx->SetCurrentBasicBlock(bMixedOn); 00547 emitMaskMixed(ctx, oldMask, ltest, bDone); 00548 00549 // When done, set the current basic block to the block that the two 00550 // paths above jump to when they're done. 00551 ctx->SetCurrentBasicBlock(bDone); 00552 } 00553 else if (trueStmts != NULL || falseStmts != NULL) { 00554 // If there is nothing that is potentially unsafe to run with all 00555 // lanes off in the true and false statements and if the total 00556 // complexity of those two is relatively simple, then we'll go 00557 // ahead and emit straightline code that runs both sides, updating 00558 // the mask accordingly. This is useful for efficiently compiling 00559 // things like: 00560 // 00561 // if (foo) x = 0; 00562 // else ++x; 00563 // 00564 // Where the overhead of checking if any of the program instances wants 00565 // to run one side or the other is more than the actual computation. 00566 // SafeToRunWithMaskAllOff() checks to make sure that we don't do this 00567 // for potentially dangerous code like: 00568 // 00569 // if (index < count) array[index] = 0; 00570 // 00571 // where our use of blend for conditional assignments doesn't check 00572 // for the 'all lanes' off case. 00573 int trueFalseCost = (::EstimateCost(trueStmts) + 00574 ::EstimateCost(falseStmts)); 00575 bool costIsAcceptable = (trueFalseCost < 00576 PREDICATE_SAFE_IF_STATEMENT_COST); 00577 00578 bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) && 00579 SafeToRunWithMaskAllOff(falseStmts)); 00580 00581 Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).", 00582 ::EstimateCost(trueStmts), (int)SafeToRunWithMaskAllOff(trueStmts), 00583 ::EstimateCost(falseStmts), (int)SafeToRunWithMaskAllOff(falseStmts)); 00584 00585 if (safeToRunWithAllLanesOff && 00586 (costIsAcceptable || g->opt.disableCoherentControlFlow)) { 00587 ctx->StartVaryingIf(oldMask); 00588 emitMaskedTrueAndFalse(ctx, oldMask, ltest); 00589 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00590 ctx->EndIf(); 00591 } 00592 else { 00593 llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done"); 00594 emitMaskMixed(ctx, oldMask, ltest, bDone); 00595 ctx->SetCurrentBasicBlock(bDone); 00596 } 00597 } 00598 } 00599 00600 00601 /** Emits code for 'if' tests under the case where we know that the program 00602 mask is all on going into the 'if'. 00603 */ 00604 void 00605 IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, 00606 llvm::BasicBlock *bDone) const { 00607 // We start by explicitly storing "all on" into the mask mask. Note 00608 // that this doesn't change its actual value, but doing so lets the 00609 // compiler see what's going on so that subsequent optimizations for 00610 // code emitted here can operate with the knowledge that the mask is 00611 // definitely all on (until it modifies the mask itself). 00612 AssertPos(pos, !g->opt.disableCoherentControlFlow); 00613 if (!g->opt.disableMaskAllOnOptimizations) 00614 ctx->SetInternalMask(LLVMMaskAllOn); 00615 llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); 00616 if (!g->opt.disableMaskAllOnOptimizations) 00617 ctx->SetFunctionMask(LLVMMaskAllOn); 00618 00619 // First, check the value of the test. If it's all on, then we jump to 00620 // a basic block that will only have code for the true case. 00621 llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all"); 00622 llvm::BasicBlock *bTestNoneCheck = ctx->CreateBasicBlock("cif_test_none_check"); 00623 llvm::Value *testAllQ = ctx->All(ltest); 00624 ctx->BranchInst(bTestAll, bTestNoneCheck, testAllQ); 00625 00626 // Emit code for the 'test is all true' case 00627 ctx->SetCurrentBasicBlock(bTestAll); 00628 ctx->StartVaryingIf(LLVMMaskAllOn); 00629 lEmitIfStatements(ctx, trueStmts, "if: all on mask, expr all true"); 00630 ctx->EndIf(); 00631 if (ctx->GetCurrentBasicBlock() != NULL) 00632 // bblock may legitimately be NULL since if there's a return stmt 00633 // or break or continue we can actually jump and end emission since 00634 // we know all of the lanes are following this path... 00635 ctx->BranchInst(bDone); 00636 00637 // The test isn't all true. Now emit code to determine if it's all 00638 // false, or has mixed values. 00639 ctx->SetCurrentBasicBlock(bTestNoneCheck); 00640 llvm::BasicBlock *bTestNone = ctx->CreateBasicBlock("cif_test_none"); 00641 llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_test_mixed"); 00642 llvm::Value *testMixedQ = ctx->Any(ltest); 00643 ctx->BranchInst(bTestMixed, bTestNone, testMixedQ); 00644 00645 // Emit code for the 'test is all false' case 00646 ctx->SetCurrentBasicBlock(bTestNone); 00647 ctx->StartVaryingIf(LLVMMaskAllOn); 00648 lEmitIfStatements(ctx, falseStmts, "if: all on mask, expr all false"); 00649 ctx->EndIf(); 00650 if (ctx->GetCurrentBasicBlock()) 00651 // bblock may be NULL since if there's a return stmt or break or 00652 // continue we can actually jump or whatever and end emission... 00653 ctx->BranchInst(bDone); 00654 00655 // Finally emit code for the 'mixed true/false' case. We unavoidably 00656 // need to run both the true and the false statements. 00657 ctx->SetCurrentBasicBlock(bTestMixed); 00658 ctx->StartVaryingIf(LLVMMaskAllOn); 00659 emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest); 00660 // In this case, return/break/continue isn't allowed to jump and end 00661 // emission. 00662 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00663 ctx->EndIf(); 00664 ctx->BranchInst(bDone); 00665 00666 ctx->SetCurrentBasicBlock(bDone); 00667 ctx->SetFunctionMask(oldFunctionMask); 00668 } 00669 00670 00671 /** Emit code for an 'if' test where the lane mask is known to be mixed 00672 on/off going into it. 00673 */ 00674 void 00675 IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, 00676 llvm::Value *ltest, llvm::BasicBlock *bDone) const { 00677 ctx->StartVaryingIf(oldMask); 00678 llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true"); 00679 if (trueStmts != NULL) { 00680 llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true"); 00681 ctx->SetInternalMaskAnd(oldMask, ltest); 00682 00683 // Do any of the program instances want to run the 'true' 00684 // block? If not, jump ahead to bNext. 00685 llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask()); 00686 ctx->BranchInst(bRunTrue, bNext, maskAnyQ); 00687 00688 // Emit statements for true 00689 ctx->SetCurrentBasicBlock(bRunTrue); 00690 lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements"); 00691 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00692 ctx->BranchInst(bNext); 00693 ctx->SetCurrentBasicBlock(bNext); 00694 } 00695 if (falseStmts != NULL) { 00696 llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false"); 00697 bNext = ctx->CreateBasicBlock("safe_if_after_false"); 00698 ctx->SetInternalMaskAndNot(oldMask, ltest); 00699 00700 // Similarly, check to see if any of the instances want to 00701 // run the 'false' block... 00702 llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask()); 00703 ctx->BranchInst(bRunFalse, bNext, maskAnyQ); 00704 00705 // Emit code for false 00706 ctx->SetCurrentBasicBlock(bRunFalse); 00707 lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements"); 00708 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00709 ctx->BranchInst(bNext); 00710 ctx->SetCurrentBasicBlock(bNext); 00711 } 00712 ctx->BranchInst(bDone); 00713 ctx->SetCurrentBasicBlock(bDone); 00714 ctx->EndIf(); 00715 } 00716 00717 00718 /////////////////////////////////////////////////////////////////////////// 00719 // DoStmt 00720 00721 struct VaryingBCCheckInfo { 00722 VaryingBCCheckInfo() { 00723 varyingControlFlowDepth = 0; 00724 foundVaryingBreakOrContinue = false; 00725 } 00726 00727 int varyingControlFlowDepth; 00728 bool foundVaryingBreakOrContinue; 00729 }; 00730 00731 00732 /** Returns true if the given node is an 'if' statement where the test 00733 condition has varying type. */ 00734 static bool 00735 lIsVaryingFor(ASTNode *node) { 00736 IfStmt *ifStmt; 00737 if ((ifStmt = dynamic_cast<IfStmt *>(node)) != NULL && 00738 ifStmt->test != NULL) { 00739 const Type *type = ifStmt->test->GetType(); 00740 return (type != NULL && type->IsVaryingType()); 00741 } 00742 else 00743 return false; 00744 } 00745 00746 00747 /** Preorder callback function for checking for varying breaks or 00748 continues. */ 00749 static bool 00750 lVaryingBCPreFunc(ASTNode *node, void *d) { 00751 VaryingBCCheckInfo *info = (VaryingBCCheckInfo *)d; 00752 00753 // We found a break or continue statement; if we're under varying 00754 // control flow, then bingo. 00755 if ((dynamic_cast<BreakStmt *>(node) != NULL || 00756 dynamic_cast<ContinueStmt *>(node) != NULL) && 00757 info->varyingControlFlowDepth > 0) { 00758 info->foundVaryingBreakOrContinue = true; 00759 return false; 00760 } 00761 00762 // Update the count of the nesting depth of varying control flow if 00763 // this is an if statement with a varying condition. 00764 if (lIsVaryingFor(node)) 00765 ++info->varyingControlFlowDepth; 00766 00767 if (dynamic_cast<ForStmt *>(node) != NULL || 00768 dynamic_cast<DoStmt *>(node) != NULL || 00769 dynamic_cast<ForeachStmt *>(node) != NULL) 00770 // Don't recurse into these guys, since we don't care about varying 00771 // breaks or continues within them... 00772 return false; 00773 else 00774 return true; 00775 } 00776 00777 00778 /** Postorder callback function for checking for varying breaks or 00779 continues; decrement the varying control flow depth after the node's 00780 children have been processed, if this is a varying if statement. */ 00781 static ASTNode * 00782 lVaryingBCPostFunc(ASTNode *node, void *d) { 00783 VaryingBCCheckInfo *info = (VaryingBCCheckInfo *)d; 00784 if (lIsVaryingFor(node)) 00785 --info->varyingControlFlowDepth; 00786 return node; 00787 } 00788 00789 00790 /** Given a statment, walk through it to see if there is a 'break' or 00791 'continue' statement inside if its children, under varying control 00792 flow. We need to detect this case for loops since what might otherwise 00793 look like a 'uniform' loop needs to have code emitted to do all of the 00794 lane management stuff if this is the case. 00795 */ 00796 static bool 00797 lHasVaryingBreakOrContinue(Stmt *stmt) { 00798 VaryingBCCheckInfo info; 00799 WalkAST(stmt, lVaryingBCPreFunc, lVaryingBCPostFunc, &info); 00800 return info.foundVaryingBreakOrContinue; 00801 } 00802 00803 00804 DoStmt::DoStmt(Expr *t, Stmt *s, bool cc, SourcePos p) 00805 : Stmt(p), testExpr(t), bodyStmts(s), 00806 doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) { 00807 } 00808 00809 00810 void DoStmt::EmitCode(FunctionEmitContext *ctx) const { 00811 // Check for things that could be NULL due to earlier errors during 00812 // compilation. 00813 if (!ctx->GetCurrentBasicBlock()) 00814 return; 00815 if (!testExpr || !testExpr->GetType()) 00816 return; 00817 00818 bool uniformTest = testExpr->GetType()->IsUniformType(); 00819 if (uniformTest && doCoherentCheck) 00820 Warning(testExpr->pos, "Uniform condition supplied to \"cdo\" " 00821 "statement."); 00822 00823 llvm::BasicBlock *bloop = ctx->CreateBasicBlock("do_loop"); 00824 llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit"); 00825 llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test"); 00826 00827 ctx->StartLoop(bexit, btest, uniformTest); 00828 00829 // Start by jumping into the loop body 00830 ctx->BranchInst(bloop); 00831 00832 // And now emit code for the loop body 00833 ctx->SetCurrentBasicBlock(bloop); 00834 ctx->SetLoopMask(ctx->GetInternalMask()); 00835 ctx->SetDebugPos(pos); 00836 // FIXME: in the StmtList::EmitCode() method takes starts/stops a new 00837 // scope around the statements in the list. So if the body is just a 00838 // single statement (and thus not a statement list), we need a new 00839 // scope, but we don't want two scopes in the StmtList case. 00840 if (!dynamic_cast<StmtList *>(bodyStmts)) 00841 ctx->StartScope(); 00842 00843 ctx->AddInstrumentationPoint("do loop body"); 00844 if (doCoherentCheck && !uniformTest) { 00845 // Check to see if the mask is all on 00846 llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("do_all_on"); 00847 llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("do_mixed"); 00848 ctx->BranchIfMaskAll(bAllOn, bMixed); 00849 00850 // If so, emit code for the 'mask all on' case. In particular, 00851 // explicitly set the mask to 'all on' (see rationale in 00852 // IfStmt::emitCoherentTests()), and then emit the code for the 00853 // loop body. 00854 ctx->SetCurrentBasicBlock(bAllOn); 00855 if (!g->opt.disableMaskAllOnOptimizations) 00856 ctx->SetInternalMask(LLVMMaskAllOn); 00857 llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); 00858 if (!g->opt.disableMaskAllOnOptimizations) 00859 ctx->SetFunctionMask(LLVMMaskAllOn); 00860 if (bodyStmts) 00861 bodyStmts->EmitCode(ctx); 00862 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00863 ctx->SetFunctionMask(oldFunctionMask); 00864 ctx->BranchInst(btest); 00865 00866 // The mask is mixed. Just emit the code for the loop body. 00867 ctx->SetCurrentBasicBlock(bMixed); 00868 if (bodyStmts) 00869 bodyStmts->EmitCode(ctx); 00870 AssertPos(pos, ctx->GetCurrentBasicBlock()); 00871 ctx->BranchInst(btest); 00872 } 00873 else { 00874 // Otherwise just emit the code for the loop body. The current 00875 // mask is good. 00876 if (bodyStmts) 00877 bodyStmts->EmitCode(ctx); 00878 if (ctx->GetCurrentBasicBlock()) 00879 ctx->BranchInst(btest); 00880 } 00881 // End the scope we started above, if needed. 00882 if (!dynamic_cast<StmtList *>(bodyStmts)) 00883 ctx->EndScope(); 00884 00885 // Now emit code for the loop test. 00886 ctx->SetCurrentBasicBlock(btest); 00887 // First, emit code to restore the mask value for any lanes that 00888 // executed a 'continue' during the current loop before we go and emit 00889 // the code for the test. This is only necessary for varying loops; 00890 // 'uniform' loops just jump when they hit a continue statement and 00891 // don't mess with the mask. 00892 if (!uniformTest) 00893 ctx->RestoreContinuedLanes(); 00894 llvm::Value *testValue = testExpr->GetValue(ctx); 00895 if (!testValue) 00896 return; 00897 00898 if (uniformTest) 00899 // For the uniform case, just jump to the top of the loop or the 00900 // exit basic block depending on the value of the test. 00901 ctx->BranchInst(bloop, bexit, testValue); 00902 else { 00903 // For the varying case, update the mask based on the value of the 00904 // test. If any program instances still want to be running, jump 00905 // to the top of the loop. Otherwise, jump out. 00906 llvm::Value *mask = ctx->GetInternalMask(); 00907 ctx->SetInternalMaskAnd(mask, testValue); 00908 ctx->BranchIfMaskAny(bloop, bexit); 00909 } 00910 00911 // ...and we're done. Set things up for subsequent code to be emitted 00912 // in the right basic block. 00913 ctx->SetCurrentBasicBlock(bexit); 00914 ctx->EndLoop(); 00915 } 00916 00917 00918 Stmt * 00919 DoStmt::TypeCheck() { 00920 const Type *testType; 00921 if (testExpr != NULL && (testType = testExpr->GetType()) != NULL) { 00922 if (!testType->IsNumericType() && !testType->IsBoolType()) { 00923 Error(testExpr->pos, "Type \"%s\" can't be converted to boolean for \"while\" " 00924 "test in \"do\" loop.", testExpr->GetType()->GetString().c_str()); 00925 return NULL; 00926 } 00927 00928 // Should the test condition for the loop be uniform or varying? 00929 // It can be uniform only if three conditions are met: 00930 // 00931 // - First and foremost, the type of the test condition must be 00932 // uniform. 00933 // 00934 // - Second, the user must not have set the dis-optimization option 00935 // that disables uniform flow control. 00936 // 00937 // - Thirdly, and most subtlely, there must not be any break or 00938 // continue statements inside the loop that are within the scope 00939 // of a 'varying' if statement. If there are, then we type cast 00940 // the test to be 'varying', so that the code generated for the 00941 // loop includes masking stuff, so that we can track which lanes 00942 // actually want to be running, accounting for breaks/continues. 00943 // 00944 bool uniformTest = (testType->IsUniformType() && 00945 !g->opt.disableUniformControlFlow && 00946 !lHasVaryingBreakOrContinue(bodyStmts)); 00947 testExpr = new TypeCastExpr(uniformTest ? AtomicType::UniformBool : 00948 AtomicType::VaryingBool, 00949 testExpr, testExpr->pos); 00950 } 00951 00952 return this; 00953 } 00954 00955 00956 int 00957 DoStmt::EstimateCost() const { 00958 bool uniformTest = testExpr ? testExpr->GetType()->IsUniformType() : 00959 (!g->opt.disableUniformControlFlow && 00960 !lHasVaryingBreakOrContinue(bodyStmts)); 00961 00962 return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP; 00963 } 00964 00965 00966 void 00967 DoStmt::Print(int indent) const { 00968 printf("%*cDo Stmt", indent, ' '); 00969 pos.Print(); 00970 printf(":\n"); 00971 printf("%*cTest: ", indent+4, ' '); 00972 if (testExpr) testExpr->Print(); 00973 printf("\n"); 00974 if (bodyStmts) { 00975 printf("%*cStmts:\n", indent+4, ' '); 00976 bodyStmts->Print(indent+8); 00977 } 00978 } 00979 00980 00981 /////////////////////////////////////////////////////////////////////////// 00982 // ForStmt 00983 00984 ForStmt::ForStmt(Stmt *i, Expr *t, Stmt *s, Stmt *st, bool cc, SourcePos p) 00985 : Stmt(p), init(i), test(t), step(s), stmts(st), 00986 doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) { 00987 } 00988 00989 00990 void 00991 ForStmt::EmitCode(FunctionEmitContext *ctx) const { 00992 if (!ctx->GetCurrentBasicBlock()) 00993 return; 00994 00995 llvm::BasicBlock *btest = ctx->CreateBasicBlock("for_test"); 00996 llvm::BasicBlock *bstep = ctx->CreateBasicBlock("for_step"); 00997 llvm::BasicBlock *bloop = ctx->CreateBasicBlock("for_loop"); 00998 llvm::BasicBlock *bexit = ctx->CreateBasicBlock("for_exit"); 00999 01000 bool uniformTest = test ? test->GetType()->IsUniformType() : 01001 (!g->opt.disableUniformControlFlow && 01002 !lHasVaryingBreakOrContinue(stmts)); 01003 01004 ctx->StartLoop(bexit, bstep, uniformTest); 01005 ctx->SetDebugPos(pos); 01006 01007 // If we have an initiailizer statement, start by emitting the code for 01008 // it and then jump into the loop test code. (Also start a new scope 01009 // since the initiailizer may be a declaration statement). 01010 if (init) { 01011 AssertPos(pos, dynamic_cast<StmtList *>(init) == NULL); 01012 ctx->StartScope(); 01013 init->EmitCode(ctx); 01014 } 01015 ctx->BranchInst(btest); 01016 01017 // Emit code to get the value of the loop test. If no test expression 01018 // was provided, just go with a true value. 01019 ctx->SetCurrentBasicBlock(btest); 01020 llvm::Value *ltest = NULL; 01021 if (test) { 01022 ltest = test->GetValue(ctx); 01023 if (!ltest) { 01024 ctx->EndScope(); 01025 ctx->EndLoop(); 01026 return; 01027 } 01028 } 01029 else 01030 ltest = uniformTest ? LLVMTrue : LLVMBoolVector(true); 01031 01032 // Now use the test's value. For a uniform loop, we can either jump to 01033 // the loop body or the loop exit, based on whether it's true or false. 01034 // For a non-uniform loop, we update the mask and jump into the loop if 01035 // any of the mask values are true. 01036 if (uniformTest) { 01037 if (doCoherentCheck) 01038 Warning(test->pos, "Uniform condition supplied to cfor/cwhile " 01039 "statement."); 01040 AssertPos(pos, ltest->getType() == LLVMTypes::BoolType); 01041 ctx->BranchInst(bloop, bexit, ltest); 01042 } 01043 else { 01044 llvm::Value *mask = ctx->GetInternalMask(); 01045 ctx->SetInternalMaskAnd(mask, ltest); 01046 ctx->BranchIfMaskAny(bloop, bexit); 01047 } 01048 01049 // On to emitting the code for the loop body. 01050 ctx->SetCurrentBasicBlock(bloop); 01051 ctx->SetLoopMask(ctx->GetInternalMask()); 01052 ctx->AddInstrumentationPoint("for loop body"); 01053 if (!dynamic_cast<StmtList *>(stmts)) 01054 ctx->StartScope(); 01055 01056 if (doCoherentCheck && !uniformTest) { 01057 // For 'varying' loops with the coherence check, we start by 01058 // checking to see if the mask is all on, after it has been updated 01059 // based on the value of the test. 01060 llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("for_all_on"); 01061 llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("for_mixed"); 01062 ctx->BranchIfMaskAll(bAllOn, bMixed); 01063 01064 // Emit code for the mask being all on. Explicitly set the mask to 01065 // be on so that the optimizer can see that it's on (i.e. now that 01066 // the runtime test has passed, make this fact clear for code 01067 // generation at compile time here.) 01068 ctx->SetCurrentBasicBlock(bAllOn); 01069 if (!g->opt.disableMaskAllOnOptimizations) 01070 ctx->SetInternalMask(LLVMMaskAllOn); 01071 llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); 01072 if (!g->opt.disableMaskAllOnOptimizations) 01073 ctx->SetFunctionMask(LLVMMaskAllOn); 01074 if (stmts) 01075 stmts->EmitCode(ctx); 01076 AssertPos(pos, ctx->GetCurrentBasicBlock()); 01077 ctx->SetFunctionMask(oldFunctionMask); 01078 ctx->BranchInst(bstep); 01079 01080 // Emit code for the mask being mixed. We should never run the 01081 // loop with the mask all off, based on the BranchIfMaskAny call 01082 // above. 01083 ctx->SetCurrentBasicBlock(bMixed); 01084 if (stmts) 01085 stmts->EmitCode(ctx); 01086 ctx->BranchInst(bstep); 01087 } 01088 else { 01089 // For both uniform loops and varying loops without the coherence 01090 // check, we know that at least one program instance wants to be 01091 // running the loop, so just emit code for the loop body and jump 01092 // to the loop step code. 01093 if (stmts) 01094 stmts->EmitCode(ctx); 01095 if (ctx->GetCurrentBasicBlock()) 01096 ctx->BranchInst(bstep); 01097 } 01098 if (!dynamic_cast<StmtList *>(stmts)) 01099 ctx->EndScope(); 01100 01101 // Emit code for the loop step. First, restore the lane mask of any 01102 // program instances that executed a 'continue' during the previous 01103 // iteration. Then emit code for the loop step and then jump to the 01104 // test code. 01105 ctx->SetCurrentBasicBlock(bstep); 01106 ctx->RestoreContinuedLanes(); 01107 if (step) 01108 step->EmitCode(ctx); 01109 ctx->BranchInst(btest); 01110 01111 // Set the current emission basic block to the loop exit basic block 01112 ctx->SetCurrentBasicBlock(bexit); 01113 if (init) 01114 ctx->EndScope(); 01115 ctx->EndLoop(); 01116 } 01117 01118 01119 Stmt * 01120 ForStmt::TypeCheck() { 01121 const Type *testType; 01122 if (test && (testType = test->GetType()) != NULL) { 01123 if (!testType->IsNumericType() && !testType->IsBoolType()) { 01124 Error(test->pos, "Type \"%s\" can't be converted to boolean for \"for\" " 01125 "loop test.", test->GetType()->GetString().c_str()); 01126 return NULL; 01127 } 01128 01129 // See comments in DoStmt::TypeCheck() regarding 01130 // 'uniformTest' and the type cast here. 01131 bool uniformTest = (testType->IsUniformType() && 01132 !g->opt.disableUniformControlFlow && 01133 !lHasVaryingBreakOrContinue(stmts)); 01134 test = new TypeCastExpr(uniformTest ? AtomicType::UniformBool : 01135 AtomicType::VaryingBool, test, test->pos); 01136 test = ::TypeCheck(test); 01137 if (test == NULL) 01138 return NULL; 01139 } 01140 01141 return this; 01142 } 01143 01144 01145 int 01146 ForStmt::EstimateCost() const { 01147 bool uniformTest = test ? test->GetType()->IsUniformType() : 01148 (!g->opt.disableUniformControlFlow && 01149 !lHasVaryingBreakOrContinue(stmts)); 01150 01151 return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP; 01152 } 01153 01154 01155 void 01156 ForStmt::Print(int indent) const { 01157 printf("%*cFor Stmt", indent, ' '); 01158 pos.Print(); 01159 printf("\n"); 01160 if (init) { 01161 printf("%*cInit:\n", indent+4, ' '); 01162 init->Print(indent+8); 01163 } 01164 if (test) { 01165 printf("%*cTest: ", indent+4, ' '); 01166 test->Print(); 01167 printf("\n"); 01168 } 01169 if (step) { 01170 printf("%*cStep:\n", indent+4, ' '); 01171 step->Print(indent+8); 01172 } 01173 if (stmts) { 01174 printf("%*cStmts:\n", indent+4, ' '); 01175 stmts->Print(indent+8); 01176 } 01177 } 01178 01179 /////////////////////////////////////////////////////////////////////////// 01180 // BreakStmt 01181 01182 BreakStmt::BreakStmt(bool cc, SourcePos p) 01183 : Stmt(p), doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) { 01184 } 01185 01186 01187 void 01188 BreakStmt::EmitCode(FunctionEmitContext *ctx) const { 01189 if (!ctx->GetCurrentBasicBlock()) 01190 return; 01191 01192 ctx->SetDebugPos(pos); 01193 ctx->Break(doCoherenceCheck); 01194 } 01195 01196 01197 Stmt * 01198 BreakStmt::TypeCheck() { 01199 return this; 01200 } 01201 01202 01203 int 01204 BreakStmt::EstimateCost() const { 01205 return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE : 01206 COST_REGULAR_BREAK_CONTINUE; 01207 } 01208 01209 01210 void 01211 BreakStmt::Print(int indent) const { 01212 printf("%*c%sBreak Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : ""); 01213 pos.Print(); 01214 printf("\n"); 01215 } 01216 01217 01218 /////////////////////////////////////////////////////////////////////////// 01219 // ContinueStmt 01220 01221 ContinueStmt::ContinueStmt(bool cc, SourcePos p) 01222 : Stmt(p), doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) { 01223 } 01224 01225 01226 void 01227 ContinueStmt::EmitCode(FunctionEmitContext *ctx) const { 01228 if (!ctx->GetCurrentBasicBlock()) 01229 return; 01230 01231 ctx->SetDebugPos(pos); 01232 ctx->Continue(doCoherenceCheck); 01233 } 01234 01235 01236 Stmt * 01237 ContinueStmt::TypeCheck() { 01238 return this; 01239 } 01240 01241 01242 int 01243 ContinueStmt::EstimateCost() const { 01244 return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE : 01245 COST_REGULAR_BREAK_CONTINUE; 01246 } 01247 01248 01249 void 01250 ContinueStmt::Print(int indent) const { 01251 printf("%*c%sContinue Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : ""); 01252 pos.Print(); 01253 printf("\n"); 01254 } 01255 01256 01257 /////////////////////////////////////////////////////////////////////////// 01258 // ForeachStmt 01259 01260 ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs, 01261 const std::vector<Expr *> &se, 01262 const std::vector<Expr *> &ee, 01263 Stmt *s, bool t, SourcePos pos) 01264 : Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t), 01265 stmts(s) { 01266 } 01267 01268 01269 /* Given a uniform counter value in the memory location pointed to by 01270 uniformCounterPtr, compute the corresponding set of varying counter 01271 values for use within the loop body. 01272 */ 01273 static llvm::Value * 01274 lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, 01275 llvm::Value *uniformCounterPtr, 01276 llvm::Value *varyingCounterPtr, 01277 const std::vector<int> &spans) { 01278 // Smear the uniform counter value out to be varying 01279 llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); 01280 llvm::Value *smearCounter = 01281 llvm::UndefValue::get(LLVMTypes::Int32VectorType); 01282 for (int i = 0; i < g->target.vectorWidth; ++i) 01283 smearCounter = 01284 ctx->InsertInst(smearCounter, counter, i, "smear_counter"); 01285 01286 // Figure out the offsets; this is a little bit tricky. As an example, 01287 // consider a 2D tiled foreach loop, where we're running 8-wide and 01288 // where the inner dimension has a stride of 4 and the outer dimension 01289 // has a stride of 2. For the inner dimension, we want the offsets 01290 // (0,1,2,3,0,1,2,3), and for the outer dimension we want 01291 // (0,0,0,0,1,1,1,1). 01292 int32_t delta[ISPC_MAX_NVEC]; 01293 for (int i = 0; i < g->target.vectorWidth; ++i) { 01294 int d = i; 01295 // First, account for the effect of any dimensions at deeper 01296 // nesting levels than the current one. 01297 int prevDimSpanCount = 1; 01298 for (int j = dim; j < nDims-1; ++j) 01299 prevDimSpanCount *= spans[j+1]; 01300 d /= prevDimSpanCount; 01301 01302 // And now with what's left, figure out our own offset 01303 delta[i] = d % spans[dim]; 01304 } 01305 01306 // Add the deltas to compute the varying counter values; store the 01307 // result to memory and then return it directly as well. 01308 llvm::Value *varyingCounter = 01309 ctx->BinaryOperator(llvm::Instruction::Add, smearCounter, 01310 LLVMInt32Vector(delta), "iter_val"); 01311 ctx->StoreInst(varyingCounter, varyingCounterPtr); 01312 return varyingCounter; 01313 } 01314 01315 01316 /** Returns the integer log2 of the given integer. */ 01317 static int 01318 lLog2(int i) { 01319 int ret = 0; 01320 while (i != 0) { 01321 ++ret; 01322 i >>= 1; 01323 } 01324 return ret-1; 01325 } 01326 01327 01328 /* Figure out how many elements to process in each dimension for each time 01329 through a foreach loop. The untiled case is easy; all of the outer 01330 dimensions up until the innermost one have a span of 1, and the 01331 innermost one takes the entire vector width. For the tiled case, we 01332 give wider spans to the innermost dimensions while also trying to 01333 generate relatively square domains. 01334 01335 This code works recursively from outer dimensions to inner dimensions. 01336 */ 01337 static void 01338 lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) { 01339 if (dimsLeft == 0) { 01340 // Nothing left to do but give all of the remaining work to the 01341 // innermost domain. 01342 *a = itemsLeft; 01343 return; 01344 } 01345 01346 if (isTiled == false || (dimsLeft >= lLog2(itemsLeft))) 01347 // If we're not tiled, or if there are enough dimensions left that 01348 // giving this one any more than a span of one would mean that a 01349 // later dimension would have to have a span of one, give this one 01350 // a span of one to save the available items for later. 01351 *a = 1; 01352 else if (itemsLeft >= 16 && (dimsLeft == 1)) 01353 // Special case to have 4x4 domains for the 2D case when running 01354 // 16-wide. 01355 *a = 4; 01356 else 01357 // Otherwise give this dimension a span of two. 01358 *a = 2; 01359 01360 lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1); 01361 } 01362 01363 01364 /* Emit code for a foreach statement. We effectively emit code to run the 01365 set of n-dimensional nested loops corresponding to the dimensionality of 01366 the foreach statement along with the extra logic to deal with mismatches 01367 between the vector width we're compiling to and the number of elements 01368 to process. 01369 */ 01370 void 01371 ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { 01372 if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL) 01373 return; 01374 01375 llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body"); 01376 llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body"); 01377 llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit"); 01378 01379 llvm::Value *oldMask = ctx->GetInternalMask(); 01380 llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); 01381 01382 ctx->SetDebugPos(pos); 01383 ctx->StartScope(); 01384 01385 ctx->SetInternalMask(LLVMMaskAllOn); 01386 ctx->SetFunctionMask(LLVMMaskAllOn); 01387 01388 // This should be caught during typechecking 01389 AssertPos(pos, startExprs.size() == dimVariables.size() && 01390 endExprs.size() == dimVariables.size()); 01391 int nDims = (int)dimVariables.size(); 01392 01393 /////////////////////////////////////////////////////////////////////// 01394 // Setup: compute the number of items we have to work on in each 01395 // dimension and a number of derived values. 01396 std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest; 01397 std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs; 01398 std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs; 01399 01400 std::vector<int> span(nDims, 0); 01401 lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]); 01402 01403 for (int i = 0; i < nDims; ++i) { 01404 // Basic blocks that we'll fill in later with the looping logic for 01405 // this dimension. 01406 bbReset.push_back(ctx->CreateBasicBlock("foreach_reset")); 01407 if (i < nDims-1) 01408 // stepping for the innermost dimension is handled specially 01409 bbStep.push_back(ctx->CreateBasicBlock("foreach_step")); 01410 bbTest.push_back(ctx->CreateBasicBlock("foreach_test")); 01411 01412 // Start and end value for this loop dimension 01413 llvm::Value *sv = startExprs[i]->GetValue(ctx); 01414 llvm::Value *ev = endExprs[i]->GetValue(ctx); 01415 if (sv == NULL || ev == NULL) 01416 return; 01417 startVals.push_back(sv); 01418 endVals.push_back(ev); 01419 01420 // nItems = endVal - startVal 01421 llvm::Value *nItems = 01422 ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems"); 01423 01424 // nExtras = nItems % (span for this dimension) 01425 // This gives us the number of extra elements we need to deal with 01426 // at the end of the loop for this dimension that don't fit cleanly 01427 // into a vector width. 01428 nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems, 01429 LLVMInt32(span[i]), "nextras")); 01430 01431 // alignedEnd = endVal - nExtras 01432 alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, 01433 nExtras[i], "aligned_end")); 01434 01435 /////////////////////////////////////////////////////////////////////// 01436 // Each dimension has a loop counter that is a uniform value that 01437 // goes from startVal to endVal, in steps of the span for this 01438 // dimension. Its value is only used internally here for looping 01439 // logic and isn't directly available in the user's program code. 01440 uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type, 01441 "counter")); 01442 ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); 01443 01444 // There is also a varying variable that holds the set of index 01445 // values for each dimension in the current loop iteration; this is 01446 // the value that is program-visible. 01447 dimVariables[i]->storagePtr = 01448 ctx->AllocaInst(LLVMTypes::Int32VectorType, 01449 dimVariables[i]->name.c_str()); 01450 dimVariables[i]->parentFunction = ctx->GetFunction(); 01451 ctx->EmitVariableDebugInfo(dimVariables[i]); 01452 01453 // Each dimension also maintains a mask that represents which of 01454 // the varying elements in the current iteration should be 01455 // processed. (i.e. this is used to disable the lanes that have 01456 // out-of-bounds offsets.) 01457 extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask")); 01458 ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); 01459 } 01460 01461 ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR); 01462 01463 // On to the outermost loop's test 01464 ctx->BranchInst(bbTest[0]); 01465 01466 /////////////////////////////////////////////////////////////////////////// 01467 // foreach_reset: this code runs when we need to reset the counter for 01468 // a given dimension in preparation for running through its loop again, 01469 // after the enclosing level advances its counter. 01470 for (int i = 0; i < nDims; ++i) { 01471 ctx->SetCurrentBasicBlock(bbReset[i]); 01472 if (i == 0) 01473 ctx->BranchInst(bbExit); 01474 else { 01475 ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); 01476 ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); 01477 ctx->BranchInst(bbStep[i-1]); 01478 } 01479 } 01480 01481 /////////////////////////////////////////////////////////////////////////// 01482 // foreach_step: increment the uniform counter by the vector width. 01483 // Note that we don't increment the varying counter here as well but 01484 // just generate its value when we need it in the loop body. Don't do 01485 // this for the innermost dimension, which has a more complex stepping 01486 // structure.. 01487 for (int i = 0; i < nDims-1; ++i) { 01488 ctx->SetCurrentBasicBlock(bbStep[i]); 01489 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]); 01490 llvm::Value *newCounter = 01491 ctx->BinaryOperator(llvm::Instruction::Add, counter, 01492 LLVMInt32(span[i]), "new_counter"); 01493 ctx->StoreInst(newCounter, uniformCounterPtrs[i]); 01494 ctx->BranchInst(bbTest[i]); 01495 } 01496 01497 /////////////////////////////////////////////////////////////////////////// 01498 // foreach_test (for all dimensions other than the innermost...) 01499 std::vector<llvm::Value *> inExtras; 01500 for (int i = 0; i < nDims-1; ++i) { 01501 ctx->SetCurrentBasicBlock(bbTest[i]); 01502 01503 llvm::Value *haveExtras = 01504 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT, 01505 endVals[i], alignedEnd[i], "have_extras"); 01506 01507 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter"); 01508 llvm::Value *atAlignedEnd = 01509 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, 01510 counter, alignedEnd[i], "at_aligned_end"); 01511 llvm::Value *inEx = 01512 ctx->BinaryOperator(llvm::Instruction::And, haveExtras, 01513 atAlignedEnd, "in_extras"); 01514 01515 if (i == 0) 01516 inExtras.push_back(inEx); 01517 else 01518 inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx, 01519 inExtras[i-1], "in_extras_all")); 01520 01521 llvm::Value *varyingCounter = 01522 lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], 01523 dimVariables[i]->storagePtr, span); 01524 01525 llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); 01526 for (int j = 0; j < g->target.vectorWidth; ++j) 01527 smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end"); 01528 // Do a vector compare of its value to the end value to generate a 01529 // mask for this last bit of work. 01530 llvm::Value *emask = 01531 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01532 varyingCounter, smearEnd); 01533 emask = ctx->I1VecToBoolVec(emask); 01534 01535 if (i == 0) 01536 ctx->StoreInst(emask, extrasMaskPtrs[i]); 01537 else { 01538 llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]); 01539 llvm::Value *newMask = 01540 ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, 01541 "extras_mask"); 01542 ctx->StoreInst(newMask, extrasMaskPtrs[i]); 01543 } 01544 01545 llvm::Value *notAtEnd = 01546 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01547 counter, endVals[i]); 01548 ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd); 01549 } 01550 01551 /////////////////////////////////////////////////////////////////////////// 01552 // foreach_test (for innermost dimension) 01553 // 01554 // All of the outer dimensions are handled generically--basically as a 01555 // for() loop from the start value to the end value, where at each loop 01556 // test, we compute the mask of active elements for the current 01557 // dimension and then update an overall mask that is the AND 01558 // combination of all of the outer ones. 01559 // 01560 // The innermost loop is handled specially, for performance purposes. 01561 // When starting the innermost dimension, we start by checking once 01562 // whether any of the outer dimensions has set the mask to be 01563 // partially-active or not. We follow different code paths for these 01564 // two cases, taking advantage of the knowledge that the mask is all 01565 // on, when this is the case. 01566 // 01567 // In each of these code paths, we start with a loop from the starting 01568 // value to the aligned end value for the innermost dimension; we can 01569 // guarantee that the innermost loop will have an "all on" mask (as far 01570 // as its dimension is concerned) for the duration of this loop. Doing 01571 // so allows us to emit code that assumes the mask is all on (for the 01572 // case where none of the outer dimensions has set the mask to be 01573 // partially on), or allows us to emit code that just uses the mask 01574 // from the outer dimensions directly (for the case where they have). 01575 // 01576 // After this loop, we just need to deal with one vector's worth of 01577 // "ragged extra bits", where the mask used includes the effect of the 01578 // mask for the innermost dimension. 01579 // 01580 // We start out this process by emitting the check that determines 01581 // whether any of the enclosing dimensions is partially active 01582 // (i.e. processing extra elements that don't exactly fit into a 01583 // vector). 01584 llvm::BasicBlock *bbOuterInExtras = 01585 ctx->CreateBasicBlock("outer_in_extras"); 01586 llvm::BasicBlock *bbOuterNotInExtras = 01587 ctx->CreateBasicBlock("outer_not_in_extras"); 01588 01589 ctx->SetCurrentBasicBlock(bbTest[nDims-1]); 01590 if (inExtras.size()) 01591 ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras, 01592 inExtras.back()); 01593 else 01594 // for a 1D iteration domain, we certainly don't have any enclosing 01595 // dimensions that are processing extra elements. 01596 ctx->BranchInst(bbOuterNotInExtras); 01597 01598 /////////////////////////////////////////////////////////////////////////// 01599 // One or more outer dimensions in extras, so we need to mask for the loop 01600 // body regardless. We break this into two cases, roughly: 01601 // for (counter = start; counter < alignedEnd; counter += step) { 01602 // // mask is all on for inner, so set mask to outer mask 01603 // // run loop body with mask 01604 // } 01605 // // counter == alignedEnd 01606 // if (counter < end) { 01607 // // set mask to outermask & (counter+programCounter < end) 01608 // // run loop body with mask 01609 // } 01610 llvm::BasicBlock *bbAllInnerPartialOuter = 01611 ctx->CreateBasicBlock("all_inner_partial_outer"); 01612 llvm::BasicBlock *bbPartial = 01613 ctx->CreateBasicBlock("both_partial"); 01614 ctx->SetCurrentBasicBlock(bbOuterInExtras); { 01615 // Update the varying counter value here, since all subsequent 01616 // blocks along this path need it. 01617 lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 01618 dimVariables[nDims-1]->storagePtr, span); 01619 01620 // here we just check to see if counter < alignedEnd 01621 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); 01622 llvm::Value *beforeAlignedEnd = 01623 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01624 counter, alignedEnd[nDims-1], "before_aligned_end"); 01625 ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd); 01626 } 01627 01628 // Below we have a basic block that runs the loop body code for the 01629 // case where the mask is partially but not fully on. This same block 01630 // runs in multiple cases: both for handling any ragged extra data for 01631 // the innermost dimension but also when outer dimensions have set the 01632 // mask to be partially on. 01633 // 01634 // The value stored in stepIndexAfterMaskedBodyPtr is used after each 01635 // execution of the body code to determine whether the innermost index 01636 // value should be incremented by the step (we're running the "for" 01637 // loop of full vectors at the innermost dimension, with outer 01638 // dimensions having set the mask to be partially on), or whether we're 01639 // running once for the ragged extra bits at the end of the innermost 01640 // dimension, in which case we're done with the innermost dimension and 01641 // should step the loop counter for the next enclosing dimension 01642 // instead. 01643 llvm::Value *stepIndexAfterMaskedBodyPtr = 01644 ctx->AllocaInst(LLVMTypes::BoolType, "step_index"); 01645 01646 /////////////////////////////////////////////////////////////////////////// 01647 // We're in the inner loop part where the only masking is due to outer 01648 // dimensions but the innermost dimension fits fully into a vector's 01649 // width. Set the mask and jump to the masked loop body. 01650 ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); { 01651 llvm::Value *mask; 01652 if (nDims == 1) 01653 // 1D loop; we shouldn't ever get here anyway 01654 mask = LLVMMaskAllOff; 01655 else 01656 mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]); 01657 01658 ctx->SetInternalMask(mask); 01659 01660 ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr); 01661 ctx->BranchInst(bbMaskedBody); 01662 } 01663 01664 /////////////////////////////////////////////////////////////////////////// 01665 // We need to include the effect of the innermost dimension in the mask 01666 // for the final bits here 01667 ctx->SetCurrentBasicBlock(bbPartial); { 01668 llvm::Value *varyingCounter = 01669 ctx->LoadInst(dimVariables[nDims-1]->storagePtr); 01670 llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); 01671 for (int j = 0; j < g->target.vectorWidth; ++j) 01672 smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); 01673 llvm::Value *emask = 01674 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01675 varyingCounter, smearEnd); 01676 emask = ctx->I1VecToBoolVec(emask); 01677 01678 if (nDims == 1) 01679 ctx->SetInternalMask(emask); 01680 else { 01681 llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]); 01682 llvm::Value *newMask = 01683 ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, 01684 "extras_mask"); 01685 ctx->SetInternalMask(newMask); 01686 } 01687 01688 ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr); 01689 ctx->BranchInst(bbMaskedBody); 01690 } 01691 01692 /////////////////////////////////////////////////////////////////////////// 01693 // None of the outer dimensions is processing extras; along the lines 01694 // of above, we can express this as: 01695 // for (counter = start; counter < alignedEnd; counter += step) { 01696 // // mask is all on 01697 // // run loop body with mask all on 01698 // } 01699 // // counter == alignedEnd 01700 // if (counter < end) { 01701 // // set mask to (counter+programCounter < end) 01702 // // run loop body with mask 01703 // } 01704 llvm::BasicBlock *bbPartialInnerAllOuter = 01705 ctx->CreateBasicBlock("partial_inner_all_outer"); 01706 ctx->SetCurrentBasicBlock(bbOuterNotInExtras); { 01707 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); 01708 llvm::Value *beforeAlignedEnd = 01709 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01710 counter, alignedEnd[nDims-1], "before_aligned_end"); 01711 ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter, 01712 beforeAlignedEnd); 01713 } 01714 01715 /////////////////////////////////////////////////////////////////////////// 01716 // full_body: do a full vector's worth of work. We know that all 01717 // lanes will be running here, so we explicitly set the mask to be 'all 01718 // on'. This ends up being relatively straightforward: just update the 01719 // value of the varying loop counter and have the statements in the 01720 // loop body emit their code. 01721 llvm::BasicBlock *bbFullBodyContinue = 01722 ctx->CreateBasicBlock("foreach_full_continue"); 01723 ctx->SetCurrentBasicBlock(bbFullBody); { 01724 ctx->SetInternalMask(LLVMMaskAllOn); 01725 lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 01726 dimVariables[nDims-1]->storagePtr, span); 01727 ctx->SetContinueTarget(bbFullBodyContinue); 01728 ctx->AddInstrumentationPoint("foreach loop body (all on)"); 01729 stmts->EmitCode(ctx); 01730 AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL); 01731 ctx->BranchInst(bbFullBodyContinue); 01732 } 01733 ctx->SetCurrentBasicBlock(bbFullBodyContinue); { 01734 ctx->RestoreContinuedLanes(); 01735 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]); 01736 llvm::Value *newCounter = 01737 ctx->BinaryOperator(llvm::Instruction::Add, counter, 01738 LLVMInt32(span[nDims-1]), "new_counter"); 01739 ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]); 01740 ctx->BranchInst(bbOuterNotInExtras); 01741 } 01742 01743 /////////////////////////////////////////////////////////////////////////// 01744 // We're done running blocks with the mask all on; see if the counter is 01745 // less than the end value, in which case we need to run the body one 01746 // more time to get the extra bits. 01747 llvm::BasicBlock *bbSetInnerMask = 01748 ctx->CreateBasicBlock("partial_inner_only"); 01749 ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); { 01750 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); 01751 llvm::Value *beforeFullEnd = 01752 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01753 counter, endVals[nDims-1], "before_full_end"); 01754 ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd); 01755 } 01756 01757 /////////////////////////////////////////////////////////////////////////// 01758 // The outer dimensions are all on, so the mask is just given by the 01759 // mask for the innermost dimension 01760 ctx->SetCurrentBasicBlock(bbSetInnerMask); { 01761 llvm::Value *varyingCounter = 01762 lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], 01763 dimVariables[nDims-1]->storagePtr, span); 01764 llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); 01765 for (int j = 0; j < g->target.vectorWidth; ++j) 01766 smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); 01767 llvm::Value *emask = 01768 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, 01769 varyingCounter, smearEnd); 01770 emask = ctx->I1VecToBoolVec(emask); 01771 ctx->SetInternalMask(emask); 01772 01773 ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr); 01774 ctx->BranchInst(bbMaskedBody); 01775 } 01776 01777 /////////////////////////////////////////////////////////////////////////// 01778 // masked_body: set the mask and have the statements emit their 01779 // code again. Note that it's generally worthwhile having two copies 01780 // of the statements' code, since the code above is emitted with the 01781 // mask known to be all-on, which in turn leads to more efficient code 01782 // for that case. 01783 llvm::BasicBlock *bbStepInnerIndex = 01784 ctx->CreateBasicBlock("step_inner_index"); 01785 llvm::BasicBlock *bbMaskedBodyContinue = 01786 ctx->CreateBasicBlock("foreach_masked_continue"); 01787 ctx->SetCurrentBasicBlock(bbMaskedBody); { 01788 ctx->AddInstrumentationPoint("foreach loop body (masked)"); 01789 ctx->SetContinueTarget(bbMaskedBodyContinue); 01790 ctx->DisableGatherScatterWarnings(); 01791 stmts->EmitCode(ctx); 01792 ctx->EnableGatherScatterWarnings(); 01793 ctx->BranchInst(bbMaskedBodyContinue); 01794 } 01795 ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); { 01796 ctx->RestoreContinuedLanes(); 01797 llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr); 01798 ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex); 01799 } 01800 01801 /////////////////////////////////////////////////////////////////////////// 01802 // step the innermost index, for the case where we're doing the 01803 // innermost for loop over full vectors. 01804 ctx->SetCurrentBasicBlock(bbStepInnerIndex); { 01805 llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]); 01806 llvm::Value *newCounter = 01807 ctx->BinaryOperator(llvm::Instruction::Add, counter, 01808 LLVMInt32(span[nDims-1]), "new_counter"); 01809 ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]); 01810 ctx->BranchInst(bbOuterInExtras); 01811 } 01812 01813 /////////////////////////////////////////////////////////////////////////// 01814 // foreach_exit: All done. Restore the old mask and clean up 01815 ctx->SetCurrentBasicBlock(bbExit); 01816 01817 ctx->SetInternalMask(oldMask); 01818 ctx->SetFunctionMask(oldFunctionMask); 01819 01820 ctx->EndForeach(); 01821 ctx->EndScope(); 01822 } 01823 01824 01825 Stmt * 01826 ForeachStmt::TypeCheck() { 01827 bool anyErrors = false; 01828 for (unsigned int i = 0; i < startExprs.size(); ++i) { 01829 if (startExprs[i] != NULL) 01830 startExprs[i] = TypeConvertExpr(startExprs[i], 01831 AtomicType::UniformInt32, 01832 "foreach starting value"); 01833 anyErrors |= (startExprs[i] == NULL); 01834 } 01835 for (unsigned int i = 0; i < endExprs.size(); ++i) { 01836 if (endExprs[i] != NULL) 01837 endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32, 01838 "foreach ending value"); 01839 anyErrors |= (endExprs[i] == NULL); 01840 } 01841 01842 if (startExprs.size() < dimVariables.size()) { 01843 Error(pos, "Not enough initial values provided for \"foreach\" loop; " 01844 "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size()); 01845 anyErrors = true; 01846 } 01847 else if (startExprs.size() > dimVariables.size()) { 01848 Error(pos, "Too many initial values provided for \"foreach\" loop; " 01849 "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size()); 01850 anyErrors = true; 01851 } 01852 01853 if (endExprs.size() < dimVariables.size()) { 01854 Error(pos, "Not enough initial values provided for \"foreach\" loop; " 01855 "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size()); 01856 anyErrors = true; 01857 } 01858 else if (endExprs.size() > dimVariables.size()) { 01859 Error(pos, "Too many initial values provided for \"foreach\" loop; " 01860 "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size()); 01861 anyErrors = true; 01862 } 01863 01864 return anyErrors ? NULL : this; 01865 } 01866 01867 01868 int 01869 ForeachStmt::EstimateCost() const { 01870 return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP); 01871 } 01872 01873 01874 void 01875 ForeachStmt::Print(int indent) const { 01876 printf("%*cForeach Stmt", indent, ' '); 01877 pos.Print(); 01878 printf("\n"); 01879 01880 for (unsigned int i = 0; i < dimVariables.size(); ++i) 01881 if (dimVariables[i] != NULL) 01882 printf("%*cVar %d: %s\n", indent+4, ' ', i, 01883 dimVariables[i]->name.c_str()); 01884 else 01885 printf("%*cVar %d: NULL\n", indent+4, ' ', i); 01886 01887 printf("Start values:\n"); 01888 for (unsigned int i = 0; i < startExprs.size(); ++i) { 01889 if (startExprs[i] != NULL) 01890 startExprs[i]->Print(); 01891 else 01892 printf("NULL"); 01893 if (i != startExprs.size()-1) 01894 printf(", "); 01895 else 01896 printf("\n"); 01897 } 01898 01899 printf("End values:\n"); 01900 for (unsigned int i = 0; i < endExprs.size(); ++i) { 01901 if (endExprs[i] != NULL) 01902 endExprs[i]->Print(); 01903 else 01904 printf("NULL"); 01905 if (i != endExprs.size()-1) 01906 printf(", "); 01907 else 01908 printf("\n"); 01909 } 01910 01911 if (stmts != NULL) { 01912 printf("%*cStmts:\n", indent+4, ' '); 01913 stmts->Print(indent+8); 01914 } 01915 } 01916 01917 01918 /////////////////////////////////////////////////////////////////////////// 01919 // ForeachActiveStmt 01920 01921 ForeachActiveStmt::ForeachActiveStmt(Symbol *s, Stmt *st, SourcePos pos) 01922 : Stmt(pos) { 01923 sym = s; 01924 stmts = st; 01925 } 01926 01927 01928 void 01929 ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const { 01930 if (!ctx->GetCurrentBasicBlock()) 01931 return; 01932 01933 // Allocate storage for the symbol that we'll use for the uniform 01934 // variable that holds the current program instance in each loop 01935 // iteration. 01936 if (sym->type == NULL) { 01937 Assert(m->errorCount > 0); 01938 return; 01939 } 01940 Assert(Type::Equal(sym->type, 01941 AtomicType::UniformInt64->GetAsConstType())); 01942 sym->storagePtr = ctx->AllocaInst(LLVMTypes::Int64Type, sym->name.c_str()); 01943 01944 ctx->SetDebugPos(pos); 01945 ctx->EmitVariableDebugInfo(sym); 01946 01947 // The various basic blocks that we'll need in the below 01948 llvm::BasicBlock *bbFindNext = 01949 ctx->CreateBasicBlock("foreach_active_find_next"); 01950 llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_active_body"); 01951 llvm::BasicBlock *bbCheckForMore = 01952 ctx->CreateBasicBlock("foreach_active_check_for_more"); 01953 llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_active_done"); 01954 01955 // Save the old mask so that we can restore it at the end 01956 llvm::Value *oldInternalMask = ctx->GetInternalMask(); 01957 01958 // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain 01959 // to be processed by a pass through the loop body. It starts out with 01960 // the current execution mask (which should never be all off going in 01961 // to this)... 01962 llvm::Value *oldFullMask = ctx->GetFullMask(); 01963 llvm::Value *maskBitsPtr = 01964 ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits"); 01965 llvm::Value *movmsk = ctx->LaneMask(oldFullMask); 01966 ctx->StoreInst(movmsk, maskBitsPtr); 01967 01968 // Officially start the loop. 01969 ctx->StartScope(); 01970 ctx->StartForeach(FunctionEmitContext::FOREACH_ACTIVE); 01971 ctx->SetContinueTarget(bbCheckForMore); 01972 01973 // Onward to find the first set of program instance to run the loop for 01974 ctx->BranchInst(bbFindNext); 01975 01976 ctx->SetCurrentBasicBlock(bbFindNext); { 01977 // Load the bitmask of the lanes left to be processed 01978 llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits"); 01979 01980 // Find the index of the first set bit in the mask 01981 llvm::Function *ctlzFunc = 01982 m->module->getFunction("__count_trailing_zeros_i64"); 01983 Assert(ctlzFunc != NULL); 01984 llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits, 01985 "first_set"); 01986 01987 // Store that value into the storage allocated for the iteration 01988 // variable. 01989 ctx->StoreInst(firstSet, sym->storagePtr); 01990 01991 // Now set the execution mask to be only on for the current program 01992 // instance. (TODO: is there a more efficient way to do this? e.g. 01993 // for AVX1, we might want to do this as float rather than int 01994 // math...) 01995 01996 // Get the "program index" vector value 01997 llvm::Value *programIndex = 01998 llvm::UndefValue::get(LLVMTypes::Int32VectorType); 01999 for (int i = 0; i < g->target.vectorWidth; ++i) 02000 programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i, 02001 "prog_index"); 02002 02003 // And smear the current lane out to a vector 02004 llvm::Value *firstSet32 = 02005 ctx->TruncInst(firstSet, LLVMTypes::Int32Type, "first_set32"); 02006 llvm::Value *firstSet32Smear = ctx->SmearUniform(firstSet32); 02007 02008 // Now set the execution mask based on doing a vector compare of 02009 // these two 02010 llvm::Value *iterMask = 02011 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, 02012 firstSet32Smear, programIndex); 02013 iterMask = ctx->I1VecToBoolVec(iterMask); 02014 02015 ctx->SetInternalMask(iterMask); 02016 02017 // Also update the bitvector of lanes left to turn off the bit for 02018 // the lane we're about to run. 02019 llvm::Value *setMask = 02020 ctx->BinaryOperator(llvm::Instruction::Shl, LLVMInt64(1), 02021 firstSet, "set_mask"); 02022 llvm::Value *notSetMask = ctx->NotOperator(setMask); 02023 llvm::Value *newRemaining = 02024 ctx->BinaryOperator(llvm::Instruction::And, remainingBits, 02025 notSetMask, "new_remaining"); 02026 ctx->StoreInst(newRemaining, maskBitsPtr); 02027 02028 // and onward to run the loop body... 02029 ctx->BranchInst(bbBody); 02030 } 02031 02032 ctx->SetCurrentBasicBlock(bbBody); { 02033 // Run the code in the body of the loop. This is easy now. 02034 if (stmts) 02035 stmts->EmitCode(ctx); 02036 02037 Assert(ctx->GetCurrentBasicBlock() != NULL); 02038 ctx->BranchInst(bbCheckForMore); 02039 } 02040 02041 ctx->SetCurrentBasicBlock(bbCheckForMore); { 02042 // At the end of the loop body (either due to running the 02043 // statements normally, or a continue statement in the middle of 02044 // the loop that jumps to the end, see if there are any lanes left 02045 // to be processed. 02046 llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits"); 02047 llvm::Value *nonZero = 02048 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, 02049 remainingBits, LLVMInt64(0), "remaining_ne_zero"); 02050 ctx->BranchInst(bbFindNext, bbDone, nonZero); 02051 } 02052 02053 ctx->SetCurrentBasicBlock(bbDone); 02054 ctx->SetInternalMask(oldInternalMask); 02055 ctx->EndForeach(); 02056 ctx->EndScope(); 02057 } 02058 02059 02060 void 02061 ForeachActiveStmt::Print(int indent) const { 02062 printf("%*cForeach_active Stmt", indent, ' '); 02063 pos.Print(); 02064 printf("\n"); 02065 02066 printf("%*cIter symbol: ", indent+4, ' '); 02067 if (sym != NULL) { 02068 printf("%s", sym->name.c_str()); 02069 if (sym->type != NULL) 02070 printf(" %s", sym->type->GetString().c_str()); 02071 } 02072 else 02073 printf("NULL"); 02074 printf("\n"); 02075 02076 printf("%*cStmts:\n", indent+4, ' '); 02077 if (stmts != NULL) 02078 stmts->Print(indent+8); 02079 else 02080 printf("NULL"); 02081 printf("\n"); 02082 } 02083 02084 02085 Stmt * 02086 ForeachActiveStmt::TypeCheck() { 02087 if (sym == NULL) 02088 return NULL; 02089 02090 return this; 02091 } 02092 02093 02094 int 02095 ForeachActiveStmt::EstimateCost() const { 02096 return COST_VARYING_LOOP; 02097 } 02098 02099 02100 /////////////////////////////////////////////////////////////////////////// 02101 // ForeachUniqueStmt 02102 02103 ForeachUniqueStmt::ForeachUniqueStmt(const char *iterName, Expr *e, 02104 Stmt *s, SourcePos pos) 02105 : Stmt(pos) { 02106 sym = m->symbolTable->LookupVariable(iterName); 02107 expr = e; 02108 stmts = s; 02109 } 02110 02111 02112 void 02113 ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const { 02114 if (!ctx->GetCurrentBasicBlock()) 02115 return; 02116 02117 // First, allocate local storage for the symbol that we'll use for the 02118 // uniform variable that holds the current unique value through each 02119 // loop. 02120 if (sym->type == NULL) { 02121 Assert(m->errorCount > 0); 02122 return; 02123 } 02124 llvm::Type *symType = sym->type->LLVMType(g->ctx); 02125 if (symType == NULL) { 02126 Assert(m->errorCount > 0); 02127 return; 02128 } 02129 sym->storagePtr = ctx->AllocaInst(symType, sym->name.c_str()); 02130 02131 ctx->SetDebugPos(pos); 02132 ctx->EmitVariableDebugInfo(sym); 02133 02134 // The various basic blocks that we'll need in the below 02135 llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_find_next"); 02136 llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body"); 02137 llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_check_for_more"); 02138 llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_done"); 02139 02140 // Prepare the FunctionEmitContext 02141 ctx->StartScope(); 02142 02143 // Save the old internal mask so that we can restore it at the end 02144 llvm::Value *oldMask = ctx->GetInternalMask(); 02145 02146 // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain 02147 // to be processed by a pass through the foreach_unique loop body. It 02148 // starts out with the full execution mask (which should never be all 02149 // off going in to this)... 02150 llvm::Value *oldFullMask = ctx->GetFullMask(); 02151 llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits"); 02152 llvm::Value *movmsk = ctx->LaneMask(oldFullMask); 02153 ctx->StoreInst(movmsk, maskBitsPtr); 02154 02155 // Officially start the loop. 02156 ctx->StartForeach(FunctionEmitContext::FOREACH_UNIQUE); 02157 ctx->SetContinueTarget(bbCheckForMore); 02158 02159 // Evaluate the varying expression we're iterating over just once. 02160 llvm::Value *exprValue = expr->GetValue(ctx); 02161 02162 // And we'll store its value into locally-allocated storage, for ease 02163 // of indexing over it with non-compile-time-constant indices. 02164 const Type *exprType; 02165 llvm::VectorType *llvmExprType; 02166 if (exprValue == NULL || 02167 (exprType = expr->GetType()) == NULL || 02168 (llvmExprType = 02169 llvm::dyn_cast<llvm::VectorType>(exprValue->getType())) == NULL) { 02170 Assert(m->errorCount > 0); 02171 return; 02172 } 02173 ctx->SetDebugPos(pos); 02174 const Type *exprPtrType = PointerType::GetUniform(exprType); 02175 llvm::Value *exprMem = ctx->AllocaInst(llvmExprType, "expr_mem"); 02176 ctx->StoreInst(exprValue, exprMem); 02177 02178 // Onward to find the first set of lanes to run the loop for 02179 ctx->BranchInst(bbFindNext); 02180 02181 ctx->SetCurrentBasicBlock(bbFindNext); { 02182 // Load the bitmask of the lanes left to be processed 02183 llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits"); 02184 02185 // Find the index of the first set bit in the mask 02186 llvm::Function *ctlzFunc = 02187 m->module->getFunction("__count_trailing_zeros_i64"); 02188 Assert(ctlzFunc != NULL); 02189 llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits, 02190 "first_set"); 02191 02192 // And load the corresponding element value from the temporary 02193 // memory storing the value of the varying expr. 02194 llvm::Value *uniqueValuePtr = 02195 ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType, 02196 "unique_index_ptr"); 02197 llvm::Value *uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value"); 02198 02199 // If it's a varying pointer type, need to convert from the int 02200 // type we store in the vector to the actual pointer type 02201 if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL) 02202 uniqueValue = ctx->IntToPtrInst(uniqueValue, symType); 02203 02204 // Store that value in sym's storage so that the iteration variable 02205 // has the right value inside the loop body 02206 ctx->StoreInst(uniqueValue, sym->storagePtr); 02207 02208 // Set the execution mask so that it's on for any lane that a) was 02209 // running at the start of the foreach loop, and b) where that 02210 // lane's value of the varying expression is the same as the value 02211 // we've selected to process this time through--i.e.: 02212 // oldMask & (smear(element) == exprValue) 02213 llvm::Value *uniqueSmear = ctx->SmearUniform(uniqueValue, "unique_semar"); 02214 llvm::Value *matchingLanes = NULL; 02215 if (uniqueValue->getType()->isFloatingPointTy()) 02216 matchingLanes = 02217 ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_OEQ, 02218 uniqueSmear, exprValue, "matching_lanes"); 02219 else 02220 matchingLanes = 02221 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, 02222 uniqueSmear, exprValue, "matching_lanes"); 02223 matchingLanes = ctx->I1VecToBoolVec(matchingLanes); 02224 02225 llvm::Value *loopMask = 02226 ctx->BinaryOperator(llvm::Instruction::And, oldMask, matchingLanes, 02227 "foreach_unique_loop_mask"); 02228 ctx->SetInternalMask(loopMask); 02229 02230 // Also update the bitvector of lanes left to process in subsequent 02231 // loop iterations: 02232 // remainingBits &= ~movmsk(current mask) 02233 llvm::Value *loopMaskMM = ctx->LaneMask(loopMask); 02234 llvm::Value *notLoopMaskMM = ctx->NotOperator(loopMaskMM); 02235 llvm::Value *newRemaining = 02236 ctx->BinaryOperator(llvm::Instruction::And, remainingBits, 02237 notLoopMaskMM, "new_remaining"); 02238 ctx->StoreInst(newRemaining, maskBitsPtr); 02239 02240 // and onward... 02241 ctx->BranchInst(bbBody); 02242 } 02243 02244 ctx->SetCurrentBasicBlock(bbBody); { 02245 // Run the code in the body of the loop. This is easy now. 02246 if (stmts) 02247 stmts->EmitCode(ctx); 02248 02249 Assert(ctx->GetCurrentBasicBlock() != NULL); 02250 ctx->BranchInst(bbCheckForMore); 02251 } 02252 02253 ctx->SetCurrentBasicBlock(bbCheckForMore); { 02254 // At the end of the loop body (either due to running the 02255 // statements normally, or a continue statement in the middle of 02256 // the loop that jumps to the end, see if there are any lanes left 02257 // to be processed. 02258 llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits"); 02259 llvm::Value *nonZero = 02260 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, 02261 remainingBits, LLVMInt64(0), "remaining_ne_zero"); 02262 ctx->BranchInst(bbFindNext, bbDone, nonZero); 02263 } 02264 02265 ctx->SetCurrentBasicBlock(bbDone); 02266 ctx->SetInternalMask(oldMask); 02267 ctx->EndForeach(); 02268 ctx->EndScope(); 02269 } 02270 02271 02272 void 02273 ForeachUniqueStmt::Print(int indent) const { 02274 printf("%*cForeach_unique Stmt", indent, ' '); 02275 pos.Print(); 02276 printf("\n"); 02277 02278 printf("%*cIter symbol: ", indent+4, ' '); 02279 if (sym != NULL) { 02280 printf("%s", sym->name.c_str()); 02281 if (sym->type != NULL) 02282 printf(" %s", sym->type->GetString().c_str()); 02283 } 02284 else 02285 printf("NULL"); 02286 printf("\n"); 02287 02288 printf("%*cIter expr: ", indent+4, ' '); 02289 if (expr != NULL) 02290 expr->Print(); 02291 else 02292 printf("NULL"); 02293 printf("\n"); 02294 02295 printf("%*cStmts:\n", indent+4, ' '); 02296 if (stmts != NULL) 02297 stmts->Print(indent+8); 02298 else 02299 printf("NULL"); 02300 printf("\n"); 02301 } 02302 02303 02304 Stmt * 02305 ForeachUniqueStmt::TypeCheck() { 02306 const Type *type; 02307 if (sym == NULL || expr == NULL || (type = expr->GetType()) == NULL) 02308 return NULL; 02309 02310 if (type->IsVaryingType() == false) { 02311 Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop " 02312 "must be \"varying\" type, not \"%s\".", 02313 type->GetString().c_str()); 02314 return false; 02315 } 02316 02317 if (Type::IsBasicType(type) == false) { 02318 Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop " 02319 "must be an atomic, pointer, or enum type, not \"%s\".", 02320 type->GetString().c_str()); 02321 return false; 02322 } 02323 02324 return this; 02325 } 02326 02327 02328 int 02329 ForeachUniqueStmt::EstimateCost() const { 02330 return COST_VARYING_LOOP; 02331 } 02332 02333 02334 /////////////////////////////////////////////////////////////////////////// 02335 // CaseStmt 02336 02337 /** Given the statements following a 'case' or 'default' label, this 02338 function determines whether the mask should be checked to see if it is 02339 "all off" immediately after the label, before executing the code for 02340 the statements. 02341 */ 02342 static bool 02343 lCheckMask(Stmt *stmts) { 02344 if (stmts == NULL) 02345 return false; 02346 02347 int cost = EstimateCost(stmts); 02348 bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts); 02349 02350 // The mask should be checked if the code following the 02351 // 'case'/'default' is relatively complex, or if it would be unsafe to 02352 // run that code with the execution mask all off. 02353 return (cost > PREDICATE_SAFE_IF_STATEMENT_COST || 02354 safeToRunWithAllLanesOff == false); 02355 } 02356 02357 02358 CaseStmt::CaseStmt(int v, Stmt *s, SourcePos pos) 02359 : Stmt(pos), value(v) { 02360 stmts = s; 02361 } 02362 02363 02364 void 02365 CaseStmt::EmitCode(FunctionEmitContext *ctx) const { 02366 ctx->EmitCaseLabel(value, lCheckMask(stmts), pos); 02367 if (stmts) 02368 stmts->EmitCode(ctx); 02369 } 02370 02371 02372 void 02373 CaseStmt::Print(int indent) const { 02374 printf("%*cCase [%d] label", indent, ' ', value); 02375 pos.Print(); 02376 printf("\n"); 02377 stmts->Print(indent+4); 02378 } 02379 02380 02381 Stmt * 02382 CaseStmt::TypeCheck() { 02383 return this; 02384 } 02385 02386 02387 int 02388 CaseStmt::EstimateCost() const { 02389 return 0; 02390 } 02391 02392 02393 /////////////////////////////////////////////////////////////////////////// 02394 // DefaultStmt 02395 02396 DefaultStmt::DefaultStmt(Stmt *s, SourcePos pos) 02397 : Stmt(pos) { 02398 stmts = s; 02399 } 02400 02401 02402 void 02403 DefaultStmt::EmitCode(FunctionEmitContext *ctx) const { 02404 ctx->EmitDefaultLabel(lCheckMask(stmts), pos); 02405 if (stmts) 02406 stmts->EmitCode(ctx); 02407 } 02408 02409 02410 void 02411 DefaultStmt::Print(int indent) const { 02412 printf("%*cDefault Stmt", indent, ' '); 02413 pos.Print(); 02414 printf("\n"); 02415 stmts->Print(indent+4); 02416 } 02417 02418 02419 Stmt * 02420 DefaultStmt::TypeCheck() { 02421 return this; 02422 } 02423 02424 02425 int 02426 DefaultStmt::EstimateCost() const { 02427 return 0; 02428 } 02429 02430 02431 /////////////////////////////////////////////////////////////////////////// 02432 // SwitchStmt 02433 02434 SwitchStmt::SwitchStmt(Expr *e, Stmt *s, SourcePos pos) 02435 : Stmt(pos) { 02436 expr = e; 02437 stmts = s; 02438 } 02439 02440 02441 /* An instance of this structure is carried along as we traverse the AST 02442 nodes for the statements after a "switch" statement. We use this 02443 structure to record all of the 'case' and 'default' statements after the 02444 "switch". */ 02445 struct SwitchVisitInfo { 02446 SwitchVisitInfo(FunctionEmitContext *c) { 02447 ctx = c; 02448 defaultBlock = NULL; 02449 lastBlock = NULL; 02450 } 02451 02452 FunctionEmitContext *ctx; 02453 02454 /* Basic block for the code following the "default" label (if any). */ 02455 llvm::BasicBlock *defaultBlock; 02456 02457 /* Map from integer values after "case" labels to the basic blocks that 02458 follow the corresponding "case" label. */ 02459 std::vector<std::pair<int, llvm::BasicBlock *> > caseBlocks; 02460 02461 /* For each basic block for a "case" label or a "default" label, 02462 nextBlock[block] stores the basic block pointer for the next 02463 subsequent "case" or "default" label in the program. */ 02464 std::map<llvm::BasicBlock *, llvm::BasicBlock *> nextBlock; 02465 02466 /* The last basic block created for a "case" or "default" label; when 02467 we create the basic block for the next one, we'll use this to update 02468 the nextBlock map<> above. */ 02469 llvm::BasicBlock *lastBlock; 02470 }; 02471 02472 02473 static bool 02474 lSwitchASTPreVisit(ASTNode *node, void *d) { 02475 if (dynamic_cast<SwitchStmt *>(node) != NULL) 02476 // don't continue recursively into a nested switch--we only want 02477 // our own case and default statements! 02478 return false; 02479 02480 CaseStmt *cs = dynamic_cast<CaseStmt *>(node); 02481 DefaultStmt *ds = dynamic_cast<DefaultStmt *>(node); 02482 02483 SwitchVisitInfo *svi = (SwitchVisitInfo *)d; 02484 llvm::BasicBlock *bb = NULL; 02485 if (cs != NULL) { 02486 // Complain if we've seen a case statement with the same value 02487 // already 02488 for (int i = 0; i < (int)svi->caseBlocks.size(); ++i) { 02489 if (svi->caseBlocks[i].first == cs->value) { 02490 Error(cs->pos, "Duplicate case value \"%d\".", cs->value); 02491 return true; 02492 } 02493 } 02494 02495 // Otherwise create a new basic block for the code following this 02496 // 'case' statement and record the mappign between the case label 02497 // value and the basic block 02498 char buf[32]; 02499 sprintf(buf, "case_%d", cs->value); 02500 bb = svi->ctx->CreateBasicBlock(buf); 02501 svi->caseBlocks.push_back(std::make_pair(cs->value, bb)); 02502 } 02503 else if (ds != NULL) { 02504 // And complain if we've seen another 'default' label.. 02505 if (svi->defaultBlock != NULL) { 02506 Error(ds->pos, "Multiple \"default\" lables in switch statement."); 02507 return true; 02508 } 02509 else { 02510 // Otherwise create a basic block for the code following the 02511 // "default". 02512 bb = svi->ctx->CreateBasicBlock("default"); 02513 svi->defaultBlock = bb; 02514 } 02515 } 02516 02517 // If we saw a "case" or "default" label, then update the map to record 02518 // that the block we just created follows the block created for the 02519 // previous label in the "switch". 02520 if (bb != NULL) { 02521 svi->nextBlock[svi->lastBlock] = bb; 02522 svi->lastBlock = bb; 02523 } 02524 02525 return true; 02526 } 02527 02528 02529 void 02530 SwitchStmt::EmitCode(FunctionEmitContext *ctx) const { 02531 if (ctx->GetCurrentBasicBlock() == NULL) 02532 return; 02533 02534 const Type *type; 02535 if (expr == NULL || ((type = expr->GetType()) == NULL)) { 02536 AssertPos(pos, m->errorCount > 0); 02537 return; 02538 } 02539 02540 // Basic block we'll end up after the switch statement 02541 llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("switch_done"); 02542 02543 // Walk the AST of the statements after the 'switch' to collect a bunch 02544 // of information about the structure of the 'case' and 'default' 02545 // statements. 02546 SwitchVisitInfo svi(ctx); 02547 WalkAST(stmts, lSwitchASTPreVisit, NULL, &svi); 02548 // Record that the basic block following the last one created for a 02549 // case/default is the block after the end of the switch statement. 02550 svi.nextBlock[svi.lastBlock] = bbDone; 02551 02552 llvm::Value *exprValue = expr->GetValue(ctx); 02553 if (exprValue == NULL) { 02554 AssertPos(pos, m->errorCount > 0); 02555 return; 02556 } 02557 02558 bool isUniformCF = (type->IsUniformType() && 02559 lHasVaryingBreakOrContinue(stmts) == false); 02560 ctx->StartSwitch(isUniformCF, bbDone); 02561 ctx->SwitchInst(exprValue, svi.defaultBlock ? svi.defaultBlock : bbDone, 02562 svi.caseBlocks, svi.nextBlock); 02563 02564 if (stmts != NULL) 02565 stmts->EmitCode(ctx); 02566 02567 if (ctx->GetCurrentBasicBlock() != NULL) 02568 ctx->BranchInst(bbDone); 02569 02570 ctx->SetCurrentBasicBlock(bbDone); 02571 ctx->EndSwitch(); 02572 } 02573 02574 02575 void 02576 SwitchStmt::Print(int indent) const { 02577 printf("%*cSwitch Stmt", indent, ' '); 02578 pos.Print(); 02579 printf("\n"); 02580 printf("%*cexpr = ", indent, ' '); 02581 expr->Print(); 02582 printf("\n"); 02583 stmts->Print(indent+4); 02584 } 02585 02586 02587 Stmt * 02588 SwitchStmt::TypeCheck() { 02589 const Type *exprType; 02590 if (expr == NULL || 02591 (exprType = expr->GetType()) == NULL) { 02592 Assert(m->errorCount > 0); 02593 return NULL; 02594 } 02595 02596 const Type *toType = NULL; 02597 exprType = exprType->GetAsConstType(); 02598 bool is64bit = (Type::EqualIgnoringConst(exprType->GetAsUniformType(), 02599 AtomicType::UniformUInt64) || 02600 Type::EqualIgnoringConst(exprType->GetAsUniformType(), 02601 AtomicType::UniformInt64)); 02602 02603 if (exprType->IsUniformType()) { 02604 if (is64bit) toType = AtomicType::UniformInt64; 02605 else toType = AtomicType::UniformInt32; 02606 } 02607 else { 02608 if (is64bit) toType = AtomicType::VaryingInt64; 02609 else toType = AtomicType::VaryingInt32; 02610 } 02611 02612 expr = TypeConvertExpr(expr, toType, "switch expression"); 02613 if (expr == NULL) 02614 return NULL; 02615 02616 return this; 02617 } 02618 02619 02620 int 02621 SwitchStmt::EstimateCost() const { 02622 const Type *type = expr->GetType(); 02623 if (type && type->IsVaryingType()) 02624 return COST_VARYING_SWITCH; 02625 else 02626 return COST_UNIFORM_SWITCH; 02627 } 02628 02629 02630 /////////////////////////////////////////////////////////////////////////// 02631 // UnmaskedStmt 02632 02633 UnmaskedStmt::UnmaskedStmt(Stmt *s, SourcePos pos) 02634 : Stmt(pos) { 02635 stmts = s; 02636 } 02637 02638 02639 void 02640 UnmaskedStmt::EmitCode(FunctionEmitContext *ctx) const { 02641 if (!ctx->GetCurrentBasicBlock() || !stmts) 02642 return; 02643 02644 llvm::Value *oldInternalMask = ctx->GetInternalMask(); 02645 llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); 02646 02647 ctx->SetInternalMask(LLVMMaskAllOn); 02648 ctx->SetFunctionMask(LLVMMaskAllOn); 02649 02650 stmts->EmitCode(ctx); 02651 02652 ctx->SetInternalMask(oldInternalMask); 02653 ctx->SetFunctionMask(oldFunctionMask); 02654 } 02655 02656 02657 void 02658 UnmaskedStmt::Print(int indent) const { 02659 printf("%*cUnmasked Stmt", indent, ' '); 02660 pos.Print(); 02661 printf("\n"); 02662 02663 printf("%*cStmts:\n", indent+4, ' '); 02664 if (stmts != NULL) 02665 stmts->Print(indent+8); 02666 else 02667 printf("NULL"); 02668 printf("\n"); 02669 } 02670 02671 02672 Stmt * 02673 UnmaskedStmt::TypeCheck() { 02674 return this; 02675 } 02676 02677 02678 int 02679 UnmaskedStmt::EstimateCost() const { 02680 return COST_ASSIGN; 02681 } 02682 02683 02684 /////////////////////////////////////////////////////////////////////////// 02685 // ReturnStmt 02686 02687 ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p) 02688 : Stmt(p), expr(e), 02689 doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) { 02690 } 02691 02692 02693 void 02694 ReturnStmt::EmitCode(FunctionEmitContext *ctx) const { 02695 if (!ctx->GetCurrentBasicBlock()) 02696 return; 02697 02698 if (ctx->InForeachLoop()) { 02699 Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop."); 02700 return; 02701 } 02702 02703 // Make sure we're not trying to return a reference to something where 02704 // that doesn't make sense 02705 const Function *func = ctx->GetFunction(); 02706 const Type *returnType = func->GetReturnType(); 02707 if (IsReferenceType(returnType) == true && 02708 IsReferenceType(expr->GetType()) == false) { 02709 const Type *lvType = expr->GetLValueType(); 02710 if (lvType == NULL) { 02711 Error(expr->pos, "Illegal to return non-lvalue from function " 02712 "returning reference type \"%s\".", 02713 returnType->GetString().c_str()); 02714 return; 02715 } 02716 else if (lvType->IsUniformType() == false) { 02717 Error(expr->pos, "Illegal to return varying lvalue type from " 02718 "function returning a reference type \"%s\".", 02719 returnType->GetString().c_str()); 02720 return; 02721 } 02722 } 02723 02724 ctx->SetDebugPos(pos); 02725 ctx->CurrentLanesReturned(expr, doCoherenceCheck); 02726 } 02727 02728 02729 Stmt * 02730 ReturnStmt::TypeCheck() { 02731 return this; 02732 } 02733 02734 02735 int 02736 ReturnStmt::EstimateCost() const { 02737 return COST_RETURN; 02738 } 02739 02740 02741 void 02742 ReturnStmt::Print(int indent) const { 02743 printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : ""); 02744 pos.Print(); 02745 if (expr) 02746 expr->Print(); 02747 else printf("(void)"); 02748 printf("\n"); 02749 } 02750 02751 02752 /////////////////////////////////////////////////////////////////////////// 02753 // GotoStmt 02754 02755 GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip) 02756 : Stmt(gotoPos) { 02757 label = l; 02758 identifierPos = ip; 02759 } 02760 02761 02762 void 02763 GotoStmt::EmitCode(FunctionEmitContext *ctx) const { 02764 if (!ctx->GetCurrentBasicBlock()) 02765 return; 02766 02767 if (ctx->VaryingCFDepth() > 0) { 02768 Error(pos, "\"goto\" statements are only legal under \"uniform\" " 02769 "control flow."); 02770 return; 02771 } 02772 if (ctx->InForeachLoop()) { 02773 Error(pos, "\"goto\" statements are currently illegal inside " 02774 "\"foreach\" loops."); 02775 return; 02776 } 02777 02778 llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label); 02779 if (bb == NULL) { 02780 /* Label wasn't found. Look for suggestions that are close */ 02781 std::vector<std::string> labels = ctx->GetLabels(); 02782 std::vector<std::string> matches = MatchStrings(label, labels); 02783 std::string match_output; 02784 if (! matches.empty()) { 02785 /* Print up to 5 matches. Don't want to spew too much */ 02786 match_output += "\nDid you mean:"; 02787 for (unsigned int i=0; i<matches.size() && i<5; i++) 02788 match_output += "\n " + matches[i] + "?"; 02789 } 02790 02791 /* Label wasn't found. Emit an error */ 02792 Error(identifierPos, 02793 "No label named \"%s\" found in current function.%s", 02794 label.c_str(), match_output.c_str()); 02795 02796 return; 02797 } 02798 02799 ctx->BranchInst(bb); 02800 ctx->SetCurrentBasicBlock(NULL); 02801 } 02802 02803 02804 void 02805 GotoStmt::Print(int indent) const { 02806 printf("%*cGoto label \"%s\"\n", indent, ' ', label.c_str()); 02807 } 02808 02809 02810 Stmt * 02811 GotoStmt::Optimize() { 02812 return this; 02813 } 02814 02815 02816 Stmt * 02817 GotoStmt::TypeCheck() { 02818 return this; 02819 } 02820 02821 02822 int 02823 GotoStmt::EstimateCost() const { 02824 return COST_GOTO; 02825 } 02826 02827 02828 /////////////////////////////////////////////////////////////////////////// 02829 // LabeledStmt 02830 02831 LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p) 02832 : Stmt(p) { 02833 name = n; 02834 stmt = s; 02835 } 02836 02837 02838 void 02839 LabeledStmt::EmitCode(FunctionEmitContext *ctx) const { 02840 llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name); 02841 AssertPos(pos, bblock != NULL); 02842 02843 // End the current basic block with a jump to our basic block and then 02844 // set things up for emission to continue there. Note that the current 02845 // basic block may validly be NULL going into this statement due to an 02846 // earlier goto that NULLed it out; that doesn't stop us from 02847 // re-establishing a current basic block starting at the label.. 02848 if (ctx->GetCurrentBasicBlock() != NULL) 02849 ctx->BranchInst(bblock); 02850 ctx->SetCurrentBasicBlock(bblock); 02851 02852 if (stmt != NULL) 02853 stmt->EmitCode(ctx); 02854 } 02855 02856 02857 void 02858 LabeledStmt::Print(int indent) const { 02859 printf("%*cLabel \"%s\"\n", indent, ' ', name.c_str()); 02860 if (stmt != NULL) 02861 stmt->Print(indent); 02862 } 02863 02864 02865 Stmt * 02866 LabeledStmt::Optimize() { 02867 return this; 02868 } 02869 02870 02871 Stmt * 02872 LabeledStmt::TypeCheck() { 02873 if (!isalpha(name[0]) || name[0] == '_') { 02874 Error(pos, "Label must start with either alphabetic character or '_'."); 02875 return NULL; 02876 } 02877 for (unsigned int i = 1; i < name.size(); ++i) { 02878 if (!isalnum(name[i]) && name[i] != '_') { 02879 Error(pos, "Character \"%c\" is illegal in labels.", name[i]); 02880 return NULL; 02881 } 02882 } 02883 return this; 02884 } 02885 02886 02887 int 02888 LabeledStmt::EstimateCost() const { 02889 return 0; 02890 } 02891 02892 02893 /////////////////////////////////////////////////////////////////////////// 02894 // StmtList 02895 02896 void 02897 StmtList::EmitCode(FunctionEmitContext *ctx) const { 02898 ctx->StartScope(); 02899 ctx->SetDebugPos(pos); 02900 for (unsigned int i = 0; i < stmts.size(); ++i) 02901 if (stmts[i]) 02902 stmts[i]->EmitCode(ctx); 02903 ctx->EndScope(); 02904 } 02905 02906 02907 Stmt * 02908 StmtList::TypeCheck() { 02909 return this; 02910 } 02911 02912 02913 int 02914 StmtList::EstimateCost() const { 02915 return 0; 02916 } 02917 02918 02919 void 02920 StmtList::Print(int indent) const { 02921 printf("%*cStmt List", indent, ' '); 02922 pos.Print(); 02923 printf(":\n"); 02924 for (unsigned int i = 0; i < stmts.size(); ++i) 02925 if (stmts[i]) 02926 stmts[i]->Print(indent+4); 02927 } 02928 02929 02930 /////////////////////////////////////////////////////////////////////////// 02931 // PrintStmt 02932 02933 PrintStmt::PrintStmt(const std::string &f, Expr *v, SourcePos p) 02934 : Stmt(p), format(f), values(v) { 02935 } 02936 02937 /* Because the pointers to values that are passed to __do_print() are all 02938 void *s (and because ispc print() formatting strings statements don't 02939 encode types), we pass along a string to __do_print() where the i'th 02940 character encodes the type of the i'th value to be printed. Needless to 02941 say, the encoding chosen here and the decoding code in __do_print() need 02942 to agree on the below! 02943 */ 02944 static char 02945 lEncodeType(const Type *t) { 02946 if (Type::Equal(t, AtomicType::UniformBool)) return 'b'; 02947 if (Type::Equal(t, AtomicType::VaryingBool)) return 'B'; 02948 if (Type::Equal(t, AtomicType::UniformInt32)) return 'i'; 02949 if (Type::Equal(t, AtomicType::VaryingInt32)) return 'I'; 02950 if (Type::Equal(t, AtomicType::UniformUInt32)) return 'u'; 02951 if (Type::Equal(t, AtomicType::VaryingUInt32)) return 'U'; 02952 if (Type::Equal(t, AtomicType::UniformFloat)) return 'f'; 02953 if (Type::Equal(t, AtomicType::VaryingFloat)) return 'F'; 02954 if (Type::Equal(t, AtomicType::UniformInt64)) return 'l'; 02955 if (Type::Equal(t, AtomicType::VaryingInt64)) return 'L'; 02956 if (Type::Equal(t, AtomicType::UniformUInt64)) return 'v'; 02957 if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V'; 02958 if (Type::Equal(t, AtomicType::UniformDouble)) return 'd'; 02959 if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D'; 02960 if (CastType<PointerType>(t) != NULL) { 02961 if (t->IsUniformType()) 02962 return 'p'; 02963 else 02964 return 'P'; 02965 } 02966 else return '\0'; 02967 } 02968 02969 02970 /** Given an Expr for a value to be printed, emit the code to evaluate the 02971 expression and store the result to alloca'd memory. Update the 02972 argTypes string with the type encoding for this expression. 02973 */ 02974 static llvm::Value * 02975 lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) { 02976 const Type *type = expr->GetType(); 02977 if (type == NULL) 02978 return NULL; 02979 02980 if (CastType<ReferenceType>(type) != NULL) { 02981 expr = new RefDerefExpr(expr, expr->pos); 02982 type = expr->GetType(); 02983 if (type == NULL) 02984 return NULL; 02985 } 02986 02987 // Just int8 and int16 types to int32s... 02988 const Type *baseType = type->GetAsNonConstType()->GetAsUniformType(); 02989 if (Type::Equal(baseType, AtomicType::UniformInt8) || 02990 Type::Equal(baseType, AtomicType::UniformUInt8) || 02991 Type::Equal(baseType, AtomicType::UniformInt16) || 02992 Type::Equal(baseType, AtomicType::UniformUInt16)) { 02993 expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 : 02994 AtomicType::VaryingInt32, 02995 expr, expr->pos); 02996 type = expr->GetType(); 02997 } 02998 02999 char t = lEncodeType(type->GetAsNonConstType()); 03000 if (t == '\0') { 03001 Error(expr->pos, "Only atomic types are allowed in print statements; " 03002 "type \"%s\" is illegal.", type->GetString().c_str()); 03003 return NULL; 03004 } 03005 else { 03006 argTypes.push_back(t); 03007 03008 llvm::Type *llvmExprType = type->LLVMType(g->ctx); 03009 llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg"); 03010 llvm::Value *val = expr->GetValue(ctx); 03011 if (!val) 03012 return NULL; 03013 ctx->StoreInst(val, ptr); 03014 03015 ptr = ctx->BitCastInst(ptr, LLVMTypes::VoidPointerType); 03016 return ptr; 03017 } 03018 } 03019 03020 03021 /* PrintStmt works closely with the __do_print() function implemented in 03022 the builtins-c.c file. In particular, the EmitCode() method here needs to 03023 take the arguments passed to it from ispc and generate a valid call to 03024 __do_print() with the information that __do_print() then needs to do the 03025 actual printing work at runtime. 03026 */ 03027 void 03028 PrintStmt::EmitCode(FunctionEmitContext *ctx) const { 03029 if (!ctx->GetCurrentBasicBlock()) 03030 return; 03031 03032 ctx->SetDebugPos(pos); 03033 03034 // __do_print takes 5 arguments; we'll get them stored in the args[] array 03035 // in the code emitted below 03036 // 03037 // 1. the format string 03038 // 2. a string encoding the types of the values being printed, 03039 // one character per value 03040 // 3. the number of running program instances (i.e. the target's 03041 // vector width) 03042 // 4. the current lane mask 03043 // 5. a pointer to an array of pointers to the values to be printed 03044 llvm::Value *args[5]; 03045 std::string argTypes; 03046 03047 if (values == NULL) { 03048 llvm::Type *ptrPtrType = 03049 llvm::PointerType::get(LLVMTypes::VoidPointerType, 0); 03050 args[4] = llvm::Constant::getNullValue(ptrPtrType); 03051 } 03052 else { 03053 // Get the values passed to the print() statement evaluated and 03054 // stored in memory so that we set up the array of pointers to them 03055 // for the 5th __do_print() argument 03056 ExprList *elist = dynamic_cast<ExprList *>(values); 03057 int nArgs = elist ? elist->exprs.size() : 1; 03058 03059 // Allocate space for the array of pointers to values to be printed 03060 llvm::Type *argPtrArrayType = 03061 llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs); 03062 llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType, 03063 "print_arg_ptrs"); 03064 // Store the array pointer as a void **, which is what __do_print() 03065 // expects 03066 args[4] = ctx->BitCastInst(argPtrArray, 03067 llvm::PointerType::get(LLVMTypes::VoidPointerType, 0)); 03068 03069 // Now, for each of the arguments, emit code to evaluate its value 03070 // and store the value into alloca'd storage. Then store the 03071 // pointer to the alloca'd storage into argPtrArray. 03072 if (elist) { 03073 for (unsigned int i = 0; i < elist->exprs.size(); ++i) { 03074 Expr *expr = elist->exprs[i]; 03075 if (!expr) 03076 return; 03077 llvm::Value *ptr = lProcessPrintArg(expr, ctx, argTypes); 03078 if (!ptr) 03079 return; 03080 03081 llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL); 03082 ctx->StoreInst(ptr, arrayPtr); 03083 } 03084 } 03085 else { 03086 llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes); 03087 if (!ptr) 03088 return; 03089 llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL); 03090 ctx->StoreInst(ptr, arrayPtr); 03091 } 03092 } 03093 03094 // Now we can emit code to call __do_print() 03095 llvm::Function *printFunc = m->module->getFunction("__do_print"); 03096 AssertPos(pos, printFunc); 03097 03098 llvm::Value *mask = ctx->GetFullMask(); 03099 // Set up the rest of the parameters to it 03100 args[0] = ctx->GetStringPtr(format); 03101 args[1] = ctx->GetStringPtr(argTypes); 03102 args[2] = LLVMInt32(g->target.vectorWidth); 03103 args[3] = ctx->LaneMask(mask); 03104 std::vector<llvm::Value *> argVec(&args[0], &args[5]); 03105 ctx->CallInst(printFunc, NULL, argVec, ""); 03106 } 03107 03108 03109 void 03110 PrintStmt::Print(int indent) const { 03111 printf("%*cPrint Stmt (%s)", indent, ' ', format.c_str()); 03112 } 03113 03114 03115 Stmt * 03116 PrintStmt::TypeCheck() { 03117 return this; 03118 } 03119 03120 03121 int 03122 PrintStmt::EstimateCost() const { 03123 return COST_FUNCALL; 03124 } 03125 03126 03127 /////////////////////////////////////////////////////////////////////////// 03128 // AssertStmt 03129 03130 AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p) 03131 : Stmt(p), message(msg), expr(e) { 03132 } 03133 03134 03135 void 03136 AssertStmt::EmitCode(FunctionEmitContext *ctx) const { 03137 if (!ctx->GetCurrentBasicBlock()) 03138 return; 03139 03140 const Type *type; 03141 if (expr == NULL || 03142 (type = expr->GetType()) == NULL) { 03143 AssertPos(pos, m->errorCount > 0); 03144 return; 03145 } 03146 bool isUniform = type->IsUniformType(); 03147 03148 // The actual functionality to do the check and then handle falure is 03149 // done via a builtin written in bitcode in builtins.m4. 03150 llvm::Function *assertFunc = 03151 isUniform ? m->module->getFunction("__do_assert_uniform") : 03152 m->module->getFunction("__do_assert_varying"); 03153 AssertPos(pos, assertFunc != NULL); 03154 03155 char *errorString; 03156 if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s\n", 03157 pos.name, pos.first_line, pos.first_column, 03158 message.c_str()) == -1) { 03159 Error(pos, "Fatal error when generating assert string: asprintf() " 03160 "unable to allocate memory!"); 03161 return; 03162 } 03163 03164 std::vector<llvm::Value *> args; 03165 args.push_back(ctx->GetStringPtr(errorString)); 03166 llvm::Value *exprValue = expr->GetValue(ctx); 03167 if (exprValue == NULL) { 03168 AssertPos(pos, m->errorCount > 0); 03169 return; 03170 } 03171 args.push_back(exprValue); 03172 args.push_back(ctx->GetFullMask()); 03173 ctx->CallInst(assertFunc, NULL, args, ""); 03174 03175 free(errorString); 03176 } 03177 03178 03179 void 03180 AssertStmt::Print(int indent) const { 03181 printf("%*cAssert Stmt (%s)", indent, ' ', message.c_str()); 03182 } 03183 03184 03185 Stmt * 03186 AssertStmt::TypeCheck() { 03187 const Type *type; 03188 if (expr && (type = expr->GetType()) != NULL) { 03189 bool isUniform = type->IsUniformType(); 03190 if (!type->IsNumericType() && !type->IsBoolType()) { 03191 Error(expr->pos, "Type \"%s\" can't be converted to boolean for \"assert\".", 03192 type->GetString().c_str()); 03193 return NULL; 03194 } 03195 expr = new TypeCastExpr(isUniform ? AtomicType::UniformBool : 03196 AtomicType::VaryingBool, 03197 expr, expr->pos); 03198 expr = ::TypeCheck(expr); 03199 } 03200 return this; 03201 } 03202 03203 03204 int 03205 AssertStmt::EstimateCost() const { 03206 return COST_ASSERT; 03207 } 03208 03209 03210 /////////////////////////////////////////////////////////////////////////// 03211 // DeleteStmt 03212 03213 DeleteStmt::DeleteStmt(Expr *e, SourcePos p) 03214 : Stmt(p) { 03215 expr = e; 03216 } 03217 03218 03219 void 03220 DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { 03221 if (!ctx->GetCurrentBasicBlock()) 03222 return; 03223 03224 const Type *exprType; 03225 if (expr == NULL || ((exprType = expr->GetType()) == NULL)) { 03226 AssertPos(pos, m->errorCount > 0); 03227 return; 03228 } 03229 03230 llvm::Value *exprValue = expr->GetValue(ctx); 03231 if (exprValue == NULL) { 03232 AssertPos(pos, m->errorCount > 0); 03233 return; 03234 } 03235 03236 // Typechecking should catch this 03237 AssertPos(pos, CastType<PointerType>(exprType) != NULL); 03238 03239 if (exprType->IsUniformType()) { 03240 // For deletion of a uniform pointer, we just need to cast the 03241 // pointer type to a void pointer type, to match what 03242 // __delete_uniform() from the builtins expects. 03243 exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType, 03244 "ptr_to_void"); 03245 llvm::Function *func = m->module->getFunction("__delete_uniform"); 03246 AssertPos(pos, func != NULL); 03247 03248 ctx->CallInst(func, NULL, exprValue, ""); 03249 } 03250 else { 03251 // Varying pointers are arrays of ints, and __delete_varying() 03252 // takes a vector of i64s (even for 32-bit targets). Therefore, we 03253 // only need to extend to 64-bit values on 32-bit targets before 03254 // calling it. 03255 llvm::Function *func = m->module->getFunction("__delete_varying"); 03256 AssertPos(pos, func != NULL); 03257 if (g->target.is32Bit) 03258 exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType, 03259 "ptr_to_64"); 03260 ctx->CallInst(func, NULL, exprValue, ""); 03261 } 03262 } 03263 03264 03265 void 03266 DeleteStmt::Print(int indent) const { 03267 printf("%*cDelete Stmt", indent, ' '); 03268 } 03269 03270 03271 Stmt * 03272 DeleteStmt::TypeCheck() { 03273 const Type *exprType; 03274 if (expr == NULL || ((exprType = expr->GetType()) == NULL)) 03275 return NULL; 03276 03277 if (CastType<PointerType>(exprType) == NULL) { 03278 Error(pos, "Illegal to delete non-pointer type \"%s\".", 03279 exprType->GetString().c_str()); 03280 return NULL; 03281 } 03282 03283 return this; 03284 } 03285 03286 03287 int 03288 DeleteStmt::EstimateCost() const { 03289 return COST_DELETE; 03290 }
1.7.5.1