Intel SPMD Program Compiler  1.9.1
stmt.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2014, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file stmt.cpp
35  @brief File with definitions classes related to statements in the language
36 */
37 
38 #include "stmt.h"
39 #include "ctx.h"
40 #include "util.h"
41 #include "expr.h"
42 #include "type.h"
43 #include "func.h"
44 #include "sym.h"
45 #include "module.h"
46 #include "llvmutil.h"
47 
48 #include <stdio.h>
49 #include <map>
50 
51 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
52  #include <llvm/Module.h>
53  #include <llvm/Type.h>
54  #include <llvm/Instructions.h>
55  #include <llvm/Function.h>
56  #include <llvm/DerivedTypes.h>
57  #include <llvm/LLVMContext.h>
58  #include <llvm/Metadata.h>
59  #include <llvm/CallingConv.h>
60 #else
61  #include <llvm/IR/Module.h>
62  #include <llvm/IR/Type.h>
63  #include <llvm/IR/Instructions.h>
64  #include <llvm/IR/Function.h>
65  #include <llvm/IR/DerivedTypes.h>
66  #include <llvm/IR/LLVMContext.h>
67  #include <llvm/IR/Metadata.h>
68  #include <llvm/IR/CallingConv.h>
69 #endif
70 #include <llvm/Support/raw_ostream.h>
71 
72 ///////////////////////////////////////////////////////////////////////////
73 // Stmt
74 
75 Stmt *
77  return this;
78 }
79 
80 
81 ///////////////////////////////////////////////////////////////////////////
82 // ExprStmt
83 
85  : Stmt(p, ExprStmtID) {
86  expr = e;
87 }
88 
89 void
91  if (!ctx->GetCurrentBasicBlock())
92  return;
93 
94  ctx->SetDebugPos(pos);
95  if (expr)
96  expr->GetValue(ctx);
97 }
98 
99 
100 Stmt *
102  return this;
103 }
104 
105 
106 void
107 ExprStmt::Print(int indent) const {
108  if (!expr)
109  return;
110 
111  printf("%*c", indent, ' ');
112  printf("Expr stmt: ");
113  pos.Print();
114  expr->Print();
115  printf("\n");
116 }
117 
118 
119 int
121  return 0;
122 }
123 
124 
125 ///////////////////////////////////////////////////////////////////////////
126 // DeclStmt
127 
128 DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
129  : Stmt(p, DeclStmtID), vars(v) {
130 }
131 
132 
133 static bool
134 lHasUnsizedArrays(const Type *type) {
135  const ArrayType *at = CastType<ArrayType>(type);
136  if (at == NULL)
137  return false;
138 
139  if (at->GetElementCount() == 0)
140  return true;
141  else
142  return lHasUnsizedArrays(at->GetElementType());
143 }
144 
145 #ifdef ISPC_NVPTX_ENABLED
146 static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false)
147 {
148  if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
149  return value;
150  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
151  const int addressSpace = pt->getAddressSpace();
152  if (addressSpace != 3 && addressSpace != 4)
153  return value;
154 
155  llvm::Type *elTy = pt->getElementType();
156 
157  /* convert elTy addrspace(3)* to i64* addrspace(3)* */
158  llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
159  value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1");
160 
161  /* convert i64* addrspace(3) to i64* */
162  llvm::Function *__cvt2gen = m->module->getFunction(
163  addressSpace == 3 ? (variable ? "__cvt_loc2gen_var" : "__cvt_loc2gen") : "__cvt_const2gen");
164 
165  std::vector<llvm::Value *> __cvt2gen_args;
166  __cvt2gen_args.push_back(value);
167  value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, variable ? "gep2gen_cvt_var" : "gep2gen_cvt", ctx->GetCurrentBasicBlock());
168 
169  /* compute offset */
170  if (addressSpace == 3)
171  {
172  assert(elTy->isArrayTy());
173  const int numElTot = elTy->getArrayNumElements();
174  const int numEl = numElTot/4;
175 #if 0
176  fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl);
177 #endif
178  llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
179  assert(arrTy != NULL);
180  llvm::Type *arrElTy = arrTy->getElementType();
181 #if 0
182  if (arrElTy->isArrayTy())
183  Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
184 #endif
185 
186  /* convert i64* to errElTy* */
187  llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
188  value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
189 
190  llvm::Function *func_warp_index = m->module->getFunction("__warp_index");
191  llvm::Value *warpId = ctx->CallInst(func_warp_index, NULL, std::vector<llvm::Value*>(), "gep2gen_warp_index");
192  llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset");
193 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
194  value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
195 #else
196  value = llvm::GetElementPtrInst::Create(NULL, value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
197 #endif
198  }
199 
200  /* convert arrElTy* to elTy* */
201  llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0);
202  value = ctx->BitCastInst(value, elTyPt0, "gep2gen_cast3");
203 
204  return value;
205 }
206 #endif /* ISPC_NVPTX_ENABLED */
207 
208 void
210  if (!ctx->GetCurrentBasicBlock())
211  return;
212 
213  for (unsigned int i = 0; i < vars.size(); ++i) {
214  Symbol *sym = vars[i].sym;
215  AssertPos(pos, sym != NULL);
216  if (sym->type == NULL)
217  continue;
218  Expr *initExpr = vars[i].init;
219 
220  // Now that we're in the thick of emitting code, it's easy for us
221  // to find out the level of nesting of varying control flow we're
222  // in at this declaration. So we can finally set that
223  // Symbol::varyingCFDepth variable.
224  // @todo It's disgusting to be doing this here.
225  sym->varyingCFDepth = ctx->VaryingCFDepth();
226 
227  ctx->SetDebugPos(sym->pos);
228 
229  // If it's an array that was declared without a size but has an
230  // initializer list, then use the number of elements in the
231  // initializer list to finally set the array's size.
232  sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr);
233  if (sym->type == NULL)
234  continue;
235 
236  if (lHasUnsizedArrays(sym->type)) {
237  Error(pos, "Illegal to declare an unsized array variable without "
238  "providing an initializer expression to set its size.");
239  continue;
240  }
241 
242  // References must have initializer expressions as well.
243  if (IsReferenceType(sym->type) == true) {
244  if (initExpr == NULL) {
245  Error(sym->pos, "Must provide initializer for reference-type "
246  "variable \"%s\".", sym->name.c_str());
247  continue;
248  }
249  if (IsReferenceType(initExpr->GetType()) == false) {
250  const Type *initLVType = initExpr->GetLValueType();
251  if (initLVType == NULL) {
252  Error(initExpr->pos, "Initializer for reference-type variable "
253  "\"%s\" must have an lvalue type.", sym->name.c_str());
254  continue;
255  }
256  if (initLVType->IsUniformType() == false) {
257  Error(initExpr->pos, "Initializer for reference-type variable "
258  "\"%s\" must have a uniform lvalue type.", sym->name.c_str());
259  continue;
260  }
261  }
262  }
263 
264  llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
265  if (llvmType == NULL) {
266  AssertPos(pos, m->errorCount > 0);
267  return;
268  }
269 
270  if (sym->storageClass == SC_STATIC) {
271 #ifdef ISPC_NVPTX_ENABLED
272  if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
273  {
274  Error(sym->pos,
275  "Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
276  sym->name.c_str());
277  return;
278  }
279  if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
280  PerformanceWarning(sym->pos,
281  "\"const static varying\" variable ""\"%s\" is stored in __global address space with ""\"nvptx\" target.",
282  sym->name.c_str());
283  if (g->target->getISA() == Target::NVPTX && sym->type->IsUniformType())
284  PerformanceWarning(sym->pos,
285  "\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
286  sym->name.c_str());
287 #endif /* ISPC_NVPTX_ENABLED */
288  // For static variables, we need a compile-time constant value
289  // for its initializer; if there's no initializer, we use a
290  // zero value.
291  llvm::Constant *cinit = NULL;
292  if (initExpr != NULL) {
293  if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
294  continue;
295  // FIXME: we only need this for function pointers; it was
296  // already done for atomic types and enums in
297  // DeclStmt::TypeCheck()...
298  if (llvm::dyn_cast<ExprList>(initExpr) == NULL) {
299  initExpr = TypeConvertExpr(initExpr, sym->type,
300  "initializer");
301  // FIXME: and this is only needed to re-establish
302  // constant-ness so that GetConstant below works for
303  // constant artithmetic expressions...
304  initExpr = ::Optimize(initExpr);
305  }
306 
307  cinit = initExpr->GetConstant(sym->type);
308  if (cinit == NULL)
309  Error(initExpr->pos, "Initializer for static variable "
310  "\"%s\" must be a constant.", sym->name.c_str());
311  }
312  if (cinit == NULL)
313  cinit = llvm::Constant::getNullValue(llvmType);
314 
315  // Allocate space for the static variable in global scope, so
316  // that it persists across function calls
317 #ifdef ISPC_NVPTX_ENABLED
318  int addressSpace = 0;
319  if (g->target->getISA() == Target::NVPTX &&
320  sym->type->IsConstType() &&
321  sym->type->IsUniformType())
322  addressSpace = 4;
323  sym->storagePtr =
324  new llvm::GlobalVariable(*m->module, llvmType,
325  sym->type->IsConstType(),
326  llvm::GlobalValue::InternalLinkage, cinit,
327  llvm::Twine("static.") +
328  llvm::Twine(sym->pos.first_line) +
329  llvm::Twine(".") + sym->name.c_str(),
330  NULL,
331  llvm::GlobalVariable::NotThreadLocal,
332  addressSpace);
333  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
334 #else /* ISPC_NVPTX_ENABLED */
335  sym->storagePtr =
336  new llvm::GlobalVariable(*m->module, llvmType,
337  sym->type->IsConstType(),
338  llvm::GlobalValue::InternalLinkage, cinit,
339  llvm::Twine("static.") +
340  llvm::Twine(sym->pos.first_line) +
341  llvm::Twine(".") + sym->name.c_str());
342 #endif /* ISPC_NVPTX_ENABLED */
343  // Tell the FunctionEmitContext about the variable
344  ctx->EmitVariableDebugInfo(sym);
345  }
346 #ifdef ISPC_NVPTX_ENABLED
347  else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
348  /* NVPTX:
349  * only non-constant uniform data types are stored in shared memory
350  * constant uniform are automatically promoted to varying
351  */
352  !sym->type->IsConstType() &&
353 #if 1
354  sym->type->IsArrayType() &&
355 #endif
356  g->target->getISA() == Target::NVPTX)
357  {
358  PerformanceWarning(sym->pos,
359  "Non-constant \"uniform\" data types might be slow with \"nvptx\" target. "
360  "Unless data sharing between program instances is desired, try \"const [static] uniform\", \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
361 
362  /* with __shared__ memory everything must be an array */
363  int nel = 4;
364  ArrayType *nat;
365  bool variable = true;
366  if (sym->type->IsArrayType())
367  {
368  const ArrayType *at = CastType<ArrayType>(sym->type);
369  /* we must scale # elements by 4, because a thread-block will run 4 warps
370  * or 128 threads.
371  * ***note-to-me***:please define these value (128threads/4warps)
372  * in nvptx-target definition
373  * instead of compile-time constants
374  */
375  nel *= at->GetElementCount();
376  if (sym->type->IsSOAType())
377  nel *= sym->type->GetSOAWidth();
378  nat = new ArrayType(at->GetElementType(), nel);
379  variable = false;
380  }
381  else
382  nat = new ArrayType(sym->type, nel);
383 
384  llvm::Type *llvmTypeUn = nat->LLVMType(g->ctx);
385  llvm::Constant *cinit = llvm::UndefValue::get(llvmTypeUn);
386 
387  sym->storagePtr =
388  new llvm::GlobalVariable(*m->module, llvmTypeUn,
389  sym->type->IsConstType(),
390  llvm::GlobalValue::InternalLinkage,
391  cinit,
392  llvm::Twine("local_") +
393  llvm::Twine(sym->pos.first_line) +
394  llvm::Twine("_") + sym->name.c_str(),
395  NULL,
396  llvm::GlobalVariable::NotThreadLocal,
397  /*AddressSpace=*/3);
398  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos, variable);
399  llvm::PointerType *ptrTy = llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
400  sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_decl");
401 
402  // Tell the FunctionEmitContext about the variable; must do
403  // this before the initializer stuff.
404  ctx->EmitVariableDebugInfo(sym);
405 
406  if (initExpr == 0 && sym->type->IsConstType())
407  Error(sym->pos, "Missing initializer for const variable "
408  "\"%s\".", sym->name.c_str());
409 
410  // And then get it initialized...
411  sym->parentFunction = ctx->GetFunction();
412  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
413  }
414 #endif /* ISPC_NVPTX_ENABLED */
415  else
416  {
417  // For non-static variables, allocate storage on the stack
418  sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
419 
420  // Tell the FunctionEmitContext about the variable; must do
421  // this before the initializer stuff.
422  ctx->EmitVariableDebugInfo(sym);
423  if (initExpr == 0 && sym->type->IsConstType())
424  Error(sym->pos, "Missing initializer for const variable "
425  "\"%s\".", sym->name.c_str());
426 
427  // And then get it initialized...
428  sym->parentFunction = ctx->GetFunction();
429  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
430  }
431  }
432 }
433 
434 
435 Stmt *
437  for (unsigned int i = 0; i < vars.size(); ++i) {
438  Expr *init = vars[i].init;
439  if (init != NULL && llvm::dyn_cast<ExprList>(init) == NULL) {
440  // If the variable is const-qualified, after we've optimized
441  // the initializer expression, see if we have a ConstExpr. If
442  // so, save it in Symbol::constValue where it can be used in
443  // optimizing later expressions that have this symbol in them.
444  // Note that there are cases where the expression may be
445  // constant but where we don't have a ConstExpr; an example is
446  // const arrays--the ConstExpr implementation just can't
447  // represent an array of values.
448  //
449  // All this is fine in terms of the code that's generated in
450  // the end (LLVM's constant folding stuff is good), but it
451  // means that the ispc compiler's ability to reason about what
452  // is definitely a compile-time constant for things like
453  // computing array sizes from non-trivial expressions is
454  // consequently limited.
455  Symbol *sym = vars[i].sym;
456  if (sym->type && sym->type->IsConstType() &&
457  Type::Equal(init->GetType(), sym->type))
458  sym->constValue = llvm::dyn_cast<ConstExpr>(init);
459  }
460  }
461  return this;
462 }
463 
464 
465 Stmt *
467  bool encounteredError = false;
468  for (unsigned int i = 0; i < vars.size(); ++i) {
469  if (vars[i].sym == NULL) {
470  encounteredError = true;
471  continue;
472  }
473 
474  if (vars[i].init == NULL)
475  continue;
476 
477  // get the right type for stuff like const float foo = 2; so that
478  // the int->float type conversion is in there and we don't return
479  // an int as the constValue later...
480  const Type *type = vars[i].sym->type;
481  if (CastType<AtomicType>(type) != NULL ||
482  CastType<EnumType>(type) != NULL) {
483  // If it's an expr list with an atomic type, we'll later issue
484  // an error. Need to leave vars[i].init as is in that case so
485  // it is in fact caught later, though.
486  if (llvm::dyn_cast<ExprList>(vars[i].init) == NULL) {
487  vars[i].init = TypeConvertExpr(vars[i].init, type,
488  "initializer");
489  if (vars[i].init == NULL)
490  encounteredError = true;
491  }
492  }
493  }
494  return encounteredError ? NULL : this;
495 }
496 
497 
498 void
499 DeclStmt::Print(int indent) const {
500  printf("%*cDecl Stmt:", indent, ' ');
501  pos.Print();
502  for (unsigned int i = 0; i < vars.size(); ++i) {
503  printf("%*cVariable %s (%s)", indent+4, ' ',
504  vars[i].sym->name.c_str(),
505  vars[i].sym->type->GetString().c_str());
506  if (vars[i].init != NULL) {
507  printf(" = ");
508  vars[i].init->Print();
509  }
510  printf("\n");
511  }
512  printf("\n");
513 }
514 
515 
516 int
518  return 0;
519 }
520 
521 
522 ///////////////////////////////////////////////////////////////////////////
523 // IfStmt
524 
525 IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p)
526  : Stmt(p, IfStmtID), test(t), trueStmts(ts), falseStmts(fs),
527  doAllCheck(checkCoherence &&
528  !g->opt.disableCoherentControlFlow) {
529 }
530 
531 
532 static void
533 lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse) {
534  if (!stmts)
535  return;
536 
537  if (llvm::dyn_cast<StmtList>(stmts) == NULL)
538  ctx->StartScope();
539  ctx->AddInstrumentationPoint(trueOrFalse);
540  stmts->EmitCode(ctx);
541  if (llvm::dyn_cast<const StmtList>(stmts) == NULL)
542  ctx->EndScope();
543 }
544 
545 
546 /** Returns true if the "true" block for the if statement consists of a
547  single 'break' statement, and the "false" block is empty. */
548 /*
549 static bool
550 lCanApplyBreakOptimization(Stmt *trueStmts, Stmt *falseStmts) {
551  if (falseStmts != NULL) {
552  if (StmtList *sl = llvm::dyn_cast<StmtList>(falseStmts)) {
553  return (sl->stmts.size() == 0);
554  }
555  else
556  return false;
557  }
558 
559  if (llvm::dyn_cast<BreakStmt>(trueStmts))
560  return true;
561  else if (StmtList *sl = llvm::dyn_cast<StmtList>(trueStmts))
562  return (sl->stmts.size() == 1 &&
563  llvm::dyn_cast<BreakStmt>(sl->stmts[0]) != NULL);
564  else
565  return false;
566 }
567 */
568 
569 void
571  // First check all of the things that might happen due to errors
572  // earlier in compilation and bail out if needed so that we don't
573  // dereference NULL pointers in the below...
574  if (!ctx->GetCurrentBasicBlock())
575  return;
576  if (!test)
577  return;
578  const Type *testType = test->GetType();
579  if (!testType)
580  return;
581 
582  ctx->SetDebugPos(pos);
583  bool isUniform = testType->IsUniformType();
584 
585  llvm::Value *testValue = test->GetValue(ctx);
586  if (testValue == NULL)
587  return;
588 
589 #ifdef ISPC_NVPTX_ENABLED
590 #if 0
591  if (!isUniform && g->target->getISA() == Target::NVPTX)
592  {
593  /* With "nvptx" target, SIMT hardware takes care of non-uniform
594  * control flow. We trick ISPC to generate uniform control flow.
595  */
596  testValue = ctx->ExtractInst(testValue, 0);
597  isUniform = true;
598  }
599 #endif
600 #endif /* ISPC_NVPTX_ENABLED */
601 
602  if (isUniform) {
603  ctx->StartUniformIf();
604  if (doAllCheck)
605  Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
606 
607  // 'If' statements with uniform conditions are relatively
608  // straightforward. We evaluate the condition and then jump to
609  // either the 'then' or 'else' clause depending on its value.
610  llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
611  llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
612  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
613 
614  // Jump to the appropriate basic block based on the value of
615  // the 'if' test
616  ctx->BranchInst(bthen, belse, testValue);
617 
618  // Emit code for the 'true' case
619  ctx->SetCurrentBasicBlock(bthen);
620  lEmitIfStatements(ctx, trueStmts, "true");
621  if (ctx->GetCurrentBasicBlock())
622  ctx->BranchInst(bexit);
623 
624  // Emit code for the 'false' case
625  ctx->SetCurrentBasicBlock(belse);
626  lEmitIfStatements(ctx, falseStmts, "false");
627  if (ctx->GetCurrentBasicBlock())
628  ctx->BranchInst(bexit);
629 
630  // Set the active basic block to the newly-created exit block
631  // so that subsequent emitted code starts there.
632  ctx->SetCurrentBasicBlock(bexit);
633  ctx->EndIf();
634  }
635  /*
636  // Disabled for performance reasons. Change to an optional compile-time opt switch.
637  else if (lCanApplyBreakOptimization(trueStmts, falseStmts)) {
638  // If we have a simple break statement inside the 'if' and are
639  // under varying control flow, just update the execution mask
640  // directly and don't emit code for the statements. This leads to
641  // better code for this case--this is surprising and should be
642  // root-caused further, but for now this gives us performance
643  // benefit in this case.
644  ctx->SetInternalMaskAndNot(ctx->GetInternalMask(), testValue);
645  }
646  */
647  else
648  emitVaryingIf(ctx, testValue);
649 }
650 
651 
652 Stmt *
654  if (test != NULL) {
655  const Type *testType = test->GetType();
656  if (testType != NULL) {
657  bool isUniform = (testType->IsUniformType() &&
661  "\"if\" statement test");
662  if (test == NULL)
663  return NULL;
664  }
665  }
666 
667  return this;
668 }
669 
670 
671 int
673  const Type *type;
674  if (test == NULL || (type = test->GetType()) == NULL)
675  return 0;
676 
677  return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
678 }
679 
680 
681 void
682 IfStmt::Print(int indent) const {
683  printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : "");
684  pos.Print();
685  printf("\n%*cTest: ", indent+4, ' ');
686  test->Print();
687  printf("\n");
688  if (trueStmts) {
689  printf("%*cTrue:\n", indent+4, ' ');
690  trueStmts->Print(indent+8);
691  }
692  if (falseStmts) {
693  printf("%*cFalse:\n", indent+4, ' ');
694  falseStmts->Print(indent+8);
695  }
696 }
697 
698 
699 /** Emit code to run both the true and false statements for the if test,
700  with the mask set appropriately before running each one.
701 */
702 void
704  llvm::Value *test) const {
705  if (trueStmts) {
706  ctx->SetInternalMaskAnd(oldMask, test);
707  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
708  // under varying control flow,, returns can't stop instruction
709  // emission, so this better be non-NULL...
711  }
712  if (falseStmts) {
713  ctx->SetInternalMaskAndNot(oldMask, test);
714  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
716  }
717 }
718 
719 /** Emit code for an if test that checks the mask and the test values and
720  tries to be smart about jumping over code that doesn't need to be run.
721  */
722 void
723 IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
724  llvm::Value *oldMask = ctx->GetInternalMask();
725  if (doAllCheck) {
726  // We can't tell if the mask going into the if is all on at the
727  // compile time. Emit code to check for this and then either run
728  // the code for the 'all on' or the 'mixed' case depending on the
729  // mask's value at runtime.
730  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("cif_mask_all");
731  llvm::BasicBlock *bMixedOn = ctx->CreateBasicBlock("cif_mask_mixed");
732  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
733 
734  // Jump to either bAllOn or bMixedOn, depending on the mask's value
735  llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask());
736  ctx->BranchInst(bAllOn, bMixedOn, maskAllQ);
737 
738  // Emit code for the 'mask all on' case
739  ctx->SetCurrentBasicBlock(bAllOn);
740  emitMaskAllOn(ctx, ltest, bDone);
741 
742  // And emit code for the mixed mask case
743  ctx->SetCurrentBasicBlock(bMixedOn);
744  emitMaskMixed(ctx, oldMask, ltest, bDone);
745 
746  // When done, set the current basic block to the block that the two
747  // paths above jump to when they're done.
748  ctx->SetCurrentBasicBlock(bDone);
749  }
750  else if (trueStmts != NULL || falseStmts != NULL) {
751  // If there is nothing that is potentially unsafe to run with all
752  // lanes off in the true and false statements and if the total
753  // complexity of those two is relatively simple, then we'll go
754  // ahead and emit straightline code that runs both sides, updating
755  // the mask accordingly. This is useful for efficiently compiling
756  // things like:
757  //
758  // if (foo) x = 0;
759  // else ++x;
760  //
761  // Where the overhead of checking if any of the program instances wants
762  // to run one side or the other is more than the actual computation.
763  // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
764  // for potentially dangerous code like:
765  //
766  // if (index < count) array[index] = 0;
767  //
768  // where our use of blend for conditional assignments doesn't check
769  // for the 'all lanes' off case.
770  int trueFalseCost = (::EstimateCost(trueStmts) +
772  bool costIsAcceptable = (trueFalseCost <
774 
775  bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
777 
778  Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).",
781 
782  if (safeToRunWithAllLanesOff &&
783  (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
784  ctx->StartVaryingIf(oldMask);
785  emitMaskedTrueAndFalse(ctx, oldMask, ltest);
787  ctx->EndIf();
788  }
789  else {
790  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
791  emitMaskMixed(ctx, oldMask, ltest, bDone);
792  ctx->SetCurrentBasicBlock(bDone);
793  }
794  }
795 }
796 
797 
798 /** Emits code for 'if' tests under the case where we know that the program
799  mask is all on going into the 'if'.
800  */
801 void
803  llvm::BasicBlock *bDone) const {
804  // We start by explicitly storing "all on" into the mask mask. Note
805  // that this doesn't change its actual value, but doing so lets the
806  // compiler see what's going on so that subsequent optimizations for
807  // code emitted here can operate with the knowledge that the mask is
808  // definitely all on (until it modifies the mask itself).
812  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
815 
816  // First, check the value of the test. If it's all on, then we jump to
817  // a basic block that will only have code for the true case.
818  llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all");
819  llvm::BasicBlock *bTestNoneCheck = ctx->CreateBasicBlock("cif_test_none_check");
820  llvm::Value *testAllQ = ctx->All(ltest);
821  ctx->BranchInst(bTestAll, bTestNoneCheck, testAllQ);
822 
823  // Emit code for the 'test is all true' case
824  ctx->SetCurrentBasicBlock(bTestAll);
826  lEmitIfStatements(ctx, trueStmts, "if: all on mask, expr all true");
827  ctx->EndIf();
828  if (ctx->GetCurrentBasicBlock() != NULL)
829  // bblock may legitimately be NULL since if there's a return stmt
830  // or break or continue we can actually jump and end emission since
831  // we know all of the lanes are following this path...
832  ctx->BranchInst(bDone);
833 
834  // The test isn't all true. Now emit code to determine if it's all
835  // false, or has mixed values.
836  ctx->SetCurrentBasicBlock(bTestNoneCheck);
837  llvm::BasicBlock *bTestNone = ctx->CreateBasicBlock("cif_test_none");
838  llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_test_mixed");
839  llvm::Value *testMixedQ = ctx->Any(ltest);
840  ctx->BranchInst(bTestMixed, bTestNone, testMixedQ);
841 
842  // Emit code for the 'test is all false' case
843  ctx->SetCurrentBasicBlock(bTestNone);
845  lEmitIfStatements(ctx, falseStmts, "if: all on mask, expr all false");
846  ctx->EndIf();
847  if (ctx->GetCurrentBasicBlock())
848  // bblock may be NULL since if there's a return stmt or break or
849  // continue we can actually jump or whatever and end emission...
850  ctx->BranchInst(bDone);
851 
852  // Finally emit code for the 'mixed true/false' case. We unavoidably
853  // need to run both the true and the false statements.
854  ctx->SetCurrentBasicBlock(bTestMixed);
857  // In this case, return/break/continue isn't allowed to jump and end
858  // emission.
860  ctx->EndIf();
861  ctx->BranchInst(bDone);
862 
863  ctx->SetCurrentBasicBlock(bDone);
864  ctx->SetFunctionMask(oldFunctionMask);
865 }
866 
867 
868 /** Emit code for an 'if' test where the lane mask is known to be mixed
869  on/off going into it.
870  */
871 void
872 IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
873  llvm::Value *ltest, llvm::BasicBlock *bDone) const {
874  ctx->StartVaryingIf(oldMask);
875  llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
876 
877  llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
878  ctx->SetInternalMaskAnd(oldMask, ltest);
879 
880  // Do any of the program instances want to run the 'true'
881  // block? If not, jump ahead to bNext.
882 
883 #ifdef ISPC_NVPTX_ENABLED
884 #if 0
885  llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
886 #else
887  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
888 #endif
889 #else /* ISPC_NVPTX_ENABLED */
890  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
891 #endif /* ISPC_NVPTX_ENABLED */
892 
893  ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
894 
895  // Emit statements for true
896  ctx->SetCurrentBasicBlock(bRunTrue);
897  if (trueStmts != NULL)
898  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
900  ctx->BranchInst(bNext);
901  ctx->SetCurrentBasicBlock(bNext);
902 
903  // False...
904  llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
905  ctx->SetInternalMaskAndNot(oldMask, ltest);
906 
907  // Similarly, check to see if any of the instances want to
908  // run the 'false' block...
909 
910 #ifdef ISPC_NVPTX_ENABLED
911 #if 0
912  llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
913 #else
914  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
915 #endif
916 #else /* ISPC_NVPTX_ENABLED */
917  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
918 #endif /* ISPC_NVPTX_ENABLED */
919  ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
920 
921  // Emit code for false
922  ctx->SetCurrentBasicBlock(bRunFalse);
923  if (falseStmts)
924  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
926 
927  ctx->BranchInst(bDone);
928  ctx->SetCurrentBasicBlock(bDone);
929  ctx->EndIf();
930 }
931 
932 
933 ///////////////////////////////////////////////////////////////////////////
934 // DoStmt
935 
940  }
941 
944 };
945 
946 
947 /** Returns true if the given node is an 'if' statement where the test
948  condition has varying type. */
949 static bool
951  IfStmt *ifStmt;
952  if ((ifStmt = llvm::dyn_cast<IfStmt>(node)) != NULL &&
953  ifStmt->test != NULL) {
954  const Type *type = ifStmt->test->GetType();
955  return (type != NULL && type->IsVaryingType());
956  }
957  else
958  return false;
959 }
960 
961 
962 /** Preorder callback function for checking for varying breaks or
963  continues. */
964 static bool
965 lVaryingBCPreFunc(ASTNode *node, void *d) {
967 
968  // We found a break or continue statement; if we're under varying
969  // control flow, then bingo.
970  if ((llvm::dyn_cast<BreakStmt>(node) != NULL ||
971  llvm::dyn_cast<ContinueStmt>(node) != NULL) &&
972  info->varyingControlFlowDepth > 0) {
973  info->foundVaryingBreakOrContinue = true;
974  return false;
975  }
976 
977  // Update the count of the nesting depth of varying control flow if
978  // this is an if statement with a varying condition.
979  if (lIsVaryingFor(node))
980  ++info->varyingControlFlowDepth;
981 
982  if (llvm::dyn_cast<ForStmt>(node) != NULL ||
983  llvm::dyn_cast<DoStmt>(node) != NULL ||
984  llvm::dyn_cast<ForeachStmt>(node) != NULL)
985  // Don't recurse into these guys, since we don't care about varying
986  // breaks or continues within them...
987  return false;
988  else
989  return true;
990 }
991 
992 
993 /** Postorder callback function for checking for varying breaks or
994  continues; decrement the varying control flow depth after the node's
995  children have been processed, if this is a varying if statement. */
996 static ASTNode *
997 lVaryingBCPostFunc(ASTNode *node, void *d) {
999  if (lIsVaryingFor(node))
1000  --info->varyingControlFlowDepth;
1001  return node;
1002 }
1003 
1004 
1005 /** Given a statment, walk through it to see if there is a 'break' or
1006  'continue' statement inside if its children, under varying control
1007  flow. We need to detect this case for loops since what might otherwise
1008  look like a 'uniform' loop needs to have code emitted to do all of the
1009  lane management stuff if this is the case.
1010  */
1011 static bool
1013  VaryingBCCheckInfo info;
1015  return info.foundVaryingBreakOrContinue;
1016 }
1017 
1018 
1019 DoStmt::DoStmt(Expr *t, Stmt *s, bool cc, SourcePos p)
1020  : Stmt(p, DoStmtID), testExpr(t), bodyStmts(s),
1021  doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
1022 }
1023 
1024 
1026  // Check for things that could be NULL due to earlier errors during
1027  // compilation.
1028  if (!ctx->GetCurrentBasicBlock())
1029  return;
1030  if (!testExpr || !testExpr->GetType())
1031  return;
1032 
1033  bool uniformTest = testExpr->GetType()->IsUniformType();
1034  if (uniformTest && doCoherentCheck)
1035  Warning(testExpr->pos, "Uniform condition supplied to \"cdo\" "
1036  "statement.");
1037 
1038  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("do_loop");
1039  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit");
1040  llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test");
1041 
1042  ctx->StartLoop(bexit, btest, uniformTest);
1043 
1044  // Start by jumping into the loop body
1045  ctx->BranchInst(bloop);
1046 
1047  // And now emit code for the loop body
1048  ctx->SetCurrentBasicBlock(bloop);
1049  ctx->SetBlockEntryMask(ctx->GetFullMask());
1050  ctx->SetDebugPos(pos);
1051  // FIXME: in the StmtList::EmitCode() method takes starts/stops a new
1052  // scope around the statements in the list. So if the body is just a
1053  // single statement (and thus not a statement list), we need a new
1054  // scope, but we don't want two scopes in the StmtList case.
1055  if (!llvm::dyn_cast<StmtList>(bodyStmts))
1056  ctx->StartScope();
1057 
1058  ctx->AddInstrumentationPoint("do loop body");
1059  if (doCoherentCheck && !uniformTest) {
1060  // Check to see if the mask is all on
1061  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("do_all_on");
1062  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("do_mixed");
1063  ctx->BranchIfMaskAll(bAllOn, bMixed);
1064 
1065  // If so, emit code for the 'mask all on' case. In particular,
1066  // explicitly set the mask to 'all on' (see rationale in
1067  // IfStmt::emitCoherentTests()), and then emit the code for the
1068  // loop body.
1069  ctx->SetCurrentBasicBlock(bAllOn);
1072  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1075  if (bodyStmts)
1076  bodyStmts->EmitCode(ctx);
1078  ctx->SetFunctionMask(oldFunctionMask);
1079  ctx->BranchInst(btest);
1080 
1081  // The mask is mixed. Just emit the code for the loop body.
1082  ctx->SetCurrentBasicBlock(bMixed);
1083  if (bodyStmts)
1084  bodyStmts->EmitCode(ctx);
1086  ctx->BranchInst(btest);
1087  }
1088  else {
1089  // Otherwise just emit the code for the loop body. The current
1090  // mask is good.
1091  if (bodyStmts)
1092  bodyStmts->EmitCode(ctx);
1093  if (ctx->GetCurrentBasicBlock())
1094  ctx->BranchInst(btest);
1095  }
1096  // End the scope we started above, if needed.
1097  if (!llvm::dyn_cast<StmtList>(bodyStmts))
1098  ctx->EndScope();
1099 
1100  // Now emit code for the loop test.
1101  ctx->SetCurrentBasicBlock(btest);
1102  // First, emit code to restore the mask value for any lanes that
1103  // executed a 'continue' during the current loop before we go and emit
1104  // the code for the test. This is only necessary for varying loops;
1105  // 'uniform' loops just jump when they hit a continue statement and
1106  // don't mess with the mask.
1107  if (!uniformTest) {
1108  ctx->RestoreContinuedLanes();
1109  ctx->ClearBreakLanes();
1110  }
1111  llvm::Value *testValue = testExpr->GetValue(ctx);
1112  if (!testValue)
1113  return;
1114 
1115  if (uniformTest)
1116  // For the uniform case, just jump to the top of the loop or the
1117  // exit basic block depending on the value of the test.
1118  ctx->BranchInst(bloop, bexit, testValue);
1119  else {
1120  // For the varying case, update the mask based on the value of the
1121  // test. If any program instances still want to be running, jump
1122  // to the top of the loop. Otherwise, jump out.
1123  llvm::Value *mask = ctx->GetInternalMask();
1124  ctx->SetInternalMaskAnd(mask, testValue);
1125  ctx->BranchIfMaskAny(bloop, bexit);
1126  }
1127 
1128  // ...and we're done. Set things up for subsequent code to be emitted
1129  // in the right basic block.
1130  ctx->SetCurrentBasicBlock(bexit);
1131  ctx->EndLoop();
1132 }
1133 
1134 
1135 Stmt *
1137  const Type *testType;
1138  if (testExpr != NULL && (testType = testExpr->GetType()) != NULL) {
1139  // Should the test condition for the loop be uniform or varying?
1140  // It can be uniform only if three conditions are met:
1141  //
1142  // - First and foremost, the type of the test condition must be
1143  // uniform.
1144  //
1145  // - Second, the user must not have set the dis-optimization option
1146  // that disables uniform flow control.
1147  //
1148  // - Thirdly, and most subtlely, there must not be any break or
1149  // continue statements inside the loop that are within the scope
1150  // of a 'varying' if statement. If there are, then we type cast
1151  // the test to be 'varying', so that the code generated for the
1152  // loop includes masking stuff, so that we can track which lanes
1153  // actually want to be running, accounting for breaks/continues.
1154  //
1155  bool uniformTest = (testType->IsUniformType() &&
1160  "\"do\" statement");
1161  }
1162 
1163  return this;
1164 }
1165 
1166 
1167 int
1169  bool uniformTest = testExpr ? testExpr->GetType()->IsUniformType() :
1172 
1173  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1174 }
1175 
1176 
1177 void
1178 DoStmt::Print(int indent) const {
1179  printf("%*cDo Stmt", indent, ' ');
1180  pos.Print();
1181  printf(":\n");
1182  printf("%*cTest: ", indent+4, ' ');
1183  if (testExpr) testExpr->Print();
1184  printf("\n");
1185  if (bodyStmts) {
1186  printf("%*cStmts:\n", indent+4, ' ');
1187  bodyStmts->Print(indent+8);
1188  }
1189 }
1190 
1191 
1192 ///////////////////////////////////////////////////////////////////////////
1193 // ForStmt
1194 
1195 ForStmt::ForStmt(Stmt *i, Expr *t, Stmt *s, Stmt *st, bool cc, SourcePos p)
1196  : Stmt(p, ForStmtID), init(i), test(t), step(s), stmts(st),
1197  doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
1198 }
1199 
1200 
1201 void
1203  if (!ctx->GetCurrentBasicBlock())
1204  return;
1205 
1206  llvm::BasicBlock *btest = ctx->CreateBasicBlock("for_test");
1207  llvm::BasicBlock *bstep = ctx->CreateBasicBlock("for_step");
1208  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("for_loop");
1209  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("for_exit");
1210 
1211  bool uniformTest = test ? test->GetType()->IsUniformType() :
1214 
1215  ctx->StartLoop(bexit, bstep, uniformTest);
1216  ctx->SetDebugPos(pos);
1217 
1218  // If we have an initiailizer statement, start by emitting the code for
1219  // it and then jump into the loop test code. (Also start a new scope
1220  // since the initiailizer may be a declaration statement).
1221  if (init) {
1222  AssertPos(pos, llvm::dyn_cast<StmtList>(init) == NULL);
1223  ctx->StartScope();
1224  init->EmitCode(ctx);
1225  }
1226  ctx->BranchInst(btest);
1227 
1228  // Emit code to get the value of the loop test. If no test expression
1229  // was provided, just go with a true value.
1230  ctx->SetCurrentBasicBlock(btest);
1231  llvm::Value *ltest = NULL;
1232  if (test) {
1233  ltest = test->GetValue(ctx);
1234  if (!ltest) {
1235  ctx->EndScope();
1236  ctx->EndLoop();
1237  return;
1238  }
1239  }
1240  else
1241  ltest = uniformTest ? LLVMTrue : LLVMBoolVector(true);
1242 
1243  // Now use the test's value. For a uniform loop, we can either jump to
1244  // the loop body or the loop exit, based on whether it's true or false.
1245  // For a non-uniform loop, we update the mask and jump into the loop if
1246  // any of the mask values are true.
1247  if (uniformTest) {
1248  if (doCoherentCheck)
1249  Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
1250  "statement.");
1251  AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
1252  ctx->BranchInst(bloop, bexit, ltest);
1253  }
1254  else {
1255  llvm::Value *mask = ctx->GetInternalMask();
1256  ctx->SetInternalMaskAnd(mask, ltest);
1257  ctx->BranchIfMaskAny(bloop, bexit);
1258  }
1259 
1260  // On to emitting the code for the loop body.
1261  ctx->SetCurrentBasicBlock(bloop);
1262  ctx->SetBlockEntryMask(ctx->GetFullMask());
1263  ctx->AddInstrumentationPoint("for loop body");
1264  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1265  ctx->StartScope();
1266 
1267  if (doCoherentCheck && !uniformTest) {
1268  // For 'varying' loops with the coherence check, we start by
1269  // checking to see if the mask is all on, after it has been updated
1270  // based on the value of the test.
1271  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("for_all_on");
1272  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("for_mixed");
1273  ctx->BranchIfMaskAll(bAllOn, bMixed);
1274 
1275  // Emit code for the mask being all on. Explicitly set the mask to
1276  // be on so that the optimizer can see that it's on (i.e. now that
1277  // the runtime test has passed, make this fact clear for code
1278  // generation at compile time here.)
1279  ctx->SetCurrentBasicBlock(bAllOn);
1282  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1285  if (stmts)
1286  stmts->EmitCode(ctx);
1288  ctx->SetFunctionMask(oldFunctionMask);
1289  ctx->BranchInst(bstep);
1290 
1291  // Emit code for the mask being mixed. We should never run the
1292  // loop with the mask all off, based on the BranchIfMaskAny call
1293  // above.
1294  ctx->SetCurrentBasicBlock(bMixed);
1295  if (stmts)
1296  stmts->EmitCode(ctx);
1297  ctx->BranchInst(bstep);
1298  }
1299  else {
1300  // For both uniform loops and varying loops without the coherence
1301  // check, we know that at least one program instance wants to be
1302  // running the loop, so just emit code for the loop body and jump
1303  // to the loop step code.
1304  if (stmts)
1305  stmts->EmitCode(ctx);
1306  if (ctx->GetCurrentBasicBlock())
1307  ctx->BranchInst(bstep);
1308  }
1309  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1310  ctx->EndScope();
1311 
1312  // Emit code for the loop step. First, restore the lane mask of any
1313  // program instances that executed a 'continue' during the previous
1314  // iteration. Then emit code for the loop step and then jump to the
1315  // test code.
1316  ctx->SetCurrentBasicBlock(bstep);
1317  ctx->RestoreContinuedLanes();
1318  ctx->ClearBreakLanes();
1319 
1320  if (step)
1321  step->EmitCode(ctx);
1322  ctx->BranchInst(btest);
1323 
1324  // Set the current emission basic block to the loop exit basic block
1325  ctx->SetCurrentBasicBlock(bexit);
1326  if (init)
1327  ctx->EndScope();
1328  ctx->EndLoop();
1329 }
1330 
1331 
1332 Stmt *
1334  const Type *testType;
1335  if (test && (testType = test->GetType()) != NULL) {
1336  // See comments in DoStmt::TypeCheck() regarding
1337  // 'uniformTest' and the type conversion here.
1338  bool uniformTest = (testType->IsUniformType() &&
1343  "\"for\"/\"while\" statement");
1344  if (test == NULL)
1345  return NULL;
1346  }
1347 
1348  return this;
1349 }
1350 
1351 
1352 int
1354  bool uniformTest = test ? test->GetType()->IsUniformType() :
1357 
1358  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1359 }
1360 
1361 
1362 void
1363 ForStmt::Print(int indent) const {
1364  printf("%*cFor Stmt", indent, ' ');
1365  pos.Print();
1366  printf("\n");
1367  if (init) {
1368  printf("%*cInit:\n", indent+4, ' ');
1369  init->Print(indent+8);
1370  }
1371  if (test) {
1372  printf("%*cTest: ", indent+4, ' ');
1373  test->Print();
1374  printf("\n");
1375  }
1376  if (step) {
1377  printf("%*cStep:\n", indent+4, ' ');
1378  step->Print(indent+8);
1379  }
1380  if (stmts) {
1381  printf("%*cStmts:\n", indent+4, ' ');
1382  stmts->Print(indent+8);
1383  }
1384 }
1385 
1386 ///////////////////////////////////////////////////////////////////////////
1387 // BreakStmt
1388 
1390  : Stmt(p, BreakStmtID) {
1391 }
1392 
1393 
1394 void
1396  if (!ctx->GetCurrentBasicBlock())
1397  return;
1398 
1399  ctx->SetDebugPos(pos);
1400  ctx->Break(true);
1401 }
1402 
1403 
1404 Stmt *
1406  return this;
1407 }
1408 
1409 
1410 int
1412  return COST_BREAK_CONTINUE;
1413 }
1414 
1415 
1416 void
1417 BreakStmt::Print(int indent) const {
1418  printf("%*cBreak Stmt", indent, ' ');
1419  pos.Print();
1420  printf("\n");
1421 }
1422 
1423 
1424 ///////////////////////////////////////////////////////////////////////////
1425 // ContinueStmt
1426 
1428  : Stmt(p, ContinueStmtID) {
1429 }
1430 
1431 
1432 void
1434  if (!ctx->GetCurrentBasicBlock())
1435  return;
1436 
1437  ctx->SetDebugPos(pos);
1438  ctx->Continue(true);
1439 }
1440 
1441 
1442 Stmt *
1444  return this;
1445 }
1446 
1447 
1448 int
1450  return COST_BREAK_CONTINUE;
1451 }
1452 
1453 
1454 void
1455 ContinueStmt::Print(int indent) const {
1456  printf("%*cContinue Stmt", indent, ' ');
1457  pos.Print();
1458  printf("\n");
1459 }
1460 
1461 
1462 ///////////////////////////////////////////////////////////////////////////
1463 // ForeachStmt
1464 
1465 ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs,
1466  const std::vector<Expr *> &se,
1467  const std::vector<Expr *> &ee,
1468  Stmt *s, bool t, SourcePos pos)
1469  : Stmt(pos, ForeachStmtID), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
1470  stmts(s) {
1471 }
1472 
1473 
1474 /* Given a uniform counter value in the memory location pointed to by
1475  uniformCounterPtr, compute the corresponding set of varying counter
1476  values for use within the loop body.
1477  */
1478 static llvm::Value *
1480  llvm::Value *uniformCounterPtr,
1481  llvm::Value *varyingCounterPtr,
1482  const std::vector<int> &spans) {
1483 #ifdef ISPC_NVPTX_ENABLED
1484  if (g->target->getISA() == Target::NVPTX)
1485  {
1486  // Smear the uniform counter value out to be varying
1487  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1488  llvm::Value *smearCounter = ctx->BroadcastValue(
1489  counter, LLVMTypes::Int32VectorType, "smear_counter");
1490 
1491  // Figure out the offsets; this is a little bit tricky. As an example,
1492  // consider a 2D tiled foreach loop, where we're running 8-wide and
1493  // where the inner dimension has a stride of 4 and the outer dimension
1494  // has a stride of 2. For the inner dimension, we want the offsets
1495  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1496  // (0,0,0,0,1,1,1,1).
1497  int32_t delta[ISPC_MAX_NVEC];
1498  const int vecWidth = 32;
1499  std::vector<llvm::Constant*> constDeltaList;
1500  for (int i = 0; i < vecWidth; ++i)
1501  {
1502  int d = i;
1503  // First, account for the effect of any dimensions at deeper
1504  // nesting levels than the current one.
1505  int prevDimSpanCount = 1;
1506  for (int j = dim; j < nDims-1; ++j)
1507  prevDimSpanCount *= spans[j+1];
1508  d /= prevDimSpanCount;
1509 
1510  // And now with what's left, figure out our own offset
1511  delta[i] = d % spans[dim];
1512  constDeltaList.push_back(LLVMInt8(delta[i]));
1513  }
1514 
1515  llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
1516  // llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
1517 
1518 
1519  llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
1520  /*Module=*/*m->module,
1521  /*Type=*/ArrayDelta,
1522  /*isConstant=*/true,
1523  /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
1524  /*Initializer=*/0, // has initializer, specified below
1525  /*Name=*/"constDeltaForeach");
1526 #if 0
1527  /*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
1528  /*unsigned AddressSpace=*/4 /*constant*/);
1529 #endif
1530 
1531 
1532  llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
1533 
1534  globalDelta->setInitializer(constDelta);
1535  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1536  llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
1537 
1538  std::vector<llvm::Value*> ptr_arrayidx_indices;
1539  ptr_arrayidx_indices.push_back(LLVMInt32(0));
1540  ptr_arrayidx_indices.push_back(laneIdx);
1541 #if 1
1542 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1543  llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
1544 #else
1545  llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(NULL, globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
1546 #endif
1547  llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
1548  llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
1549 
1550  llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
1551  llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
1552 
1553  llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
1554  // llvm::UndefValue(LLVMInt32Vector),
1555  const_packed_41,
1556  int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
1557 #endif
1558 
1559 
1560  // Add the deltas to compute the varying counter values; store the
1561  // result to memory and then return it directly as well.
1562 #if 0
1563  llvm::Value *varyingCounter =
1564  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1565  LLVMInt32Vector(delta), "iter_val");
1566 #else
1567  llvm::Value *varyingCounter =
1568  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1569  packed_43, "iter_val");
1570 #endif
1571  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1572  return varyingCounter;
1573  }
1574 #endif /* ISPC_NVPTX_ENABLED */
1575 
1576  // Smear the uniform counter value out to be varying
1577  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1578  llvm::Value *smearCounter = ctx->BroadcastValue(
1579  counter, LLVMTypes::Int32VectorType, "smear_counter");
1580 
1581  // Figure out the offsets; this is a little bit tricky. As an example,
1582  // consider a 2D tiled foreach loop, where we're running 8-wide and
1583  // where the inner dimension has a stride of 4 and the outer dimension
1584  // has a stride of 2. For the inner dimension, we want the offsets
1585  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1586  // (0,0,0,0,1,1,1,1).
1587  int32_t delta[ISPC_MAX_NVEC];
1588  for (int i = 0; i < g->target->getVectorWidth(); ++i) {
1589  int d = i;
1590  // First, account for the effect of any dimensions at deeper
1591  // nesting levels than the current one.
1592  int prevDimSpanCount = 1;
1593  for (int j = dim; j < nDims-1; ++j)
1594  prevDimSpanCount *= spans[j+1];
1595  d /= prevDimSpanCount;
1596 
1597  // And now with what's left, figure out our own offset
1598  delta[i] = d % spans[dim];
1599  }
1600 
1601  // Add the deltas to compute the varying counter values; store the
1602  // result to memory and then return it directly as well.
1603  llvm::Value *varyingCounter =
1604  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1605  LLVMInt32Vector(delta), "iter_val");
1606  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1607  return varyingCounter;
1608 }
1609 
1610 
1611 /** Returns the integer log2 of the given integer. */
1612 static int
1613 lLog2(int i) {
1614  int ret = 0;
1615  while (i != 0) {
1616  ++ret;
1617  i >>= 1;
1618  }
1619  return ret-1;
1620 }
1621 
1622 
1623 /* Figure out how many elements to process in each dimension for each time
1624  through a foreach loop. The untiled case is easy; all of the outer
1625  dimensions up until the innermost one have a span of 1, and the
1626  innermost one takes the entire vector width. For the tiled case, we
1627  give wider spans to the innermost dimensions while also trying to
1628  generate relatively square domains.
1629 
1630  This code works recursively from outer dimensions to inner dimensions.
1631  */
1632 static void
1633 lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
1634  if (dimsLeft == 0) {
1635  // Nothing left to do but give all of the remaining work to the
1636  // innermost domain.
1637  *a = itemsLeft;
1638  return;
1639  }
1640 
1641  if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
1642  // If we're not tiled, or if there are enough dimensions left that
1643  // giving this one any more than a span of one would mean that a
1644  // later dimension would have to have a span of one, give this one
1645  // a span of one to save the available items for later.
1646  *a = 1;
1647  else if (itemsLeft >= 16 && (dimsLeft == 1))
1648  // Special case to have 4x4 domains for the 2D case when running
1649  // 16-wide.
1650  *a = 4;
1651  else
1652  // Otherwise give this dimension a span of two.
1653  *a = 2;
1654 
1655  lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
1656 }
1657 
1658 
1659 /* Emit code for a foreach statement. We effectively emit code to run the
1660  set of n-dimensional nested loops corresponding to the dimensionality of
1661  the foreach statement along with the extra logic to deal with mismatches
1662  between the vector width we're compiling to and the number of elements
1663  to process.
1664  */
1665 void
1667  if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
1668  return;
1669 
1670  llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
1671  llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
1672  llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
1673 
1674  llvm::Value *oldMask = ctx->GetInternalMask();
1675  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1676 
1677  ctx->SetDebugPos(pos);
1678  ctx->StartScope();
1679 
1682 
1683  // This should be caught during typechecking
1684  AssertPos(pos, startExprs.size() == dimVariables.size() &&
1685  endExprs.size() == dimVariables.size());
1686  int nDims = (int)dimVariables.size();
1687 
1688  ///////////////////////////////////////////////////////////////////////
1689  // Setup: compute the number of items we have to work on in each
1690  // dimension and a number of derived values.
1691  std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
1692  std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
1693  std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
1694 
1695  std::vector<int> span(nDims, 0);
1696 #ifdef ISPC_NVPTX_ENABLED
1697  const int vectorWidth =
1698  g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
1699  lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
1700 #else /* ISPC_NVPTX_ENABLED */
1701  lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
1702 #endif /* ISPC_NVPTX_ENABLED */
1703 
1704  for (int i = 0; i < nDims; ++i) {
1705  // Basic blocks that we'll fill in later with the looping logic for
1706  // this dimension.
1707  bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
1708  if (i < nDims-1)
1709  // stepping for the innermost dimension is handled specially
1710  bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
1711  bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
1712 
1713  // Start and end value for this loop dimension
1714  llvm::Value *sv = startExprs[i]->GetValue(ctx);
1715  llvm::Value *ev = endExprs[i]->GetValue(ctx);
1716  if (sv == NULL || ev == NULL)
1717  return;
1718  startVals.push_back(sv);
1719  endVals.push_back(ev);
1720 
1721  // nItems = endVal - startVal
1722  llvm::Value *nItems =
1723  ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
1724 
1725  // nExtras = nItems % (span for this dimension)
1726  // This gives us the number of extra elements we need to deal with
1727  // at the end of the loop for this dimension that don't fit cleanly
1728  // into a vector width.
1729  nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
1730  LLVMInt32(span[i]), "nextras"));
1731 
1732  // alignedEnd = endVal - nExtras
1733  alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
1734  nExtras[i], "aligned_end"));
1735 
1736  ///////////////////////////////////////////////////////////////////////
1737  // Each dimension has a loop counter that is a uniform value that
1738  // goes from startVal to endVal, in steps of the span for this
1739  // dimension. Its value is only used internally here for looping
1740  // logic and isn't directly available in the user's program code.
1741  uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
1742  "counter"));
1743  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1744 
1745  // There is also a varying variable that holds the set of index
1746  // values for each dimension in the current loop iteration; this is
1747  // the value that is program-visible.
1748  dimVariables[i]->storagePtr =
1750  dimVariables[i]->name.c_str());
1751  dimVariables[i]->parentFunction = ctx->GetFunction();
1753 
1754  // Each dimension also maintains a mask that represents which of
1755  // the varying elements in the current iteration should be
1756  // processed. (i.e. this is used to disable the lanes that have
1757  // out-of-bounds offsets.)
1758  extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
1759  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1760  }
1761 
1763 
1764  // On to the outermost loop's test
1765  ctx->BranchInst(bbTest[0]);
1766 
1767  ///////////////////////////////////////////////////////////////////////////
1768  // foreach_reset: this code runs when we need to reset the counter for
1769  // a given dimension in preparation for running through its loop again,
1770  // after the enclosing level advances its counter.
1771  for (int i = 0; i < nDims; ++i) {
1772  ctx->SetCurrentBasicBlock(bbReset[i]);
1773  if (i == 0)
1774  ctx->BranchInst(bbExit);
1775  else {
1776  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1777  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1778  ctx->BranchInst(bbStep[i-1]);
1779  }
1780  }
1781 
1782  ///////////////////////////////////////////////////////////////////////////
1783  // foreach_step: increment the uniform counter by the vector width.
1784  // Note that we don't increment the varying counter here as well but
1785  // just generate its value when we need it in the loop body. Don't do
1786  // this for the innermost dimension, which has a more complex stepping
1787  // structure..
1788  for (int i = 0; i < nDims-1; ++i) {
1789  ctx->SetCurrentBasicBlock(bbStep[i]);
1790  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
1791  llvm::Value *newCounter =
1792  ctx->BinaryOperator(llvm::Instruction::Add, counter,
1793  LLVMInt32(span[i]), "new_counter");
1794  ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
1795  ctx->BranchInst(bbTest[i]);
1796  }
1797 
1798  ///////////////////////////////////////////////////////////////////////////
1799  // foreach_test (for all dimensions other than the innermost...)
1800  std::vector<llvm::Value *> inExtras;
1801  for (int i = 0; i < nDims-1; ++i) {
1802  ctx->SetCurrentBasicBlock(bbTest[i]);
1803 
1804  llvm::Value *haveExtras =
1805  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
1806  endVals[i], alignedEnd[i], "have_extras");
1807 
1808  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
1809  llvm::Value *atAlignedEnd =
1810  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1811  counter, alignedEnd[i], "at_aligned_end");
1812  llvm::Value *inEx =
1813  ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
1814  atAlignedEnd, "in_extras");
1815 
1816  if (i == 0)
1817  inExtras.push_back(inEx);
1818  else
1819  inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
1820  inExtras[i-1], "in_extras_all"));
1821 
1822  llvm::Value *varyingCounter =
1823  lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
1824  dimVariables[i]->storagePtr, span);
1825 
1826  llvm::Value *smearEnd = ctx->BroadcastValue(
1827  endVals[i], LLVMTypes::Int32VectorType, "smear_end");
1828 
1829  // Do a vector compare of its value to the end value to generate a
1830  // mask for this last bit of work.
1831  llvm::Value *emask =
1832  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1833  varyingCounter, smearEnd);
1834  emask = ctx->I1VecToBoolVec(emask);
1835 
1836  if (i == 0)
1837  ctx->StoreInst(emask, extrasMaskPtrs[i]);
1838  else {
1839  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
1840  llvm::Value *newMask =
1841  ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
1842  "extras_mask");
1843  ctx->StoreInst(newMask, extrasMaskPtrs[i]);
1844  }
1845 
1846  llvm::Value *notAtEnd =
1847  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1848  counter, endVals[i]);
1849  ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
1850  }
1851 
1852  ///////////////////////////////////////////////////////////////////////////
1853  // foreach_test (for innermost dimension)
1854  //
1855  // All of the outer dimensions are handled generically--basically as a
1856  // for() loop from the start value to the end value, where at each loop
1857  // test, we compute the mask of active elements for the current
1858  // dimension and then update an overall mask that is the AND
1859  // combination of all of the outer ones.
1860  //
1861  // The innermost loop is handled specially, for performance purposes.
1862  // When starting the innermost dimension, we start by checking once
1863  // whether any of the outer dimensions has set the mask to be
1864  // partially-active or not. We follow different code paths for these
1865  // two cases, taking advantage of the knowledge that the mask is all
1866  // on, when this is the case.
1867  //
1868  // In each of these code paths, we start with a loop from the starting
1869  // value to the aligned end value for the innermost dimension; we can
1870  // guarantee that the innermost loop will have an "all on" mask (as far
1871  // as its dimension is concerned) for the duration of this loop. Doing
1872  // so allows us to emit code that assumes the mask is all on (for the
1873  // case where none of the outer dimensions has set the mask to be
1874  // partially on), or allows us to emit code that just uses the mask
1875  // from the outer dimensions directly (for the case where they have).
1876  //
1877  // After this loop, we just need to deal with one vector's worth of
1878  // "ragged extra bits", where the mask used includes the effect of the
1879  // mask for the innermost dimension.
1880  //
1881  // We start out this process by emitting the check that determines
1882  // whether any of the enclosing dimensions is partially active
1883  // (i.e. processing extra elements that don't exactly fit into a
1884  // vector).
1885  llvm::BasicBlock *bbOuterInExtras =
1886  ctx->CreateBasicBlock("outer_in_extras");
1887  llvm::BasicBlock *bbOuterNotInExtras =
1888  ctx->CreateBasicBlock("outer_not_in_extras");
1889 
1890  ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
1891  if (inExtras.size())
1892  ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
1893  inExtras.back());
1894  else
1895  // for a 1D iteration domain, we certainly don't have any enclosing
1896  // dimensions that are processing extra elements.
1897  ctx->BranchInst(bbOuterNotInExtras);
1898 
1899  ///////////////////////////////////////////////////////////////////////////
1900  // One or more outer dimensions in extras, so we need to mask for the loop
1901  // body regardless. We break this into two cases, roughly:
1902  // for (counter = start; counter < alignedEnd; counter += step) {
1903  // // mask is all on for inner, so set mask to outer mask
1904  // // run loop body with mask
1905  // }
1906  // // counter == alignedEnd
1907  // if (counter < end) {
1908  // // set mask to outermask & (counter+programCounter < end)
1909  // // run loop body with mask
1910  // }
1911  llvm::BasicBlock *bbAllInnerPartialOuter =
1912  ctx->CreateBasicBlock("all_inner_partial_outer");
1913  llvm::BasicBlock *bbPartial =
1914  ctx->CreateBasicBlock("both_partial");
1915  ctx->SetCurrentBasicBlock(bbOuterInExtras); {
1916  // Update the varying counter value here, since all subsequent
1917  // blocks along this path need it.
1918  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
1919  dimVariables[nDims-1]->storagePtr, span);
1920 
1921  // here we just check to see if counter < alignedEnd
1922  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
1923  llvm::Value *beforeAlignedEnd =
1924  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1925  counter, alignedEnd[nDims-1], "before_aligned_end");
1926  ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
1927  }
1928 
1929  // Below we have a basic block that runs the loop body code for the
1930  // case where the mask is partially but not fully on. This same block
1931  // runs in multiple cases: both for handling any ragged extra data for
1932  // the innermost dimension but also when outer dimensions have set the
1933  // mask to be partially on.
1934  //
1935  // The value stored in stepIndexAfterMaskedBodyPtr is used after each
1936  // execution of the body code to determine whether the innermost index
1937  // value should be incremented by the step (we're running the "for"
1938  // loop of full vectors at the innermost dimension, with outer
1939  // dimensions having set the mask to be partially on), or whether we're
1940  // running once for the ragged extra bits at the end of the innermost
1941  // dimension, in which case we're done with the innermost dimension and
1942  // should step the loop counter for the next enclosing dimension
1943  // instead.
1944  llvm::Value *stepIndexAfterMaskedBodyPtr =
1945  ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
1946 
1947  ///////////////////////////////////////////////////////////////////////////
1948  // We're in the inner loop part where the only masking is due to outer
1949  // dimensions but the innermost dimension fits fully into a vector's
1950  // width. Set the mask and jump to the masked loop body.
1951  ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
1952  llvm::Value *mask;
1953  if (nDims == 1)
1954  // 1D loop; we shouldn't ever get here anyway
1955  mask = LLVMMaskAllOff;
1956  else
1957  mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
1958 
1959  ctx->SetInternalMask(mask);
1960 
1961  ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
1962  ctx->BranchInst(bbMaskedBody);
1963  }
1964 
1965  ///////////////////////////////////////////////////////////////////////////
1966  // We need to include the effect of the innermost dimension in the mask
1967  // for the final bits here
1968  ctx->SetCurrentBasicBlock(bbPartial); {
1969  llvm::Value *varyingCounter =
1970  ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
1971  llvm::Value *smearEnd = ctx->BroadcastValue(
1972  endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
1973 
1974  llvm::Value *emask =
1975  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1976  varyingCounter, smearEnd);
1977  emask = ctx->I1VecToBoolVec(emask);
1978 
1979  if (nDims == 1) {
1980  ctx->SetInternalMask(emask);
1981  }
1982  else {
1983  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
1984  llvm::Value *newMask =
1985  ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
1986  "extras_mask");
1987  ctx->SetInternalMask(newMask);
1988  }
1989 
1990  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
1991 
1992  // check to see if counter != end, otherwise, the next step is not necessary
1993  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
1994  llvm::Value *atEnd =
1995  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
1996  counter, endVals[nDims-1], "at_end");
1997  ctx->BranchInst(bbMaskedBody, bbReset[nDims-1], atEnd);
1998  }
1999 
2000  ///////////////////////////////////////////////////////////////////////////
2001  // None of the outer dimensions is processing extras; along the lines
2002  // of above, we can express this as:
2003  // for (counter = start; counter < alignedEnd; counter += step) {
2004  // // mask is all on
2005  // // run loop body with mask all on
2006  // }
2007  // // counter == alignedEnd
2008  // if (counter < end) {
2009  // // set mask to (counter+programCounter < end)
2010  // // run loop body with mask
2011  // }
2012  llvm::BasicBlock *bbPartialInnerAllOuter =
2013  ctx->CreateBasicBlock("partial_inner_all_outer");
2014  ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
2015  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
2016  llvm::Value *beforeAlignedEnd =
2017  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2018  counter, alignedEnd[nDims-1], "before_aligned_end");
2019  ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
2020  beforeAlignedEnd);
2021  }
2022 
2023  ///////////////////////////////////////////////////////////////////////////
2024  // full_body: do a full vector's worth of work. We know that all
2025  // lanes will be running here, so we explicitly set the mask to be 'all
2026  // on'. This ends up being relatively straightforward: just update the
2027  // value of the varying loop counter and have the statements in the
2028  // loop body emit their code.
2029  llvm::BasicBlock *bbFullBodyContinue =
2030  ctx->CreateBasicBlock("foreach_full_continue");
2031  ctx->SetCurrentBasicBlock(bbFullBody); {
2034  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
2035  dimVariables[nDims-1]->storagePtr, span);
2036  ctx->SetContinueTarget(bbFullBodyContinue);
2037  ctx->AddInstrumentationPoint("foreach loop body (all on)");
2038  stmts->EmitCode(ctx);
2039  AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
2040  ctx->BranchInst(bbFullBodyContinue);
2041  }
2042  ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
2043  ctx->RestoreContinuedLanes();
2044  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
2045  llvm::Value *newCounter =
2046  ctx->BinaryOperator(llvm::Instruction::Add, counter,
2047  LLVMInt32(span[nDims-1]), "new_counter");
2048  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
2049  ctx->BranchInst(bbOuterNotInExtras);
2050  }
2051 
2052  ///////////////////////////////////////////////////////////////////////////
2053  // We're done running blocks with the mask all on; see if the counter is
2054  // less than the end value, in which case we need to run the body one
2055  // more time to get the extra bits.
2056  llvm::BasicBlock *bbSetInnerMask =
2057  ctx->CreateBasicBlock("partial_inner_only");
2058  ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
2059  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
2060  llvm::Value *beforeFullEnd =
2061  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2062  counter, endVals[nDims-1], "before_full_end");
2063  ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
2064  }
2065 
2066  ///////////////////////////////////////////////////////////////////////////
2067  // The outer dimensions are all on, so the mask is just given by the
2068  // mask for the innermost dimension
2069  ctx->SetCurrentBasicBlock(bbSetInnerMask); {
2070  llvm::Value *varyingCounter =
2071  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
2072  dimVariables[nDims-1]->storagePtr, span);
2073  llvm::Value *smearEnd = ctx->BroadcastValue(
2074  endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
2075  llvm::Value *emask =
2076  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2077  varyingCounter, smearEnd);
2078  emask = ctx->I1VecToBoolVec(emask);
2079  ctx->SetInternalMask(emask);
2080  ctx->SetBlockEntryMask(emask);
2081 
2082  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
2083  ctx->BranchInst(bbMaskedBody);
2084  }
2085 
2086  ///////////////////////////////////////////////////////////////////////////
2087  // masked_body: set the mask and have the statements emit their
2088  // code again. Note that it's generally worthwhile having two copies
2089  // of the statements' code, since the code above is emitted with the
2090  // mask known to be all-on, which in turn leads to more efficient code
2091  // for that case.
2092  llvm::BasicBlock *bbStepInnerIndex =
2093  ctx->CreateBasicBlock("step_inner_index");
2094  llvm::BasicBlock *bbMaskedBodyContinue =
2095  ctx->CreateBasicBlock("foreach_masked_continue");
2096  ctx->SetCurrentBasicBlock(bbMaskedBody); {
2097  ctx->AddInstrumentationPoint("foreach loop body (masked)");
2098  ctx->SetContinueTarget(bbMaskedBodyContinue);
2100  ctx->SetBlockEntryMask(ctx->GetFullMask());
2101  stmts->EmitCode(ctx);
2103  ctx->BranchInst(bbMaskedBodyContinue);
2104  }
2105  ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
2106  ctx->RestoreContinuedLanes();
2107  llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
2108  ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
2109  }
2110 
2111  ///////////////////////////////////////////////////////////////////////////
2112  // step the innermost index, for the case where we're doing the
2113  // innermost for loop over full vectors.
2114  ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
2115  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
2116  llvm::Value *newCounter =
2117  ctx->BinaryOperator(llvm::Instruction::Add, counter,
2118  LLVMInt32(span[nDims-1]), "new_counter");
2119  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
2120  ctx->BranchInst(bbOuterInExtras);
2121  }
2122 
2123  ///////////////////////////////////////////////////////////////////////////
2124  // foreach_exit: All done. Restore the old mask and clean up
2125  ctx->SetCurrentBasicBlock(bbExit);
2126 
2127  ctx->SetInternalMask(oldMask);
2128  ctx->SetFunctionMask(oldFunctionMask);
2129 
2130  ctx->EndForeach();
2131  ctx->EndScope();
2132 }
2133 
2134 
2135 Stmt *
2137  bool anyErrors = false;
2138  for (unsigned int i = 0; i < startExprs.size(); ++i) {
2139  if (startExprs[i] != NULL)
2142  "foreach starting value");
2143  anyErrors |= (startExprs[i] == NULL);
2144  }
2145  for (unsigned int i = 0; i < endExprs.size(); ++i) {
2146  if (endExprs[i] != NULL)
2148  "foreach ending value");
2149  anyErrors |= (endExprs[i] == NULL);
2150  }
2151 
2152  if (startExprs.size() < dimVariables.size()) {
2153  Error(pos, "Not enough initial values provided for \"foreach\" loop; "
2154  "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
2155  anyErrors = true;
2156  }
2157  else if (startExprs.size() > dimVariables.size()) {
2158  Error(pos, "Too many initial values provided for \"foreach\" loop; "
2159  "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
2160  anyErrors = true;
2161  }
2162 
2163  if (endExprs.size() < dimVariables.size()) {
2164  Error(pos, "Not enough initial values provided for \"foreach\" loop; "
2165  "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
2166  anyErrors = true;
2167  }
2168  else if (endExprs.size() > dimVariables.size()) {
2169  Error(pos, "Too many initial values provided for \"foreach\" loop; "
2170  "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
2171  anyErrors = true;
2172  }
2173 
2174  return anyErrors ? NULL : this;
2175 }
2176 
2177 
2178 int
2181 }
2182 
2183 
2184 void
2185 ForeachStmt::Print(int indent) const {
2186  printf("%*cForeach Stmt", indent, ' ');
2187  pos.Print();
2188  printf("\n");
2189 
2190  for (unsigned int i = 0; i < dimVariables.size(); ++i)
2191  if (dimVariables[i] != NULL)
2192  printf("%*cVar %d: %s\n", indent+4, ' ', i,
2193  dimVariables[i]->name.c_str());
2194  else
2195  printf("%*cVar %d: NULL\n", indent+4, ' ', i);
2196 
2197  printf("Start values:\n");
2198  for (unsigned int i = 0; i < startExprs.size(); ++i) {
2199  if (startExprs[i] != NULL)
2200  startExprs[i]->Print();
2201  else
2202  printf("NULL");
2203  if (i != startExprs.size()-1)
2204  printf(", ");
2205  else
2206  printf("\n");
2207  }
2208 
2209  printf("End values:\n");
2210  for (unsigned int i = 0; i < endExprs.size(); ++i) {
2211  if (endExprs[i] != NULL)
2212  endExprs[i]->Print();
2213  else
2214  printf("NULL");
2215  if (i != endExprs.size()-1)
2216  printf(", ");
2217  else
2218  printf("\n");
2219  }
2220 
2221  if (stmts != NULL) {
2222  printf("%*cStmts:\n", indent+4, ' ');
2223  stmts->Print(indent+8);
2224  }
2225 }
2226 
2227 
2228 ///////////////////////////////////////////////////////////////////////////
2229 // ForeachActiveStmt
2230 
2232  : Stmt(pos, ForeachActiveStmtID) {
2233  sym = s;
2234  stmts = st;
2235 }
2236 
2237 
2238 void
2240  if (!ctx->GetCurrentBasicBlock())
2241  return;
2242 
2243  // Allocate storage for the symbol that we'll use for the uniform
2244  // variable that holds the current program instance in each loop
2245  // iteration.
2246  if (sym->type == NULL) {
2247  Assert(m->errorCount > 0);
2248  return;
2249  }
2253 
2254  ctx->SetDebugPos(pos);
2255  ctx->EmitVariableDebugInfo(sym);
2256 
2257  // The various basic blocks that we'll need in the below
2258  llvm::BasicBlock *bbFindNext =
2259  ctx->CreateBasicBlock("foreach_active_find_next");
2260  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_active_body");
2261  llvm::BasicBlock *bbCheckForMore =
2262  ctx->CreateBasicBlock("foreach_active_check_for_more");
2263  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_active_done");
2264 
2265  // Save the old mask so that we can restore it at the end
2266  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2267 
2268  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2269  // to be processed by a pass through the loop body. It starts out with
2270  // the current execution mask (which should never be all off going in
2271  // to this)...
2272  llvm::Value *oldFullMask = ctx->GetFullMask();
2273  llvm::Value *maskBitsPtr =
2274  ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2275  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2276  ctx->StoreInst(movmsk, maskBitsPtr);
2277 
2278  // Officially start the loop.
2279  ctx->StartScope();
2281  ctx->SetContinueTarget(bbCheckForMore);
2282 
2283  // Onward to find the first set of program instance to run the loop for
2284  ctx->BranchInst(bbFindNext);
2285 
2286  ctx->SetCurrentBasicBlock(bbFindNext); {
2287  // Load the bitmask of the lanes left to be processed
2288  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2289 
2290  // Find the index of the first set bit in the mask
2291  llvm::Function *ctlzFunc =
2292  m->module->getFunction("__count_trailing_zeros_i64");
2293  Assert(ctlzFunc != NULL);
2294  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
2295  "first_set");
2296 
2297  // Store that value into the storage allocated for the iteration
2298  // variable.
2299  ctx->StoreInst(firstSet, sym->storagePtr);
2300 
2301  // Now set the execution mask to be only on for the current program
2302  // instance. (TODO: is there a more efficient way to do this? e.g.
2303  // for AVX1, we might want to do this as float rather than int
2304  // math...)
2305 
2306  // Get the "program index" vector value
2307 #ifdef ISPC_NVPTX_ENABLED
2308  llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
2309  ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
2310 #else /* ISPC_NVPTX_ENABLED */
2311  llvm::Value *programIndex = ctx->ProgramIndexVector();
2312 #endif /* ISPC_NVPTX_ENABLED */
2313 
2314  // And smear the current lane out to a vector
2315  llvm::Value *firstSet32 =
2316  ctx->TruncInst(firstSet, LLVMTypes::Int32Type, "first_set32");
2317  llvm::Value *firstSet32Smear = ctx->SmearUniform(firstSet32);
2318 
2319  // Now set the execution mask based on doing a vector compare of
2320  // these two
2321  llvm::Value *iterMask =
2322  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
2323  firstSet32Smear, programIndex);
2324  iterMask = ctx->I1VecToBoolVec(iterMask);
2325 
2326  ctx->SetInternalMask(iterMask);
2327 
2328  // Also update the bitvector of lanes left to turn off the bit for
2329  // the lane we're about to run.
2330  llvm::Value *setMask =
2331  ctx->BinaryOperator(llvm::Instruction::Shl, LLVMInt64(1),
2332  firstSet, "set_mask");
2333  llvm::Value *notSetMask = ctx->NotOperator(setMask);
2334  llvm::Value *newRemaining =
2335  ctx->BinaryOperator(llvm::Instruction::And, remainingBits,
2336  notSetMask, "new_remaining");
2337  ctx->StoreInst(newRemaining, maskBitsPtr);
2338 
2339  // and onward to run the loop body...
2340  ctx->BranchInst(bbBody);
2341  }
2342 
2343  ctx->SetCurrentBasicBlock(bbBody); {
2344  ctx->SetBlockEntryMask(ctx->GetFullMask());
2345 
2346  // Run the code in the body of the loop. This is easy now.
2347  if (stmts)
2348  stmts->EmitCode(ctx);
2349 
2350  Assert(ctx->GetCurrentBasicBlock() != NULL);
2351  ctx->BranchInst(bbCheckForMore);
2352  }
2353 
2354  ctx->SetCurrentBasicBlock(bbCheckForMore); {
2355  ctx->RestoreContinuedLanes();
2356  // At the end of the loop body (either due to running the
2357  // statements normally, or a continue statement in the middle of
2358  // the loop that jumps to the end, see if there are any lanes left
2359  // to be processed.
2360  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2361  llvm::Value *nonZero =
2362  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
2363  remainingBits, LLVMInt64(0), "remaining_ne_zero");
2364  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2365  }
2366 
2367  ctx->SetCurrentBasicBlock(bbDone);
2368  ctx->SetInternalMask(oldInternalMask);
2369  ctx->EndForeach();
2370  ctx->EndScope();
2371 }
2372 
2373 
2374 void
2375 ForeachActiveStmt::Print(int indent) const {
2376  printf("%*cForeach_active Stmt", indent, ' ');
2377  pos.Print();
2378  printf("\n");
2379 
2380  printf("%*cIter symbol: ", indent+4, ' ');
2381  if (sym != NULL) {
2382  printf("%s", sym->name.c_str());
2383  if (sym->type != NULL)
2384  printf(" %s", sym->type->GetString().c_str());
2385  }
2386  else
2387  printf("NULL");
2388  printf("\n");
2389 
2390  printf("%*cStmts:\n", indent+4, ' ');
2391  if (stmts != NULL)
2392  stmts->Print(indent+8);
2393  else
2394  printf("NULL");
2395  printf("\n");
2396 }
2397 
2398 
2399 Stmt *
2401  if (sym == NULL)
2402  return NULL;
2403 
2404  return this;
2405 }
2406 
2407 
2408 int
2410  return COST_VARYING_LOOP;
2411 }
2412 
2413 
2414 ///////////////////////////////////////////////////////////////////////////
2415 // ForeachUniqueStmt
2416 
2418  Stmt *s, SourcePos pos)
2419  : Stmt(pos, ForeachUniqueStmtID) {
2420  sym = m->symbolTable->LookupVariable(iterName);
2421  expr = e;
2422  stmts = s;
2423 }
2424 
2425 
2426 void
2428  if (!ctx->GetCurrentBasicBlock())
2429  return;
2430 
2431  // First, allocate local storage for the symbol that we'll use for the
2432  // uniform variable that holds the current unique value through each
2433  // loop.
2434  if (sym->type == NULL) {
2435  Assert(m->errorCount > 0);
2436  return;
2437  }
2438  llvm::Type *symType = sym->type->LLVMType(g->ctx);
2439  if (symType == NULL) {
2440  Assert(m->errorCount > 0);
2441  return;
2442  }
2443  sym->storagePtr = ctx->AllocaInst(symType, sym->name.c_str());
2444 
2445  ctx->SetDebugPos(pos);
2446  ctx->EmitVariableDebugInfo(sym);
2447 
2448  // The various basic blocks that we'll need in the below
2449  llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_find_next");
2450  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
2451  llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_check_for_more");
2452  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_done");
2453 
2454  // Prepare the FunctionEmitContext
2455  ctx->StartScope();
2456 
2457  // Save the old internal mask so that we can restore it at the end
2458  llvm::Value *oldMask = ctx->GetInternalMask();
2459 
2460  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2461  // to be processed by a pass through the foreach_unique loop body. It
2462  // starts out with the full execution mask (which should never be all
2463  // off going in to this)...
2464  llvm::Value *oldFullMask = ctx->GetFullMask();
2465  llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2466  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2467  ctx->StoreInst(movmsk, maskBitsPtr);
2468 
2469  // Officially start the loop.
2471  ctx->SetContinueTarget(bbCheckForMore);
2472 
2473  // Evaluate the varying expression we're iterating over just once.
2474  llvm::Value *exprValue = expr->GetValue(ctx);
2475 
2476  // And we'll store its value into locally-allocated storage, for ease
2477  // of indexing over it with non-compile-time-constant indices.
2478  const Type *exprType;
2479  llvm::VectorType *llvmExprType;
2480  if (exprValue == NULL ||
2481  (exprType = expr->GetType()) == NULL ||
2482  (llvmExprType =
2483  llvm::dyn_cast<llvm::VectorType>(exprValue->getType())) == NULL) {
2484  Assert(m->errorCount > 0);
2485  return;
2486  }
2487  ctx->SetDebugPos(pos);
2488  const Type *exprPtrType = PointerType::GetUniform(exprType);
2489  llvm::Value *exprMem = ctx->AllocaInst(llvmExprType, "expr_mem");
2490  ctx->StoreInst(exprValue, exprMem);
2491 
2492  // Onward to find the first set of lanes to run the loop for
2493  ctx->BranchInst(bbFindNext);
2494 
2495  ctx->SetCurrentBasicBlock(bbFindNext); {
2496  // Load the bitmask of the lanes left to be processed
2497  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2498 
2499  // Find the index of the first set bit in the mask
2500  llvm::Function *ctlzFunc =
2501  m->module->getFunction("__count_trailing_zeros_i64");
2502  Assert(ctlzFunc != NULL);
2503  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
2504  "first_set");
2505 
2506  // And load the corresponding element value from the temporary
2507  // memory storing the value of the varying expr.
2508  llvm::Value *uniqueValue;
2509 #ifdef ISPC_NVPTX_ENABLED
2510  if (g->target->getISA() == Target::NVPTX)
2511  {
2512  llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
2513  uniqueValue = ctx->Extract(exprValue, firstSet32);
2514  }
2515  else
2516  {
2517 #endif /* ISPC_NVPTX_ENABLED */
2518  llvm::Value *uniqueValuePtr =
2519  ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
2520  "unique_index_ptr");
2521  uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
2522 #ifdef ISPC_NVPTX_ENABLED
2523  }
2524 #endif /* ISPC_NVPTX_ENABLED */
2525  // If it's a varying pointer type, need to convert from the int
2526  // type we store in the vector to the actual pointer type
2527  if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
2528  uniqueValue = ctx->IntToPtrInst(uniqueValue, symType);
2529 
2530  // Store that value in sym's storage so that the iteration variable
2531  // has the right value inside the loop body
2532  ctx->StoreInst(uniqueValue, sym->storagePtr);
2533 
2534  // Set the execution mask so that it's on for any lane that a) was
2535  // running at the start of the foreach loop, and b) where that
2536  // lane's value of the varying expression is the same as the value
2537  // we've selected to process this time through--i.e.:
2538  // oldMask & (smear(element) == exprValue)
2539  llvm::Value *uniqueSmear = ctx->SmearUniform(uniqueValue, "unique_smear");
2540  llvm::Value *matchingLanes = NULL;
2541  if (uniqueValue->getType()->isFloatingPointTy())
2542  matchingLanes =
2543  ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_OEQ,
2544  uniqueSmear, exprValue, "matching_lanes");
2545  else
2546  matchingLanes =
2547  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
2548  uniqueSmear, exprValue, "matching_lanes");
2549  matchingLanes = ctx->I1VecToBoolVec(matchingLanes);
2550 
2551  llvm::Value *loopMask =
2552  ctx->BinaryOperator(llvm::Instruction::And, oldMask, matchingLanes,
2553  "foreach_unique_loop_mask");
2554  ctx->SetInternalMask(loopMask);
2555 
2556  // Also update the bitvector of lanes left to process in subsequent
2557  // loop iterations:
2558  // remainingBits &= ~movmsk(current mask)
2559  llvm::Value *loopMaskMM = ctx->LaneMask(loopMask);
2560  llvm::Value *notLoopMaskMM = ctx->NotOperator(loopMaskMM);
2561  llvm::Value *newRemaining =
2562  ctx->BinaryOperator(llvm::Instruction::And, remainingBits,
2563  notLoopMaskMM, "new_remaining");
2564  ctx->StoreInst(newRemaining, maskBitsPtr);
2565 
2566  // and onward...
2567  ctx->BranchInst(bbBody);
2568  }
2569 
2570  ctx->SetCurrentBasicBlock(bbBody); {
2571  ctx->SetBlockEntryMask(ctx->GetFullMask());
2572  // Run the code in the body of the loop. This is easy now.
2573  if (stmts)
2574  stmts->EmitCode(ctx);
2575 
2576  Assert(ctx->GetCurrentBasicBlock() != NULL);
2577  ctx->BranchInst(bbCheckForMore);
2578  }
2579 
2580  ctx->SetCurrentBasicBlock(bbCheckForMore); {
2581  // At the end of the loop body (either due to running the
2582  // statements normally, or a continue statement in the middle of
2583  // the loop that jumps to the end, see if there are any lanes left
2584  // to be processed.
2585  ctx->RestoreContinuedLanes();
2586  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2587  llvm::Value *nonZero =
2588  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
2589  remainingBits, LLVMInt64(0), "remaining_ne_zero");
2590  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2591  }
2592 
2593  ctx->SetCurrentBasicBlock(bbDone);
2594  ctx->SetInternalMask(oldMask);
2595  ctx->EndForeach();
2596  ctx->EndScope();
2597 }
2598 
2599 
2600 void
2601 ForeachUniqueStmt::Print(int indent) const {
2602  printf("%*cForeach_unique Stmt", indent, ' ');
2603  pos.Print();
2604  printf("\n");
2605 
2606  printf("%*cIter symbol: ", indent+4, ' ');
2607  if (sym != NULL) {
2608  printf("%s", sym->name.c_str());
2609  if (sym->type != NULL)
2610  printf(" %s", sym->type->GetString().c_str());
2611  }
2612  else
2613  printf("NULL");
2614  printf("\n");
2615 
2616  printf("%*cIter expr: ", indent+4, ' ');
2617  if (expr != NULL)
2618  expr->Print();
2619  else
2620  printf("NULL");
2621  printf("\n");
2622 
2623  printf("%*cStmts:\n", indent+4, ' ');
2624  if (stmts != NULL)
2625  stmts->Print(indent+8);
2626  else
2627  printf("NULL");
2628  printf("\n");
2629 }
2630 
2631 
2632 Stmt *
2634  const Type *type;
2635  if (sym == NULL || expr == NULL || (type = expr->GetType()) == NULL)
2636  return NULL;
2637 
2638  if (type->IsVaryingType() == false) {
2639  Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
2640  "must be \"varying\" type, not \"%s\".",
2641  type->GetString().c_str());
2642  return NULL;
2643  }
2644 
2645  if (Type::IsBasicType(type) == false) {
2646  Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
2647  "must be an atomic, pointer, or enum type, not \"%s\".",
2648  type->GetString().c_str());
2649  return NULL;
2650  }
2651 
2652  return this;
2653 }
2654 
2655 
2656 int
2658  return COST_VARYING_LOOP;
2659 }
2660 
2661 
2662 ///////////////////////////////////////////////////////////////////////////
2663 // CaseStmt
2664 
2665 /** Given the statements following a 'case' or 'default' label, this
2666  function determines whether the mask should be checked to see if it is
2667  "all off" immediately after the label, before executing the code for
2668  the statements.
2669  */
2670 static bool
2671 lCheckMask(Stmt *stmts) {
2672  if (stmts == NULL)
2673  return false;
2674 
2675  int cost = EstimateCost(stmts);
2676  bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
2677 
2678  // The mask should be checked if the code following the
2679  // 'case'/'default' is relatively complex, or if it would be unsafe to
2680  // run that code with the execution mask all off.
2681  return (cost > PREDICATE_SAFE_IF_STATEMENT_COST ||
2682  safeToRunWithAllLanesOff == false);
2683 }
2684 
2685 
2687  : Stmt(pos, CaseStmtID), value(v) {
2688  stmts = s;
2689 }
2690 
2691 
2692 void
2694  ctx->EmitCaseLabel(value, lCheckMask(stmts), pos);
2695  if (stmts)
2696  stmts->EmitCode(ctx);
2697 }
2698 
2699 
2700 void
2701 CaseStmt::Print(int indent) const {
2702  printf("%*cCase [%d] label", indent, ' ', value);
2703  pos.Print();
2704  printf("\n");
2705  stmts->Print(indent+4);
2706 }
2707 
2708 
2709 Stmt *
2711  return this;
2712 }
2713 
2714 
2715 int
2717  return 0;
2718 }
2719 
2720 
2721 ///////////////////////////////////////////////////////////////////////////
2722 // DefaultStmt
2723 
2725  : Stmt(pos, DefaultStmtID) {
2726  stmts = s;
2727 }
2728 
2729 
2730 void
2733  if (stmts)
2734  stmts->EmitCode(ctx);
2735 }
2736 
2737 
2738 void
2739 DefaultStmt::Print(int indent) const {
2740  printf("%*cDefault Stmt", indent, ' ');
2741  pos.Print();
2742  printf("\n");
2743  stmts->Print(indent+4);
2744 }
2745 
2746 
2747 Stmt *
2749  return this;
2750 }
2751 
2752 
2753 int
2755  return 0;
2756 }
2757 
2758 
2759 ///////////////////////////////////////////////////////////////////////////
2760 // SwitchStmt
2761 
2763  : Stmt(pos, SwitchStmtID) {
2764  expr = e;
2765  stmts = s;
2766 }
2767 
2768 
2769 /* An instance of this structure is carried along as we traverse the AST
2770  nodes for the statements after a "switch" statement. We use this
2771  structure to record all of the 'case' and 'default' statements after the
2772  "switch". */
2775  ctx = c;
2776  defaultBlock = NULL;
2777  lastBlock = NULL;
2778  }
2779 
2781 
2782  /* Basic block for the code following the "default" label (if any). */
2783  llvm::BasicBlock *defaultBlock;
2784 
2785  /* Map from integer values after "case" labels to the basic blocks that
2786  follow the corresponding "case" label. */
2787  std::vector<std::pair<int, llvm::BasicBlock *> > caseBlocks;
2788 
2789  /* For each basic block for a "case" label or a "default" label,
2790  nextBlock[block] stores the basic block pointer for the next
2791  subsequent "case" or "default" label in the program. */
2792  std::map<llvm::BasicBlock *, llvm::BasicBlock *> nextBlock;
2793 
2794  /* The last basic block created for a "case" or "default" label; when
2795  we create the basic block for the next one, we'll use this to update
2796  the nextBlock map<> above. */
2797  llvm::BasicBlock *lastBlock;
2798 };
2799 
2800 
2801 static bool
2802 lSwitchASTPreVisit(ASTNode *node, void *d) {
2803  if (llvm::dyn_cast<SwitchStmt>(node) != NULL)
2804  // don't continue recursively into a nested switch--we only want
2805  // our own case and default statements!
2806  return false;
2807 
2808  CaseStmt *cs = llvm::dyn_cast<CaseStmt>(node);
2809  DefaultStmt *ds = llvm::dyn_cast<DefaultStmt>(node);
2810 
2811  SwitchVisitInfo *svi = (SwitchVisitInfo *)d;
2812  llvm::BasicBlock *bb = NULL;
2813  if (cs != NULL) {
2814  // Complain if we've seen a case statement with the same value
2815  // already
2816  for (int i = 0; i < (int)svi->caseBlocks.size(); ++i) {
2817  if (svi->caseBlocks[i].first == cs->value) {
2818  Error(cs->pos, "Duplicate case value \"%d\".", cs->value);
2819  return true;
2820  }
2821  }
2822 
2823  // Otherwise create a new basic block for the code following this
2824  // 'case' statement and record the mappign between the case label
2825  // value and the basic block
2826  char buf[32];
2827  sprintf(buf, "case_%d", cs->value);
2828  bb = svi->ctx->CreateBasicBlock(buf);
2829  svi->caseBlocks.push_back(std::make_pair(cs->value, bb));
2830  }
2831  else if (ds != NULL) {
2832  // And complain if we've seen another 'default' label..
2833  if (svi->defaultBlock != NULL) {
2834  Error(ds->pos, "Multiple \"default\" lables in switch statement.");
2835  return true;
2836  }
2837  else {
2838  // Otherwise create a basic block for the code following the
2839  // "default".
2840  bb = svi->ctx->CreateBasicBlock("default");
2841  svi->defaultBlock = bb;
2842  }
2843  }
2844 
2845  // If we saw a "case" or "default" label, then update the map to record
2846  // that the block we just created follows the block created for the
2847  // previous label in the "switch".
2848  if (bb != NULL) {
2849  svi->nextBlock[svi->lastBlock] = bb;
2850  svi->lastBlock = bb;
2851  }
2852 
2853  return true;
2854 }
2855 
2856 
2857 void
2859  if (ctx->GetCurrentBasicBlock() == NULL)
2860  return;
2861 
2862  const Type *type;
2863  if (expr == NULL || ((type = expr->GetType()) == NULL)) {
2864  AssertPos(pos, m->errorCount > 0);
2865  return;
2866  }
2867 
2868  // Basic block we'll end up after the switch statement
2869  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("switch_done");
2870 
2871  // Walk the AST of the statements after the 'switch' to collect a bunch
2872  // of information about the structure of the 'case' and 'default'
2873  // statements.
2874  SwitchVisitInfo svi(ctx);
2875  WalkAST(stmts, lSwitchASTPreVisit, NULL, &svi);
2876  // Record that the basic block following the last one created for a
2877  // case/default is the block after the end of the switch statement.
2878  svi.nextBlock[svi.lastBlock] = bbDone;
2879 
2880  llvm::Value *exprValue = expr->GetValue(ctx);
2881  if (exprValue == NULL) {
2882  AssertPos(pos, m->errorCount > 0);
2883  return;
2884  }
2885 
2886  bool isUniformCF = (type->IsUniformType() &&
2887  lHasVaryingBreakOrContinue(stmts) == false);
2888  ctx->StartSwitch(isUniformCF, bbDone);
2889  ctx->SetBlockEntryMask(ctx->GetFullMask());
2890  ctx->SwitchInst(exprValue, svi.defaultBlock ? svi.defaultBlock : bbDone,
2891  svi.caseBlocks, svi.nextBlock);
2892 
2893  if (stmts != NULL)
2894  stmts->EmitCode(ctx);
2895 
2896  if (ctx->GetCurrentBasicBlock() != NULL)
2897  ctx->BranchInst(bbDone);
2898 
2899  ctx->SetCurrentBasicBlock(bbDone);
2900  ctx->EndSwitch();
2901 }
2902 
2903 
2904 void
2905 SwitchStmt::Print(int indent) const {
2906  printf("%*cSwitch Stmt", indent, ' ');
2907  pos.Print();
2908  printf("\n");
2909  printf("%*cexpr = ", indent, ' ');
2910  expr->Print();
2911  printf("\n");
2912  stmts->Print(indent+4);
2913 }
2914 
2915 
2916 Stmt *
2918  const Type *exprType;
2919  if (expr == NULL ||
2920  (exprType = expr->GetType()) == NULL) {
2921  Assert(m->errorCount > 0);
2922  return NULL;
2923  }
2924 
2925  const Type *toType = NULL;
2926  exprType = exprType->GetAsConstType();
2927  bool is64bit = (Type::EqualIgnoringConst(exprType->GetAsUniformType(),
2931 
2932  if (exprType->IsUniformType()) {
2933  if (is64bit) toType = AtomicType::UniformInt64;
2934  else toType = AtomicType::UniformInt32;
2935  }
2936  else {
2937  if (is64bit) toType = AtomicType::VaryingInt64;
2938  else toType = AtomicType::VaryingInt32;
2939  }
2940 
2941  expr = TypeConvertExpr(expr, toType, "switch expression");
2942  if (expr == NULL)
2943  return NULL;
2944 
2945  return this;
2946 }
2947 
2948 
2949 int
2951  const Type *type = expr->GetType();
2952  if (type && type->IsVaryingType())
2953  return COST_VARYING_SWITCH;
2954  else
2955  return COST_UNIFORM_SWITCH;
2956 }
2957 
2958 
2959 ///////////////////////////////////////////////////////////////////////////
2960 // UnmaskedStmt
2961 
2963  : Stmt(pos, UnmaskedStmtID) {
2964  stmts = s;
2965 }
2966 
2967 
2968 void
2970  if (!ctx->GetCurrentBasicBlock() || !stmts)
2971  return;
2972 
2973  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2974  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
2975 
2978 
2979  stmts->EmitCode(ctx);
2980 
2981 
2982  // Do not restore old mask if our basic block is over. This happends if we emit code
2983  // for something like 'unmasked{return;}', for example.
2984  if (ctx->GetCurrentBasicBlock() == NULL)
2985  return;
2986 
2987  ctx->SetInternalMask(oldInternalMask);
2988  ctx->SetFunctionMask(oldFunctionMask);
2989 }
2990 
2991 
2992 void
2993 UnmaskedStmt::Print(int indent) const {
2994  printf("%*cUnmasked Stmt", indent, ' ');
2995  pos.Print();
2996  printf("\n");
2997 
2998  printf("%*cStmts:\n", indent+4, ' ');
2999  if (stmts != NULL)
3000  stmts->Print(indent+8);
3001  else
3002  printf("NULL");
3003  printf("\n");
3004 }
3005 
3006 
3007 Stmt *
3009  return this;
3010 }
3011 
3012 
3013 int
3015  return COST_ASSIGN;
3016 }
3017 
3018 
3019 ///////////////////////////////////////////////////////////////////////////
3020 // ReturnStmt
3021 
3023  : Stmt(p, ReturnStmtID), expr(e) {
3024 }
3025 
3026 
3027 void
3029  if (!ctx->GetCurrentBasicBlock())
3030  return;
3031 
3032  if (ctx->InForeachLoop()) {
3033  Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
3034  return;
3035  }
3036 
3037  // Make sure we're not trying to return a reference to something where
3038  // that doesn't make sense
3039  const Function *func = ctx->GetFunction();
3040  const Type *returnType = func->GetReturnType();
3041  if (IsReferenceType(returnType) == true &&
3042  IsReferenceType(expr->GetType()) == false) {
3043  const Type *lvType = expr->GetLValueType();
3044  if (lvType == NULL) {
3045  Error(expr->pos, "Illegal to return non-lvalue from function "
3046  "returning reference type \"%s\".",
3047  returnType->GetString().c_str());
3048  return;
3049  }
3050  else if (lvType->IsUniformType() == false) {
3051  Error(expr->pos, "Illegal to return varying lvalue type from "
3052  "function returning a reference type \"%s\".",
3053  returnType->GetString().c_str());
3054  return;
3055  }
3056  }
3057 
3058  ctx->SetDebugPos(pos);
3059  ctx->CurrentLanesReturned(expr, true);
3060 }
3061 
3062 
3063 Stmt *
3065  return this;
3066 }
3067 
3068 
3069 int
3071  return COST_RETURN;
3072 }
3073 
3074 
3075 void
3076 ReturnStmt::Print(int indent) const {
3077  printf("%*cReturn Stmt", indent, ' ');
3078  pos.Print();
3079  if (expr)
3080  expr->Print();
3081  else printf("(void)");
3082  printf("\n");
3083 }
3084 
3085 
3086 ///////////////////////////////////////////////////////////////////////////
3087 // GotoStmt
3088 
3089 GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip)
3090  : Stmt(gotoPos, GotoStmtID) {
3091  label = l;
3092  identifierPos = ip;
3093 }
3094 
3095 
3096 void
3098  if (!ctx->GetCurrentBasicBlock())
3099  return;
3100 
3101  if (ctx->VaryingCFDepth() > 0) {
3102  Error(pos, "\"goto\" statements are only legal under \"uniform\" "
3103  "control flow.");
3104  return;
3105  }
3106  if (ctx->InForeachLoop()) {
3107  Error(pos, "\"goto\" statements are currently illegal inside "
3108  "\"foreach\" loops.");
3109  return;
3110  }
3111 
3112  llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
3113  if (bb == NULL) {
3114  /* Label wasn't found. Look for suggestions that are close */
3115  std::vector<std::string> labels = ctx->GetLabels();
3116  std::vector<std::string> matches = MatchStrings(label, labels);
3117  std::string match_output;
3118  if (! matches.empty()) {
3119  /* Print up to 5 matches. Don't want to spew too much */
3120  match_output += "\nDid you mean:";
3121  for (unsigned int i=0; i<matches.size() && i<5; i++)
3122  match_output += "\n " + matches[i] + "?";
3123  }
3124 
3125  /* Label wasn't found. Emit an error */
3127  "No label named \"%s\" found in current function.%s",
3128  label.c_str(), match_output.c_str());
3129 
3130  return;
3131  }
3132 
3133  ctx->BranchInst(bb);
3134  ctx->SetCurrentBasicBlock(NULL);
3135 }
3136 
3137 
3138 void
3139 GotoStmt::Print(int indent) const {
3140  printf("%*cGoto label \"%s\"\n", indent, ' ', label.c_str());
3141 }
3142 
3143 
3144 Stmt *
3146  return this;
3147 }
3148 
3149 
3150 Stmt *
3152  return this;
3153 }
3154 
3155 
3156 int
3158  return COST_GOTO;
3159 }
3160 
3161 
3162 ///////////////////////////////////////////////////////////////////////////
3163 // LabeledStmt
3164 
3166  : Stmt(p, LabeledStmtID) {
3167  name = n;
3168  stmt = s;
3169 }
3170 
3171 
3172 void
3174  llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
3175  AssertPos(pos, bblock != NULL);
3176 
3177  // End the current basic block with a jump to our basic block and then
3178  // set things up for emission to continue there. Note that the current
3179  // basic block may validly be NULL going into this statement due to an
3180  // earlier goto that NULLed it out; that doesn't stop us from
3181  // re-establishing a current basic block starting at the label..
3182  if (ctx->GetCurrentBasicBlock() != NULL)
3183  ctx->BranchInst(bblock);
3184  ctx->SetCurrentBasicBlock(bblock);
3185 
3186  if (stmt != NULL)
3187  stmt->EmitCode(ctx);
3188 }
3189 
3190 
3191 void
3192 LabeledStmt::Print(int indent) const {
3193  printf("%*cLabel \"%s\"\n", indent, ' ', name.c_str());
3194  if (stmt != NULL)
3195  stmt->Print(indent);
3196 }
3197 
3198 
3199 Stmt *
3201  return this;
3202 }
3203 
3204 
3205 Stmt *
3207  if (!isalpha(name[0]) || name[0] == '_') {
3208  Error(pos, "Label must start with either alphabetic character or '_'.");
3209  return NULL;
3210  }
3211  for (unsigned int i = 1; i < name.size(); ++i) {
3212  if (!isalnum(name[i]) && name[i] != '_') {
3213  Error(pos, "Character \"%c\" is illegal in labels.", name[i]);
3214  return NULL;
3215  }
3216  }
3217  return this;
3218 }
3219 
3220 
3221 int
3223  return 0;
3224 }
3225 
3226 
3227 ///////////////////////////////////////////////////////////////////////////
3228 // StmtList
3229 
3230 void
3232  ctx->StartScope();
3233  ctx->SetDebugPos(pos);
3234  for (unsigned int i = 0; i < stmts.size(); ++i)
3235  if (stmts[i])
3236  stmts[i]->EmitCode(ctx);
3237  ctx->EndScope();
3238 }
3239 
3240 
3241 Stmt *
3243  return this;
3244 }
3245 
3246 
3247 int
3249  return 0;
3250 }
3251 
3252 
3253 void
3254 StmtList::Print(int indent) const {
3255  printf("%*cStmt List", indent, ' ');
3256  pos.Print();
3257  printf(":\n");
3258  for (unsigned int i = 0; i < stmts.size(); ++i)
3259  if (stmts[i])
3260  stmts[i]->Print(indent+4);
3261 }
3262 
3263 
3264 ///////////////////////////////////////////////////////////////////////////
3265 // PrintStmt
3266 
3267 PrintStmt::PrintStmt(const std::string &f, Expr *v, SourcePos p)
3268  : Stmt(p, PrintStmtID), format(f), values(v) {
3269 }
3270 
3271 /* Because the pointers to values that are passed to __do_print() are all
3272  void *s (and because ispc print() formatting strings statements don't
3273  encode types), we pass along a string to __do_print() where the i'th
3274  character encodes the type of the i'th value to be printed. Needless to
3275  say, the encoding chosen here and the decoding code in __do_print() need
3276  to agree on the below!
3277  */
3278 static char
3279 lEncodeType(const Type *t) {
3280  if (Type::Equal(t, AtomicType::UniformBool)) return 'b';
3281  if (Type::Equal(t, AtomicType::VaryingBool)) return 'B';
3282  if (Type::Equal(t, AtomicType::UniformInt32)) return 'i';
3283  if (Type::Equal(t, AtomicType::VaryingInt32)) return 'I';
3284  if (Type::Equal(t, AtomicType::UniformUInt32)) return 'u';
3285  if (Type::Equal(t, AtomicType::VaryingUInt32)) return 'U';
3286  if (Type::Equal(t, AtomicType::UniformFloat)) return 'f';
3287  if (Type::Equal(t, AtomicType::VaryingFloat)) return 'F';
3288  if (Type::Equal(t, AtomicType::UniformInt64)) return 'l';
3289  if (Type::Equal(t, AtomicType::VaryingInt64)) return 'L';
3290  if (Type::Equal(t, AtomicType::UniformUInt64)) return 'v';
3291  if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V';
3292  if (Type::Equal(t, AtomicType::UniformDouble)) return 'd';
3293  if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D';
3294  if (CastType<PointerType>(t) != NULL) {
3295  if (t->IsUniformType())
3296  return 'p';
3297  else
3298  return 'P';
3299  }
3300  else return '\0';
3301 }
3302 
3303 
3304 /** Given an Expr for a value to be printed, emit the code to evaluate the
3305  expression and store the result to alloca'd memory. Update the
3306  argTypes string with the type encoding for this expression.
3307  */
3308 static llvm::Value *
3309 lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
3310  const Type *type = expr->GetType();
3311  if (type == NULL)
3312  return NULL;
3313 
3314  if (CastType<ReferenceType>(type) != NULL) {
3315  expr = new RefDerefExpr(expr, expr->pos);
3316  type = expr->GetType();
3317  if (type == NULL)
3318  return NULL;
3319  }
3320 
3321  // Just int8 and int16 types to int32s...
3322  const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
3323  if (Type::Equal(baseType, AtomicType::UniformInt8) ||
3327  expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
3329  expr, expr->pos);
3330  type = expr->GetType();
3331  }
3332 
3333  char t = lEncodeType(type->GetAsNonConstType());
3334  if (t == '\0') {
3335  Error(expr->pos, "Only atomic types are allowed in print statements; "
3336  "type \"%s\" is illegal.", type->GetString().c_str());
3337  return NULL;
3338  }
3339  else {
3340  if (Type::Equal(baseType, AtomicType::UniformBool)) {
3341  // Blast bools to ints, but do it here to preserve encoding for
3342  // printing 'true' or 'false'
3343  expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
3345  expr, expr->pos);
3346  type = expr->GetType();
3347  }
3348  argTypes.push_back(t);
3349 
3350  llvm::Type *llvmExprType = type->LLVMType(g->ctx);
3351  llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
3352  llvm::Value *val = expr->GetValue(ctx);
3353  if (!val)
3354  return NULL;
3355  ctx->StoreInst(val, ptr);
3356 
3357  ptr = ctx->BitCastInst(ptr, LLVMTypes::VoidPointerType);
3358  return ptr;
3359  }
3360 }
3361 
3362 
3363 /* PrintStmt works closely with the __do_print() function implemented in
3364  the builtins-c.c file. In particular, the EmitCode() method here needs to
3365  take the arguments passed to it from ispc and generate a valid call to
3366  __do_print() with the information that __do_print() then needs to do the
3367  actual printing work at runtime.
3368  */
3369 void
3371  if (!ctx->GetCurrentBasicBlock())
3372  return;
3373 
3374  ctx->SetDebugPos(pos);
3375 
3376  // __do_print takes 5 arguments; we'll get them stored in the args[] array
3377  // in the code emitted below
3378  //
3379  // 1. the format string
3380  // 2. a string encoding the types of the values being printed,
3381  // one character per value
3382  // 3. the number of running program instances (i.e. the target's
3383  // vector width)
3384  // 4. the current lane mask
3385  // 5. a pointer to an array of pointers to the values to be printed
3386  llvm::Value *args[5];
3387  std::string argTypes;
3388 
3389  if (values == NULL) {
3390  llvm::Type *ptrPtrType =
3391  llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
3392  args[4] = llvm::Constant::getNullValue(ptrPtrType);
3393  }
3394  else {
3395  // Get the values passed to the print() statement evaluated and
3396  // stored in memory so that we set up the array of pointers to them
3397  // for the 5th __do_print() argument
3398  ExprList *elist = llvm::dyn_cast<ExprList>(values);
3399  int nArgs = elist ? elist->exprs.size() : 1;
3400 
3401  // Allocate space for the array of pointers to values to be printed
3402  llvm::Type *argPtrArrayType =
3403  llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
3404  llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
3405  "print_arg_ptrs");
3406  // Store the array pointer as a void **, which is what __do_print()
3407  // expects
3408  args[4] = ctx->BitCastInst(argPtrArray,
3409  llvm::PointerType::get(LLVMTypes::VoidPointerType, 0));
3410 
3411  // Now, for each of the arguments, emit code to evaluate its value
3412  // and store the value into alloca'd storage. Then store the
3413  // pointer to the alloca'd storage into argPtrArray.
3414  if (elist) {
3415  for (unsigned int i = 0; i < elist->exprs.size(); ++i) {
3416  Expr *expr = elist->exprs[i];
3417  if (!expr)
3418  return;
3419  llvm::Value *ptr = lProcessPrintArg(expr, ctx, argTypes);
3420  if (!ptr)
3421  return;
3422 
3423  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL);
3424  ctx->StoreInst(ptr, arrayPtr);
3425  }
3426  }
3427  else {
3428  llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes);
3429  if (!ptr)
3430  return;
3431  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL);
3432  ctx->StoreInst(ptr, arrayPtr);
3433  }
3434  }
3435 
3436  // Now we can emit code to call __do_print()
3437 #ifdef ISPC_NVPTX_ENABLED
3438  llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
3439  m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
3440 #else /* ISPC_NVPTX_ENABLED */
3441  llvm::Function *printFunc = m->module->getFunction("__do_print");
3442 #endif /* ISPC_NVPTX_ENABLED */
3443  AssertPos(pos, printFunc);
3444 
3445  llvm::Value *mask = ctx->GetFullMask();
3446  // Set up the rest of the parameters to it
3447  args[0] = ctx->GetStringPtr(format);
3448  args[1] = ctx->GetStringPtr(argTypes);
3449  args[2] = LLVMInt32(g->target->getVectorWidth());
3450  args[3] = ctx->LaneMask(mask);
3451  std::vector<llvm::Value *> argVec(&args[0], &args[5]);
3452  ctx->CallInst(printFunc, NULL, argVec, "");
3453 }
3454 
3455 
3456 void
3457 PrintStmt::Print(int indent) const {
3458  printf("%*cPrint Stmt (%s)", indent, ' ', format.c_str());
3459 }
3460 
3461 
3462 Stmt *
3464  return this;
3465 }
3466 
3467 
3468 int
3470  return COST_FUNCALL;
3471 }
3472 
3473 
3474 ///////////////////////////////////////////////////////////////////////////
3475 // AssertStmt
3476 
3477 AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p)
3478  : Stmt(p, AssertStmtID), message(msg), expr(e) {
3479 }
3480 
3481 
3482 void
3484  if (!ctx->GetCurrentBasicBlock())
3485  return;
3486 
3487  const Type *type;
3488  if (expr == NULL ||
3489  (type = expr->GetType()) == NULL) {
3490  AssertPos(pos, m->errorCount > 0);
3491  return;
3492  }
3493  bool isUniform = type->IsUniformType();
3494 
3495  // The actual functionality to do the check and then handle falure is
3496  // done via a builtin written in bitcode in builtins/util.m4.
3497  llvm::Function *assertFunc =
3498  isUniform ? m->module->getFunction("__do_assert_uniform") :
3499  m->module->getFunction("__do_assert_varying");
3500  AssertPos(pos, assertFunc != NULL);
3501 
3502  char *errorString;
3503  if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s",
3505  message.c_str()) == -1) {
3506  Error(pos, "Fatal error when generating assert string: asprintf() "
3507  "unable to allocate memory!");
3508  return;
3509  }
3510 
3511  std::vector<llvm::Value *> args;
3512  args.push_back(ctx->GetStringPtr(errorString));
3513  llvm::Value *exprValue = expr->GetValue(ctx);
3514  if (exprValue == NULL) {
3515  AssertPos(pos, m->errorCount > 0);
3516  return;
3517  }
3518  args.push_back(exprValue);
3519  args.push_back(ctx->GetFullMask());
3520  ctx->CallInst(assertFunc, NULL, args, "");
3521 
3522  free(errorString);
3523 }
3524 
3525 
3526 void
3527 AssertStmt::Print(int indent) const {
3528  printf("%*cAssert Stmt (%s)", indent, ' ', message.c_str());
3529 }
3530 
3531 
3532 Stmt *
3534  const Type *type;
3535  if (expr && (type = expr->GetType()) != NULL) {
3536  bool isUniform = type->IsUniformType();
3539  "\"assert\" statement");
3540  if (expr == NULL)
3541  return NULL;
3542  }
3543  return this;
3544 }
3545 
3546 
3547 int
3549  return COST_ASSERT;
3550 }
3551 
3552 
3553 ///////////////////////////////////////////////////////////////////////////
3554 // DeleteStmt
3555 
3557  : Stmt(p, DeleteStmtID) {
3558  expr = e;
3559 }
3560 
3561 
3562 void
3564  if (!ctx->GetCurrentBasicBlock())
3565  return;
3566 
3567  const Type *exprType;
3568  if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
3569  AssertPos(pos, m->errorCount > 0);
3570  return;
3571  }
3572 
3573  llvm::Value *exprValue = expr->GetValue(ctx);
3574  if (exprValue == NULL) {
3575  AssertPos(pos, m->errorCount > 0);
3576  return;
3577  }
3578 
3579  // Typechecking should catch this
3580  AssertPos(pos, CastType<PointerType>(exprType) != NULL);
3581 
3582  if (exprType->IsUniformType()) {
3583  // For deletion of a uniform pointer, we just need to cast the
3584  // pointer type to a void pointer type, to match what
3585  // __delete_uniform() from the builtins expects.
3586  exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
3587  "ptr_to_void");
3588  llvm::Function *func;
3589  if (g->target->is32Bit()) {
3590  func = m->module->getFunction("__delete_uniform_32rt");
3591  } else {
3592  func = m->module->getFunction("__delete_uniform_64rt");
3593  }
3594  AssertPos(pos, func != NULL);
3595 
3596  ctx->CallInst(func, NULL, exprValue, "");
3597  }
3598  else {
3599  // Varying pointers are arrays of ints, and __delete_varying()
3600  // takes a vector of i64s (even for 32-bit targets). Therefore, we
3601  // only need to extend to 64-bit values on 32-bit targets before
3602  // calling it.
3603  llvm::Function *func;
3604  if (g->target->is32Bit()) {
3605  func = m->module->getFunction("__delete_varying_32rt");
3606  } else {
3607  func = m->module->getFunction("__delete_varying_64rt");
3608  }
3609  AssertPos(pos, func != NULL);
3610  if (g->target->is32Bit())
3611  exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
3612  "ptr_to_64");
3613  ctx->CallInst(func, NULL, exprValue, "");
3614  }
3615 }
3616 
3617 
3618 void
3619 DeleteStmt::Print(int indent) const {
3620  printf("%*cDelete Stmt", indent, ' ');
3621 }
3622 
3623 
3624 Stmt *
3626  const Type *exprType;
3627  if (expr == NULL || ((exprType = expr->GetType()) == NULL))
3628  return NULL;
3629 
3630  if (CastType<PointerType>(exprType) == NULL) {
3631  Error(pos, "Illegal to delete non-pointer type \"%s\".",
3632  exprType->GetString().c_str());
3633  return NULL;
3634  }
3635 
3636  return this;
3637 }
3638 
3639 
3640 int
3642  return COST_DELETE;
3643 }
int EstimateCost() const
Definition: stmt.cpp:1168
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3097
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1381
bool IsVaryingType() const
Definition: type.h:150
ForeachStmt(const std::vector< Symbol * > &loopVars, const std::vector< Expr * > &startExprs, const std::vector< Expr * > &endExprs, Stmt *bodyStatements, bool tiled, SourcePos pos)
Definition: stmt.cpp:1465
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
Expr * expr
Definition: stmt.h:581
int GetElementCount() const
Definition: type.cpp:1470
Stmt * bodyStmts
Definition: stmt.h:190
void Print(int indent) const
Definition: stmt.cpp:107
ForStmt(Stmt *initializer, Expr *testExpr, Stmt *stepStatements, Stmt *bodyStatements, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:1195
std::vector< VariableDeclaration > vars
Definition: stmt.h:122
std::vector< std::pair< int, llvm::BasicBlock * > > caseBlocks
Definition: stmt.cpp:2787
static bool lIsVaryingFor(ASTNode *node)
Definition: stmt.cpp:950
static bool lHasVaryingBreakOrContinue(Stmt *stmt)
Definition: stmt.cpp:1012
Definition: func.h:44
int EstimateCost() const
Definition: stmt.cpp:672
void Print(int indent) const
Definition: stmt.cpp:3139
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2589
Opt opt
Definition: ispc.h:541
void StartUniformIf()
Definition: ctx.cpp:560
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock * > > &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > &nextBlocks)
Definition: ctx.cpp:1172
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:987
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1542
Stmt * stmts
Definition: stmt.h:404
static llvm::Value * lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, llvm::Value *uniformCounterPtr, llvm::Value *varyingCounterPtr, const std::vector< int > &spans)
Definition: stmt.cpp:1479
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:501
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:635
const bool doCoherentCheck
Definition: stmt.h:224
const AtomicType * GetAsConstType() const
Definition: type.cpp:308
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1762
Stmt * trueStmts
Definition: stmt.h:149
void StartScope()
Definition: ctx.cpp:1700
void PerformanceWarning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:426
static const AtomicType * VaryingUInt64
Definition: type.h:355
int EstimateCost() const
Definition: stmt.cpp:517
Expr * expr
Definition: stmt.h:380
std::vector< Expr * > endExprs
Definition: stmt.h:288
void emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:802
SwitchVisitInfo(FunctionEmitContext *c)
Definition: stmt.cpp:2774
virtual void Print() const =0
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:509
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:3457
void emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *test) const
Definition: stmt.cpp:723
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:570
static bool lSwitchASTPreVisit(ASTNode *node, void *d)
Definition: stmt.cpp:2802
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2242
bool IsArrayType() const
Definition: type.cpp:236
int getVectorWidth() const
Definition: ispc.h:283
int EstimateCost() const
Definition: stmt.cpp:3157
Module * m
Definition: ispc.cpp:89
Stmt * TypeCheck()
Definition: stmt.cpp:3064
Interface class for statements in the ispc language.
Definition: stmt.h:49
Stmt * TypeCheck()
Definition: stmt.cpp:2136
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1939
int first_line
Definition: ispc.h:139
virtual void EmitCode(FunctionEmitContext *ctx) const =0
Target * target
Definition: ispc.h:543
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3173
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2808
void Print(int indent) const
Definition: stmt.cpp:2375
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:538
BreakStmt(SourcePos pos)
Definition: stmt.cpp:1389
const std::string message
Definition: stmt.h:579
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1291
const std::string format
Definition: stmt.h:548
static const AtomicType * VaryingDouble
Definition: type.h:356
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2165
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:3042
Expression representing a compile-time constant value.
Definition: expr.h:390
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1991
void EndSwitch()
Definition: ctx.cpp:1013
int EstimateCost() const
Definition: stmt.cpp:2657
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:566
void Print(int indent) const
Definition: stmt.cpp:3527
static llvm::Type * BoolType
Definition: llvmutil.h:70
void SetContinueTarget(llvm::BasicBlock *bb)
Definition: ctx.h:258
#define Assert(expr)
Definition: ispc.h:170
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2858
void StartForeach(ForeachType ft)
Definition: ctx.cpp:679
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
int EstimateCost() const
Definition: stmt.cpp:3641
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:74
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:92
static const AtomicType * UniformUInt32
Definition: type.h:352
int EstimateCost() const
Definition: stmt.cpp:2950
Stmt * Optimize()
Definition: stmt.cpp:3200
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:836
llvm::ConstantInt * LLVMInt8(int8_t i)
Definition: llvmutil.cpp:235
llvm::Value * GetFullMask()
Definition: ctx.cpp:474
virtual llvm::Constant * GetConstant(const Type *type) const
Definition: expr.cpp:102
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts, bool doAllCheck, SourcePos pos)
Definition: stmt.cpp:525
Stmt * TypeCheck()
Definition: stmt.cpp:2400
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3563
int EstimateCost() const
Definition: stmt.cpp:3248
virtual const Type * GetAsUniformType() const =0
Stmt * TypeCheck()
Definition: stmt.cpp:1136
Stmt * TypeCheck()
Definition: stmt.cpp:1405
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1640
llvm::BasicBlock * lastBlock
Definition: stmt.cpp:2797
Stmt * TypeCheck()
Definition: stmt.cpp:2748
std::string name
Definition: sym.h:71
static int lLog2(int i)
Definition: stmt.cpp:1613
A list of expressions.
Definition: expr.h:252
CaseStmt(int value, Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2686
static bool lCheckMask(Stmt *stmts)
Definition: stmt.cpp:2671
Symbol * LookupVariable(const char *name)
Definition: sym.cpp:131
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2239
Stmt * TypeCheck()
Definition: stmt.cpp:3463
Statement implementation representing a 'do' statement in the program.
Definition: stmt.h:173
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:527
int EstimateCost() const
Definition: stmt.cpp:2754
void RestoreContinuedLanes()
Definition: ctx.cpp:959
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
virtual std::string GetString() const =0
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1395
void Print(int indent) const
Definition: stmt.cpp:3192
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:450
static const AtomicType * UniformUInt16
Definition: type.h:351
static bool lHasUnsizedArrays(const Type *type)
Definition: stmt.cpp:134
int GetSOAWidth() const
Definition: type.h:160
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:963
ReturnStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:3022
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:756
const int value
Definition: stmt.h:403
int EstimateCost() const
Definition: stmt.cpp:120
ConstExpr * constValue
Definition: sym.h:87
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1582
header file with declarations for symbol and symbol table classes.
static const AtomicType * UniformBool
Definition: type.h:346
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3548
Symbol * sym
Definition: stmt.h:335
bool disableMaskAllOnOptimizations
Definition: ispc.h:470
Expr * test
Definition: stmt.h:147
int EstimateCost() const
Definition: stmt.cpp:2409
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:3324
static ASTNode * lVaryingBCPostFunc(ASTNode *node, void *d)
Definition: stmt.cpp:997
void Print(int indent) const
Definition: stmt.cpp:1363
llvm::Module * module
Definition: module.h:158
Stmt * stmts
Definition: stmt.h:448
File with declarations for classes related to statements in the language.
static void lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse)
Definition: stmt.cpp:533
ContinueStmt(SourcePos pos)
Definition: stmt.cpp:1427
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:1120
void Print(int indent) const
Definition: stmt.cpp:2905
Stmt * stmts
Definition: stmt.h:425
Globals * g
Definition: ispc.cpp:88
bool IsReferenceType(const Type *t)
Definition: type.h:1093
Expr * expr
Definition: stmt.h:446
Expression representing a type cast of the given expression to a probably-different type...
Definition: expr.h:509
ExprStmt(Expr *expr, SourcePos pos)
Definition: stmt.cpp:84
LabeledStmt(const char *label, Stmt *stmt, SourcePos p)
Definition: stmt.cpp:3165
static const AtomicType * UniformUInt64
Definition: type.h:355
bool IsUniformType() const
Definition: type.h:145
void EndLoop()
Definition: ctx.cpp:664
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:462
bool disableCoherentControlFlow
Definition: ispc.h:492
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2429
void Print(int indent) const
Definition: stmt.cpp:3457
Stmt * TypeCheck()
Definition: stmt.cpp:2710
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1305
llvm::Constant * LLVMTrue
Definition: llvmutil.cpp:90
Stmt * TypeCheck()
Definition: stmt.cpp:3242
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
virtual bool IsConstType() const =0
ForeachUniqueStmt(const char *iterName, Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2417
Stmt * stmts
Definition: stmt.h:337
ForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2231
SourcePos GetDebugPos() const
Definition: ctx.cpp:1667
Stmt * TypeCheck()
Definition: stmt.cpp:101
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1434
Stmt * Optimize()
Definition: stmt.cpp:3145
DeclStmt(const std::vector< VariableDeclaration > &v, SourcePos pos)
Definition: stmt.cpp:128
Stmt * step
Definition: stmt.h:221
Stmt * TypeCheck()
Definition: stmt.cpp:3206
static char lEncodeType(const Type *t)
Definition: stmt.cpp:3279
bool SafeToRunWithMaskAllOff(ASTNode *root)
Definition: ast.cpp:506
Stmt * TypeCheck()
Definition: stmt.cpp:3625
static llvm::Type * Int64Type
Definition: llvmutil.h:75
static llvm::Type * Int8Type
Definition: llvmutil.h:72
llvm::Constant * LLVMBoolVector(bool v)
Definition: llvmutil.cpp:493
Stmt * TypeCheck()
Definition: stmt.cpp:2917
bool isTiled
Definition: stmt.h:289
void Print(int indent) const
Definition: stmt.cpp:2993
FunctionEmitContext * ctx
Definition: stmt.cpp:2780
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:93
Header file with declarations for various LLVM utility stuff.
llvm::ArrayType * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:1325
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3231
#define AssertPos(pos, expr)
Definition: ispc.h:173
virtual const Type * GetLValueType() const
Definition: expr.cpp:94
AssertStmt(const std::string &msg, Expr *e, SourcePos p)
Definition: stmt.cpp:3477
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2969
ISA getISA() const
Definition: ispc.h:267
virtual const Type * GetAsConstType() const =0
static bool IsBasicType(const Type *type)
Definition: type.cpp:3509
SourcePos pos
Definition: sym.h:70
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value * > &args, const char *name=NULL)
Definition: ctx.cpp:3639
static void lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a)
Definition: stmt.cpp:1633
Stmt * TypeCheck()
Definition: stmt.cpp:1443
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3028
void Print(int indent) const
Definition: stmt.cpp:3254
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:495
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
void Print(int indent) const
Definition: stmt.cpp:1178
StorageClass storageClass
Definition: sym.h:96
Representation of a range of positions in a source file.
Definition: ispc.h:134
Stmt * stmts
Definition: stmt.h:312
DoStmt(Expr *testExpr, Stmt *bodyStmts, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:1019
Stmt * TypeCheck()
Definition: stmt.cpp:3151
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3483
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2693
bool InForeachLoop() const
Definition: ctx.cpp:1235
Stmt * stmts
Definition: stmt.h:223
int VaryingCFDepth() const
Definition: ctx.cpp:1225
void ClearBreakLanes()
Definition: ctx.cpp:977
static const AtomicType * VaryingBool
Definition: type.h:346
GotoStmt(const char *label, SourcePos gotoPos, SourcePos idPos)
Definition: stmt.cpp:3089
const char * name
Definition: ispc.h:138
virtual void Print(int indent) const =0
SourcePos pos
Definition: ast.h:77
void Print(int indent) const
Definition: stmt.cpp:3619
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
Stmt * TypeCheck()
Definition: stmt.cpp:1333
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:68
static const AtomicType * VaryingInt64
Definition: type.h:354
const bool doCoherentCheck
Definition: stmt.h:191
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2731
Expr * values
Definition: stmt.h:551
const Type * GetReturnType() const
Definition: func.cpp:180
bool foundVaryingBreakOrContinue
Definition: stmt.cpp:943
const Function * GetFunction() const
Definition: ctx.cpp:444
void InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos)
Definition: expr.cpp:648
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1570
Expr * testExpr
Definition: stmt.h:189
SwitchStmt(Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2762
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
int EstimateCost() const
Definition: stmt.cpp:1411
static const AtomicType * UniformUInt8
Definition: type.h:350
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1244
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1025
static llvm::Type * Int32Type
Definition: llvmutil.h:74
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test) const
Definition: stmt.cpp:703
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1661
void Print(int indent) const
Definition: stmt.cpp:2601
#define ISPC_MAX_NVEC
Definition: ispc.h:68
bool disableUniformControlFlow
Definition: ispc.h:498
Representation of a function in a source file.
Stmt * falseStmts
Definition: stmt.h:151
Stmt * TypeCheck()
Definition: stmt.cpp:2633
int first_column
Definition: ispc.h:140
void Print() const
Definition: ispc.cpp:1588
Expr * test
Definition: stmt.h:218
Stmt * init
Definition: stmt.h:215
static bool Equal(const Type *a, const Type *b)
Definition: type.cpp:3619
static bool lVaryingBCPreFunc(ASTNode *node, void *d)
Definition: stmt.cpp:965
Stmt * stmts
Definition: stmt.h:290
Definition: stmt.h:387
std::vector< Expr * > exprs
Definition: expr.h:270
llvm::BasicBlock * defaultBlock
Definition: stmt.cpp:2783
virtual const Type * GetType() const =0
std::vector< Expr * > startExprs
Definition: stmt.h:287
static const AtomicType * UniformFloat
Definition: type.h:353
llvm::Value * GetInternalMask()
Definition: ctx.cpp:468
int EstimateCost() const
Definition: stmt.cpp:3222
void Print(int indent) const
Definition: stmt.cpp:2739
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2063
static const AtomicType * UniformInt32
Definition: type.h:349
Symbol * sym
Definition: stmt.h:311
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:517
SourcePos identifierPos
Definition: stmt.h:471
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:3479
Stmt * stmt
Definition: stmt.h:496
int varyingCFDepth
Definition: sym.h:98
void EndForeach()
Definition: ctx.cpp:713
int EstimateCost() const
Definition: stmt.cpp:2179
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1250
Stmt * TypeCheck()
Definition: stmt.cpp:3008
Interface class that defines the type abstraction.
Definition: type.h:101
static const AtomicType * UniformDouble
Definition: type.h:356
std::vector< Stmt * > stmts
Definition: stmt.h:519
int EstimateCost() const
Definition: stmt.cpp:1449
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:456
static llvm::VectorType * MaskType
Definition: llvmutil.h:86
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:1057
Expr * expr
Definition: stmt.h:92
int EstimateCost(ASTNode *root)
Definition: ast.cpp:352
Stmt * stmts
Definition: stmt.h:358
Stmt * Optimize()
Definition: stmt.cpp:436
DefaultStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2724
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:595
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2427
Statement representing a single if statement, possibly with an else clause.
Definition: stmt.h:128
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
Expr * expr
Definition: stmt.h:603
virtual Stmt * Optimize()
Definition: stmt.cpp:76
void Print(int indent) const
Definition: stmt.cpp:2185
std::vector< Symbol * > dimVariables
Definition: stmt.h:286
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2132
const Function * parentFunction
Definition: sym.h:104
int EstimateCost() const
Definition: stmt.cpp:3469
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1433
virtual const Type * GetAsNonConstType() const =0
UnmaskedStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2962
int EstimateCost() const
Definition: stmt.cpp:3014
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3370
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1398
std::string name
Definition: stmt.h:494
Expression that represents dereferencing a reference to get its value.
Definition: expr.h:601
static const Type * SizeUnsizedArrays(const Type *type, Expr *initExpr)
Definition: type.cpp:1601
void Print(int indent) const
Definition: stmt.cpp:682
std::vector< std::string > MatchStrings(const std::string &str, const std::vector< std::string > &options)
Definition: util.cpp:519
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1202
Stmt * TypeCheck()
Definition: stmt.cpp:466
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2025
Expr * expr
Definition: stmt.h:336
std::string label
Definition: stmt.h:470
void Print(int indent) const
Definition: stmt.cpp:499
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type)
Definition: expr.cpp:615
PrintStmt(const std::string &f, Expr *v, SourcePos p)
Definition: stmt.cpp:3267
const Type * GetElementType() const
Definition: type.cpp:1476
int EstimateCost() const
Definition: stmt.cpp:3070
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
static const AtomicType * UniformInt64
Definition: type.h:354
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:632
static const AtomicType * UniformInt16
Definition: type.h:348
const Type * type
Definition: sym.h:84
void Print(int indent) const
Definition: stmt.cpp:1455
int EstimateCost() const
Definition: stmt.cpp:3548
static llvm::Value * lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes)
Definition: stmt.cpp:3309
int varyingControlFlowDepth
Definition: stmt.cpp:942
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3625
Stmt * TypeCheck()
Definition: stmt.cpp:653
bool IsSOAType() const
Definition: type.h:156
DeleteStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:3556
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:872
void Debug(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:398
void Print(int indent) const
Definition: stmt.cpp:3076
void Print(int indent) const
Definition: stmt.cpp:2701
bool is32Bit() const
Definition: ispc.h:273
const bool doAllCheck
Definition: stmt.h:158
static const AtomicType * VaryingUInt32
Definition: type.h:352
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1905
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:90
void Print(int indent) const
Definition: stmt.cpp:1417
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1283
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:487
static const AtomicType * VaryingFloat
Definition: type.h:353
SymbolTable * symbolTable
Definition: module.h:155
static const AtomicType * UniformInt8
Definition: type.h:347
int EstimateCost() const
Definition: stmt.cpp:1353
File with declarations for classes related to type representation.
Stmt * TypeCheck()
Definition: stmt.cpp:3533
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1588
std::map< llvm::BasicBlock *, llvm::BasicBlock * > nextBlock
Definition: stmt.cpp:2792
int EstimateCost() const
Definition: stmt.cpp:2716
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:209
One-dimensional array type.
Definition: type.h:555
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1666