Intel SPMD Program Compiler  1.9.2
stmt.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2014, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file stmt.cpp
35  @brief File with definitions classes related to statements in the language
36 */
37 
38 #include "stmt.h"
39 #include "ctx.h"
40 #include "util.h"
41 #include "expr.h"
42 #include "type.h"
43 #include "func.h"
44 #include "sym.h"
45 #include "module.h"
46 #include "llvmutil.h"
47 
48 #include <stdio.h>
49 #include <map>
50 
51 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
52  #include <llvm/Module.h>
53  #include <llvm/Type.h>
54  #include <llvm/Instructions.h>
55  #include <llvm/Function.h>
56  #include <llvm/DerivedTypes.h>
57  #include <llvm/LLVMContext.h>
58  #include <llvm/Metadata.h>
59  #include <llvm/CallingConv.h>
60 #else
61  #include <llvm/IR/Module.h>
62  #include <llvm/IR/Type.h>
63  #include <llvm/IR/Instructions.h>
64  #include <llvm/IR/Function.h>
65  #include <llvm/IR/DerivedTypes.h>
66  #include <llvm/IR/LLVMContext.h>
67  #include <llvm/IR/Metadata.h>
68  #include <llvm/IR/CallingConv.h>
69 #endif
70 #include <llvm/Support/raw_ostream.h>
71 
72 ///////////////////////////////////////////////////////////////////////////
73 // Stmt
74 
75 Stmt *
77  return this;
78 }
79 
80 
81 ///////////////////////////////////////////////////////////////////////////
82 // ExprStmt
83 
85  : Stmt(p, ExprStmtID) {
86  expr = e;
87 }
88 
89 void
91  if (!ctx->GetCurrentBasicBlock())
92  return;
93 
94  ctx->SetDebugPos(pos);
95  if (expr)
96  expr->GetValue(ctx);
97 }
98 
99 
100 Stmt *
102  return this;
103 }
104 
105 
106 void
107 ExprStmt::Print(int indent) const {
108  if (!expr)
109  return;
110 
111  printf("%*c", indent, ' ');
112  printf("Expr stmt: ");
113  pos.Print();
114  expr->Print();
115  printf("\n");
116 }
117 
118 
119 int
121  return 0;
122 }
123 
124 
125 ///////////////////////////////////////////////////////////////////////////
126 // DeclStmt
127 
128 DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
129  : Stmt(p, DeclStmtID), vars(v) {
130 }
131 
132 
133 static bool
134 lHasUnsizedArrays(const Type *type) {
135  const ArrayType *at = CastType<ArrayType>(type);
136  if (at == NULL)
137  return false;
138 
139  if (at->GetElementCount() == 0)
140  return true;
141  else
142  return lHasUnsizedArrays(at->GetElementType());
143 }
144 
145 #ifdef ISPC_NVPTX_ENABLED
146 static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false)
147 {
148  if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
149  return value;
150  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
151  const int addressSpace = pt->getAddressSpace();
152  if (addressSpace != 3 && addressSpace != 4)
153  return value;
154 
155  llvm::Type *elTy = pt->getElementType();
156 
157  /* convert elTy addrspace(3)* to i64* addrspace(3)* */
158  llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
159  value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1");
160 
161  /* convert i64* addrspace(3) to i64* */
162  llvm::Function *__cvt2gen = m->module->getFunction(
163  addressSpace == 3 ? (variable ? "__cvt_loc2gen_var" : "__cvt_loc2gen") : "__cvt_const2gen");
164 
165  std::vector<llvm::Value *> __cvt2gen_args;
166  __cvt2gen_args.push_back(value);
167  value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, variable ? "gep2gen_cvt_var" : "gep2gen_cvt", ctx->GetCurrentBasicBlock());
168 
169  /* compute offset */
170  if (addressSpace == 3)
171  {
172  assert(elTy->isArrayTy());
173  const int numElTot = elTy->getArrayNumElements();
174  const int numEl = numElTot/4;
175 #if 0
176  fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl);
177 #endif
178  llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
179  assert(arrTy != NULL);
180  llvm::Type *arrElTy = arrTy->getElementType();
181 #if 0
182  if (arrElTy->isArrayTy())
183  Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
184 #endif
185 
186  /* convert i64* to errElTy* */
187  llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
188  value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
189 
190  llvm::Function *func_warp_index = m->module->getFunction("__warp_index");
191  llvm::Value *warpId = ctx->CallInst(func_warp_index, NULL, std::vector<llvm::Value*>(), "gep2gen_warp_index");
192  llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset");
193 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
194  value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
195 #else
196  value = llvm::GetElementPtrInst::Create(NULL, value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
197 #endif
198  }
199 
200  /* convert arrElTy* to elTy* */
201  llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0);
202  value = ctx->BitCastInst(value, elTyPt0, "gep2gen_cast3");
203 
204  return value;
205 }
206 #endif /* ISPC_NVPTX_ENABLED */
207 
208 void
210  if (!ctx->GetCurrentBasicBlock())
211  return;
212 
213  for (unsigned int i = 0; i < vars.size(); ++i) {
214  Symbol *sym = vars[i].sym;
215  AssertPos(pos, sym != NULL);
216  if (sym->type == NULL)
217  continue;
218  Expr *initExpr = vars[i].init;
219 
220  // Now that we're in the thick of emitting code, it's easy for us
221  // to find out the level of nesting of varying control flow we're
222  // in at this declaration. So we can finally set that
223  // Symbol::varyingCFDepth variable.
224  // @todo It's disgusting to be doing this here.
225  sym->varyingCFDepth = ctx->VaryingCFDepth();
226 
227  ctx->SetDebugPos(sym->pos);
228 
229  // If it's an array that was declared without a size but has an
230  // initializer list, then use the number of elements in the
231  // initializer list to finally set the array's size.
232  sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr);
233  if (sym->type == NULL)
234  continue;
235 
236  if (lHasUnsizedArrays(sym->type)) {
237  Error(pos, "Illegal to declare an unsized array variable without "
238  "providing an initializer expression to set its size.");
239  continue;
240  }
241 
242  // References must have initializer expressions as well.
243  if (IsReferenceType(sym->type) == true) {
244  if (initExpr == NULL) {
245  Error(sym->pos, "Must provide initializer for reference-type "
246  "variable \"%s\".", sym->name.c_str());
247  continue;
248  }
249  if (IsReferenceType(initExpr->GetType()) == false) {
250  const Type *initLVType = initExpr->GetLValueType();
251  if (initLVType == NULL) {
252  Error(initExpr->pos, "Initializer for reference-type variable "
253  "\"%s\" must have an lvalue type.", sym->name.c_str());
254  continue;
255  }
256  if (initLVType->IsUniformType() == false) {
257  Error(initExpr->pos, "Initializer for reference-type variable "
258  "\"%s\" must have a uniform lvalue type.", sym->name.c_str());
259  continue;
260  }
261  }
262  }
263 
264  llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
265  if (llvmType == NULL) {
266  AssertPos(pos, m->errorCount > 0);
267  return;
268  }
269 
270  if (sym->storageClass == SC_STATIC) {
271 #ifdef ISPC_NVPTX_ENABLED
272  if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
273  {
274  Error(sym->pos,
275  "Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
276  sym->name.c_str());
277  return;
278  }
279  if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
280  PerformanceWarning(sym->pos,
281  "\"const static varying\" variable ""\"%s\" is stored in __global address space with ""\"nvptx\" target.",
282  sym->name.c_str());
283  if (g->target->getISA() == Target::NVPTX && sym->type->IsUniformType())
284  PerformanceWarning(sym->pos,
285  "\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
286  sym->name.c_str());
287 #endif /* ISPC_NVPTX_ENABLED */
288  // For static variables, we need a compile-time constant value
289  // for its initializer; if there's no initializer, we use a
290  // zero value.
291  llvm::Constant *cinit = NULL;
292  if (initExpr != NULL) {
293  if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
294  continue;
295  // FIXME: we only need this for function pointers; it was
296  // already done for atomic types and enums in
297  // DeclStmt::TypeCheck()...
298  if (llvm::dyn_cast<ExprList>(initExpr) == NULL) {
299  initExpr = TypeConvertExpr(initExpr, sym->type,
300  "initializer");
301  // FIXME: and this is only needed to re-establish
302  // constant-ness so that GetConstant below works for
303  // constant artithmetic expressions...
304  initExpr = ::Optimize(initExpr);
305  }
306 
307  cinit = initExpr->GetConstant(sym->type);
308  if (cinit == NULL)
309  Error(initExpr->pos, "Initializer for static variable "
310  "\"%s\" must be a constant.", sym->name.c_str());
311  }
312  if (cinit == NULL)
313  cinit = llvm::Constant::getNullValue(llvmType);
314 
315  // Allocate space for the static variable in global scope, so
316  // that it persists across function calls
317 #ifdef ISPC_NVPTX_ENABLED
318  int addressSpace = 0;
319  if (g->target->getISA() == Target::NVPTX &&
320  sym->type->IsConstType() &&
321  sym->type->IsUniformType())
322  addressSpace = 4;
323  sym->storagePtr =
324  new llvm::GlobalVariable(*m->module, llvmType,
325  sym->type->IsConstType(),
326  llvm::GlobalValue::InternalLinkage, cinit,
327  llvm::Twine("static.") +
328  llvm::Twine(sym->pos.first_line) +
329  llvm::Twine(".") + sym->name.c_str(),
330  NULL,
331  llvm::GlobalVariable::NotThreadLocal,
332  addressSpace);
333  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
334 #else /* ISPC_NVPTX_ENABLED */
335  sym->storagePtr =
336  new llvm::GlobalVariable(*m->module, llvmType,
337  sym->type->IsConstType(),
338  llvm::GlobalValue::InternalLinkage, cinit,
339  llvm::Twine("static.") +
340  llvm::Twine(sym->pos.first_line) +
341  llvm::Twine(".") + sym->name.c_str());
342 #endif /* ISPC_NVPTX_ENABLED */
343  // Tell the FunctionEmitContext about the variable
344  ctx->EmitVariableDebugInfo(sym);
345  }
346 #ifdef ISPC_NVPTX_ENABLED
347  else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
348  /* NVPTX:
349  * only non-constant uniform data types are stored in shared memory
350  * constant uniform are automatically promoted to varying
351  */
352  !sym->type->IsConstType() &&
353 #if 1
354  sym->type->IsArrayType() &&
355 #endif
356  g->target->getISA() == Target::NVPTX)
357  {
358  PerformanceWarning(sym->pos,
359  "Non-constant \"uniform\" data types might be slow with \"nvptx\" target. "
360  "Unless data sharing between program instances is desired, try \"const [static] uniform\", \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
361 
362  /* with __shared__ memory everything must be an array */
363  int nel = 4;
364  ArrayType *nat;
365  bool variable = true;
366  if (sym->type->IsArrayType())
367  {
368  const ArrayType *at = CastType<ArrayType>(sym->type);
369  /* we must scale # elements by 4, because a thread-block will run 4 warps
370  * or 128 threads.
371  * ***note-to-me***:please define these value (128threads/4warps)
372  * in nvptx-target definition
373  * instead of compile-time constants
374  */
375  nel *= at->GetElementCount();
376  if (sym->type->IsSOAType())
377  nel *= sym->type->GetSOAWidth();
378  nat = new ArrayType(at->GetElementType(), nel);
379  variable = false;
380  }
381  else
382  nat = new ArrayType(sym->type, nel);
383 
384  llvm::Type *llvmTypeUn = nat->LLVMType(g->ctx);
385  llvm::Constant *cinit = llvm::UndefValue::get(llvmTypeUn);
386 
387  sym->storagePtr =
388  new llvm::GlobalVariable(*m->module, llvmTypeUn,
389  sym->type->IsConstType(),
390  llvm::GlobalValue::InternalLinkage,
391  cinit,
392  llvm::Twine("local_") +
393  llvm::Twine(sym->pos.first_line) +
394  llvm::Twine("_") + sym->name.c_str(),
395  NULL,
396  llvm::GlobalVariable::NotThreadLocal,
397  /*AddressSpace=*/3);
398  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos, variable);
399  llvm::PointerType *ptrTy = llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
400  sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_decl");
401 
402  // Tell the FunctionEmitContext about the variable; must do
403  // this before the initializer stuff.
404  ctx->EmitVariableDebugInfo(sym);
405 
406  if (initExpr == 0 && sym->type->IsConstType())
407  Error(sym->pos, "Missing initializer for const variable "
408  "\"%s\".", sym->name.c_str());
409 
410  // And then get it initialized...
411  sym->parentFunction = ctx->GetFunction();
412  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
413  }
414 #endif /* ISPC_NVPTX_ENABLED */
415  else
416  {
417  // For non-static variables, allocate storage on the stack
418  sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
419 
420  // Tell the FunctionEmitContext about the variable; must do
421  // this before the initializer stuff.
422  ctx->EmitVariableDebugInfo(sym);
423  if (initExpr == 0 && sym->type->IsConstType())
424  Error(sym->pos, "Missing initializer for const variable "
425  "\"%s\".", sym->name.c_str());
426 
427  // And then get it initialized...
428  sym->parentFunction = ctx->GetFunction();
429  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
430  }
431  }
432 }
433 
434 
435 Stmt *
437  for (unsigned int i = 0; i < vars.size(); ++i) {
438  Expr *init = vars[i].init;
439  if (init != NULL && llvm::dyn_cast<ExprList>(init) == NULL) {
440  // If the variable is const-qualified, after we've optimized
441  // the initializer expression, see if we have a ConstExpr. If
442  // so, save it in Symbol::constValue where it can be used in
443  // optimizing later expressions that have this symbol in them.
444  // Note that there are cases where the expression may be
445  // constant but where we don't have a ConstExpr; an example is
446  // const arrays--the ConstExpr implementation just can't
447  // represent an array of values.
448  //
449  // All this is fine in terms of the code that's generated in
450  // the end (LLVM's constant folding stuff is good), but it
451  // means that the ispc compiler's ability to reason about what
452  // is definitely a compile-time constant for things like
453  // computing array sizes from non-trivial expressions is
454  // consequently limited.
455  Symbol *sym = vars[i].sym;
456  if (sym->type && sym->type->IsConstType() &&
457  Type::Equal(init->GetType(), sym->type))
458  sym->constValue = llvm::dyn_cast<ConstExpr>(init);
459  }
460  }
461  return this;
462 }
463 
464 // Do type conversion if needed and check for not initializing array with
465 // another array (array assignment is not allowed).
466 // Do that recursively to handle brace initialization, which may contain
467 // another brace initialization.
468 static
469 bool checkInit(const Type *type, Expr *init) {
470  bool encounteredError = false;
471 
472  // get the right type for stuff like const float foo = 2; so that
473  // the int->float type conversion is in there and we don't return
474  // an int as the constValue later...
475  if (CastType<AtomicType>(type) != NULL ||
476  CastType<EnumType>(type) != NULL) {
477  // If it's an expr list with an atomic type, we'll later issue
478  // an error. Need to leave vars[i].init as is in that case so
479  // it is in fact caught later, though.
480  if (llvm::dyn_cast<ExprList>(init) == NULL) {
481  init = TypeConvertExpr(init, type, "initializer");
482  if (init == NULL)
483  encounteredError = true;
484  }
485  } else if (CastType<ArrayType>(type) != NULL &&
486  llvm::dyn_cast<ExprList>(init) == NULL) {
487  encounteredError = true;
488  Error(init->pos, "Array initializer must be an initializer list");
489  } else if (CastType<StructType>(type) != NULL &&
490  llvm::dyn_cast<ExprList>(init) != NULL) {
491  const StructType *st = CastType<StructType>(type);
492  ExprList *el = llvm::dyn_cast<ExprList>(init);
493  int elt_count = st->GetElementCount() < el->exprs.size() ?
494  st->GetElementCount() : el->exprs.size();
495  for (int i=0; i < elt_count; i++) {
496  encounteredError |= checkInit(st->GetElementType(i), el->exprs[i]);
497  }
498  }
499 
500  return encounteredError;
501 }
502 
503 Stmt *
505  bool encounteredError = false;
506  for (unsigned int i = 0; i < vars.size(); ++i) {
507  if (vars[i].sym == NULL) {
508  encounteredError = true;
509  continue;
510  }
511 
512  if (vars[i].init == NULL)
513  continue;
514 
515  // Check an init.
516  encounteredError |= checkInit(vars[i].sym->type, vars[i].init);
517  }
518  return encounteredError ? NULL : this;
519 }
520 
521 
522 void
523 DeclStmt::Print(int indent) const {
524  printf("%*cDecl Stmt:", indent, ' ');
525  pos.Print();
526  for (unsigned int i = 0; i < vars.size(); ++i) {
527  printf("%*cVariable %s (%s)", indent+4, ' ',
528  vars[i].sym->name.c_str(),
529  vars[i].sym->type->GetString().c_str());
530  if (vars[i].init != NULL) {
531  printf(" = ");
532  vars[i].init->Print();
533  }
534  printf("\n");
535  }
536  printf("\n");
537 }
538 
539 
540 int
542  return 0;
543 }
544 
545 
546 ///////////////////////////////////////////////////////////////////////////
547 // IfStmt
548 
549 IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p)
550  : Stmt(p, IfStmtID), test(t), trueStmts(ts), falseStmts(fs),
551  doAllCheck(checkCoherence &&
552  !g->opt.disableCoherentControlFlow) {
553 }
554 
555 
556 static void
557 lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse) {
558  if (!stmts)
559  return;
560 
561  if (llvm::dyn_cast<StmtList>(stmts) == NULL)
562  ctx->StartScope();
563  ctx->AddInstrumentationPoint(trueOrFalse);
564  stmts->EmitCode(ctx);
565  if (llvm::dyn_cast<const StmtList>(stmts) == NULL)
566  ctx->EndScope();
567 }
568 
569 
570 /** Returns true if the "true" block for the if statement consists of a
571  single 'break' statement, and the "false" block is empty. */
572 /*
573 static bool
574 lCanApplyBreakOptimization(Stmt *trueStmts, Stmt *falseStmts) {
575  if (falseStmts != NULL) {
576  if (StmtList *sl = llvm::dyn_cast<StmtList>(falseStmts)) {
577  return (sl->stmts.size() == 0);
578  }
579  else
580  return false;
581  }
582 
583  if (llvm::dyn_cast<BreakStmt>(trueStmts))
584  return true;
585  else if (StmtList *sl = llvm::dyn_cast<StmtList>(trueStmts))
586  return (sl->stmts.size() == 1 &&
587  llvm::dyn_cast<BreakStmt>(sl->stmts[0]) != NULL);
588  else
589  return false;
590 }
591 */
592 
593 void
595  // First check all of the things that might happen due to errors
596  // earlier in compilation and bail out if needed so that we don't
597  // dereference NULL pointers in the below...
598  if (!ctx->GetCurrentBasicBlock())
599  return;
600  if (!test)
601  return;
602  const Type *testType = test->GetType();
603  if (!testType)
604  return;
605 
606  ctx->SetDebugPos(pos);
607  bool isUniform = testType->IsUniformType();
608 
609  llvm::Value *testValue = test->GetValue(ctx);
610  if (testValue == NULL)
611  return;
612 
613 #ifdef ISPC_NVPTX_ENABLED
614 #if 0
615  if (!isUniform && g->target->getISA() == Target::NVPTX)
616  {
617  /* With "nvptx" target, SIMT hardware takes care of non-uniform
618  * control flow. We trick ISPC to generate uniform control flow.
619  */
620  testValue = ctx->ExtractInst(testValue, 0);
621  isUniform = true;
622  }
623 #endif
624 #endif /* ISPC_NVPTX_ENABLED */
625 
626  if (isUniform) {
627  ctx->StartUniformIf();
628  if (doAllCheck)
629  Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
630 
631  // 'If' statements with uniform conditions are relatively
632  // straightforward. We evaluate the condition and then jump to
633  // either the 'then' or 'else' clause depending on its value.
634  llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
635  llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
636  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
637 
638  // Jump to the appropriate basic block based on the value of
639  // the 'if' test
640  ctx->BranchInst(bthen, belse, testValue);
641 
642  // Emit code for the 'true' case
643  ctx->SetCurrentBasicBlock(bthen);
644  lEmitIfStatements(ctx, trueStmts, "true");
645  if (ctx->GetCurrentBasicBlock())
646  ctx->BranchInst(bexit);
647 
648  // Emit code for the 'false' case
649  ctx->SetCurrentBasicBlock(belse);
650  lEmitIfStatements(ctx, falseStmts, "false");
651  if (ctx->GetCurrentBasicBlock())
652  ctx->BranchInst(bexit);
653 
654  // Set the active basic block to the newly-created exit block
655  // so that subsequent emitted code starts there.
656  ctx->SetCurrentBasicBlock(bexit);
657  ctx->EndIf();
658  }
659  /*
660  // Disabled for performance reasons. Change to an optional compile-time opt switch.
661  else if (lCanApplyBreakOptimization(trueStmts, falseStmts)) {
662  // If we have a simple break statement inside the 'if' and are
663  // under varying control flow, just update the execution mask
664  // directly and don't emit code for the statements. This leads to
665  // better code for this case--this is surprising and should be
666  // root-caused further, but for now this gives us performance
667  // benefit in this case.
668  ctx->SetInternalMaskAndNot(ctx->GetInternalMask(), testValue);
669  }
670  */
671  else
672  emitVaryingIf(ctx, testValue);
673 }
674 
675 
676 Stmt *
678  if (test != NULL) {
679  const Type *testType = test->GetType();
680  if (testType != NULL) {
681  bool isUniform = (testType->IsUniformType() &&
685  "\"if\" statement test");
686  if (test == NULL)
687  return NULL;
688  }
689  }
690 
691  return this;
692 }
693 
694 
695 int
697  const Type *type;
698  if (test == NULL || (type = test->GetType()) == NULL)
699  return 0;
700 
701  return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
702 }
703 
704 
705 void
706 IfStmt::Print(int indent) const {
707  printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : "");
708  pos.Print();
709  printf("\n%*cTest: ", indent+4, ' ');
710  test->Print();
711  printf("\n");
712  if (trueStmts) {
713  printf("%*cTrue:\n", indent+4, ' ');
714  trueStmts->Print(indent+8);
715  }
716  if (falseStmts) {
717  printf("%*cFalse:\n", indent+4, ' ');
718  falseStmts->Print(indent+8);
719  }
720 }
721 
722 
723 /** Emit code to run both the true and false statements for the if test,
724  with the mask set appropriately before running each one.
725 */
726 void
728  llvm::Value *test) const {
729  if (trueStmts) {
730  ctx->SetInternalMaskAnd(oldMask, test);
731  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
732  // under varying control flow,, returns can't stop instruction
733  // emission, so this better be non-NULL...
735  }
736  if (falseStmts) {
737  ctx->SetInternalMaskAndNot(oldMask, test);
738  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
740  }
741 }
742 
743 /** Emit code for an if test that checks the mask and the test values and
744  tries to be smart about jumping over code that doesn't need to be run.
745  */
746 void
747 IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
748  llvm::Value *oldMask = ctx->GetInternalMask();
749  if (doAllCheck) {
750  // We can't tell if the mask going into the if is all on at the
751  // compile time. Emit code to check for this and then either run
752  // the code for the 'all on' or the 'mixed' case depending on the
753  // mask's value at runtime.
754  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("cif_mask_all");
755  llvm::BasicBlock *bMixedOn = ctx->CreateBasicBlock("cif_mask_mixed");
756  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
757 
758  // Jump to either bAllOn or bMixedOn, depending on the mask's value
759  llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask());
760  ctx->BranchInst(bAllOn, bMixedOn, maskAllQ);
761 
762  // Emit code for the 'mask all on' case
763  ctx->SetCurrentBasicBlock(bAllOn);
764  emitMaskAllOn(ctx, ltest, bDone);
765 
766  // And emit code for the mixed mask case
767  ctx->SetCurrentBasicBlock(bMixedOn);
768  emitMaskMixed(ctx, oldMask, ltest, bDone);
769 
770  // When done, set the current basic block to the block that the two
771  // paths above jump to when they're done.
772  ctx->SetCurrentBasicBlock(bDone);
773  }
774  else if (trueStmts != NULL || falseStmts != NULL) {
775  // If there is nothing that is potentially unsafe to run with all
776  // lanes off in the true and false statements and if the total
777  // complexity of those two is relatively simple, then we'll go
778  // ahead and emit straightline code that runs both sides, updating
779  // the mask accordingly. This is useful for efficiently compiling
780  // things like:
781  //
782  // if (foo) x = 0;
783  // else ++x;
784  //
785  // Where the overhead of checking if any of the program instances wants
786  // to run one side or the other is more than the actual computation.
787  // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
788  // for potentially dangerous code like:
789  //
790  // if (index < count) array[index] = 0;
791  //
792  // where our use of blend for conditional assignments doesn't check
793  // for the 'all lanes' off case.
794  int trueFalseCost = (::EstimateCost(trueStmts) +
796  bool costIsAcceptable = (trueFalseCost <
798 
799  bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
801 
802  Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).",
805 
806  if (safeToRunWithAllLanesOff &&
807  (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
808  ctx->StartVaryingIf(oldMask);
809  emitMaskedTrueAndFalse(ctx, oldMask, ltest);
811  ctx->EndIf();
812  }
813  else {
814  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
815  emitMaskMixed(ctx, oldMask, ltest, bDone);
816  ctx->SetCurrentBasicBlock(bDone);
817  }
818  }
819 }
820 
821 
822 /** Emits code for 'if' tests under the case where we know that the program
823  mask is all on going into the 'if'.
824  */
825 void
827  llvm::BasicBlock *bDone) const {
828  // We start by explicitly storing "all on" into the mask mask. Note
829  // that this doesn't change its actual value, but doing so lets the
830  // compiler see what's going on so that subsequent optimizations for
831  // code emitted here can operate with the knowledge that the mask is
832  // definitely all on (until it modifies the mask itself).
836  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
839 
840  // First, check the value of the test. If it's all on, then we jump to
841  // a basic block that will only have code for the true case.
842  llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all");
843  llvm::BasicBlock *bTestNoneCheck = ctx->CreateBasicBlock("cif_test_none_check");
844  llvm::Value *testAllQ = ctx->All(ltest);
845  ctx->BranchInst(bTestAll, bTestNoneCheck, testAllQ);
846 
847  // Emit code for the 'test is all true' case
848  ctx->SetCurrentBasicBlock(bTestAll);
850  lEmitIfStatements(ctx, trueStmts, "if: all on mask, expr all true");
851  ctx->EndIf();
852  if (ctx->GetCurrentBasicBlock() != NULL)
853  // bblock may legitimately be NULL since if there's a return stmt
854  // or break or continue we can actually jump and end emission since
855  // we know all of the lanes are following this path...
856  ctx->BranchInst(bDone);
857 
858  // The test isn't all true. Now emit code to determine if it's all
859  // false, or has mixed values.
860  ctx->SetCurrentBasicBlock(bTestNoneCheck);
861  llvm::BasicBlock *bTestNone = ctx->CreateBasicBlock("cif_test_none");
862  llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_test_mixed");
863  llvm::Value *testMixedQ = ctx->Any(ltest);
864  ctx->BranchInst(bTestMixed, bTestNone, testMixedQ);
865 
866  // Emit code for the 'test is all false' case
867  ctx->SetCurrentBasicBlock(bTestNone);
869  lEmitIfStatements(ctx, falseStmts, "if: all on mask, expr all false");
870  ctx->EndIf();
871  if (ctx->GetCurrentBasicBlock())
872  // bblock may be NULL since if there's a return stmt or break or
873  // continue we can actually jump or whatever and end emission...
874  ctx->BranchInst(bDone);
875 
876  // Finally emit code for the 'mixed true/false' case. We unavoidably
877  // need to run both the true and the false statements.
878  ctx->SetCurrentBasicBlock(bTestMixed);
881  // In this case, return/break/continue isn't allowed to jump and end
882  // emission.
884  ctx->EndIf();
885  ctx->BranchInst(bDone);
886 
887  ctx->SetCurrentBasicBlock(bDone);
888  ctx->SetFunctionMask(oldFunctionMask);
889 }
890 
891 
892 /** Emit code for an 'if' test where the lane mask is known to be mixed
893  on/off going into it.
894  */
895 void
896 IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
897  llvm::Value *ltest, llvm::BasicBlock *bDone) const {
898  ctx->StartVaryingIf(oldMask);
899  llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
900 
901  llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
902  ctx->SetInternalMaskAnd(oldMask, ltest);
903 
904  // Do any of the program instances want to run the 'true'
905  // block? If not, jump ahead to bNext.
906 
907 #ifdef ISPC_NVPTX_ENABLED
908 #if 0
909  llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
910 #else
911  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
912 #endif
913 #else /* ISPC_NVPTX_ENABLED */
914  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
915 #endif /* ISPC_NVPTX_ENABLED */
916 
917  ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
918 
919  // Emit statements for true
920  ctx->SetCurrentBasicBlock(bRunTrue);
921  if (trueStmts != NULL)
922  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
924  ctx->BranchInst(bNext);
925  ctx->SetCurrentBasicBlock(bNext);
926 
927  // False...
928  llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
929  ctx->SetInternalMaskAndNot(oldMask, ltest);
930 
931  // Similarly, check to see if any of the instances want to
932  // run the 'false' block...
933 
934 #ifdef ISPC_NVPTX_ENABLED
935 #if 0
936  llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
937 #else
938  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
939 #endif
940 #else /* ISPC_NVPTX_ENABLED */
941  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
942 #endif /* ISPC_NVPTX_ENABLED */
943  ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
944 
945  // Emit code for false
946  ctx->SetCurrentBasicBlock(bRunFalse);
947  if (falseStmts)
948  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
950 
951  ctx->BranchInst(bDone);
952  ctx->SetCurrentBasicBlock(bDone);
953  ctx->EndIf();
954 }
955 
956 
957 ///////////////////////////////////////////////////////////////////////////
958 // DoStmt
959 
962  varyingControlFlowDepth = 0;
963  foundVaryingBreakOrContinue = false;
964  }
965 
968 };
969 
970 
971 /** Returns true if the given node is an 'if' statement where the test
972  condition has varying type. */
973 static bool
975  IfStmt *ifStmt;
976  if ((ifStmt = llvm::dyn_cast<IfStmt>(node)) != NULL &&
977  ifStmt->test != NULL) {
978  const Type *type = ifStmt->test->GetType();
979  return (type != NULL && type->IsVaryingType());
980  }
981  else
982  return false;
983 }
984 
985 
986 /** Preorder callback function for checking for varying breaks or
987  continues. */
988 static bool
989 lVaryingBCPreFunc(ASTNode *node, void *d) {
991 
992  // We found a break or continue statement; if we're under varying
993  // control flow, then bingo.
994  if ((llvm::dyn_cast<BreakStmt>(node) != NULL ||
995  llvm::dyn_cast<ContinueStmt>(node) != NULL) &&
996  info->varyingControlFlowDepth > 0) {
997  info->foundVaryingBreakOrContinue = true;
998  return false;
999  }
1000 
1001  // Update the count of the nesting depth of varying control flow if
1002  // this is an if statement with a varying condition.
1003  if (lIsVaryingFor(node))
1004  ++info->varyingControlFlowDepth;
1005 
1006  if (llvm::dyn_cast<ForStmt>(node) != NULL ||
1007  llvm::dyn_cast<DoStmt>(node) != NULL ||
1008  llvm::dyn_cast<ForeachStmt>(node) != NULL)
1009  // Don't recurse into these guys, since we don't care about varying
1010  // breaks or continues within them...
1011  return false;
1012  else
1013  return true;
1014 }
1015 
1016 
1017 /** Postorder callback function for checking for varying breaks or
1018  continues; decrement the varying control flow depth after the node's
1019  children have been processed, if this is a varying if statement. */
1020 static ASTNode *
1021 lVaryingBCPostFunc(ASTNode *node, void *d) {
1023  if (lIsVaryingFor(node))
1024  --info->varyingControlFlowDepth;
1025  return node;
1026 }
1027 
1028 
1029 /** Given a statment, walk through it to see if there is a 'break' or
1030  'continue' statement inside if its children, under varying control
1031  flow. We need to detect this case for loops since what might otherwise
1032  look like a 'uniform' loop needs to have code emitted to do all of the
1033  lane management stuff if this is the case.
1034  */
1035 static bool
1037  VaryingBCCheckInfo info;
1039  return info.foundVaryingBreakOrContinue;
1040 }
1041 
1042 
1043 DoStmt::DoStmt(Expr *t, Stmt *s, bool cc, SourcePos p)
1044  : Stmt(p, DoStmtID), testExpr(t), bodyStmts(s),
1045  doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
1046 }
1047 
1048 
1050  // Check for things that could be NULL due to earlier errors during
1051  // compilation.
1052  if (!ctx->GetCurrentBasicBlock())
1053  return;
1054  if (!testExpr || !testExpr->GetType())
1055  return;
1056 
1057  bool uniformTest = testExpr->GetType()->IsUniformType();
1058  if (uniformTest && doCoherentCheck)
1059  Warning(testExpr->pos, "Uniform condition supplied to \"cdo\" "
1060  "statement.");
1061 
1062  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("do_loop");
1063  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit");
1064  llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test");
1065 
1066  ctx->StartLoop(bexit, btest, uniformTest);
1067 
1068  // Start by jumping into the loop body
1069  ctx->BranchInst(bloop);
1070 
1071  // And now emit code for the loop body
1072  ctx->SetCurrentBasicBlock(bloop);
1073  ctx->SetBlockEntryMask(ctx->GetFullMask());
1074  ctx->SetDebugPos(pos);
1075  // FIXME: in the StmtList::EmitCode() method takes starts/stops a new
1076  // scope around the statements in the list. So if the body is just a
1077  // single statement (and thus not a statement list), we need a new
1078  // scope, but we don't want two scopes in the StmtList case.
1079  if (!llvm::dyn_cast<StmtList>(bodyStmts))
1080  ctx->StartScope();
1081 
1082  ctx->AddInstrumentationPoint("do loop body");
1083  if (doCoherentCheck && !uniformTest) {
1084  // Check to see if the mask is all on
1085  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("do_all_on");
1086  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("do_mixed");
1087  ctx->BranchIfMaskAll(bAllOn, bMixed);
1088 
1089  // If so, emit code for the 'mask all on' case. In particular,
1090  // explicitly set the mask to 'all on' (see rationale in
1091  // IfStmt::emitCoherentTests()), and then emit the code for the
1092  // loop body.
1093  ctx->SetCurrentBasicBlock(bAllOn);
1096  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1099  if (bodyStmts)
1100  bodyStmts->EmitCode(ctx);
1102  ctx->SetFunctionMask(oldFunctionMask);
1103  ctx->BranchInst(btest);
1104 
1105  // The mask is mixed. Just emit the code for the loop body.
1106  ctx->SetCurrentBasicBlock(bMixed);
1107  if (bodyStmts)
1108  bodyStmts->EmitCode(ctx);
1110  ctx->BranchInst(btest);
1111  }
1112  else {
1113  // Otherwise just emit the code for the loop body. The current
1114  // mask is good.
1115  if (bodyStmts)
1116  bodyStmts->EmitCode(ctx);
1117  if (ctx->GetCurrentBasicBlock())
1118  ctx->BranchInst(btest);
1119  }
1120  // End the scope we started above, if needed.
1121  if (!llvm::dyn_cast<StmtList>(bodyStmts))
1122  ctx->EndScope();
1123 
1124  // Now emit code for the loop test.
1125  ctx->SetCurrentBasicBlock(btest);
1126  // First, emit code to restore the mask value for any lanes that
1127  // executed a 'continue' during the current loop before we go and emit
1128  // the code for the test. This is only necessary for varying loops;
1129  // 'uniform' loops just jump when they hit a continue statement and
1130  // don't mess with the mask.
1131  if (!uniformTest) {
1132  ctx->RestoreContinuedLanes();
1133  ctx->ClearBreakLanes();
1134  }
1135  llvm::Value *testValue = testExpr->GetValue(ctx);
1136  if (!testValue)
1137  return;
1138 
1139  if (uniformTest)
1140  // For the uniform case, just jump to the top of the loop or the
1141  // exit basic block depending on the value of the test.
1142  ctx->BranchInst(bloop, bexit, testValue);
1143  else {
1144  // For the varying case, update the mask based on the value of the
1145  // test. If any program instances still want to be running, jump
1146  // to the top of the loop. Otherwise, jump out.
1147  llvm::Value *mask = ctx->GetInternalMask();
1148  ctx->SetInternalMaskAnd(mask, testValue);
1149  ctx->BranchIfMaskAny(bloop, bexit);
1150  }
1151 
1152  // ...and we're done. Set things up for subsequent code to be emitted
1153  // in the right basic block.
1154  ctx->SetCurrentBasicBlock(bexit);
1155  ctx->EndLoop();
1156 }
1157 
1158 
1159 Stmt *
1161  const Type *testType;
1162  if (testExpr != NULL && (testType = testExpr->GetType()) != NULL) {
1163  // Should the test condition for the loop be uniform or varying?
1164  // It can be uniform only if three conditions are met:
1165  //
1166  // - First and foremost, the type of the test condition must be
1167  // uniform.
1168  //
1169  // - Second, the user must not have set the dis-optimization option
1170  // that disables uniform flow control.
1171  //
1172  // - Thirdly, and most subtlely, there must not be any break or
1173  // continue statements inside the loop that are within the scope
1174  // of a 'varying' if statement. If there are, then we type cast
1175  // the test to be 'varying', so that the code generated for the
1176  // loop includes masking stuff, so that we can track which lanes
1177  // actually want to be running, accounting for breaks/continues.
1178  //
1179  bool uniformTest = (testType->IsUniformType() &&
1184  "\"do\" statement");
1185  }
1186 
1187  return this;
1188 }
1189 
1190 
1191 int
1193  bool uniformTest = testExpr ? testExpr->GetType()->IsUniformType() :
1196 
1197  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1198 }
1199 
1200 
1201 void
1202 DoStmt::Print(int indent) const {
1203  printf("%*cDo Stmt", indent, ' ');
1204  pos.Print();
1205  printf(":\n");
1206  printf("%*cTest: ", indent+4, ' ');
1207  if (testExpr) testExpr->Print();
1208  printf("\n");
1209  if (bodyStmts) {
1210  printf("%*cStmts:\n", indent+4, ' ');
1211  bodyStmts->Print(indent+8);
1212  }
1213 }
1214 
1215 
1216 ///////////////////////////////////////////////////////////////////////////
1217 // ForStmt
1218 
1219 ForStmt::ForStmt(Stmt *i, Expr *t, Stmt *s, Stmt *st, bool cc, SourcePos p)
1220  : Stmt(p, ForStmtID), init(i), test(t), step(s), stmts(st),
1221  doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {
1222 }
1223 
1224 
1225 void
1227  if (!ctx->GetCurrentBasicBlock())
1228  return;
1229 
1230  llvm::BasicBlock *btest = ctx->CreateBasicBlock("for_test");
1231  llvm::BasicBlock *bstep = ctx->CreateBasicBlock("for_step");
1232  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("for_loop");
1233  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("for_exit");
1234 
1235  bool uniformTest = test ? test->GetType()->IsUniformType() :
1238 
1239  ctx->StartLoop(bexit, bstep, uniformTest);
1240  ctx->SetDebugPos(pos);
1241 
1242  // If we have an initiailizer statement, start by emitting the code for
1243  // it and then jump into the loop test code. (Also start a new scope
1244  // since the initiailizer may be a declaration statement).
1245  if (init) {
1246  AssertPos(pos, llvm::dyn_cast<StmtList>(init) == NULL);
1247  ctx->StartScope();
1248  init->EmitCode(ctx);
1249  }
1250  ctx->BranchInst(btest);
1251 
1252  // Emit code to get the value of the loop test. If no test expression
1253  // was provided, just go with a true value.
1254  ctx->SetCurrentBasicBlock(btest);
1255  llvm::Value *ltest = NULL;
1256  if (test) {
1257  ltest = test->GetValue(ctx);
1258  if (!ltest) {
1259  ctx->EndScope();
1260  ctx->EndLoop();
1261  return;
1262  }
1263  }
1264  else
1265  ltest = uniformTest ? LLVMTrue : LLVMBoolVector(true);
1266 
1267  // Now use the test's value. For a uniform loop, we can either jump to
1268  // the loop body or the loop exit, based on whether it's true or false.
1269  // For a non-uniform loop, we update the mask and jump into the loop if
1270  // any of the mask values are true.
1271  if (uniformTest) {
1272  if (doCoherentCheck)
1273  Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
1274  "statement.");
1275  AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
1276  ctx->BranchInst(bloop, bexit, ltest);
1277  }
1278  else {
1279  llvm::Value *mask = ctx->GetInternalMask();
1280  ctx->SetInternalMaskAnd(mask, ltest);
1281  ctx->BranchIfMaskAny(bloop, bexit);
1282  }
1283 
1284  // On to emitting the code for the loop body.
1285  ctx->SetCurrentBasicBlock(bloop);
1286  ctx->SetBlockEntryMask(ctx->GetFullMask());
1287  ctx->AddInstrumentationPoint("for loop body");
1288  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1289  ctx->StartScope();
1290 
1291  if (doCoherentCheck && !uniformTest) {
1292  // For 'varying' loops with the coherence check, we start by
1293  // checking to see if the mask is all on, after it has been updated
1294  // based on the value of the test.
1295  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("for_all_on");
1296  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("for_mixed");
1297  ctx->BranchIfMaskAll(bAllOn, bMixed);
1298 
1299  // Emit code for the mask being all on. Explicitly set the mask to
1300  // be on so that the optimizer can see that it's on (i.e. now that
1301  // the runtime test has passed, make this fact clear for code
1302  // generation at compile time here.)
1303  ctx->SetCurrentBasicBlock(bAllOn);
1306  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1309  if (stmts)
1310  stmts->EmitCode(ctx);
1312  ctx->SetFunctionMask(oldFunctionMask);
1313  ctx->BranchInst(bstep);
1314 
1315  // Emit code for the mask being mixed. We should never run the
1316  // loop with the mask all off, based on the BranchIfMaskAny call
1317  // above.
1318  ctx->SetCurrentBasicBlock(bMixed);
1319  if (stmts)
1320  stmts->EmitCode(ctx);
1321  ctx->BranchInst(bstep);
1322  }
1323  else {
1324  // For both uniform loops and varying loops without the coherence
1325  // check, we know that at least one program instance wants to be
1326  // running the loop, so just emit code for the loop body and jump
1327  // to the loop step code.
1328  if (stmts)
1329  stmts->EmitCode(ctx);
1330  if (ctx->GetCurrentBasicBlock())
1331  ctx->BranchInst(bstep);
1332  }
1333  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1334  ctx->EndScope();
1335 
1336  // Emit code for the loop step. First, restore the lane mask of any
1337  // program instances that executed a 'continue' during the previous
1338  // iteration. Then emit code for the loop step and then jump to the
1339  // test code.
1340  ctx->SetCurrentBasicBlock(bstep);
1341  ctx->RestoreContinuedLanes();
1342  ctx->ClearBreakLanes();
1343 
1344  if (step)
1345  step->EmitCode(ctx);
1346  ctx->BranchInst(btest);
1347 
1348  // Set the current emission basic block to the loop exit basic block
1349  ctx->SetCurrentBasicBlock(bexit);
1350  if (init)
1351  ctx->EndScope();
1352  ctx->EndLoop();
1353 }
1354 
1355 
1356 Stmt *
1358  const Type *testType;
1359  if (test && (testType = test->GetType()) != NULL) {
1360  // See comments in DoStmt::TypeCheck() regarding
1361  // 'uniformTest' and the type conversion here.
1362  bool uniformTest = (testType->IsUniformType() &&
1367  "\"for\"/\"while\" statement");
1368  if (test == NULL)
1369  return NULL;
1370  }
1371 
1372  return this;
1373 }
1374 
1375 
1376 int
1378  bool uniformTest = test ? test->GetType()->IsUniformType() :
1381 
1382  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1383 }
1384 
1385 
1386 void
1387 ForStmt::Print(int indent) const {
1388  printf("%*cFor Stmt", indent, ' ');
1389  pos.Print();
1390  printf("\n");
1391  if (init) {
1392  printf("%*cInit:\n", indent+4, ' ');
1393  init->Print(indent+8);
1394  }
1395  if (test) {
1396  printf("%*cTest: ", indent+4, ' ');
1397  test->Print();
1398  printf("\n");
1399  }
1400  if (step) {
1401  printf("%*cStep:\n", indent+4, ' ');
1402  step->Print(indent+8);
1403  }
1404  if (stmts) {
1405  printf("%*cStmts:\n", indent+4, ' ');
1406  stmts->Print(indent+8);
1407  }
1408 }
1409 
1410 ///////////////////////////////////////////////////////////////////////////
1411 // BreakStmt
1412 
1414  : Stmt(p, BreakStmtID) {
1415 }
1416 
1417 
1418 void
1420  if (!ctx->GetCurrentBasicBlock())
1421  return;
1422 
1423  ctx->SetDebugPos(pos);
1424  ctx->Break(true);
1425 }
1426 
1427 
1428 Stmt *
1430  return this;
1431 }
1432 
1433 
1434 int
1436  return COST_BREAK_CONTINUE;
1437 }
1438 
1439 
1440 void
1441 BreakStmt::Print(int indent) const {
1442  printf("%*cBreak Stmt", indent, ' ');
1443  pos.Print();
1444  printf("\n");
1445 }
1446 
1447 
1448 ///////////////////////////////////////////////////////////////////////////
1449 // ContinueStmt
1450 
1452  : Stmt(p, ContinueStmtID) {
1453 }
1454 
1455 
1456 void
1458  if (!ctx->GetCurrentBasicBlock())
1459  return;
1460 
1461  ctx->SetDebugPos(pos);
1462  ctx->Continue(true);
1463 }
1464 
1465 
1466 Stmt *
1468  return this;
1469 }
1470 
1471 
1472 int
1474  return COST_BREAK_CONTINUE;
1475 }
1476 
1477 
1478 void
1479 ContinueStmt::Print(int indent) const {
1480  printf("%*cContinue Stmt", indent, ' ');
1481  pos.Print();
1482  printf("\n");
1483 }
1484 
1485 
1486 ///////////////////////////////////////////////////////////////////////////
1487 // ForeachStmt
1488 
1489 ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs,
1490  const std::vector<Expr *> &se,
1491  const std::vector<Expr *> &ee,
1492  Stmt *s, bool t, SourcePos pos)
1493  : Stmt(pos, ForeachStmtID), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
1494  stmts(s) {
1495 }
1496 
1497 
1498 /* Given a uniform counter value in the memory location pointed to by
1499  uniformCounterPtr, compute the corresponding set of varying counter
1500  values for use within the loop body.
1501  */
1502 static llvm::Value *
1504  llvm::Value *uniformCounterPtr,
1505  llvm::Value *varyingCounterPtr,
1506  const std::vector<int> &spans) {
1507 #ifdef ISPC_NVPTX_ENABLED
1508  if (g->target->getISA() == Target::NVPTX)
1509  {
1510  // Smear the uniform counter value out to be varying
1511  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1512  llvm::Value *smearCounter = ctx->BroadcastValue(
1513  counter, LLVMTypes::Int32VectorType, "smear_counter");
1514 
1515  // Figure out the offsets; this is a little bit tricky. As an example,
1516  // consider a 2D tiled foreach loop, where we're running 8-wide and
1517  // where the inner dimension has a stride of 4 and the outer dimension
1518  // has a stride of 2. For the inner dimension, we want the offsets
1519  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1520  // (0,0,0,0,1,1,1,1).
1521  int32_t delta[ISPC_MAX_NVEC];
1522  const int vecWidth = 32;
1523  std::vector<llvm::Constant*> constDeltaList;
1524  for (int i = 0; i < vecWidth; ++i)
1525  {
1526  int d = i;
1527  // First, account for the effect of any dimensions at deeper
1528  // nesting levels than the current one.
1529  int prevDimSpanCount = 1;
1530  for (int j = dim; j < nDims-1; ++j)
1531  prevDimSpanCount *= spans[j+1];
1532  d /= prevDimSpanCount;
1533 
1534  // And now with what's left, figure out our own offset
1535  delta[i] = d % spans[dim];
1536  constDeltaList.push_back(LLVMInt8(delta[i]));
1537  }
1538 
1539  llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
1540  // llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
1541 
1542 
1543  llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
1544  /*Module=*/*m->module,
1545  /*Type=*/ArrayDelta,
1546  /*isConstant=*/true,
1547  /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
1548  /*Initializer=*/0, // has initializer, specified below
1549  /*Name=*/"constDeltaForeach");
1550 #if 0
1551  /*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
1552  /*unsigned AddressSpace=*/4 /*constant*/);
1553 #endif
1554 
1555 
1556  llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
1557 
1558  globalDelta->setInitializer(constDelta);
1559  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1560  llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
1561 
1562  std::vector<llvm::Value*> ptr_arrayidx_indices;
1563  ptr_arrayidx_indices.push_back(LLVMInt32(0));
1564  ptr_arrayidx_indices.push_back(laneIdx);
1565 #if 1
1566 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1567  llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
1568 #else
1569  llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(NULL, globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
1570 #endif
1571  llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
1572  llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
1573 
1574  llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
1575  llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
1576 
1577  llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
1578  // llvm::UndefValue(LLVMInt32Vector),
1579  const_packed_41,
1580  int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
1581 #endif
1582 
1583 
1584  // Add the deltas to compute the varying counter values; store the
1585  // result to memory and then return it directly as well.
1586 #if 0
1587  llvm::Value *varyingCounter =
1588  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1589  LLVMInt32Vector(delta), "iter_val");
1590 #else
1591  llvm::Value *varyingCounter =
1592  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1593  packed_43, "iter_val");
1594 #endif
1595  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1596  return varyingCounter;
1597  }
1598 #endif /* ISPC_NVPTX_ENABLED */
1599 
1600  // Smear the uniform counter value out to be varying
1601  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1602  llvm::Value *smearCounter = ctx->BroadcastValue(
1603  counter, LLVMTypes::Int32VectorType, "smear_counter");
1604 
1605  // Figure out the offsets; this is a little bit tricky. As an example,
1606  // consider a 2D tiled foreach loop, where we're running 8-wide and
1607  // where the inner dimension has a stride of 4 and the outer dimension
1608  // has a stride of 2. For the inner dimension, we want the offsets
1609  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1610  // (0,0,0,0,1,1,1,1).
1611  int32_t delta[ISPC_MAX_NVEC];
1612  for (int i = 0; i < g->target->getVectorWidth(); ++i) {
1613  int d = i;
1614  // First, account for the effect of any dimensions at deeper
1615  // nesting levels than the current one.
1616  int prevDimSpanCount = 1;
1617  for (int j = dim; j < nDims-1; ++j)
1618  prevDimSpanCount *= spans[j+1];
1619  d /= prevDimSpanCount;
1620 
1621  // And now with what's left, figure out our own offset
1622  delta[i] = d % spans[dim];
1623  }
1624 
1625  // Add the deltas to compute the varying counter values; store the
1626  // result to memory and then return it directly as well.
1627  llvm::Value *varyingCounter =
1628  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1629  LLVMInt32Vector(delta), "iter_val");
1630  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1631  return varyingCounter;
1632 }
1633 
1634 
1635 /** Returns the integer log2 of the given integer. */
1636 static int
1637 lLog2(int i) {
1638  int ret = 0;
1639  while (i != 0) {
1640  ++ret;
1641  i >>= 1;
1642  }
1643  return ret-1;
1644 }
1645 
1646 
1647 /* Figure out how many elements to process in each dimension for each time
1648  through a foreach loop. The untiled case is easy; all of the outer
1649  dimensions up until the innermost one have a span of 1, and the
1650  innermost one takes the entire vector width. For the tiled case, we
1651  give wider spans to the innermost dimensions while also trying to
1652  generate relatively square domains.
1653 
1654  This code works recursively from outer dimensions to inner dimensions.
1655  */
1656 static void
1657 lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
1658  if (dimsLeft == 0) {
1659  // Nothing left to do but give all of the remaining work to the
1660  // innermost domain.
1661  *a = itemsLeft;
1662  return;
1663  }
1664 
1665  if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
1666  // If we're not tiled, or if there are enough dimensions left that
1667  // giving this one any more than a span of one would mean that a
1668  // later dimension would have to have a span of one, give this one
1669  // a span of one to save the available items for later.
1670  *a = 1;
1671  else if (itemsLeft >= 16 && (dimsLeft == 1))
1672  // Special case to have 4x4 domains for the 2D case when running
1673  // 16-wide.
1674  *a = 4;
1675  else
1676  // Otherwise give this dimension a span of two.
1677  *a = 2;
1678 
1679  lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
1680 }
1681 
1682 
1683 /* Emit code for a foreach statement. We effectively emit code to run the
1684  set of n-dimensional nested loops corresponding to the dimensionality of
1685  the foreach statement along with the extra logic to deal with mismatches
1686  between the vector width we're compiling to and the number of elements
1687  to process.
1688  */
1689 void
1691  if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
1692  return;
1693 
1694  llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
1695  llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
1696  llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
1697 
1698  llvm::Value *oldMask = ctx->GetInternalMask();
1699  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1700 
1701  ctx->SetDebugPos(pos);
1702  ctx->StartScope();
1703 
1706 
1707  // This should be caught during typechecking
1708  AssertPos(pos, startExprs.size() == dimVariables.size() &&
1709  endExprs.size() == dimVariables.size());
1710  int nDims = (int)dimVariables.size();
1711 
1712  ///////////////////////////////////////////////////////////////////////
1713  // Setup: compute the number of items we have to work on in each
1714  // dimension and a number of derived values.
1715  std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
1716  std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
1717  std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
1718 
1719  std::vector<int> span(nDims, 0);
1720 #ifdef ISPC_NVPTX_ENABLED
1721  const int vectorWidth =
1722  g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
1723  lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
1724 #else /* ISPC_NVPTX_ENABLED */
1725  lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
1726 #endif /* ISPC_NVPTX_ENABLED */
1727 
1728  for (int i = 0; i < nDims; ++i) {
1729  // Basic blocks that we'll fill in later with the looping logic for
1730  // this dimension.
1731  bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
1732  if (i < nDims-1)
1733  // stepping for the innermost dimension is handled specially
1734  bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
1735  bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
1736 
1737  // Start and end value for this loop dimension
1738  llvm::Value *sv = startExprs[i]->GetValue(ctx);
1739  llvm::Value *ev = endExprs[i]->GetValue(ctx);
1740  if (sv == NULL || ev == NULL)
1741  return;
1742  startVals.push_back(sv);
1743  endVals.push_back(ev);
1744 
1745  // nItems = endVal - startVal
1746  llvm::Value *nItems =
1747  ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
1748 
1749  // nExtras = nItems % (span for this dimension)
1750  // This gives us the number of extra elements we need to deal with
1751  // at the end of the loop for this dimension that don't fit cleanly
1752  // into a vector width.
1753  nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
1754  LLVMInt32(span[i]), "nextras"));
1755 
1756  // alignedEnd = endVal - nExtras
1757  alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
1758  nExtras[i], "aligned_end"));
1759 
1760  ///////////////////////////////////////////////////////////////////////
1761  // Each dimension has a loop counter that is a uniform value that
1762  // goes from startVal to endVal, in steps of the span for this
1763  // dimension. Its value is only used internally here for looping
1764  // logic and isn't directly available in the user's program code.
1765  uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
1766  "counter"));
1767  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1768 
1769  // There is also a varying variable that holds the set of index
1770  // values for each dimension in the current loop iteration; this is
1771  // the value that is program-visible.
1772  dimVariables[i]->storagePtr =
1774  dimVariables[i]->name.c_str());
1775  dimVariables[i]->parentFunction = ctx->GetFunction();
1777 
1778  // Each dimension also maintains a mask that represents which of
1779  // the varying elements in the current iteration should be
1780  // processed. (i.e. this is used to disable the lanes that have
1781  // out-of-bounds offsets.)
1782  extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
1783  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1784  }
1785 
1787 
1788  // On to the outermost loop's test
1789  ctx->BranchInst(bbTest[0]);
1790 
1791  ///////////////////////////////////////////////////////////////////////////
1792  // foreach_reset: this code runs when we need to reset the counter for
1793  // a given dimension in preparation for running through its loop again,
1794  // after the enclosing level advances its counter.
1795  for (int i = 0; i < nDims; ++i) {
1796  ctx->SetCurrentBasicBlock(bbReset[i]);
1797  if (i == 0)
1798  ctx->BranchInst(bbExit);
1799  else {
1800  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1801  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1802  ctx->BranchInst(bbStep[i-1]);
1803  }
1804  }
1805 
1806  ///////////////////////////////////////////////////////////////////////////
1807  // foreach_step: increment the uniform counter by the vector width.
1808  // Note that we don't increment the varying counter here as well but
1809  // just generate its value when we need it in the loop body. Don't do
1810  // this for the innermost dimension, which has a more complex stepping
1811  // structure..
1812  for (int i = 0; i < nDims-1; ++i) {
1813  ctx->SetCurrentBasicBlock(bbStep[i]);
1814  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
1815  llvm::Value *newCounter =
1816  ctx->BinaryOperator(llvm::Instruction::Add, counter,
1817  LLVMInt32(span[i]), "new_counter");
1818  ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
1819  ctx->BranchInst(bbTest[i]);
1820  }
1821 
1822  ///////////////////////////////////////////////////////////////////////////
1823  // foreach_test (for all dimensions other than the innermost...)
1824  std::vector<llvm::Value *> inExtras;
1825  for (int i = 0; i < nDims-1; ++i) {
1826  ctx->SetCurrentBasicBlock(bbTest[i]);
1827 
1828  llvm::Value *haveExtras =
1829  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
1830  endVals[i], alignedEnd[i], "have_extras");
1831 
1832  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
1833  llvm::Value *atAlignedEnd =
1834  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1835  counter, alignedEnd[i], "at_aligned_end");
1836  llvm::Value *inEx =
1837  ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
1838  atAlignedEnd, "in_extras");
1839 
1840  if (i == 0)
1841  inExtras.push_back(inEx);
1842  else
1843  inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
1844  inExtras[i-1], "in_extras_all"));
1845 
1846  llvm::Value *varyingCounter =
1847  lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
1848  dimVariables[i]->storagePtr, span);
1849 
1850  llvm::Value *smearEnd = ctx->BroadcastValue(
1851  endVals[i], LLVMTypes::Int32VectorType, "smear_end");
1852 
1853  // Do a vector compare of its value to the end value to generate a
1854  // mask for this last bit of work.
1855  llvm::Value *emask =
1856  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1857  varyingCounter, smearEnd);
1858  emask = ctx->I1VecToBoolVec(emask);
1859 
1860  if (i == 0)
1861  ctx->StoreInst(emask, extrasMaskPtrs[i]);
1862  else {
1863  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
1864  llvm::Value *newMask =
1865  ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
1866  "extras_mask");
1867  ctx->StoreInst(newMask, extrasMaskPtrs[i]);
1868  }
1869 
1870  llvm::Value *notAtEnd =
1871  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1872  counter, endVals[i]);
1873  ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
1874  }
1875 
1876  ///////////////////////////////////////////////////////////////////////////
1877  // foreach_test (for innermost dimension)
1878  //
1879  // All of the outer dimensions are handled generically--basically as a
1880  // for() loop from the start value to the end value, where at each loop
1881  // test, we compute the mask of active elements for the current
1882  // dimension and then update an overall mask that is the AND
1883  // combination of all of the outer ones.
1884  //
1885  // The innermost loop is handled specially, for performance purposes.
1886  // When starting the innermost dimension, we start by checking once
1887  // whether any of the outer dimensions has set the mask to be
1888  // partially-active or not. We follow different code paths for these
1889  // two cases, taking advantage of the knowledge that the mask is all
1890  // on, when this is the case.
1891  //
1892  // In each of these code paths, we start with a loop from the starting
1893  // value to the aligned end value for the innermost dimension; we can
1894  // guarantee that the innermost loop will have an "all on" mask (as far
1895  // as its dimension is concerned) for the duration of this loop. Doing
1896  // so allows us to emit code that assumes the mask is all on (for the
1897  // case where none of the outer dimensions has set the mask to be
1898  // partially on), or allows us to emit code that just uses the mask
1899  // from the outer dimensions directly (for the case where they have).
1900  //
1901  // After this loop, we just need to deal with one vector's worth of
1902  // "ragged extra bits", where the mask used includes the effect of the
1903  // mask for the innermost dimension.
1904  //
1905  // We start out this process by emitting the check that determines
1906  // whether any of the enclosing dimensions is partially active
1907  // (i.e. processing extra elements that don't exactly fit into a
1908  // vector).
1909  llvm::BasicBlock *bbOuterInExtras =
1910  ctx->CreateBasicBlock("outer_in_extras");
1911  llvm::BasicBlock *bbOuterNotInExtras =
1912  ctx->CreateBasicBlock("outer_not_in_extras");
1913 
1914  ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
1915  if (inExtras.size())
1916  ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
1917  inExtras.back());
1918  else
1919  // for a 1D iteration domain, we certainly don't have any enclosing
1920  // dimensions that are processing extra elements.
1921  ctx->BranchInst(bbOuterNotInExtras);
1922 
1923  ///////////////////////////////////////////////////////////////////////////
1924  // One or more outer dimensions in extras, so we need to mask for the loop
1925  // body regardless. We break this into two cases, roughly:
1926  // for (counter = start; counter < alignedEnd; counter += step) {
1927  // // mask is all on for inner, so set mask to outer mask
1928  // // run loop body with mask
1929  // }
1930  // // counter == alignedEnd
1931  // if (counter < end) {
1932  // // set mask to outermask & (counter+programCounter < end)
1933  // // run loop body with mask
1934  // }
1935  llvm::BasicBlock *bbAllInnerPartialOuter =
1936  ctx->CreateBasicBlock("all_inner_partial_outer");
1937  llvm::BasicBlock *bbPartial =
1938  ctx->CreateBasicBlock("both_partial");
1939  ctx->SetCurrentBasicBlock(bbOuterInExtras); {
1940  // Update the varying counter value here, since all subsequent
1941  // blocks along this path need it.
1942  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
1943  dimVariables[nDims-1]->storagePtr, span);
1944 
1945  // here we just check to see if counter < alignedEnd
1946  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
1947  llvm::Value *beforeAlignedEnd =
1948  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
1949  counter, alignedEnd[nDims-1], "before_aligned_end");
1950  ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
1951  }
1952 
1953  // Below we have a basic block that runs the loop body code for the
1954  // case where the mask is partially but not fully on. This same block
1955  // runs in multiple cases: both for handling any ragged extra data for
1956  // the innermost dimension but also when outer dimensions have set the
1957  // mask to be partially on.
1958  //
1959  // The value stored in stepIndexAfterMaskedBodyPtr is used after each
1960  // execution of the body code to determine whether the innermost index
1961  // value should be incremented by the step (we're running the "for"
1962  // loop of full vectors at the innermost dimension, with outer
1963  // dimensions having set the mask to be partially on), or whether we're
1964  // running once for the ragged extra bits at the end of the innermost
1965  // dimension, in which case we're done with the innermost dimension and
1966  // should step the loop counter for the next enclosing dimension
1967  // instead.
1968  llvm::Value *stepIndexAfterMaskedBodyPtr =
1969  ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
1970 
1971  ///////////////////////////////////////////////////////////////////////////
1972  // We're in the inner loop part where the only masking is due to outer
1973  // dimensions but the innermost dimension fits fully into a vector's
1974  // width. Set the mask and jump to the masked loop body.
1975  ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
1976  llvm::Value *mask;
1977  if (nDims == 1)
1978  // 1D loop; we shouldn't ever get here anyway
1979  mask = LLVMMaskAllOff;
1980  else
1981  mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
1982 
1983  ctx->SetInternalMask(mask);
1984 
1985  ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
1986  ctx->BranchInst(bbMaskedBody);
1987  }
1988 
1989  ///////////////////////////////////////////////////////////////////////////
1990  // We need to include the effect of the innermost dimension in the mask
1991  // for the final bits here
1992  ctx->SetCurrentBasicBlock(bbPartial); {
1993  llvm::Value *varyingCounter =
1994  ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
1995  llvm::Value *smearEnd = ctx->BroadcastValue(
1996  endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
1997 
1998  llvm::Value *emask =
1999  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2000  varyingCounter, smearEnd);
2001  emask = ctx->I1VecToBoolVec(emask);
2002 
2003  if (nDims == 1) {
2004  ctx->SetInternalMask(emask);
2005  }
2006  else {
2007  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
2008  llvm::Value *newMask =
2009  ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
2010  "extras_mask");
2011  ctx->SetInternalMask(newMask);
2012  }
2013 
2014  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
2015 
2016  // check to see if counter != end, otherwise, the next step is not necessary
2017  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
2018  llvm::Value *atEnd =
2019  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
2020  counter, endVals[nDims-1], "at_end");
2021  ctx->BranchInst(bbMaskedBody, bbReset[nDims-1], atEnd);
2022  }
2023 
2024  ///////////////////////////////////////////////////////////////////////////
2025  // None of the outer dimensions is processing extras; along the lines
2026  // of above, we can express this as:
2027  // for (counter = start; counter < alignedEnd; counter += step) {
2028  // // mask is all on
2029  // // run loop body with mask all on
2030  // }
2031  // // counter == alignedEnd
2032  // if (counter < end) {
2033  // // set mask to (counter+programCounter < end)
2034  // // run loop body with mask
2035  // }
2036  llvm::BasicBlock *bbPartialInnerAllOuter =
2037  ctx->CreateBasicBlock("partial_inner_all_outer");
2038  ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
2039  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
2040  llvm::Value *beforeAlignedEnd =
2041  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2042  counter, alignedEnd[nDims-1], "before_aligned_end");
2043  ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
2044  beforeAlignedEnd);
2045  }
2046 
2047  ///////////////////////////////////////////////////////////////////////////
2048  // full_body: do a full vector's worth of work. We know that all
2049  // lanes will be running here, so we explicitly set the mask to be 'all
2050  // on'. This ends up being relatively straightforward: just update the
2051  // value of the varying loop counter and have the statements in the
2052  // loop body emit their code.
2053  llvm::BasicBlock *bbFullBodyContinue =
2054  ctx->CreateBasicBlock("foreach_full_continue");
2055  ctx->SetCurrentBasicBlock(bbFullBody); {
2058  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
2059  dimVariables[nDims-1]->storagePtr, span);
2060  ctx->SetContinueTarget(bbFullBodyContinue);
2061  ctx->AddInstrumentationPoint("foreach loop body (all on)");
2062  stmts->EmitCode(ctx);
2063  AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
2064  ctx->BranchInst(bbFullBodyContinue);
2065  }
2066  ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
2067  ctx->RestoreContinuedLanes();
2068  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
2069  llvm::Value *newCounter =
2070  ctx->BinaryOperator(llvm::Instruction::Add, counter,
2071  LLVMInt32(span[nDims-1]), "new_counter");
2072  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
2073  ctx->BranchInst(bbOuterNotInExtras);
2074  }
2075 
2076  ///////////////////////////////////////////////////////////////////////////
2077  // We're done running blocks with the mask all on; see if the counter is
2078  // less than the end value, in which case we need to run the body one
2079  // more time to get the extra bits.
2080  llvm::BasicBlock *bbSetInnerMask =
2081  ctx->CreateBasicBlock("partial_inner_only");
2082  ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
2083  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
2084  llvm::Value *beforeFullEnd =
2085  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2086  counter, endVals[nDims-1], "before_full_end");
2087  ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
2088  }
2089 
2090  ///////////////////////////////////////////////////////////////////////////
2091  // The outer dimensions are all on, so the mask is just given by the
2092  // mask for the innermost dimension
2093  ctx->SetCurrentBasicBlock(bbSetInnerMask); {
2094  llvm::Value *varyingCounter =
2095  lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
2096  dimVariables[nDims-1]->storagePtr, span);
2097  llvm::Value *smearEnd = ctx->BroadcastValue(
2098  endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
2099  llvm::Value *emask =
2100  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
2101  varyingCounter, smearEnd);
2102  emask = ctx->I1VecToBoolVec(emask);
2103  ctx->SetInternalMask(emask);
2104  ctx->SetBlockEntryMask(emask);
2105 
2106  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
2107  ctx->BranchInst(bbMaskedBody);
2108  }
2109 
2110  ///////////////////////////////////////////////////////////////////////////
2111  // masked_body: set the mask and have the statements emit their
2112  // code again. Note that it's generally worthwhile having two copies
2113  // of the statements' code, since the code above is emitted with the
2114  // mask known to be all-on, which in turn leads to more efficient code
2115  // for that case.
2116  llvm::BasicBlock *bbStepInnerIndex =
2117  ctx->CreateBasicBlock("step_inner_index");
2118  llvm::BasicBlock *bbMaskedBodyContinue =
2119  ctx->CreateBasicBlock("foreach_masked_continue");
2120  ctx->SetCurrentBasicBlock(bbMaskedBody); {
2121  ctx->AddInstrumentationPoint("foreach loop body (masked)");
2122  ctx->SetContinueTarget(bbMaskedBodyContinue);
2124  ctx->SetBlockEntryMask(ctx->GetFullMask());
2125  stmts->EmitCode(ctx);
2127  ctx->BranchInst(bbMaskedBodyContinue);
2128  }
2129  ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
2130  ctx->RestoreContinuedLanes();
2131  llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
2132  ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
2133  }
2134 
2135  ///////////////////////////////////////////////////////////////////////////
2136  // step the innermost index, for the case where we're doing the
2137  // innermost for loop over full vectors.
2138  ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
2139  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
2140  llvm::Value *newCounter =
2141  ctx->BinaryOperator(llvm::Instruction::Add, counter,
2142  LLVMInt32(span[nDims-1]), "new_counter");
2143  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
2144  ctx->BranchInst(bbOuterInExtras);
2145  }
2146 
2147  ///////////////////////////////////////////////////////////////////////////
2148  // foreach_exit: All done. Restore the old mask and clean up
2149  ctx->SetCurrentBasicBlock(bbExit);
2150 
2151  ctx->SetInternalMask(oldMask);
2152  ctx->SetFunctionMask(oldFunctionMask);
2153 
2154  ctx->EndForeach();
2155  ctx->EndScope();
2156 }
2157 
2158 
2159 Stmt *
2161  bool anyErrors = false;
2162  for (unsigned int i = 0; i < startExprs.size(); ++i) {
2163  if (startExprs[i] != NULL)
2166  "foreach starting value");
2167  anyErrors |= (startExprs[i] == NULL);
2168  }
2169  for (unsigned int i = 0; i < endExprs.size(); ++i) {
2170  if (endExprs[i] != NULL)
2172  "foreach ending value");
2173  anyErrors |= (endExprs[i] == NULL);
2174  }
2175 
2176  if (startExprs.size() < dimVariables.size()) {
2177  Error(pos, "Not enough initial values provided for \"foreach\" loop; "
2178  "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
2179  anyErrors = true;
2180  }
2181  else if (startExprs.size() > dimVariables.size()) {
2182  Error(pos, "Too many initial values provided for \"foreach\" loop; "
2183  "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
2184  anyErrors = true;
2185  }
2186 
2187  if (endExprs.size() < dimVariables.size()) {
2188  Error(pos, "Not enough initial values provided for \"foreach\" loop; "
2189  "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
2190  anyErrors = true;
2191  }
2192  else if (endExprs.size() > dimVariables.size()) {
2193  Error(pos, "Too many initial values provided for \"foreach\" loop; "
2194  "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
2195  anyErrors = true;
2196  }
2197 
2198  return anyErrors ? NULL : this;
2199 }
2200 
2201 
2202 int
2205 }
2206 
2207 
2208 void
2209 ForeachStmt::Print(int indent) const {
2210  printf("%*cForeach Stmt", indent, ' ');
2211  pos.Print();
2212  printf("\n");
2213 
2214  for (unsigned int i = 0; i < dimVariables.size(); ++i)
2215  if (dimVariables[i] != NULL)
2216  printf("%*cVar %d: %s\n", indent+4, ' ', i,
2217  dimVariables[i]->name.c_str());
2218  else
2219  printf("%*cVar %d: NULL\n", indent+4, ' ', i);
2220 
2221  printf("Start values:\n");
2222  for (unsigned int i = 0; i < startExprs.size(); ++i) {
2223  if (startExprs[i] != NULL)
2224  startExprs[i]->Print();
2225  else
2226  printf("NULL");
2227  if (i != startExprs.size()-1)
2228  printf(", ");
2229  else
2230  printf("\n");
2231  }
2232 
2233  printf("End values:\n");
2234  for (unsigned int i = 0; i < endExprs.size(); ++i) {
2235  if (endExprs[i] != NULL)
2236  endExprs[i]->Print();
2237  else
2238  printf("NULL");
2239  if (i != endExprs.size()-1)
2240  printf(", ");
2241  else
2242  printf("\n");
2243  }
2244 
2245  if (stmts != NULL) {
2246  printf("%*cStmts:\n", indent+4, ' ');
2247  stmts->Print(indent+8);
2248  }
2249 }
2250 
2251 
2252 ///////////////////////////////////////////////////////////////////////////
2253 // ForeachActiveStmt
2254 
2256  : Stmt(pos, ForeachActiveStmtID) {
2257  sym = s;
2258  stmts = st;
2259 }
2260 
2261 
2262 void
2264  if (!ctx->GetCurrentBasicBlock())
2265  return;
2266 
2267  // Allocate storage for the symbol that we'll use for the uniform
2268  // variable that holds the current program instance in each loop
2269  // iteration.
2270  if (sym->type == NULL) {
2271  Assert(m->errorCount > 0);
2272  return;
2273  }
2277 
2278  ctx->SetDebugPos(pos);
2279  ctx->EmitVariableDebugInfo(sym);
2280 
2281  // The various basic blocks that we'll need in the below
2282  llvm::BasicBlock *bbFindNext =
2283  ctx->CreateBasicBlock("foreach_active_find_next");
2284  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_active_body");
2285  llvm::BasicBlock *bbCheckForMore =
2286  ctx->CreateBasicBlock("foreach_active_check_for_more");
2287  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_active_done");
2288 
2289  // Save the old mask so that we can restore it at the end
2290  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2291 
2292  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2293  // to be processed by a pass through the loop body. It starts out with
2294  // the current execution mask (which should never be all off going in
2295  // to this)...
2296  llvm::Value *oldFullMask = ctx->GetFullMask();
2297  llvm::Value *maskBitsPtr =
2298  ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2299  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2300  ctx->StoreInst(movmsk, maskBitsPtr);
2301 
2302  // Officially start the loop.
2303  ctx->StartScope();
2305  ctx->SetContinueTarget(bbCheckForMore);
2306 
2307  // Onward to find the first set of program instance to run the loop for
2308  ctx->BranchInst(bbFindNext);
2309 
2310  ctx->SetCurrentBasicBlock(bbFindNext); {
2311  // Load the bitmask of the lanes left to be processed
2312  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2313 
2314  // Find the index of the first set bit in the mask
2315  llvm::Function *ctlzFunc =
2316  m->module->getFunction("__count_trailing_zeros_i64");
2317  Assert(ctlzFunc != NULL);
2318  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
2319  "first_set");
2320 
2321  // Store that value into the storage allocated for the iteration
2322  // variable.
2323  ctx->StoreInst(firstSet, sym->storagePtr);
2324 
2325  // Now set the execution mask to be only on for the current program
2326  // instance. (TODO: is there a more efficient way to do this? e.g.
2327  // for AVX1, we might want to do this as float rather than int
2328  // math...)
2329 
2330  // Get the "program index" vector value
2331 #ifdef ISPC_NVPTX_ENABLED
2332  llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
2333  ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
2334 #else /* ISPC_NVPTX_ENABLED */
2335  llvm::Value *programIndex = ctx->ProgramIndexVector();
2336 #endif /* ISPC_NVPTX_ENABLED */
2337 
2338  // And smear the current lane out to a vector
2339  llvm::Value *firstSet32 =
2340  ctx->TruncInst(firstSet, LLVMTypes::Int32Type, "first_set32");
2341  llvm::Value *firstSet32Smear = ctx->SmearUniform(firstSet32);
2342 
2343  // Now set the execution mask based on doing a vector compare of
2344  // these two
2345  llvm::Value *iterMask =
2346  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
2347  firstSet32Smear, programIndex);
2348  iterMask = ctx->I1VecToBoolVec(iterMask);
2349 
2350  ctx->SetInternalMask(iterMask);
2351 
2352  // Also update the bitvector of lanes left to turn off the bit for
2353  // the lane we're about to run.
2354  llvm::Value *setMask =
2355  ctx->BinaryOperator(llvm::Instruction::Shl, LLVMInt64(1),
2356  firstSet, "set_mask");
2357  llvm::Value *notSetMask = ctx->NotOperator(setMask);
2358  llvm::Value *newRemaining =
2359  ctx->BinaryOperator(llvm::Instruction::And, remainingBits,
2360  notSetMask, "new_remaining");
2361  ctx->StoreInst(newRemaining, maskBitsPtr);
2362 
2363  // and onward to run the loop body...
2364  ctx->BranchInst(bbBody);
2365  }
2366 
2367  ctx->SetCurrentBasicBlock(bbBody); {
2368  ctx->SetBlockEntryMask(ctx->GetFullMask());
2369 
2370  // Run the code in the body of the loop. This is easy now.
2371  if (stmts)
2372  stmts->EmitCode(ctx);
2373 
2374  Assert(ctx->GetCurrentBasicBlock() != NULL);
2375  ctx->BranchInst(bbCheckForMore);
2376  }
2377 
2378  ctx->SetCurrentBasicBlock(bbCheckForMore); {
2379  ctx->RestoreContinuedLanes();
2380  // At the end of the loop body (either due to running the
2381  // statements normally, or a continue statement in the middle of
2382  // the loop that jumps to the end, see if there are any lanes left
2383  // to be processed.
2384  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2385  llvm::Value *nonZero =
2386  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
2387  remainingBits, LLVMInt64(0), "remaining_ne_zero");
2388  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2389  }
2390 
2391  ctx->SetCurrentBasicBlock(bbDone);
2392  ctx->SetInternalMask(oldInternalMask);
2393  ctx->EndForeach();
2394  ctx->EndScope();
2395 }
2396 
2397 
2398 void
2399 ForeachActiveStmt::Print(int indent) const {
2400  printf("%*cForeach_active Stmt", indent, ' ');
2401  pos.Print();
2402  printf("\n");
2403 
2404  printf("%*cIter symbol: ", indent+4, ' ');
2405  if (sym != NULL) {
2406  printf("%s", sym->name.c_str());
2407  if (sym->type != NULL)
2408  printf(" %s", sym->type->GetString().c_str());
2409  }
2410  else
2411  printf("NULL");
2412  printf("\n");
2413 
2414  printf("%*cStmts:\n", indent+4, ' ');
2415  if (stmts != NULL)
2416  stmts->Print(indent+8);
2417  else
2418  printf("NULL");
2419  printf("\n");
2420 }
2421 
2422 
2423 Stmt *
2425  if (sym == NULL)
2426  return NULL;
2427 
2428  return this;
2429 }
2430 
2431 
2432 int
2434  return COST_VARYING_LOOP;
2435 }
2436 
2437 
2438 ///////////////////////////////////////////////////////////////////////////
2439 // ForeachUniqueStmt
2440 
2442  Stmt *s, SourcePos pos)
2443  : Stmt(pos, ForeachUniqueStmtID) {
2444  sym = m->symbolTable->LookupVariable(iterName);
2445  expr = e;
2446  stmts = s;
2447 }
2448 
2449 
2450 void
2452  if (!ctx->GetCurrentBasicBlock())
2453  return;
2454 
2455  // First, allocate local storage for the symbol that we'll use for the
2456  // uniform variable that holds the current unique value through each
2457  // loop.
2458  if (sym->type == NULL) {
2459  Assert(m->errorCount > 0);
2460  return;
2461  }
2462  llvm::Type *symType = sym->type->LLVMType(g->ctx);
2463  if (symType == NULL) {
2464  Assert(m->errorCount > 0);
2465  return;
2466  }
2467  sym->storagePtr = ctx->AllocaInst(symType, sym->name.c_str());
2468 
2469  ctx->SetDebugPos(pos);
2470  ctx->EmitVariableDebugInfo(sym);
2471 
2472  // The various basic blocks that we'll need in the below
2473  llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_find_next");
2474  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
2475  llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_check_for_more");
2476  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_done");
2477 
2478  // Prepare the FunctionEmitContext
2479  ctx->StartScope();
2480 
2481  // Save the old internal mask so that we can restore it at the end
2482  llvm::Value *oldMask = ctx->GetInternalMask();
2483 
2484  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2485  // to be processed by a pass through the foreach_unique loop body. It
2486  // starts out with the full execution mask (which should never be all
2487  // off going in to this)...
2488  llvm::Value *oldFullMask = ctx->GetFullMask();
2489  llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2490  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2491  ctx->StoreInst(movmsk, maskBitsPtr);
2492 
2493  // Officially start the loop.
2495  ctx->SetContinueTarget(bbCheckForMore);
2496 
2497  // Evaluate the varying expression we're iterating over just once.
2498  llvm::Value *exprValue = expr->GetValue(ctx);
2499 
2500  // And we'll store its value into locally-allocated storage, for ease
2501  // of indexing over it with non-compile-time-constant indices.
2502  const Type *exprType;
2503  llvm::VectorType *llvmExprType;
2504  if (exprValue == NULL ||
2505  (exprType = expr->GetType()) == NULL ||
2506  (llvmExprType =
2507  llvm::dyn_cast<llvm::VectorType>(exprValue->getType())) == NULL) {
2508  Assert(m->errorCount > 0);
2509  return;
2510  }
2511  ctx->SetDebugPos(pos);
2512  const Type *exprPtrType = PointerType::GetUniform(exprType);
2513  llvm::Value *exprMem = ctx->AllocaInst(llvmExprType, "expr_mem");
2514  ctx->StoreInst(exprValue, exprMem);
2515 
2516  // Onward to find the first set of lanes to run the loop for
2517  ctx->BranchInst(bbFindNext);
2518 
2519  ctx->SetCurrentBasicBlock(bbFindNext); {
2520  // Load the bitmask of the lanes left to be processed
2521  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2522 
2523  // Find the index of the first set bit in the mask
2524  llvm::Function *ctlzFunc =
2525  m->module->getFunction("__count_trailing_zeros_i64");
2526  Assert(ctlzFunc != NULL);
2527  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits,
2528  "first_set");
2529 
2530  // And load the corresponding element value from the temporary
2531  // memory storing the value of the varying expr.
2532  llvm::Value *uniqueValue;
2533 #ifdef ISPC_NVPTX_ENABLED
2534  if (g->target->getISA() == Target::NVPTX)
2535  {
2536  llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
2537  uniqueValue = ctx->Extract(exprValue, firstSet32);
2538  }
2539  else
2540  {
2541 #endif /* ISPC_NVPTX_ENABLED */
2542  llvm::Value *uniqueValuePtr =
2543  ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
2544  "unique_index_ptr");
2545  uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
2546 #ifdef ISPC_NVPTX_ENABLED
2547  }
2548 #endif /* ISPC_NVPTX_ENABLED */
2549  // If it's a varying pointer type, need to convert from the int
2550  // type we store in the vector to the actual pointer type
2551  if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
2552  uniqueValue = ctx->IntToPtrInst(uniqueValue, symType);
2553 
2554  // Store that value in sym's storage so that the iteration variable
2555  // has the right value inside the loop body
2556  ctx->StoreInst(uniqueValue, sym->storagePtr);
2557 
2558  // Set the execution mask so that it's on for any lane that a) was
2559  // running at the start of the foreach loop, and b) where that
2560  // lane's value of the varying expression is the same as the value
2561  // we've selected to process this time through--i.e.:
2562  // oldMask & (smear(element) == exprValue)
2563  llvm::Value *uniqueSmear = ctx->SmearUniform(uniqueValue, "unique_smear");
2564  llvm::Value *matchingLanes = NULL;
2565  if (uniqueValue->getType()->isFloatingPointTy())
2566  matchingLanes =
2567  ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_OEQ,
2568  uniqueSmear, exprValue, "matching_lanes");
2569  else
2570  matchingLanes =
2571  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
2572  uniqueSmear, exprValue, "matching_lanes");
2573  matchingLanes = ctx->I1VecToBoolVec(matchingLanes);
2574 
2575  llvm::Value *loopMask =
2576  ctx->BinaryOperator(llvm::Instruction::And, oldMask, matchingLanes,
2577  "foreach_unique_loop_mask");
2578  ctx->SetInternalMask(loopMask);
2579 
2580  // Also update the bitvector of lanes left to process in subsequent
2581  // loop iterations:
2582  // remainingBits &= ~movmsk(current mask)
2583  llvm::Value *loopMaskMM = ctx->LaneMask(loopMask);
2584  llvm::Value *notLoopMaskMM = ctx->NotOperator(loopMaskMM);
2585  llvm::Value *newRemaining =
2586  ctx->BinaryOperator(llvm::Instruction::And, remainingBits,
2587  notLoopMaskMM, "new_remaining");
2588  ctx->StoreInst(newRemaining, maskBitsPtr);
2589 
2590  // and onward...
2591  ctx->BranchInst(bbBody);
2592  }
2593 
2594  ctx->SetCurrentBasicBlock(bbBody); {
2595  ctx->SetBlockEntryMask(ctx->GetFullMask());
2596  // Run the code in the body of the loop. This is easy now.
2597  if (stmts)
2598  stmts->EmitCode(ctx);
2599 
2600  Assert(ctx->GetCurrentBasicBlock() != NULL);
2601  ctx->BranchInst(bbCheckForMore);
2602  }
2603 
2604  ctx->SetCurrentBasicBlock(bbCheckForMore); {
2605  // At the end of the loop body (either due to running the
2606  // statements normally, or a continue statement in the middle of
2607  // the loop that jumps to the end, see if there are any lanes left
2608  // to be processed.
2609  ctx->RestoreContinuedLanes();
2610  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2611  llvm::Value *nonZero =
2612  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
2613  remainingBits, LLVMInt64(0), "remaining_ne_zero");
2614  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2615  }
2616 
2617  ctx->SetCurrentBasicBlock(bbDone);
2618  ctx->SetInternalMask(oldMask);
2619  ctx->EndForeach();
2620  ctx->EndScope();
2621 }
2622 
2623 
2624 void
2625 ForeachUniqueStmt::Print(int indent) const {
2626  printf("%*cForeach_unique Stmt", indent, ' ');
2627  pos.Print();
2628  printf("\n");
2629 
2630  printf("%*cIter symbol: ", indent+4, ' ');
2631  if (sym != NULL) {
2632  printf("%s", sym->name.c_str());
2633  if (sym->type != NULL)
2634  printf(" %s", sym->type->GetString().c_str());
2635  }
2636  else
2637  printf("NULL");
2638  printf("\n");
2639 
2640  printf("%*cIter expr: ", indent+4, ' ');
2641  if (expr != NULL)
2642  expr->Print();
2643  else
2644  printf("NULL");
2645  printf("\n");
2646 
2647  printf("%*cStmts:\n", indent+4, ' ');
2648  if (stmts != NULL)
2649  stmts->Print(indent+8);
2650  else
2651  printf("NULL");
2652  printf("\n");
2653 }
2654 
2655 
2656 Stmt *
2658  const Type *type;
2659  if (sym == NULL || expr == NULL || (type = expr->GetType()) == NULL)
2660  return NULL;
2661 
2662  if (type->IsVaryingType() == false) {
2663  Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
2664  "must be \"varying\" type, not \"%s\".",
2665  type->GetString().c_str());
2666  return NULL;
2667  }
2668 
2669  if (Type::IsBasicType(type) == false) {
2670  Error(expr->pos, "Iteration domain type in \"foreach_tiled\" loop "
2671  "must be an atomic, pointer, or enum type, not \"%s\".",
2672  type->GetString().c_str());
2673  return NULL;
2674  }
2675 
2676  return this;
2677 }
2678 
2679 
2680 int
2682  return COST_VARYING_LOOP;
2683 }
2684 
2685 
2686 ///////////////////////////////////////////////////////////////////////////
2687 // CaseStmt
2688 
2689 /** Given the statements following a 'case' or 'default' label, this
2690  function determines whether the mask should be checked to see if it is
2691  "all off" immediately after the label, before executing the code for
2692  the statements.
2693  */
2694 static bool
2696  if (stmts == NULL)
2697  return false;
2698 
2699  int cost = EstimateCost(stmts);
2700  bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
2701 
2702  // The mask should be checked if the code following the
2703  // 'case'/'default' is relatively complex, or if it would be unsafe to
2704  // run that code with the execution mask all off.
2705  return (cost > PREDICATE_SAFE_IF_STATEMENT_COST ||
2706  safeToRunWithAllLanesOff == false);
2707 }
2708 
2709 
2711  : Stmt(pos, CaseStmtID), value(v) {
2712  stmts = s;
2713 }
2714 
2715 
2716 void
2719  if (stmts)
2720  stmts->EmitCode(ctx);
2721 }
2722 
2723 
2724 void
2725 CaseStmt::Print(int indent) const {
2726  printf("%*cCase [%d] label", indent, ' ', value);
2727  pos.Print();
2728  printf("\n");
2729  stmts->Print(indent+4);
2730 }
2731 
2732 
2733 Stmt *
2735  return this;
2736 }
2737 
2738 
2739 int
2741  return 0;
2742 }
2743 
2744 
2745 ///////////////////////////////////////////////////////////////////////////
2746 // DefaultStmt
2747 
2749  : Stmt(pos, DefaultStmtID) {
2750  stmts = s;
2751 }
2752 
2753 
2754 void
2757  if (stmts)
2758  stmts->EmitCode(ctx);
2759 }
2760 
2761 
2762 void
2763 DefaultStmt::Print(int indent) const {
2764  printf("%*cDefault Stmt", indent, ' ');
2765  pos.Print();
2766  printf("\n");
2767  stmts->Print(indent+4);
2768 }
2769 
2770 
2771 Stmt *
2773  return this;
2774 }
2775 
2776 
2777 int
2779  return 0;
2780 }
2781 
2782 
2783 ///////////////////////////////////////////////////////////////////////////
2784 // SwitchStmt
2785 
2787  : Stmt(pos, SwitchStmtID) {
2788  expr = e;
2789  stmts = s;
2790 }
2791 
2792 
2793 /* An instance of this structure is carried along as we traverse the AST
2794  nodes for the statements after a "switch" statement. We use this
2795  structure to record all of the 'case' and 'default' statements after the
2796  "switch". */
2799  ctx = c;
2800  defaultBlock = NULL;
2801  lastBlock = NULL;
2802  }
2803 
2805 
2806  /* Basic block for the code following the "default" label (if any). */
2807  llvm::BasicBlock *defaultBlock;
2808 
2809  /* Map from integer values after "case" labels to the basic blocks that
2810  follow the corresponding "case" label. */
2811  std::vector<std::pair<int, llvm::BasicBlock *> > caseBlocks;
2812 
2813  /* For each basic block for a "case" label or a "default" label,
2814  nextBlock[block] stores the basic block pointer for the next
2815  subsequent "case" or "default" label in the program. */
2816  std::map<llvm::BasicBlock *, llvm::BasicBlock *> nextBlock;
2817 
2818  /* The last basic block created for a "case" or "default" label; when
2819  we create the basic block for the next one, we'll use this to update
2820  the nextBlock map<> above. */
2821  llvm::BasicBlock *lastBlock;
2822 };
2823 
2824 
2825 static bool
2826 lSwitchASTPreVisit(ASTNode *node, void *d) {
2827  if (llvm::dyn_cast<SwitchStmt>(node) != NULL)
2828  // don't continue recursively into a nested switch--we only want
2829  // our own case and default statements!
2830  return false;
2831 
2832  CaseStmt *cs = llvm::dyn_cast<CaseStmt>(node);
2833  DefaultStmt *ds = llvm::dyn_cast<DefaultStmt>(node);
2834 
2835  SwitchVisitInfo *svi = (SwitchVisitInfo *)d;
2836  llvm::BasicBlock *bb = NULL;
2837  if (cs != NULL) {
2838  // Complain if we've seen a case statement with the same value
2839  // already
2840  for (int i = 0; i < (int)svi->caseBlocks.size(); ++i) {
2841  if (svi->caseBlocks[i].first == cs->value) {
2842  Error(cs->pos, "Duplicate case value \"%d\".", cs->value);
2843  return true;
2844  }
2845  }
2846 
2847  // Otherwise create a new basic block for the code following this
2848  // 'case' statement and record the mappign between the case label
2849  // value and the basic block
2850  char buf[32];
2851  sprintf(buf, "case_%d", cs->value);
2852  bb = svi->ctx->CreateBasicBlock(buf);
2853  svi->caseBlocks.push_back(std::make_pair(cs->value, bb));
2854  }
2855  else if (ds != NULL) {
2856  // And complain if we've seen another 'default' label..
2857  if (svi->defaultBlock != NULL) {
2858  Error(ds->pos, "Multiple \"default\" lables in switch statement.");
2859  return true;
2860  }
2861  else {
2862  // Otherwise create a basic block for the code following the
2863  // "default".
2864  bb = svi->ctx->CreateBasicBlock("default");
2865  svi->defaultBlock = bb;
2866  }
2867  }
2868 
2869  // If we saw a "case" or "default" label, then update the map to record
2870  // that the block we just created follows the block created for the
2871  // previous label in the "switch".
2872  if (bb != NULL) {
2873  svi->nextBlock[svi->lastBlock] = bb;
2874  svi->lastBlock = bb;
2875  }
2876 
2877  return true;
2878 }
2879 
2880 
2881 void
2883  if (ctx->GetCurrentBasicBlock() == NULL)
2884  return;
2885 
2886  const Type *type;
2887  if (expr == NULL || ((type = expr->GetType()) == NULL)) {
2888  AssertPos(pos, m->errorCount > 0);
2889  return;
2890  }
2891 
2892  // Basic block we'll end up after the switch statement
2893  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("switch_done");
2894 
2895  // Walk the AST of the statements after the 'switch' to collect a bunch
2896  // of information about the structure of the 'case' and 'default'
2897  // statements.
2898  SwitchVisitInfo svi(ctx);
2899  WalkAST(stmts, lSwitchASTPreVisit, NULL, &svi);
2900  // Record that the basic block following the last one created for a
2901  // case/default is the block after the end of the switch statement.
2902  svi.nextBlock[svi.lastBlock] = bbDone;
2903 
2904  llvm::Value *exprValue = expr->GetValue(ctx);
2905  if (exprValue == NULL) {
2906  AssertPos(pos, m->errorCount > 0);
2907  return;
2908  }
2909 
2910  bool isUniformCF = (type->IsUniformType() &&
2911  lHasVaryingBreakOrContinue(stmts) == false);
2912  ctx->StartSwitch(isUniformCF, bbDone);
2913  ctx->SetBlockEntryMask(ctx->GetFullMask());
2914  ctx->SwitchInst(exprValue, svi.defaultBlock ? svi.defaultBlock : bbDone,
2915  svi.caseBlocks, svi.nextBlock);
2916 
2917  if (stmts != NULL)
2918  stmts->EmitCode(ctx);
2919 
2920  if (ctx->GetCurrentBasicBlock() != NULL)
2921  ctx->BranchInst(bbDone);
2922 
2923  ctx->SetCurrentBasicBlock(bbDone);
2924  ctx->EndSwitch();
2925 }
2926 
2927 
2928 void
2929 SwitchStmt::Print(int indent) const {
2930  printf("%*cSwitch Stmt", indent, ' ');
2931  pos.Print();
2932  printf("\n");
2933  printf("%*cexpr = ", indent, ' ');
2934  expr->Print();
2935  printf("\n");
2936  stmts->Print(indent+4);
2937 }
2938 
2939 
2940 Stmt *
2942  const Type *exprType;
2943  if (expr == NULL ||
2944  (exprType = expr->GetType()) == NULL) {
2945  Assert(m->errorCount > 0);
2946  return NULL;
2947  }
2948 
2949  const Type *toType = NULL;
2950  exprType = exprType->GetAsConstType();
2951  bool is64bit = (Type::EqualIgnoringConst(exprType->GetAsUniformType(),
2955 
2956  if (exprType->IsUniformType()) {
2957  if (is64bit) toType = AtomicType::UniformInt64;
2958  else toType = AtomicType::UniformInt32;
2959  }
2960  else {
2961  if (is64bit) toType = AtomicType::VaryingInt64;
2962  else toType = AtomicType::VaryingInt32;
2963  }
2964 
2965  expr = TypeConvertExpr(expr, toType, "switch expression");
2966  if (expr == NULL)
2967  return NULL;
2968 
2969  return this;
2970 }
2971 
2972 
2973 int
2975  const Type *type = expr->GetType();
2976  if (type && type->IsVaryingType())
2977  return COST_VARYING_SWITCH;
2978  else
2979  return COST_UNIFORM_SWITCH;
2980 }
2981 
2982 
2983 ///////////////////////////////////////////////////////////////////////////
2984 // UnmaskedStmt
2985 
2987  : Stmt(pos, UnmaskedStmtID) {
2988  stmts = s;
2989 }
2990 
2991 
2992 void
2994  if (!ctx->GetCurrentBasicBlock() || !stmts)
2995  return;
2996 
2997  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2998  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
2999 
3002 
3003  stmts->EmitCode(ctx);
3004 
3005 
3006  // Do not restore old mask if our basic block is over. This happends if we emit code
3007  // for something like 'unmasked{return;}', for example.
3008  if (ctx->GetCurrentBasicBlock() == NULL)
3009  return;
3010 
3011  ctx->SetInternalMask(oldInternalMask);
3012  ctx->SetFunctionMask(oldFunctionMask);
3013 }
3014 
3015 
3016 void
3017 UnmaskedStmt::Print(int indent) const {
3018  printf("%*cUnmasked Stmt", indent, ' ');
3019  pos.Print();
3020  printf("\n");
3021 
3022  printf("%*cStmts:\n", indent+4, ' ');
3023  if (stmts != NULL)
3024  stmts->Print(indent+8);
3025  else
3026  printf("NULL");
3027  printf("\n");
3028 }
3029 
3030 
3031 Stmt *
3033  return this;
3034 }
3035 
3036 
3037 int
3039  return COST_ASSIGN;
3040 }
3041 
3042 
3043 ///////////////////////////////////////////////////////////////////////////
3044 // ReturnStmt
3045 
3047  : Stmt(p, ReturnStmtID), expr(e) {
3048 }
3049 
3050 
3051 void
3053  if (!ctx->GetCurrentBasicBlock())
3054  return;
3055 
3056  if (ctx->InForeachLoop()) {
3057  Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
3058  return;
3059  }
3060 
3061  // Make sure we're not trying to return a reference to something where
3062  // that doesn't make sense
3063  const Function *func = ctx->GetFunction();
3064  const Type *returnType = func->GetReturnType();
3065  if (IsReferenceType(returnType) == true &&
3066  IsReferenceType(expr->GetType()) == false) {
3067  const Type *lvType = expr->GetLValueType();
3068  if (lvType == NULL) {
3069  Error(expr->pos, "Illegal to return non-lvalue from function "
3070  "returning reference type \"%s\".",
3071  returnType->GetString().c_str());
3072  return;
3073  }
3074  else if (lvType->IsUniformType() == false) {
3075  Error(expr->pos, "Illegal to return varying lvalue type from "
3076  "function returning a reference type \"%s\".",
3077  returnType->GetString().c_str());
3078  return;
3079  }
3080  }
3081 
3082  ctx->SetDebugPos(pos);
3083  ctx->CurrentLanesReturned(expr, true);
3084 }
3085 
3086 
3087 Stmt *
3089  return this;
3090 }
3091 
3092 
3093 int
3095  return COST_RETURN;
3096 }
3097 
3098 
3099 void
3100 ReturnStmt::Print(int indent) const {
3101  printf("%*cReturn Stmt", indent, ' ');
3102  pos.Print();
3103  if (expr)
3104  expr->Print();
3105  else printf("(void)");
3106  printf("\n");
3107 }
3108 
3109 
3110 ///////////////////////////////////////////////////////////////////////////
3111 // GotoStmt
3112 
3113 GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip)
3114  : Stmt(gotoPos, GotoStmtID) {
3115  label = l;
3116  identifierPos = ip;
3117 }
3118 
3119 
3120 void
3122  if (!ctx->GetCurrentBasicBlock())
3123  return;
3124 
3125  if (ctx->VaryingCFDepth() > 0) {
3126  Error(pos, "\"goto\" statements are only legal under \"uniform\" "
3127  "control flow.");
3128  return;
3129  }
3130  if (ctx->InForeachLoop()) {
3131  Error(pos, "\"goto\" statements are currently illegal inside "
3132  "\"foreach\" loops.");
3133  return;
3134  }
3135 
3136  llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
3137  if (bb == NULL) {
3138  /* Label wasn't found. Look for suggestions that are close */
3139  std::vector<std::string> labels = ctx->GetLabels();
3140  std::vector<std::string> matches = MatchStrings(label, labels);
3141  std::string match_output;
3142  if (! matches.empty()) {
3143  /* Print up to 5 matches. Don't want to spew too much */
3144  match_output += "\nDid you mean:";
3145  for (unsigned int i=0; i<matches.size() && i<5; i++)
3146  match_output += "\n " + matches[i] + "?";
3147  }
3148 
3149  /* Label wasn't found. Emit an error */
3151  "No label named \"%s\" found in current function.%s",
3152  label.c_str(), match_output.c_str());
3153 
3154  return;
3155  }
3156 
3157  ctx->BranchInst(bb);
3158  ctx->SetCurrentBasicBlock(NULL);
3159 }
3160 
3161 
3162 void
3163 GotoStmt::Print(int indent) const {
3164  printf("%*cGoto label \"%s\"\n", indent, ' ', label.c_str());
3165 }
3166 
3167 
3168 Stmt *
3170  return this;
3171 }
3172 
3173 
3174 Stmt *
3176  return this;
3177 }
3178 
3179 
3180 int
3182  return COST_GOTO;
3183 }
3184 
3185 
3186 ///////////////////////////////////////////////////////////////////////////
3187 // LabeledStmt
3188 
3190  : Stmt(p, LabeledStmtID) {
3191  name = n;
3192  stmt = s;
3193 }
3194 
3195 
3196 void
3198  llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
3199  AssertPos(pos, bblock != NULL);
3200 
3201  // End the current basic block with a jump to our basic block and then
3202  // set things up for emission to continue there. Note that the current
3203  // basic block may validly be NULL going into this statement due to an
3204  // earlier goto that NULLed it out; that doesn't stop us from
3205  // re-establishing a current basic block starting at the label..
3206  if (ctx->GetCurrentBasicBlock() != NULL)
3207  ctx->BranchInst(bblock);
3208  ctx->SetCurrentBasicBlock(bblock);
3209 
3210  if (stmt != NULL)
3211  stmt->EmitCode(ctx);
3212 }
3213 
3214 
3215 void
3216 LabeledStmt::Print(int indent) const {
3217  printf("%*cLabel \"%s\"\n", indent, ' ', name.c_str());
3218  if (stmt != NULL)
3219  stmt->Print(indent);
3220 }
3221 
3222 
3223 Stmt *
3225  return this;
3226 }
3227 
3228 
3229 Stmt *
3231  if (!isalpha(name[0]) || name[0] == '_') {
3232  Error(pos, "Label must start with either alphabetic character or '_'.");
3233  return NULL;
3234  }
3235  for (unsigned int i = 1; i < name.size(); ++i) {
3236  if (!isalnum(name[i]) && name[i] != '_') {
3237  Error(pos, "Character \"%c\" is illegal in labels.", name[i]);
3238  return NULL;
3239  }
3240  }
3241  return this;
3242 }
3243 
3244 
3245 int
3247  return 0;
3248 }
3249 
3250 
3251 ///////////////////////////////////////////////////////////////////////////
3252 // StmtList
3253 
3254 void
3256  ctx->StartScope();
3257  ctx->SetDebugPos(pos);
3258  for (unsigned int i = 0; i < stmts.size(); ++i)
3259  if (stmts[i])
3260  stmts[i]->EmitCode(ctx);
3261  ctx->EndScope();
3262 }
3263 
3264 
3265 Stmt *
3267  return this;
3268 }
3269 
3270 
3271 int
3273  return 0;
3274 }
3275 
3276 
3277 void
3278 StmtList::Print(int indent) const {
3279  printf("%*cStmt List", indent, ' ');
3280  pos.Print();
3281  printf(":\n");
3282  for (unsigned int i = 0; i < stmts.size(); ++i)
3283  if (stmts[i])
3284  stmts[i]->Print(indent+4);
3285 }
3286 
3287 
3288 ///////////////////////////////////////////////////////////////////////////
3289 // PrintStmt
3290 
3291 PrintStmt::PrintStmt(const std::string &f, Expr *v, SourcePos p)
3292  : Stmt(p, PrintStmtID), format(f), values(v) {
3293 }
3294 
3295 /* Because the pointers to values that are passed to __do_print() are all
3296  void *s (and because ispc print() formatting strings statements don't
3297  encode types), we pass along a string to __do_print() where the i'th
3298  character encodes the type of the i'th value to be printed. Needless to
3299  say, the encoding chosen here and the decoding code in __do_print() need
3300  to agree on the below!
3301  */
3302 static char
3303 lEncodeType(const Type *t) {
3304  if (Type::Equal(t, AtomicType::UniformBool)) return 'b';
3305  if (Type::Equal(t, AtomicType::VaryingBool)) return 'B';
3306  if (Type::Equal(t, AtomicType::UniformInt32)) return 'i';
3307  if (Type::Equal(t, AtomicType::VaryingInt32)) return 'I';
3308  if (Type::Equal(t, AtomicType::UniformUInt32)) return 'u';
3309  if (Type::Equal(t, AtomicType::VaryingUInt32)) return 'U';
3310  if (Type::Equal(t, AtomicType::UniformFloat)) return 'f';
3311  if (Type::Equal(t, AtomicType::VaryingFloat)) return 'F';
3312  if (Type::Equal(t, AtomicType::UniformInt64)) return 'l';
3313  if (Type::Equal(t, AtomicType::VaryingInt64)) return 'L';
3314  if (Type::Equal(t, AtomicType::UniformUInt64)) return 'v';
3315  if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V';
3316  if (Type::Equal(t, AtomicType::UniformDouble)) return 'd';
3317  if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D';
3318  if (CastType<PointerType>(t) != NULL) {
3319  if (t->IsUniformType())
3320  return 'p';
3321  else
3322  return 'P';
3323  }
3324  else return '\0';
3325 }
3326 
3327 
3328 /** Given an Expr for a value to be printed, emit the code to evaluate the
3329  expression and store the result to alloca'd memory. Update the
3330  argTypes string with the type encoding for this expression.
3331  */
3332 static llvm::Value *
3333 lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
3334  const Type *type = expr->GetType();
3335  if (type == NULL)
3336  return NULL;
3337 
3338  if (CastType<ReferenceType>(type) != NULL) {
3339  expr = new RefDerefExpr(expr, expr->pos);
3340  type = expr->GetType();
3341  if (type == NULL)
3342  return NULL;
3343  }
3344 
3345  // Just int8 and int16 types to int32s...
3346  const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
3347  if (Type::Equal(baseType, AtomicType::UniformInt8) ||
3351  expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
3353  expr, expr->pos);
3354  type = expr->GetType();
3355  }
3356 
3357  char t = lEncodeType(type->GetAsNonConstType());
3358  if (t == '\0') {
3359  Error(expr->pos, "Only atomic types are allowed in print statements; "
3360  "type \"%s\" is illegal.", type->GetString().c_str());
3361  return NULL;
3362  }
3363  else {
3364  if (Type::Equal(baseType, AtomicType::UniformBool)) {
3365  // Blast bools to ints, but do it here to preserve encoding for
3366  // printing 'true' or 'false'
3367  expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
3369  expr, expr->pos);
3370  type = expr->GetType();
3371  }
3372  argTypes.push_back(t);
3373 
3374  llvm::Type *llvmExprType = type->LLVMType(g->ctx);
3375  llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
3376  llvm::Value *val = expr->GetValue(ctx);
3377  if (!val)
3378  return NULL;
3379  ctx->StoreInst(val, ptr);
3380 
3381  ptr = ctx->BitCastInst(ptr, LLVMTypes::VoidPointerType);
3382  return ptr;
3383  }
3384 }
3385 
3386 
3387 /* PrintStmt works closely with the __do_print() function implemented in
3388  the builtins-c.c file. In particular, the EmitCode() method here needs to
3389  take the arguments passed to it from ispc and generate a valid call to
3390  __do_print() with the information that __do_print() then needs to do the
3391  actual printing work at runtime.
3392  */
3393 void
3395  if (!ctx->GetCurrentBasicBlock())
3396  return;
3397 
3398  ctx->SetDebugPos(pos);
3399 
3400  // __do_print takes 5 arguments; we'll get them stored in the args[] array
3401  // in the code emitted below
3402  //
3403  // 1. the format string
3404  // 2. a string encoding the types of the values being printed,
3405  // one character per value
3406  // 3. the number of running program instances (i.e. the target's
3407  // vector width)
3408  // 4. the current lane mask
3409  // 5. a pointer to an array of pointers to the values to be printed
3410  llvm::Value *args[5];
3411  std::string argTypes;
3412 
3413  if (values == NULL) {
3414  llvm::Type *ptrPtrType =
3415  llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
3416  args[4] = llvm::Constant::getNullValue(ptrPtrType);
3417  }
3418  else {
3419  // Get the values passed to the print() statement evaluated and
3420  // stored in memory so that we set up the array of pointers to them
3421  // for the 5th __do_print() argument
3422  ExprList *elist = llvm::dyn_cast<ExprList>(values);
3423  int nArgs = elist ? elist->exprs.size() : 1;
3424 
3425  // Allocate space for the array of pointers to values to be printed
3426  llvm::Type *argPtrArrayType =
3427  llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
3428  llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
3429  "print_arg_ptrs");
3430  // Store the array pointer as a void **, which is what __do_print()
3431  // expects
3432  args[4] = ctx->BitCastInst(argPtrArray,
3433  llvm::PointerType::get(LLVMTypes::VoidPointerType, 0));
3434 
3435  // Now, for each of the arguments, emit code to evaluate its value
3436  // and store the value into alloca'd storage. Then store the
3437  // pointer to the alloca'd storage into argPtrArray.
3438  if (elist) {
3439  for (unsigned int i = 0; i < elist->exprs.size(); ++i) {
3440  Expr *expr = elist->exprs[i];
3441  if (!expr)
3442  return;
3443  llvm::Value *ptr = lProcessPrintArg(expr, ctx, argTypes);
3444  if (!ptr)
3445  return;
3446 
3447  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL);
3448  ctx->StoreInst(ptr, arrayPtr);
3449  }
3450  }
3451  else {
3452  llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes);
3453  if (!ptr)
3454  return;
3455  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL);
3456  ctx->StoreInst(ptr, arrayPtr);
3457  }
3458  }
3459 
3460  // Now we can emit code to call __do_print()
3461 #ifdef ISPC_NVPTX_ENABLED
3462  llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
3463  m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
3464 #else /* ISPC_NVPTX_ENABLED */
3465  llvm::Function *printFunc = m->module->getFunction("__do_print");
3466 #endif /* ISPC_NVPTX_ENABLED */
3467  AssertPos(pos, printFunc);
3468 
3469  llvm::Value *mask = ctx->GetFullMask();
3470  // Set up the rest of the parameters to it
3471  args[0] = ctx->GetStringPtr(format);
3472  args[1] = ctx->GetStringPtr(argTypes);
3473  args[2] = LLVMInt32(g->target->getVectorWidth());
3474  args[3] = ctx->LaneMask(mask);
3475  std::vector<llvm::Value *> argVec(&args[0], &args[5]);
3476  ctx->CallInst(printFunc, NULL, argVec, "");
3477 }
3478 
3479 
3480 void
3481 PrintStmt::Print(int indent) const {
3482  printf("%*cPrint Stmt (%s)", indent, ' ', format.c_str());
3483 }
3484 
3485 
3486 Stmt *
3488  return this;
3489 }
3490 
3491 
3492 int
3494  return COST_FUNCALL;
3495 }
3496 
3497 
3498 ///////////////////////////////////////////////////////////////////////////
3499 // AssertStmt
3500 
3501 AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p)
3502  : Stmt(p, AssertStmtID), message(msg), expr(e) {
3503 }
3504 
3505 
3506 void
3508  if (!ctx->GetCurrentBasicBlock())
3509  return;
3510 
3511  const Type *type;
3512  if (expr == NULL ||
3513  (type = expr->GetType()) == NULL) {
3514  AssertPos(pos, m->errorCount > 0);
3515  return;
3516  }
3517  bool isUniform = type->IsUniformType();
3518 
3519  // The actual functionality to do the check and then handle falure is
3520  // done via a builtin written in bitcode in builtins/util.m4.
3521  llvm::Function *assertFunc =
3522  isUniform ? m->module->getFunction("__do_assert_uniform") :
3523  m->module->getFunction("__do_assert_varying");
3524  AssertPos(pos, assertFunc != NULL);
3525 
3526  char *errorString;
3527  if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s",
3529  message.c_str()) == -1) {
3530  Error(pos, "Fatal error when generating assert string: asprintf() "
3531  "unable to allocate memory!");
3532  return;
3533  }
3534 
3535  std::vector<llvm::Value *> args;
3536  args.push_back(ctx->GetStringPtr(errorString));
3537  llvm::Value *exprValue = expr->GetValue(ctx);
3538  if (exprValue == NULL) {
3539  AssertPos(pos, m->errorCount > 0);
3540  return;
3541  }
3542  args.push_back(exprValue);
3543  args.push_back(ctx->GetFullMask());
3544  ctx->CallInst(assertFunc, NULL, args, "");
3545 
3546  free(errorString);
3547 }
3548 
3549 
3550 void
3551 AssertStmt::Print(int indent) const {
3552  printf("%*cAssert Stmt (%s)", indent, ' ', message.c_str());
3553 }
3554 
3555 
3556 Stmt *
3558  const Type *type;
3559  if (expr && (type = expr->GetType()) != NULL) {
3560  bool isUniform = type->IsUniformType();
3563  "\"assert\" statement");
3564  if (expr == NULL)
3565  return NULL;
3566  }
3567  return this;
3568 }
3569 
3570 
3571 int
3573  return COST_ASSERT;
3574 }
3575 
3576 
3577 ///////////////////////////////////////////////////////////////////////////
3578 // DeleteStmt
3579 
3581  : Stmt(p, DeleteStmtID) {
3582  expr = e;
3583 }
3584 
3585 
3586 void
3588  if (!ctx->GetCurrentBasicBlock())
3589  return;
3590 
3591  const Type *exprType;
3592  if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
3593  AssertPos(pos, m->errorCount > 0);
3594  return;
3595  }
3596 
3597  llvm::Value *exprValue = expr->GetValue(ctx);
3598  if (exprValue == NULL) {
3599  AssertPos(pos, m->errorCount > 0);
3600  return;
3601  }
3602 
3603  // Typechecking should catch this
3604  AssertPos(pos, CastType<PointerType>(exprType) != NULL);
3605 
3606  if (exprType->IsUniformType()) {
3607  // For deletion of a uniform pointer, we just need to cast the
3608  // pointer type to a void pointer type, to match what
3609  // __delete_uniform() from the builtins expects.
3610  exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
3611  "ptr_to_void");
3612  llvm::Function *func;
3613  if (g->target->is32Bit()) {
3614  func = m->module->getFunction("__delete_uniform_32rt");
3615  } else {
3616  func = m->module->getFunction("__delete_uniform_64rt");
3617  }
3618  AssertPos(pos, func != NULL);
3619 
3620  ctx->CallInst(func, NULL, exprValue, "");
3621  }
3622  else {
3623  // Varying pointers are arrays of ints, and __delete_varying()
3624  // takes a vector of i64s (even for 32-bit targets). Therefore, we
3625  // only need to extend to 64-bit values on 32-bit targets before
3626  // calling it.
3627  llvm::Function *func;
3628  if (g->target->is32Bit()) {
3629  func = m->module->getFunction("__delete_varying_32rt");
3630  } else {
3631  func = m->module->getFunction("__delete_varying_64rt");
3632  }
3633  AssertPos(pos, func != NULL);
3634  if (g->target->is32Bit())
3635  exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
3636  "ptr_to_64");
3637  ctx->CallInst(func, NULL, exprValue, "");
3638  }
3639 }
3640 
3641 
3642 void
3643 DeleteStmt::Print(int indent) const {
3644  printf("%*cDelete Stmt", indent, ' ');
3645 }
3646 
3647 
3648 Stmt *
3650  const Type *exprType;
3651  if (expr == NULL || ((exprType = expr->GetType()) == NULL))
3652  return NULL;
3653 
3654  if (CastType<PointerType>(exprType) == NULL) {
3655  Error(pos, "Illegal to delete non-pointer type \"%s\".",
3656  exprType->GetString().c_str());
3657  return NULL;
3658  }
3659 
3660  return this;
3661 }
3662 
3663 
3664 int
3666  return COST_DELETE;
3667 }
int EstimateCost() const
Definition: stmt.cpp:1192
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3121
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1401
bool IsVaryingType() const
Definition: type.h:150
ForeachStmt(const std::vector< Symbol * > &loopVars, const std::vector< Expr * > &startExprs, const std::vector< Expr * > &endExprs, Stmt *bodyStatements, bool tiled, SourcePos pos)
Definition: stmt.cpp:1489
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
Expr * expr
Definition: stmt.h:581
int GetElementCount() const
Definition: type.cpp:1522
Stmt * bodyStmts
Definition: stmt.h:190
void Print(int indent) const
Definition: stmt.cpp:107
ForStmt(Stmt *initializer, Expr *testExpr, Stmt *stepStatements, Stmt *bodyStatements, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:1219
std::vector< VariableDeclaration > vars
Definition: stmt.h:122
std::vector< std::pair< int, llvm::BasicBlock * > > caseBlocks
Definition: stmt.cpp:2811
static bool lIsVaryingFor(ASTNode *node)
Definition: stmt.cpp:974
static bool lHasVaryingBreakOrContinue(Stmt *stmt)
Definition: stmt.cpp:1036
Definition: func.h:44
int EstimateCost() const
Definition: stmt.cpp:696
void Print(int indent) const
Definition: stmt.cpp:3163
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2598
Opt opt
Definition: ispc.h:547
void StartUniformIf()
Definition: ctx.cpp:580
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock * > > &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > &nextBlocks)
Definition: ctx.cpp:1192
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:1007
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1562
Stmt * stmts
Definition: stmt.h:404
static llvm::Value * lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, llvm::Value *uniformCounterPtr, llvm::Value *varyingCounterPtr, const std::vector< int > &spans)
Definition: stmt.cpp:1503
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:521
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:655
const bool doCoherentCheck
Definition: stmt.h:224
const AtomicType * GetAsConstType() const
Definition: type.cpp:312
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1773
Stmt * trueStmts
Definition: stmt.h:149
void StartScope()
Definition: ctx.cpp:1715
void PerformanceWarning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:426
static const AtomicType * VaryingUInt64
Definition: type.h:355
int EstimateCost() const
Definition: stmt.cpp:541
Expr * expr
Definition: stmt.h:380
std::vector< Expr * > endExprs
Definition: stmt.h:288
void emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:826
SwitchVisitInfo(FunctionEmitContext *c)
Definition: stmt.cpp:2798
virtual void Print() const =0
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:529
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:3485
void emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *test) const
Definition: stmt.cpp:747
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:594
static bool lSwitchASTPreVisit(ASTNode *node, void *d)
Definition: stmt.cpp:2826
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2251
bool IsArrayType() const
Definition: type.cpp:240
int getVectorWidth() const
Definition: ispc.h:285
int EstimateCost() const
Definition: stmt.cpp:3181
Module * m
Definition: ispc.cpp:93
Stmt * TypeCheck()
Definition: stmt.cpp:3088
Interface class for statements in the ispc language.
Definition: stmt.h:49
Stmt * TypeCheck()
Definition: stmt.cpp:2160
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1948
int first_line
Definition: ispc.h:141
virtual void EmitCode(FunctionEmitContext *ctx) const =0
Target * target
Definition: ispc.h:549
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3197
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2817
void Print(int indent) const
Definition: stmt.cpp:2399
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:558
BreakStmt(SourcePos pos)
Definition: stmt.cpp:1413
const std::string message
Definition: stmt.h:579
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1311
const std::string format
Definition: stmt.h:548
static const AtomicType * VaryingDouble
Definition: type.h:356
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2174
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:3051
Expression representing a compile-time constant value.
Definition: expr.h:390
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:2000
void EndSwitch()
Definition: ctx.cpp:1033
int EstimateCost() const
Definition: stmt.cpp:2681
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:586
void Print(int indent) const
Definition: stmt.cpp:3551
static llvm::Type * BoolType
Definition: llvmutil.h:74
void SetContinueTarget(llvm::BasicBlock *bb)
Definition: ctx.h:258
#define Assert(expr)
Definition: ispc.h:172
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2882
void StartForeach(ForeachType ft)
Definition: ctx.cpp:699
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
int EstimateCost() const
Definition: stmt.cpp:3665
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:74
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:96
static const AtomicType * UniformUInt32
Definition: type.h:352
int EstimateCost() const
Definition: stmt.cpp:2974
const Type * GetElementType(const std::string &name) const
Definition: type.cpp:2484
Stmt * Optimize()
Definition: stmt.cpp:3224
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:856
llvm::ConstantInt * LLVMInt8(int8_t i)
Definition: llvmutil.cpp:235
llvm::Value * GetFullMask()
Definition: ctx.cpp:494
virtual llvm::Constant * GetConstant(const Type *type) const
Definition: expr.cpp:102
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts, bool doAllCheck, SourcePos pos)
Definition: stmt.cpp:549
Stmt * TypeCheck()
Definition: stmt.cpp:2424
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3587
int EstimateCost() const
Definition: stmt.cpp:3272
virtual const Type * GetAsUniformType() const =0
Stmt * TypeCheck()
Definition: stmt.cpp:1160
Stmt * TypeCheck()
Definition: stmt.cpp:1429
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1660
llvm::BasicBlock * lastBlock
Definition: stmt.cpp:2821
Stmt * TypeCheck()
Definition: stmt.cpp:2772
std::string name
Definition: sym.h:71
static int lLog2(int i)
Definition: stmt.cpp:1637
A list of expressions.
Definition: expr.h:252
CaseStmt(int value, Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2710
static bool lCheckMask(Stmt *stmts)
Definition: stmt.cpp:2695
Symbol * LookupVariable(const char *name)
Definition: sym.cpp:131
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2263
Stmt * TypeCheck()
Definition: stmt.cpp:3487
Statement implementation representing a &#39;do&#39; statement in the program.
Definition: stmt.h:173
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:547
int EstimateCost() const
Definition: stmt.cpp:2778
void RestoreContinuedLanes()
Definition: ctx.cpp:979
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
virtual std::string GetString() const =0
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1419
void Print(int indent) const
Definition: stmt.cpp:3216
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:470
static const AtomicType * UniformUInt16
Definition: type.h:351
static bool lHasUnsizedArrays(const Type *type)
Definition: stmt.cpp:134
int GetSOAWidth() const
Definition: type.h:160
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:1015
ReturnStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:3046
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:776
const int value
Definition: stmt.h:403
int EstimateCost() const
Definition: stmt.cpp:120
ConstExpr * constValue
Definition: sym.h:87
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1602
header file with declarations for symbol and symbol table classes.
static const AtomicType * UniformBool
Definition: type.h:346
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3576
Symbol * sym
Definition: stmt.h:335
bool disableMaskAllOnOptimizations
Definition: ispc.h:476
Expr * test
Definition: stmt.h:147
int EstimateCost() const
Definition: stmt.cpp:2433
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:3344
static ASTNode * lVaryingBCPostFunc(ASTNode *node, void *d)
Definition: stmt.cpp:1021
void Print(int indent) const
Definition: stmt.cpp:1387
llvm::Module * module
Definition: module.h:158
Stmt * stmts
Definition: stmt.h:448
File with declarations for classes related to statements in the language.
static void lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse)
Definition: stmt.cpp:557
ContinueStmt(SourcePos pos)
Definition: stmt.cpp:1451
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:1140
void Print(int indent) const
Definition: stmt.cpp:2929
Stmt * stmts
Definition: stmt.h:425
Globals * g
Definition: ispc.cpp:92
bool IsReferenceType(const Type *t)
Definition: type.h:1094
Expr * expr
Definition: stmt.h:446
Expression representing a type cast of the given expression to a probably-different type...
Definition: expr.h:509
ExprStmt(Expr *expr, SourcePos pos)
Definition: stmt.cpp:84
LabeledStmt(const char *label, Stmt *stmt, SourcePos p)
Definition: stmt.cpp:3189
static const AtomicType * UniformUInt64
Definition: type.h:355
bool IsUniformType() const
Definition: type.h:145
void EndLoop()
Definition: ctx.cpp:684
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:482
bool disableCoherentControlFlow
Definition: ispc.h:498
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2438
void Print(int indent) const
Definition: stmt.cpp:3481
Stmt * TypeCheck()
Definition: stmt.cpp:2734
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1325
llvm::Constant * LLVMTrue
Definition: llvmutil.cpp:90
Stmt * TypeCheck()
Definition: stmt.cpp:3266
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
virtual bool IsConstType() const =0
ForeachUniqueStmt(const char *iterName, Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2441
Stmt * stmts
Definition: stmt.h:337
ForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2255
SourcePos GetDebugPos() const
Definition: ctx.cpp:1687
Stmt * TypeCheck()
Definition: stmt.cpp:101
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1454
Stmt * Optimize()
Definition: stmt.cpp:3169
DeclStmt(const std::vector< VariableDeclaration > &v, SourcePos pos)
Definition: stmt.cpp:128
Stmt * step
Definition: stmt.h:221
Stmt * TypeCheck()
Definition: stmt.cpp:3230
static char lEncodeType(const Type *t)
Definition: stmt.cpp:3303
bool SafeToRunWithMaskAllOff(ASTNode *root)
Definition: ast.cpp:506
Stmt * TypeCheck()
Definition: stmt.cpp:3649
static llvm::Type * Int64Type
Definition: llvmutil.h:79
static llvm::Type * Int8Type
Definition: llvmutil.h:76
llvm::Constant * LLVMBoolVector(bool v)
Definition: llvmutil.cpp:493
Stmt * TypeCheck()
Definition: stmt.cpp:2941
bool isTiled
Definition: stmt.h:289
Representation of a structure holding a number of members.
Definition: type.h:691
void Print(int indent) const
Definition: stmt.cpp:3017
FunctionEmitContext * ctx
Definition: stmt.cpp:2804
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:97
Header file with declarations for various LLVM utility stuff.
llvm::ArrayType * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:1377
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3255
#define AssertPos(pos, expr)
Definition: ispc.h:175
virtual const Type * GetLValueType() const
Definition: expr.cpp:94
AssertStmt(const std::string &msg, Expr *e, SourcePos p)
Definition: stmt.cpp:3501
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2993
ISA getISA() const
Definition: ispc.h:269
virtual const Type * GetAsConstType() const =0
static bool IsBasicType(const Type *type)
Definition: type.cpp:3625
SourcePos pos
Definition: sym.h:70
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value * > &args, const char *name=NULL)
Definition: ctx.cpp:3667
static void lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a)
Definition: stmt.cpp:1657
Stmt * TypeCheck()
Definition: stmt.cpp:1467
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3052
void Print(int indent) const
Definition: stmt.cpp:3278
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:515
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
void Print(int indent) const
Definition: stmt.cpp:1202
StorageClass storageClass
Definition: sym.h:96
Representation of a range of positions in a source file.
Definition: ispc.h:136
Stmt * stmts
Definition: stmt.h:312
DoStmt(Expr *testExpr, Stmt *bodyStmts, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:1043
Stmt * TypeCheck()
Definition: stmt.cpp:3175
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3507
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2717
bool InForeachLoop() const
Definition: ctx.cpp:1255
Stmt * stmts
Definition: stmt.h:223
int VaryingCFDepth() const
Definition: ctx.cpp:1245
void ClearBreakLanes()
Definition: ctx.cpp:997
static const AtomicType * VaryingBool
Definition: type.h:346
GotoStmt(const char *label, SourcePos gotoPos, SourcePos idPos)
Definition: stmt.cpp:3113
const char * name
Definition: ispc.h:140
virtual void Print(int indent) const =0
static bool checkInit(const Type *type, Expr *init)
Definition: stmt.cpp:469
SourcePos pos
Definition: ast.h:77
void Print(int indent) const
Definition: stmt.cpp:3643
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
Stmt * TypeCheck()
Definition: stmt.cpp:1357
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:72
static const AtomicType * VaryingInt64
Definition: type.h:354
const bool doCoherentCheck
Definition: stmt.h:191
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2755
Expr * values
Definition: stmt.h:551
const Type * GetReturnType() const
Definition: func.cpp:180
bool foundVaryingBreakOrContinue
Definition: stmt.cpp:967
const Function * GetFunction() const
Definition: ctx.cpp:464
void InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos)
Definition: expr.cpp:648
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1590
Expr * testExpr
Definition: stmt.h:189
SwitchStmt(Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2786
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
int EstimateCost() const
Definition: stmt.cpp:1435
static const AtomicType * UniformUInt8
Definition: type.h:350
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1264
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1049
static llvm::Type * Int32Type
Definition: llvmutil.h:78
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test) const
Definition: stmt.cpp:727
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1681
void Print(int indent) const
Definition: stmt.cpp:2625
#define ISPC_MAX_NVEC
Definition: ispc.h:68
bool disableUniformControlFlow
Definition: ispc.h:504
Representation of a function in a source file.
Stmt * falseStmts
Definition: stmt.h:151
Stmt * TypeCheck()
Definition: stmt.cpp:2657
int first_column
Definition: ispc.h:142
void Print() const
Definition: ispc.cpp:1599
Expr * test
Definition: stmt.h:218
Stmt * init
Definition: stmt.h:215
static bool Equal(const Type *a, const Type *b)
Definition: type.cpp:3735
static bool lVaryingBCPreFunc(ASTNode *node, void *d)
Definition: stmt.cpp:989
Stmt * stmts
Definition: stmt.h:290
Definition: stmt.h:387
std::vector< Expr * > exprs
Definition: expr.h:270
llvm::BasicBlock * defaultBlock
Definition: stmt.cpp:2807
virtual const Type * GetType() const =0
std::vector< Expr * > startExprs
Definition: stmt.h:287
static const AtomicType * UniformFloat
Definition: type.h:353
llvm::Value * GetInternalMask()
Definition: ctx.cpp:488
int EstimateCost() const
Definition: stmt.cpp:3246
void Print(int indent) const
Definition: stmt.cpp:2763
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2072
static const AtomicType * UniformInt32
Definition: type.h:349
Symbol * sym
Definition: stmt.h:311
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:537
SourcePos identifierPos
Definition: stmt.h:471
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:3507
Stmt * stmt
Definition: stmt.h:496
int varyingCFDepth
Definition: sym.h:98
void EndForeach()
Definition: ctx.cpp:733
int EstimateCost() const
Definition: stmt.cpp:2203
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1270
Stmt * TypeCheck()
Definition: stmt.cpp:3032
Interface class that defines the type abstraction.
Definition: type.h:101
static const AtomicType * UniformDouble
Definition: type.h:356
int EstimateCost() const
Definition: stmt.cpp:1473
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:476
static llvm::VectorType * MaskType
Definition: llvmutil.h:90
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:1077
Expr * expr
Definition: stmt.h:92
Stmt * stmts
Definition: stmt.h:358
Stmt * Optimize()
Definition: stmt.cpp:436
DefaultStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2748
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:595
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2451
Statement representing a single if statement, possibly with an else clause.
Definition: stmt.h:128
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
Expr * expr
Definition: stmt.h:603
virtual Stmt * Optimize()
Definition: stmt.cpp:76
void Print(int indent) const
Definition: stmt.cpp:2209
std::vector< Symbol * > dimVariables
Definition: stmt.h:286
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2141
const Function * parentFunction
Definition: sym.h:104
int EstimateCost() const
Definition: stmt.cpp:3493
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1457
virtual const Type * GetAsNonConstType() const =0
UnmaskedStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2986
int EstimateCost() const
Definition: stmt.cpp:3038
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3394
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1418
std::string name
Definition: stmt.h:494
Expression that represents dereferencing a reference to get its value.
Definition: expr.h:601
static const Type * SizeUnsizedArrays(const Type *type, Expr *initExpr)
Definition: type.cpp:1653
void Print(int indent) const
Definition: stmt.cpp:706
std::vector< std::string > MatchStrings(const std::string &str, const std::vector< std::string > &options)
Definition: util.cpp:519
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1226
Stmt * TypeCheck()
Definition: stmt.cpp:504
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2034
Expr * expr
Definition: stmt.h:336
std::string label
Definition: stmt.h:470
void Print(int indent) const
Definition: stmt.cpp:523
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type)
Definition: expr.cpp:615
PrintStmt(const std::string &f, Expr *v, SourcePos p)
Definition: stmt.cpp:3291
const Type * GetElementType() const
Definition: type.cpp:1528
int EstimateCost() const
Definition: stmt.cpp:3094
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
static const AtomicType * UniformInt64
Definition: type.h:354
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:638
static const AtomicType * UniformInt16
Definition: type.h:348
const Type * type
Definition: sym.h:84
void Print(int indent) const
Definition: stmt.cpp:1479
int EstimateCost() const
Definition: stmt.cpp:3572
static llvm::Value * lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes)
Definition: stmt.cpp:3333
int varyingControlFlowDepth
Definition: stmt.cpp:966
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3741
Stmt * TypeCheck()
Definition: stmt.cpp:677
bool IsSOAType() const
Definition: type.h:156
DeleteStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:3580
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:896
void Debug(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:398
void Print(int indent) const
Definition: stmt.cpp:3100
void Print(int indent) const
Definition: stmt.cpp:2725
bool is32Bit() const
Definition: ispc.h:275
const bool doAllCheck
Definition: stmt.h:158
static const AtomicType * VaryingUInt32
Definition: type.h:352
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1914
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:90
void Print(int indent) const
Definition: stmt.cpp:1441
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1303
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:507
static const AtomicType * VaryingFloat
Definition: type.h:353
SymbolTable * symbolTable
Definition: module.h:155
static const AtomicType * UniformInt8
Definition: type.h:347
int EstimateCost() const
Definition: stmt.cpp:1377
int GetElementCount() const
Definition: type.h:744
File with declarations for classes related to type representation.
Stmt * TypeCheck()
Definition: stmt.cpp:3557
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1608
std::map< llvm::BasicBlock *, llvm::BasicBlock * > nextBlock
Definition: stmt.cpp:2816
int EstimateCost() const
Definition: stmt.cpp:2740
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:209
One-dimensional array type.
Definition: type.h:555
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1690