Intel SPMD Program Compiler  1.11.0
stmt.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2019, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file stmt.cpp
35  @brief File with definitions classes related to statements in the language
36 */
37 
38 #include "stmt.h"
39 #include "ctx.h"
40 #include "expr.h"
41 #include "func.h"
42 #include "llvmutil.h"
43 #include "module.h"
44 #include "sym.h"
45 #include "type.h"
46 #include "util.h"
47 
48 #include <map>
49 #include <stdio.h>
50 
51 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
52 #include <llvm/CallingConv.h>
53 #include <llvm/DerivedTypes.h>
54 #include <llvm/Function.h>
55 #include <llvm/Instructions.h>
56 #include <llvm/LLVMContext.h>
57 #include <llvm/Metadata.h>
58 #include <llvm/Module.h>
59 #include <llvm/Type.h>
60 #else
61 #include <llvm/IR/CallingConv.h>
62 #include <llvm/IR/DerivedTypes.h>
63 #include <llvm/IR/Function.h>
64 #include <llvm/IR/Instructions.h>
65 #include <llvm/IR/LLVMContext.h>
66 #include <llvm/IR/Metadata.h>
67 #include <llvm/IR/Module.h>
68 #include <llvm/IR/Type.h>
69 #endif
70 #include <llvm/Support/raw_ostream.h>
71 
72 ///////////////////////////////////////////////////////////////////////////
73 // Stmt
74 
75 Stmt *Stmt::Optimize() { return this; }
76 
77 ///////////////////////////////////////////////////////////////////////////
78 // ExprStmt
79 
80 ExprStmt::ExprStmt(Expr *e, SourcePos p) : Stmt(p, ExprStmtID) { expr = e; }
81 
83  if (!ctx->GetCurrentBasicBlock())
84  return;
85 
86  ctx->SetDebugPos(pos);
87  if (expr)
88  expr->GetValue(ctx);
89 }
90 
91 Stmt *ExprStmt::TypeCheck() { return this; }
92 
93 void ExprStmt::Print(int indent) const {
94  if (!expr)
95  return;
96 
97  printf("%*c", indent, ' ');
98  printf("Expr stmt: ");
99  pos.Print();
100  expr->Print();
101  printf("\n");
102 }
103 
104 int ExprStmt::EstimateCost() const { return 0; }
105 
106 ///////////////////////////////////////////////////////////////////////////
107 // DeclStmt
108 
109 DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p) : Stmt(p, DeclStmtID), vars(v) {}
110 
111 static bool lHasUnsizedArrays(const Type *type) {
112  const ArrayType *at = CastType<ArrayType>(type);
113  if (at == NULL)
114  return false;
115 
116  if (at->GetElementCount() == 0)
117  return true;
118  else
119  return lHasUnsizedArrays(at->GetElementType());
120 }
121 
122 #ifdef ISPC_NVPTX_ENABLED
123 static llvm::Value *lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos,
124  const bool variable = false) {
125  if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
126  return value;
127  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
128  const int addressSpace = pt->getAddressSpace();
129  if (addressSpace != 3 && addressSpace != 4)
130  return value;
131 
132  llvm::Type *elTy = pt->getElementType();
133 
134  /* convert elTy addrspace(3)* to i64* addrspace(3)* */
135  llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
136  value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1");
137 
138  /* convert i64* addrspace(3) to i64* */
139  llvm::Function *__cvt2gen = m->module->getFunction(
140  addressSpace == 3 ? (variable ? "__cvt_loc2gen_var" : "__cvt_loc2gen") : "__cvt_const2gen");
141 
142  std::vector<llvm::Value *> __cvt2gen_args;
143  __cvt2gen_args.push_back(value);
144  value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, variable ? "gep2gen_cvt_var" : "gep2gen_cvt",
145  ctx->GetCurrentBasicBlock());
146 
147  /* compute offset */
148  if (addressSpace == 3) {
149  assert(elTy->isArrayTy());
150  const int numElTot = elTy->getArrayNumElements();
151  const int numEl = numElTot / 4;
152 #if 0
153  fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl);
154 #endif
155  llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
156  assert(arrTy != NULL);
157  llvm::Type *arrElTy = arrTy->getElementType();
158 #if 0
159  if (arrElTy->isArrayTy())
160  Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
161 #endif
162 
163  /* convert i64* to errElTy* */
164  llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
165  value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
166 
167  llvm::Function *func_warp_index = m->module->getFunction("__warp_index");
168  llvm::Value *warpId = ctx->CallInst(func_warp_index, NULL, std::vector<llvm::Value *>(), "gep2gen_warp_index");
169  llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset");
170 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
171  value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
172 #else
173  value = llvm::GetElementPtrInst::Create(NULL, value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
174 #endif
175  }
176 
177  /* convert arrElTy* to elTy* */
178  llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0);
179  value = ctx->BitCastInst(value, elTyPt0, "gep2gen_cast3");
180 
181  return value;
182 }
183 #endif /* ISPC_NVPTX_ENABLED */
184 
186  if (!ctx->GetCurrentBasicBlock())
187  return;
188 
189  for (unsigned int i = 0; i < vars.size(); ++i) {
190  Symbol *sym = vars[i].sym;
191  AssertPos(pos, sym != NULL);
192  if (sym->type == NULL)
193  continue;
194  Expr *initExpr = vars[i].init;
195 
196  // Now that we're in the thick of emitting code, it's easy for us
197  // to find out the level of nesting of varying control flow we're
198  // in at this declaration. So we can finally set that
199  // Symbol::varyingCFDepth variable.
200  // @todo It's disgusting to be doing this here.
201  sym->varyingCFDepth = ctx->VaryingCFDepth();
202 
203  ctx->SetDebugPos(sym->pos);
204 
205  // If it's an array that was declared without a size but has an
206  // initializer list, then use the number of elements in the
207  // initializer list to finally set the array's size.
208  sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr);
209  if (sym->type == NULL)
210  continue;
211 
212  if (lHasUnsizedArrays(sym->type)) {
213  Error(pos, "Illegal to declare an unsized array variable without "
214  "providing an initializer expression to set its size.");
215  continue;
216  }
217 
218  // References must have initializer expressions as well.
219  if (IsReferenceType(sym->type) == true) {
220  if (initExpr == NULL) {
221  Error(sym->pos,
222  "Must provide initializer for reference-type "
223  "variable \"%s\".",
224  sym->name.c_str());
225  continue;
226  }
227  if (IsReferenceType(initExpr->GetType()) == false) {
228  const Type *initLVType = initExpr->GetLValueType();
229  if (initLVType == NULL) {
230  Error(initExpr->pos,
231  "Initializer for reference-type variable "
232  "\"%s\" must have an lvalue type.",
233  sym->name.c_str());
234  continue;
235  }
236  if (initLVType->IsUniformType() == false) {
237  Error(initExpr->pos,
238  "Initializer for reference-type variable "
239  "\"%s\" must have a uniform lvalue type.",
240  sym->name.c_str());
241  continue;
242  }
243  }
244  }
245 
246  llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
247  if (llvmType == NULL) {
248  AssertPos(pos, m->errorCount > 0);
249  return;
250  }
251 
252  if (sym->storageClass == SC_STATIC) {
253 #ifdef ISPC_NVPTX_ENABLED
254  if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType()) {
255  Error(sym->pos,
256  "Non-constant static variable "
257  "\"%s\" is not supported with "
258  "\"nvptx\" target.",
259  sym->name.c_str());
260  return;
261  }
262  if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
263  PerformanceWarning(sym->pos,
264  "\"const static varying\" variable "
265  "\"%s\" is stored in __global address space with "
266  "\"nvptx\" target.",
267  sym->name.c_str());
268  if (g->target->getISA() == Target::NVPTX && sym->type->IsUniformType())
269  PerformanceWarning(sym->pos,
270  "\"const static uniform\" variable "
271  "\"%s\" is stored in __constant address space with "
272  "\"nvptx\" target.",
273  sym->name.c_str());
274 #endif /* ISPC_NVPTX_ENABLED */
275  // For static variables, we need a compile-time constant value
276  // for its initializer; if there's no initializer, we use a
277  // zero value.
278  llvm::Constant *cinit = NULL;
279  if (initExpr != NULL) {
280  if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
281  continue;
282  // FIXME: we only need this for function pointers; it was
283  // already done for atomic types and enums in
284  // DeclStmt::TypeCheck()...
285  if (llvm::dyn_cast<ExprList>(initExpr) == NULL) {
286  initExpr = TypeConvertExpr(initExpr, sym->type, "initializer");
287  // FIXME: and this is only needed to re-establish
288  // constant-ness so that GetConstant below works for
289  // constant artithmetic expressions...
290  initExpr = ::Optimize(initExpr);
291  }
292 
293  cinit = initExpr->GetConstant(sym->type);
294  if (cinit == NULL)
295  Error(initExpr->pos,
296  "Initializer for static variable "
297  "\"%s\" must be a constant.",
298  sym->name.c_str());
299  }
300  if (cinit == NULL)
301  cinit = llvm::Constant::getNullValue(llvmType);
302 
303  // Allocate space for the static variable in global scope, so
304  // that it persists across function calls
305 #ifdef ISPC_NVPTX_ENABLED
306  int addressSpace = 0;
307  if (g->target->getISA() == Target::NVPTX && sym->type->IsConstType() && sym->type->IsUniformType())
308  addressSpace = 4;
309  sym->storagePtr = new llvm::GlobalVariable(
310  *m->module, llvmType, sym->type->IsConstType(), llvm::GlobalValue::InternalLinkage, cinit,
311  llvm::Twine("static.") + llvm::Twine(sym->pos.first_line) + llvm::Twine(".") + sym->name.c_str(), NULL,
312  llvm::GlobalVariable::NotThreadLocal, addressSpace);
313  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
314 #else /* ISPC_NVPTX_ENABLED */
315  sym->storagePtr = new llvm::GlobalVariable(
316  *m->module, llvmType, sym->type->IsConstType(), llvm::GlobalValue::InternalLinkage, cinit,
317  llvm::Twine("static.") + llvm::Twine(sym->pos.first_line) + llvm::Twine(".") + sym->name.c_str());
318 #endif /* ISPC_NVPTX_ENABLED */
319  // Tell the FunctionEmitContext about the variable
320  ctx->EmitVariableDebugInfo(sym);
321  }
322 #ifdef ISPC_NVPTX_ENABLED
323  else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
324  /* NVPTX:
325  * only non-constant uniform data types are stored in shared memory
326  * constant uniform are automatically promoted to varying
327  */
328  !sym->type->IsConstType() &&
329 #if 1
330  sym->type->IsArrayType() &&
331 #endif
332  g->target->getISA() == Target::NVPTX) {
333  PerformanceWarning(sym->pos,
334  "Non-constant \"uniform\" data types might be slow with \"nvptx\" target. "
335  "Unless data sharing between program instances is desired, try \"const [static] "
336  "uniform\", \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
337 
338  /* with __shared__ memory everything must be an array */
339  int nel = 4;
340  ArrayType *nat;
341  bool variable = true;
342  if (sym->type->IsArrayType()) {
343  const ArrayType *at = CastType<ArrayType>(sym->type);
344  /* we must scale # elements by 4, because a thread-block will run 4 warps
345  * or 128 threads.
346  * ***note-to-me***:please define these value (128threads/4warps)
347  * in nvptx-target definition
348  * instead of compile-time constants
349  */
350  nel *= at->GetElementCount();
351  if (sym->type->IsSOAType())
352  nel *= sym->type->GetSOAWidth();
353  nat = new ArrayType(at->GetElementType(), nel);
354  variable = false;
355  } else
356  nat = new ArrayType(sym->type, nel);
357 
358  llvm::Type *llvmTypeUn = nat->LLVMType(g->ctx);
359  llvm::Constant *cinit = llvm::UndefValue::get(llvmTypeUn);
360 
361  sym->storagePtr = new llvm::GlobalVariable(
362  *m->module, llvmTypeUn, sym->type->IsConstType(), llvm::GlobalValue::InternalLinkage, cinit,
363  llvm::Twine("local_") + llvm::Twine(sym->pos.first_line) + llvm::Twine("_") + sym->name.c_str(), NULL,
364  llvm::GlobalVariable::NotThreadLocal,
365  /*AddressSpace=*/3);
366  sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos, variable);
367  llvm::PointerType *ptrTy = llvm::PointerType::get(sym->type->LLVMType(g->ctx), 0);
368  sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_decl");
369 
370  // Tell the FunctionEmitContext about the variable; must do
371  // this before the initializer stuff.
372  ctx->EmitVariableDebugInfo(sym);
373 
374  if (initExpr == 0 && sym->type->IsConstType())
375  Error(sym->pos,
376  "Missing initializer for const variable "
377  "\"%s\".",
378  sym->name.c_str());
379 
380  // And then get it initialized...
381  sym->parentFunction = ctx->GetFunction();
382  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
383  }
384 #endif /* ISPC_NVPTX_ENABLED */
385  else {
386  // For non-static variables, allocate storage on the stack
387  sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
388 
389  // Tell the FunctionEmitContext about the variable; must do
390  // this before the initializer stuff.
391  ctx->EmitVariableDebugInfo(sym);
392  if (initExpr == 0 && sym->type->IsConstType())
393  Error(sym->pos,
394  "Missing initializer for const variable "
395  "\"%s\".",
396  sym->name.c_str());
397 
398  // And then get it initialized...
399  sym->parentFunction = ctx->GetFunction();
400  InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
401  }
402  }
403 }
404 
406  for (unsigned int i = 0; i < vars.size(); ++i) {
407  Expr *init = vars[i].init;
408  if (init != NULL && llvm::dyn_cast<ExprList>(init) == NULL) {
409  // If the variable is const-qualified, after we've optimized
410  // the initializer expression, see if we have a ConstExpr. If
411  // so, save it in Symbol::constValue where it can be used in
412  // optimizing later expressions that have this symbol in them.
413  // Note that there are cases where the expression may be
414  // constant but where we don't have a ConstExpr; an example is
415  // const arrays--the ConstExpr implementation just can't
416  // represent an array of values.
417  //
418  // All this is fine in terms of the code that's generated in
419  // the end (LLVM's constant folding stuff is good), but it
420  // means that the ispc compiler's ability to reason about what
421  // is definitely a compile-time constant for things like
422  // computing array sizes from non-trivial expressions is
423  // consequently limited.
424  Symbol *sym = vars[i].sym;
425  if (sym->type && sym->type->IsConstType() && Type::Equal(init->GetType(), sym->type))
426  sym->constValue = llvm::dyn_cast<ConstExpr>(init);
427  }
428  }
429  return this;
430 }
431 
432 // Do type conversion if needed and check for not initializing array with
433 // another array (array assignment is not allowed).
434 // Do that recursively to handle brace initialization, which may contain
435 // another brace initialization.
436 static bool checkInit(const Type *type, Expr **init) {
437  bool encounteredError = false;
438 
439  // get the right type for stuff like const float foo = 2; so that
440  // the int->float type conversion is in there and we don't return
441  // an int as the constValue later...
442  if (CastType<AtomicType>(type) != NULL || CastType<EnumType>(type) != NULL) {
443  // If it's an expr list with an atomic type, we'll later issue
444  // an error. Need to leave vars[i].init as is in that case so
445  // it is in fact caught later, though.
446  if (llvm::dyn_cast<ExprList>(*init) == NULL) {
447  *init = TypeConvertExpr(*init, type, "initializer");
448  if (*init == NULL)
449  encounteredError = true;
450  }
451  } else if (CastType<ArrayType>(type) != NULL && llvm::dyn_cast<ExprList>(*init) == NULL) {
452  encounteredError = true;
453  Error((*init)->pos, "Array initializer must be an initializer list");
454  } else if (CastType<StructType>(type) != NULL && llvm::dyn_cast<ExprList>(*init) != NULL) {
455  const StructType *st = CastType<StructType>(type);
456  ExprList *el = llvm::dyn_cast<ExprList>(*init);
457  int elt_count = st->GetElementCount() < el->exprs.size() ? st->GetElementCount() : el->exprs.size();
458  for (int i = 0; i < elt_count; i++) {
459  encounteredError |= checkInit(st->GetElementType(i), &(el->exprs[i]));
460  }
461  }
462 
463  return encounteredError;
464 }
465 
467  bool encounteredError = false;
468  for (unsigned int i = 0; i < vars.size(); ++i) {
469  if (vars[i].sym == NULL) {
470  encounteredError = true;
471  continue;
472  }
473 
474  if (vars[i].init == NULL)
475  continue;
476 
477  // Check an init.
478  encounteredError |= checkInit(vars[i].sym->type, &(vars[i].init));
479  }
480  return encounteredError ? NULL : this;
481 }
482 
483 void DeclStmt::Print(int indent) const {
484  printf("%*cDecl Stmt:", indent, ' ');
485  pos.Print();
486  for (unsigned int i = 0; i < vars.size(); ++i) {
487  printf("%*cVariable %s (%s)", indent + 4, ' ', vars[i].sym->name.c_str(),
488  vars[i].sym->type->GetString().c_str());
489  if (vars[i].init != NULL) {
490  printf(" = ");
491  vars[i].init->Print();
492  }
493  printf("\n");
494  }
495  printf("\n");
496 }
497 
498 int DeclStmt::EstimateCost() const { return 0; }
499 
500 ///////////////////////////////////////////////////////////////////////////
501 // IfStmt
502 
503 IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p)
504  : Stmt(p, IfStmtID), test(t), trueStmts(ts), falseStmts(fs),
505  doAllCheck(checkCoherence && !g->opt.disableCoherentControlFlow) {}
506 
507 static void lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse) {
508  if (!stmts)
509  return;
510 
511  if (llvm::dyn_cast<StmtList>(stmts) == NULL)
512  ctx->StartScope();
513  ctx->AddInstrumentationPoint(trueOrFalse);
514  stmts->EmitCode(ctx);
515  if (llvm::dyn_cast<const StmtList>(stmts) == NULL)
516  ctx->EndScope();
517 }
518 
519 /** Returns true if the "true" block for the if statement consists of a
520  single 'break' statement, and the "false" block is empty. */
521 /*
522 static bool
523 lCanApplyBreakOptimization(Stmt *trueStmts, Stmt *falseStmts) {
524  if (falseStmts != NULL) {
525  if (StmtList *sl = llvm::dyn_cast<StmtList>(falseStmts)) {
526  return (sl->stmts.size() == 0);
527  }
528  else
529  return false;
530  }
531 
532  if (llvm::dyn_cast<BreakStmt>(trueStmts))
533  return true;
534  else if (StmtList *sl = llvm::dyn_cast<StmtList>(trueStmts))
535  return (sl->stmts.size() == 1 &&
536  llvm::dyn_cast<BreakStmt>(sl->stmts[0]) != NULL);
537  else
538  return false;
539 }
540 */
541 
543  // First check all of the things that might happen due to errors
544  // earlier in compilation and bail out if needed so that we don't
545  // dereference NULL pointers in the below...
546  if (!ctx->GetCurrentBasicBlock())
547  return;
548  if (!test)
549  return;
550  const Type *testType = test->GetType();
551  if (!testType)
552  return;
553 
554  ctx->SetDebugPos(pos);
555  bool isUniform = testType->IsUniformType();
556 
557  llvm::Value *testValue = test->GetValue(ctx);
558  if (testValue == NULL)
559  return;
560 
561 #ifdef ISPC_NVPTX_ENABLED
562 #if 0
563  if (!isUniform && g->target->getISA() == Target::NVPTX)
564  {
565  /* With "nvptx" target, SIMT hardware takes care of non-uniform
566  * control flow. We trick ISPC to generate uniform control flow.
567  */
568  testValue = ctx->ExtractInst(testValue, 0);
569  isUniform = true;
570  }
571 #endif
572 #endif /* ISPC_NVPTX_ENABLED */
573 
574  if (isUniform) {
575  ctx->StartUniformIf();
576  if (doAllCheck)
577  Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
578 
579  // 'If' statements with uniform conditions are relatively
580  // straightforward. We evaluate the condition and then jump to
581  // either the 'then' or 'else' clause depending on its value.
582  llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
583  llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
584  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
585 
586  // Jump to the appropriate basic block based on the value of
587  // the 'if' test
588  ctx->BranchInst(bthen, belse, testValue);
589 
590  // Emit code for the 'true' case
591  ctx->SetCurrentBasicBlock(bthen);
592  lEmitIfStatements(ctx, trueStmts, "true");
593  if (ctx->GetCurrentBasicBlock())
594  ctx->BranchInst(bexit);
595 
596  // Emit code for the 'false' case
597  ctx->SetCurrentBasicBlock(belse);
598  lEmitIfStatements(ctx, falseStmts, "false");
599  if (ctx->GetCurrentBasicBlock())
600  ctx->BranchInst(bexit);
601 
602  // Set the active basic block to the newly-created exit block
603  // so that subsequent emitted code starts there.
604  ctx->SetCurrentBasicBlock(bexit);
605  ctx->EndIf();
606  }
607  /*
608  // Disabled for performance reasons. Change to an optional compile-time opt switch.
609  else if (lCanApplyBreakOptimization(trueStmts, falseStmts)) {
610  // If we have a simple break statement inside the 'if' and are
611  // under varying control flow, just update the execution mask
612  // directly and don't emit code for the statements. This leads to
613  // better code for this case--this is surprising and should be
614  // root-caused further, but for now this gives us performance
615  // benefit in this case.
616  ctx->SetInternalMaskAndNot(ctx->GetInternalMask(), testValue);
617  }
618  */
619  else
620  emitVaryingIf(ctx, testValue);
621 }
622 
624  if (test != NULL) {
625  const Type *testType = test->GetType();
626  if (testType != NULL) {
627  bool isUniform = (testType->IsUniformType() && !g->opt.disableUniformControlFlow);
629  "\"if\" statement test");
630  if (test == NULL)
631  return NULL;
632  }
633  }
634 
635  return this;
636 }
637 
638 int IfStmt::EstimateCost() const {
639  const Type *type;
640  if (test == NULL || (type = test->GetType()) == NULL)
641  return 0;
642 
643  return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
644 }
645 
646 void IfStmt::Print(int indent) const {
647  printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : "");
648  pos.Print();
649  printf("\n%*cTest: ", indent + 4, ' ');
650  test->Print();
651  printf("\n");
652  if (trueStmts) {
653  printf("%*cTrue:\n", indent + 4, ' ');
654  trueStmts->Print(indent + 8);
655  }
656  if (falseStmts) {
657  printf("%*cFalse:\n", indent + 4, ' ');
658  falseStmts->Print(indent + 8);
659  }
660 }
661 
662 /** Emit code to run both the true and false statements for the if test,
663  with the mask set appropriately before running each one.
664 */
665 void IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test) const {
666  if (trueStmts) {
667  ctx->SetInternalMaskAnd(oldMask, test);
668  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
669  // under varying control flow,, returns can't stop instruction
670  // emission, so this better be non-NULL...
672  }
673  if (falseStmts) {
674  ctx->SetInternalMaskAndNot(oldMask, test);
675  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
677  }
678 }
679 
680 /** Emit code for an if test that checks the mask and the test values and
681  tries to be smart about jumping over code that doesn't need to be run.
682  */
683 void IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
684  llvm::Value *oldMask = ctx->GetInternalMask();
685  if (doAllCheck) {
686  // We can't tell if the mask going into the if is all on at the
687  // compile time. Emit code to check for this and then either run
688  // the code for the 'all on' or the 'mixed' case depending on the
689  // mask's value at runtime.
690  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("cif_mask_all");
691  llvm::BasicBlock *bMixedOn = ctx->CreateBasicBlock("cif_mask_mixed");
692  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
693 
694  // Jump to either bAllOn or bMixedOn, depending on the mask's value
695  llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask());
696  ctx->BranchInst(bAllOn, bMixedOn, maskAllQ);
697 
698  // Emit code for the 'mask all on' case
699  ctx->SetCurrentBasicBlock(bAllOn);
700  emitMaskAllOn(ctx, ltest, bDone);
701 
702  // And emit code for the mixed mask case
703  ctx->SetCurrentBasicBlock(bMixedOn);
704  emitMaskMixed(ctx, oldMask, ltest, bDone);
705 
706  // When done, set the current basic block to the block that the two
707  // paths above jump to when they're done.
708  ctx->SetCurrentBasicBlock(bDone);
709  } else if (trueStmts != NULL || falseStmts != NULL) {
710  // If there is nothing that is potentially unsafe to run with all
711  // lanes off in the true and false statements and if the total
712  // complexity of those two is relatively simple, then we'll go
713  // ahead and emit straightline code that runs both sides, updating
714  // the mask accordingly. This is useful for efficiently compiling
715  // things like:
716  //
717  // if (foo) x = 0;
718  // else ++x;
719  //
720  // Where the overhead of checking if any of the program instances wants
721  // to run one side or the other is more than the actual computation.
722  // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
723  // for potentially dangerous code like:
724  //
725  // if (index < count) array[index] = 0;
726  //
727  // where our use of blend for conditional assignments doesn't check
728  // for the 'all lanes' off case.
729  int trueFalseCost = (::EstimateCost(trueStmts) + ::EstimateCost(falseStmts));
730  bool costIsAcceptable = (trueFalseCost < PREDICATE_SAFE_IF_STATEMENT_COST);
731 
732  bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) && SafeToRunWithMaskAllOff(falseStmts));
733 
734  Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).", ::EstimateCost(trueStmts),
737 
738  if (safeToRunWithAllLanesOff && (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
739  ctx->StartVaryingIf(oldMask);
740  emitMaskedTrueAndFalse(ctx, oldMask, ltest);
742  ctx->EndIf();
743  } else {
744  llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
745  emitMaskMixed(ctx, oldMask, ltest, bDone);
746  ctx->SetCurrentBasicBlock(bDone);
747  }
748  }
749 }
750 
751 /** Emits code for 'if' tests under the case where we know that the program
752  mask is all on going into the 'if'.
753  */
754 void IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, llvm::BasicBlock *bDone) const {
755  // We start by explicitly storing "all on" into the mask mask. Note
756  // that this doesn't change its actual value, but doing so lets the
757  // compiler see what's going on so that subsequent optimizations for
758  // code emitted here can operate with the knowledge that the mask is
759  // definitely all on (until it modifies the mask itself).
763  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
766 
767  // First, check the value of the test. If it's all on, then we jump to
768  // a basic block that will only have code for the true case.
769  llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all");
770  llvm::BasicBlock *bTestNoneCheck = ctx->CreateBasicBlock("cif_test_none_check");
771  llvm::Value *testAllQ = ctx->All(ltest);
772  ctx->BranchInst(bTestAll, bTestNoneCheck, testAllQ);
773 
774  // Emit code for the 'test is all true' case
775  ctx->SetCurrentBasicBlock(bTestAll);
777  lEmitIfStatements(ctx, trueStmts, "if: all on mask, expr all true");
778  ctx->EndIf();
779  if (ctx->GetCurrentBasicBlock() != NULL)
780  // bblock may legitimately be NULL since if there's a return stmt
781  // or break or continue we can actually jump and end emission since
782  // we know all of the lanes are following this path...
783  ctx->BranchInst(bDone);
784 
785  // The test isn't all true. Now emit code to determine if it's all
786  // false, or has mixed values.
787  ctx->SetCurrentBasicBlock(bTestNoneCheck);
788  llvm::BasicBlock *bTestNone = ctx->CreateBasicBlock("cif_test_none");
789  llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_test_mixed");
790  llvm::Value *testMixedQ = ctx->Any(ltest);
791  ctx->BranchInst(bTestMixed, bTestNone, testMixedQ);
792 
793  // Emit code for the 'test is all false' case
794  ctx->SetCurrentBasicBlock(bTestNone);
796  lEmitIfStatements(ctx, falseStmts, "if: all on mask, expr all false");
797  ctx->EndIf();
798  if (ctx->GetCurrentBasicBlock())
799  // bblock may be NULL since if there's a return stmt or break or
800  // continue we can actually jump or whatever and end emission...
801  ctx->BranchInst(bDone);
802 
803  // Finally emit code for the 'mixed true/false' case. We unavoidably
804  // need to run both the true and the false statements.
805  ctx->SetCurrentBasicBlock(bTestMixed);
808  // In this case, return/break/continue isn't allowed to jump and end
809  // emission.
811  ctx->EndIf();
812  ctx->BranchInst(bDone);
813 
814  ctx->SetCurrentBasicBlock(bDone);
815  ctx->SetFunctionMask(oldFunctionMask);
816 }
817 
818 /** Emit code for an 'if' test where the lane mask is known to be mixed
819  on/off going into it.
820  */
821 void IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *ltest,
822  llvm::BasicBlock *bDone) const {
823  ctx->StartVaryingIf(oldMask);
824  llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
825 
826  llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
827  ctx->SetInternalMaskAnd(oldMask, ltest);
828 
829  // Do any of the program instances want to run the 'true'
830  // block? If not, jump ahead to bNext.
831 
832 #ifdef ISPC_NVPTX_ENABLED
833 #if 0
834  llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
835 #else
836  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
837 #endif
838 #else /* ISPC_NVPTX_ENABLED */
839  llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
840 #endif /* ISPC_NVPTX_ENABLED */
841 
842  ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
843 
844  // Emit statements for true
845  ctx->SetCurrentBasicBlock(bRunTrue);
846  if (trueStmts != NULL)
847  lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
849  ctx->BranchInst(bNext);
850  ctx->SetCurrentBasicBlock(bNext);
851 
852  // False...
853  llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
854  ctx->SetInternalMaskAndNot(oldMask, ltest);
855 
856  // Similarly, check to see if any of the instances want to
857  // run the 'false' block...
858 
859 #ifdef ISPC_NVPTX_ENABLED
860 #if 0
861  llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
862 #else
863  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
864 #endif
865 #else /* ISPC_NVPTX_ENABLED */
866  llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
867 #endif /* ISPC_NVPTX_ENABLED */
868  ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
869 
870  // Emit code for false
871  ctx->SetCurrentBasicBlock(bRunFalse);
872  if (falseStmts)
873  lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
875 
876  ctx->BranchInst(bDone);
877  ctx->SetCurrentBasicBlock(bDone);
878  ctx->EndIf();
879 }
880 
881 ///////////////////////////////////////////////////////////////////////////
882 // DoStmt
883 
888  }
889 
892 };
893 
894 /** Returns true if the given node is an 'if' statement where the test
895  condition has varying type. */
896 static bool lIsVaryingFor(ASTNode *node) {
897  IfStmt *ifStmt;
898  if ((ifStmt = llvm::dyn_cast<IfStmt>(node)) != NULL && ifStmt->test != NULL) {
899  const Type *type = ifStmt->test->GetType();
900  return (type != NULL && type->IsVaryingType());
901  } else
902  return false;
903 }
904 
905 /** Preorder callback function for checking for varying breaks or
906  continues. */
907 static bool lVaryingBCPreFunc(ASTNode *node, void *d) {
909 
910  // We found a break or continue statement; if we're under varying
911  // control flow, then bingo.
912  if ((llvm::dyn_cast<BreakStmt>(node) != NULL || llvm::dyn_cast<ContinueStmt>(node) != NULL) &&
913  info->varyingControlFlowDepth > 0) {
914  info->foundVaryingBreakOrContinue = true;
915  return false;
916  }
917 
918  // Update the count of the nesting depth of varying control flow if
919  // this is an if statement with a varying condition.
920  if (lIsVaryingFor(node))
921  ++info->varyingControlFlowDepth;
922 
923  if (llvm::dyn_cast<ForStmt>(node) != NULL || llvm::dyn_cast<DoStmt>(node) != NULL ||
924  llvm::dyn_cast<ForeachStmt>(node) != NULL)
925  // Don't recurse into these guys, since we don't care about varying
926  // breaks or continues within them...
927  return false;
928  else
929  return true;
930 }
931 
932 /** Postorder callback function for checking for varying breaks or
933  continues; decrement the varying control flow depth after the node's
934  children have been processed, if this is a varying if statement. */
935 static ASTNode *lVaryingBCPostFunc(ASTNode *node, void *d) {
937  if (lIsVaryingFor(node))
938  --info->varyingControlFlowDepth;
939  return node;
940 }
941 
942 /** Given a statment, walk through it to see if there is a 'break' or
943  'continue' statement inside if its children, under varying control
944  flow. We need to detect this case for loops since what might otherwise
945  look like a 'uniform' loop needs to have code emitted to do all of the
946  lane management stuff if this is the case.
947  */
948 static bool lHasVaryingBreakOrContinue(Stmt *stmt) {
949  VaryingBCCheckInfo info;
951  return info.foundVaryingBreakOrContinue;
952 }
953 
954 DoStmt::DoStmt(Expr *t, Stmt *s, bool cc, SourcePos p)
955  : Stmt(p, DoStmtID), testExpr(t), bodyStmts(s), doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {}
956 
958  // Check for things that could be NULL due to earlier errors during
959  // compilation.
960  if (!ctx->GetCurrentBasicBlock())
961  return;
962  if (!testExpr || !testExpr->GetType())
963  return;
964 
965  bool uniformTest = testExpr->GetType()->IsUniformType();
966  if (uniformTest && doCoherentCheck)
967  Warning(testExpr->pos, "Uniform condition supplied to \"cdo\" "
968  "statement.");
969 
970  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("do_loop");
971  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit");
972  llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test");
973 
974  ctx->StartLoop(bexit, btest, uniformTest);
975 
976  // Start by jumping into the loop body
977  ctx->BranchInst(bloop);
978 
979  // And now emit code for the loop body
980  ctx->SetCurrentBasicBlock(bloop);
981  ctx->SetBlockEntryMask(ctx->GetFullMask());
982  ctx->SetDebugPos(pos);
983  // FIXME: in the StmtList::EmitCode() method takes starts/stops a new
984  // scope around the statements in the list. So if the body is just a
985  // single statement (and thus not a statement list), we need a new
986  // scope, but we don't want two scopes in the StmtList case.
987  if (!bodyStmts || !llvm::dyn_cast<StmtList>(bodyStmts))
988  ctx->StartScope();
989 
990  ctx->AddInstrumentationPoint("do loop body");
991  if (doCoherentCheck && !uniformTest) {
992  // Check to see if the mask is all on
993  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("do_all_on");
994  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("do_mixed");
995  ctx->BranchIfMaskAll(bAllOn, bMixed);
996 
997  // If so, emit code for the 'mask all on' case. In particular,
998  // explicitly set the mask to 'all on' (see rationale in
999  // IfStmt::emitCoherentTests()), and then emit the code for the
1000  // loop body.
1001  ctx->SetCurrentBasicBlock(bAllOn);
1004  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1007  if (bodyStmts)
1008  bodyStmts->EmitCode(ctx);
1010  ctx->SetFunctionMask(oldFunctionMask);
1011  ctx->BranchInst(btest);
1012 
1013  // The mask is mixed. Just emit the code for the loop body.
1014  ctx->SetCurrentBasicBlock(bMixed);
1015  if (bodyStmts)
1016  bodyStmts->EmitCode(ctx);
1018  ctx->BranchInst(btest);
1019  } else {
1020  // Otherwise just emit the code for the loop body. The current
1021  // mask is good.
1022  if (bodyStmts)
1023  bodyStmts->EmitCode(ctx);
1024  if (ctx->GetCurrentBasicBlock())
1025  ctx->BranchInst(btest);
1026  }
1027  // End the scope we started above, if needed.
1028  if (!bodyStmts || !llvm::dyn_cast<StmtList>(bodyStmts))
1029  ctx->EndScope();
1030 
1031  // Now emit code for the loop test.
1032  ctx->SetCurrentBasicBlock(btest);
1033  // First, emit code to restore the mask value for any lanes that
1034  // executed a 'continue' during the current loop before we go and emit
1035  // the code for the test. This is only necessary for varying loops;
1036  // 'uniform' loops just jump when they hit a continue statement and
1037  // don't mess with the mask.
1038  if (!uniformTest) {
1039  ctx->RestoreContinuedLanes();
1040  ctx->ClearBreakLanes();
1041  }
1042  llvm::Value *testValue = testExpr->GetValue(ctx);
1043  if (!testValue)
1044  return;
1045 
1046  if (uniformTest)
1047  // For the uniform case, just jump to the top of the loop or the
1048  // exit basic block depending on the value of the test.
1049  ctx->BranchInst(bloop, bexit, testValue);
1050  else {
1051  // For the varying case, update the mask based on the value of the
1052  // test. If any program instances still want to be running, jump
1053  // to the top of the loop. Otherwise, jump out.
1054  llvm::Value *mask = ctx->GetInternalMask();
1055  ctx->SetInternalMaskAnd(mask, testValue);
1056  ctx->BranchIfMaskAny(bloop, bexit);
1057  }
1058 
1059  // ...and we're done. Set things up for subsequent code to be emitted
1060  // in the right basic block.
1061  ctx->SetCurrentBasicBlock(bexit);
1062  ctx->EndLoop();
1063 }
1064 
1066  const Type *testType;
1067  if (testExpr != NULL && (testType = testExpr->GetType()) != NULL) {
1068  // Should the test condition for the loop be uniform or varying?
1069  // It can be uniform only if three conditions are met:
1070  //
1071  // - First and foremost, the type of the test condition must be
1072  // uniform.
1073  //
1074  // - Second, the user must not have set the dis-optimization option
1075  // that disables uniform flow control.
1076  //
1077  // - Thirdly, and most subtlely, there must not be any break or
1078  // continue statements inside the loop that are within the scope
1079  // of a 'varying' if statement. If there are, then we type cast
1080  // the test to be 'varying', so that the code generated for the
1081  // loop includes masking stuff, so that we can track which lanes
1082  // actually want to be running, accounting for breaks/continues.
1083  //
1084  bool uniformTest =
1087  "\"do\" statement");
1088  }
1089 
1090  return this;
1091 }
1092 
1094  bool uniformTest = testExpr ? testExpr->GetType()->IsUniformType()
1096 
1097  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1098 }
1099 
1100 void DoStmt::Print(int indent) const {
1101  printf("%*cDo Stmt", indent, ' ');
1102  pos.Print();
1103  printf(":\n");
1104  printf("%*cTest: ", indent + 4, ' ');
1105  if (testExpr)
1106  testExpr->Print();
1107  printf("\n");
1108  if (bodyStmts) {
1109  printf("%*cStmts:\n", indent + 4, ' ');
1110  bodyStmts->Print(indent + 8);
1111  }
1112 }
1113 
1114 ///////////////////////////////////////////////////////////////////////////
1115 // ForStmt
1116 
1117 ForStmt::ForStmt(Stmt *i, Expr *t, Stmt *s, Stmt *st, bool cc, SourcePos p)
1118  : Stmt(p, ForStmtID), init(i), test(t), step(s), stmts(st),
1119  doCoherentCheck(cc && !g->opt.disableCoherentControlFlow) {}
1120 
1122  if (!ctx->GetCurrentBasicBlock())
1123  return;
1124 
1125  llvm::BasicBlock *btest = ctx->CreateBasicBlock("for_test");
1126  llvm::BasicBlock *bstep = ctx->CreateBasicBlock("for_step");
1127  llvm::BasicBlock *bloop = ctx->CreateBasicBlock("for_loop");
1128  llvm::BasicBlock *bexit = ctx->CreateBasicBlock("for_exit");
1129 
1130  bool uniformTest = test ? test->GetType()->IsUniformType()
1132 
1133  ctx->StartLoop(bexit, bstep, uniformTest);
1134  ctx->SetDebugPos(pos);
1135 
1136  // If we have an initiailizer statement, start by emitting the code for
1137  // it and then jump into the loop test code. (Also start a new scope
1138  // since the initiailizer may be a declaration statement).
1139  if (init) {
1140  AssertPos(pos, llvm::dyn_cast<StmtList>(init) == NULL);
1141  ctx->StartScope();
1142  init->EmitCode(ctx);
1143  }
1144  ctx->BranchInst(btest);
1145 
1146  // Emit code to get the value of the loop test. If no test expression
1147  // was provided, just go with a true value.
1148  ctx->SetCurrentBasicBlock(btest);
1149  llvm::Value *ltest = NULL;
1150  if (test) {
1151  ltest = test->GetValue(ctx);
1152  if (!ltest) {
1153  // We need to end scope only if we had initializer statement.
1154  if (init) {
1155  ctx->EndScope();
1156  }
1157  ctx->EndLoop();
1158  return;
1159  }
1160  } else
1161  ltest = uniformTest ? LLVMTrue : LLVMBoolVector(true);
1162 
1163  // Now use the test's value. For a uniform loop, we can either jump to
1164  // the loop body or the loop exit, based on whether it's true or false.
1165  // For a non-uniform loop, we update the mask and jump into the loop if
1166  // any of the mask values are true.
1167  if (uniformTest) {
1168  if (doCoherentCheck)
1169  if (test)
1170  Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
1171  "statement.");
1172  AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
1173  ctx->BranchInst(bloop, bexit, ltest);
1174  } else {
1175  llvm::Value *mask = ctx->GetInternalMask();
1176  ctx->SetInternalMaskAnd(mask, ltest);
1177  ctx->BranchIfMaskAny(bloop, bexit);
1178  }
1179 
1180  // On to emitting the code for the loop body.
1181  ctx->SetCurrentBasicBlock(bloop);
1182  ctx->SetBlockEntryMask(ctx->GetFullMask());
1183  ctx->AddInstrumentationPoint("for loop body");
1184  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1185  ctx->StartScope();
1186 
1187  if (doCoherentCheck && !uniformTest) {
1188  // For 'varying' loops with the coherence check, we start by
1189  // checking to see if the mask is all on, after it has been updated
1190  // based on the value of the test.
1191  llvm::BasicBlock *bAllOn = ctx->CreateBasicBlock("for_all_on");
1192  llvm::BasicBlock *bMixed = ctx->CreateBasicBlock("for_mixed");
1193  ctx->BranchIfMaskAll(bAllOn, bMixed);
1194 
1195  // Emit code for the mask being all on. Explicitly set the mask to
1196  // be on so that the optimizer can see that it's on (i.e. now that
1197  // the runtime test has passed, make this fact clear for code
1198  // generation at compile time here.)
1199  ctx->SetCurrentBasicBlock(bAllOn);
1202  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1205  if (stmts)
1206  stmts->EmitCode(ctx);
1208  ctx->SetFunctionMask(oldFunctionMask);
1209  ctx->BranchInst(bstep);
1210 
1211  // Emit code for the mask being mixed. We should never run the
1212  // loop with the mask all off, based on the BranchIfMaskAny call
1213  // above.
1214  ctx->SetCurrentBasicBlock(bMixed);
1215  if (stmts)
1216  stmts->EmitCode(ctx);
1217  ctx->BranchInst(bstep);
1218  } else {
1219  // For both uniform loops and varying loops without the coherence
1220  // check, we know that at least one program instance wants to be
1221  // running the loop, so just emit code for the loop body and jump
1222  // to the loop step code.
1223  if (stmts)
1224  stmts->EmitCode(ctx);
1225  if (ctx->GetCurrentBasicBlock())
1226  ctx->BranchInst(bstep);
1227  }
1228  if (!llvm::dyn_cast_or_null<StmtList>(stmts))
1229  ctx->EndScope();
1230 
1231  // Emit code for the loop step. First, restore the lane mask of any
1232  // program instances that executed a 'continue' during the previous
1233  // iteration. Then emit code for the loop step and then jump to the
1234  // test code.
1235  ctx->SetCurrentBasicBlock(bstep);
1236  ctx->RestoreContinuedLanes();
1237  ctx->ClearBreakLanes();
1238 
1239  if (step)
1240  step->EmitCode(ctx);
1241  ctx->BranchInst(btest);
1242 
1243  // Set the current emission basic block to the loop exit basic block
1244  ctx->SetCurrentBasicBlock(bexit);
1245  if (init)
1246  ctx->EndScope();
1247  ctx->EndLoop();
1248 }
1249 
1251  const Type *testType;
1252  if (test && (testType = test->GetType()) != NULL) {
1253  // See comments in DoStmt::TypeCheck() regarding
1254  // 'uniformTest' and the type conversion here.
1255  bool uniformTest =
1258  "\"for\"/\"while\" statement");
1259  if (test == NULL)
1260  return NULL;
1261  }
1262 
1263  return this;
1264 }
1265 
1267  bool uniformTest = test ? test->GetType()->IsUniformType()
1269 
1270  return uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP;
1271 }
1272 
1273 void ForStmt::Print(int indent) const {
1274  printf("%*cFor Stmt", indent, ' ');
1275  pos.Print();
1276  printf("\n");
1277  if (init) {
1278  printf("%*cInit:\n", indent + 4, ' ');
1279  init->Print(indent + 8);
1280  }
1281  if (test) {
1282  printf("%*cTest: ", indent + 4, ' ');
1283  test->Print();
1284  printf("\n");
1285  }
1286  if (step) {
1287  printf("%*cStep:\n", indent + 4, ' ');
1288  step->Print(indent + 8);
1289  }
1290  if (stmts) {
1291  printf("%*cStmts:\n", indent + 4, ' ');
1292  stmts->Print(indent + 8);
1293  }
1294 }
1295 
1296 ///////////////////////////////////////////////////////////////////////////
1297 // BreakStmt
1298 
1299 BreakStmt::BreakStmt(SourcePos p) : Stmt(p, BreakStmtID) {}
1300 
1302  if (!ctx->GetCurrentBasicBlock())
1303  return;
1304 
1305  ctx->SetDebugPos(pos);
1306  ctx->Break(true);
1307 }
1308 
1309 Stmt *BreakStmt::TypeCheck() { return this; }
1310 
1312 
1313 void BreakStmt::Print(int indent) const {
1314  printf("%*cBreak Stmt", indent, ' ');
1315  pos.Print();
1316  printf("\n");
1317 }
1318 
1319 ///////////////////////////////////////////////////////////////////////////
1320 // ContinueStmt
1321 
1322 ContinueStmt::ContinueStmt(SourcePos p) : Stmt(p, ContinueStmtID) {}
1323 
1325  if (!ctx->GetCurrentBasicBlock())
1326  return;
1327 
1328  ctx->SetDebugPos(pos);
1329  ctx->Continue(true);
1330 }
1331 
1332 Stmt *ContinueStmt::TypeCheck() { return this; }
1333 
1335 
1336 void ContinueStmt::Print(int indent) const {
1337  printf("%*cContinue Stmt", indent, ' ');
1338  pos.Print();
1339  printf("\n");
1340 }
1341 
1342 ///////////////////////////////////////////////////////////////////////////
1343 // ForeachStmt
1344 
1345 ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs, const std::vector<Expr *> &se, const std::vector<Expr *> &ee,
1346  Stmt *s, bool t, SourcePos pos)
1347  : Stmt(pos, ForeachStmtID), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t), stmts(s) {}
1348 
1349 /* Given a uniform counter value in the memory location pointed to by
1350  uniformCounterPtr, compute the corresponding set of varying counter
1351  values for use within the loop body.
1352  */
1353 static llvm::Value *lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, llvm::Value *uniformCounterPtr,
1354  llvm::Value *varyingCounterPtr, const std::vector<int> &spans) {
1355 #ifdef ISPC_NVPTX_ENABLED
1356  if (g->target->getISA() == Target::NVPTX) {
1357  // Smear the uniform counter value out to be varying
1358  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1359  llvm::Value *smearCounter = ctx->BroadcastValue(counter, LLVMTypes::Int32VectorType, "smear_counter");
1360 
1361  // Figure out the offsets; this is a little bit tricky. As an example,
1362  // consider a 2D tiled foreach loop, where we're running 8-wide and
1363  // where the inner dimension has a stride of 4 and the outer dimension
1364  // has a stride of 2. For the inner dimension, we want the offsets
1365  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1366  // (0,0,0,0,1,1,1,1).
1367  int32_t delta[ISPC_MAX_NVEC];
1368  const int vecWidth = 32;
1369  std::vector<llvm::Constant *> constDeltaList;
1370  for (int i = 0; i < vecWidth; ++i) {
1371  int d = i;
1372  // First, account for the effect of any dimensions at deeper
1373  // nesting levels than the current one.
1374  int prevDimSpanCount = 1;
1375  for (int j = dim; j < nDims - 1; ++j)
1376  prevDimSpanCount *= spans[j + 1];
1377  d /= prevDimSpanCount;
1378 
1379  // And now with what's left, figure out our own offset
1380  delta[i] = d % spans[dim];
1381  constDeltaList.push_back(LLVMInt8(delta[i]));
1382  }
1383 
1384  llvm::ArrayType *ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
1385  // llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
1386 
1387  llvm::GlobalVariable *globalDelta = new llvm::GlobalVariable(
1388  /*Module=*/*m->module,
1389  /*Type=*/ArrayDelta,
1390  /*isConstant=*/true,
1391  /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
1392  /*Initializer=*/0, // has initializer, specified below
1393  /*Name=*/"constDeltaForeach");
1394 #if 0
1395  /*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
1396  /*unsigned AddressSpace=*/4 /*constant*/);
1397 #endif
1398 
1399  llvm::Constant *constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
1400 
1401  globalDelta->setInitializer(constDelta);
1402  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1403  llvm::Value *laneIdx =
1404  ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value *>(), "foreach__programIndex");
1405 
1406  std::vector<llvm::Value *> ptr_arrayidx_indices;
1407  ptr_arrayidx_indices.push_back(LLVMInt32(0));
1408  ptr_arrayidx_indices.push_back(laneIdx);
1409 #if 1
1410 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1411  llvm::Instruction *ptr_arrayidx =
1412  llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
1413 #else
1414  llvm::Instruction *ptr_arrayidx = llvm::GetElementPtrInst::Create(NULL, globalDelta, ptr_arrayidx_indices,
1415  "arrayidx", ctx->GetCurrentBasicBlock());
1416 #endif
1417  llvm::LoadInst *int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
1418  llvm::Value *int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
1419 
1420  llvm::VectorType *VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
1421  llvm::UndefValue *const_packed_41 = llvm::UndefValue::get(VectorTy_2);
1422 
1423  llvm::InsertElementInst *packed_43 = llvm::InsertElementInst::Create(
1424  // llvm::UndefValue(LLVMInt32Vector),
1425  const_packed_41, int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
1426 #endif
1427 
1428  // Add the deltas to compute the varying counter values; store the
1429  // result to memory and then return it directly as well.
1430 #if 0
1431  llvm::Value *varyingCounter =
1432  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
1433  LLVMInt32Vector(delta), "iter_val");
1434 #else
1435  llvm::Value *varyingCounter = ctx->BinaryOperator(llvm::Instruction::Add, smearCounter, packed_43, "iter_val");
1436 #endif
1437  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1438  return varyingCounter;
1439  }
1440 #endif /* ISPC_NVPTX_ENABLED */
1441 
1442  // Smear the uniform counter value out to be varying
1443  llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
1444  llvm::Value *smearCounter = ctx->BroadcastValue(counter, LLVMTypes::Int32VectorType, "smear_counter");
1445 
1446  // Figure out the offsets; this is a little bit tricky. As an example,
1447  // consider a 2D tiled foreach loop, where we're running 8-wide and
1448  // where the inner dimension has a stride of 4 and the outer dimension
1449  // has a stride of 2. For the inner dimension, we want the offsets
1450  // (0,1,2,3,0,1,2,3), and for the outer dimension we want
1451  // (0,0,0,0,1,1,1,1).
1452  int32_t delta[ISPC_MAX_NVEC];
1453  for (int i = 0; i < g->target->getVectorWidth(); ++i) {
1454  int d = i;
1455  // First, account for the effect of any dimensions at deeper
1456  // nesting levels than the current one.
1457  int prevDimSpanCount = 1;
1458  for (int j = dim; j < nDims - 1; ++j)
1459  prevDimSpanCount *= spans[j + 1];
1460  d /= prevDimSpanCount;
1461 
1462  // And now with what's left, figure out our own offset
1463  delta[i] = d % spans[dim];
1464  }
1465 
1466  // Add the deltas to compute the varying counter values; store the
1467  // result to memory and then return it directly as well.
1468  llvm::Value *varyingCounter =
1469  ctx->BinaryOperator(llvm::Instruction::Add, smearCounter, LLVMInt32Vector(delta), "iter_val");
1470  ctx->StoreInst(varyingCounter, varyingCounterPtr);
1471  return varyingCounter;
1472 }
1473 
1474 /** Returns the integer log2 of the given integer. */
1475 static int lLog2(int i) {
1476  int ret = 0;
1477  while (i != 0) {
1478  ++ret;
1479  i >>= 1;
1480  }
1481  return ret - 1;
1482 }
1483 
1484 /* Figure out how many elements to process in each dimension for each time
1485  through a foreach loop. The untiled case is easy; all of the outer
1486  dimensions up until the innermost one have a span of 1, and the
1487  innermost one takes the entire vector width. For the tiled case, we
1488  give wider spans to the innermost dimensions while also trying to
1489  generate relatively square domains.
1490 
1491  This code works recursively from outer dimensions to inner dimensions.
1492  */
1493 static void lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
1494  if (dimsLeft == 0) {
1495  // Nothing left to do but give all of the remaining work to the
1496  // innermost domain.
1497  *a = itemsLeft;
1498  return;
1499  }
1500 
1501  if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
1502  // If we're not tiled, or if there are enough dimensions left that
1503  // giving this one any more than a span of one would mean that a
1504  // later dimension would have to have a span of one, give this one
1505  // a span of one to save the available items for later.
1506  *a = 1;
1507  else if (itemsLeft >= 16 && (dimsLeft == 1))
1508  // Special case to have 4x4 domains for the 2D case when running
1509  // 16-wide.
1510  *a = 4;
1511  else
1512  // Otherwise give this dimension a span of two.
1513  *a = 2;
1514 
1515  lGetSpans(dimsLeft - 1, nDims, itemsLeft / *a, isTiled, a + 1);
1516 }
1517 
1518 /* Emit code for a foreach statement. We effectively emit code to run the
1519  set of n-dimensional nested loops corresponding to the dimensionality of
1520  the foreach statement along with the extra logic to deal with mismatches
1521  between the vector width we're compiling to and the number of elements
1522  to process.
1523  */
1525  if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
1526  return;
1527 
1528  llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
1529  llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
1530  llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
1531 
1532  llvm::Value *oldMask = ctx->GetInternalMask();
1533  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
1534 
1535  ctx->SetDebugPos(pos);
1536  ctx->StartScope();
1537 
1540 
1541  // This should be caught during typechecking
1542  AssertPos(pos, startExprs.size() == dimVariables.size() && endExprs.size() == dimVariables.size());
1543  int nDims = (int)dimVariables.size();
1544 
1545  ///////////////////////////////////////////////////////////////////////
1546  // Setup: compute the number of items we have to work on in each
1547  // dimension and a number of derived values.
1548  std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
1549  std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
1550  std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
1551 
1552  std::vector<int> span(nDims, 0);
1553 #ifdef ISPC_NVPTX_ENABLED
1554  const int vectorWidth = g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
1555  lGetSpans(nDims - 1, nDims, vectorWidth, isTiled, &span[0]);
1556 #else /* ISPC_NVPTX_ENABLED */
1557  lGetSpans(nDims - 1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
1558 #endif /* ISPC_NVPTX_ENABLED */
1559 
1560  for (int i = 0; i < nDims; ++i) {
1561  // Basic blocks that we'll fill in later with the looping logic for
1562  // this dimension.
1563  bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
1564  if (i < nDims - 1)
1565  // stepping for the innermost dimension is handled specially
1566  bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
1567  bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
1568 
1569  // Start and end value for this loop dimension
1570  llvm::Value *sv = startExprs[i]->GetValue(ctx);
1571  llvm::Value *ev = endExprs[i]->GetValue(ctx);
1572  if (sv == NULL || ev == NULL)
1573  return;
1574  startVals.push_back(sv);
1575  endVals.push_back(ev);
1576 
1577  // nItems = endVal - startVal
1578  llvm::Value *nItems = ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
1579 
1580  // nExtras = nItems % (span for this dimension)
1581  // This gives us the number of extra elements we need to deal with
1582  // at the end of the loop for this dimension that don't fit cleanly
1583  // into a vector width.
1584  nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems, LLVMInt32(span[i]), "nextras"));
1585 
1586  // alignedEnd = endVal - nExtras
1587  alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, nExtras[i], "aligned_end"));
1588 
1589  ///////////////////////////////////////////////////////////////////////
1590  // Each dimension has a loop counter that is a uniform value that
1591  // goes from startVal to endVal, in steps of the span for this
1592  // dimension. Its value is only used internally here for looping
1593  // logic and isn't directly available in the user's program code.
1594  uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type, "counter"));
1595  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1596 
1597  // There is also a varying variable that holds the set of index
1598  // values for each dimension in the current loop iteration; this is
1599  // the value that is program-visible.
1600  dimVariables[i]->storagePtr = ctx->AllocaInst(LLVMTypes::Int32VectorType, dimVariables[i]->name.c_str());
1601  dimVariables[i]->parentFunction = ctx->GetFunction();
1603 
1604  // Each dimension also maintains a mask that represents which of
1605  // the varying elements in the current iteration should be
1606  // processed. (i.e. this is used to disable the lanes that have
1607  // out-of-bounds offsets.)
1608  extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
1609  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1610  }
1611 
1613 
1614  // On to the outermost loop's test
1615  ctx->BranchInst(bbTest[0]);
1616 
1617  ///////////////////////////////////////////////////////////////////////////
1618  // foreach_reset: this code runs when we need to reset the counter for
1619  // a given dimension in preparation for running through its loop again,
1620  // after the enclosing level advances its counter.
1621  for (int i = 0; i < nDims; ++i) {
1622  ctx->SetCurrentBasicBlock(bbReset[i]);
1623  if (i == 0)
1624  ctx->BranchInst(bbExit);
1625  else {
1626  ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
1627  ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
1628  ctx->BranchInst(bbStep[i - 1]);
1629  }
1630  }
1631 
1632  ///////////////////////////////////////////////////////////////////////////
1633  // foreach_step: increment the uniform counter by the vector width.
1634  // Note that we don't increment the varying counter here as well but
1635  // just generate its value when we need it in the loop body. Don't do
1636  // this for the innermost dimension, which has a more complex stepping
1637  // structure..
1638  for (int i = 0; i < nDims - 1; ++i) {
1639  ctx->SetCurrentBasicBlock(bbStep[i]);
1640  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
1641  llvm::Value *newCounter =
1642  ctx->BinaryOperator(llvm::Instruction::Add, counter, LLVMInt32(span[i]), "new_counter");
1643  ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
1644  ctx->BranchInst(bbTest[i]);
1645  }
1646 
1647  ///////////////////////////////////////////////////////////////////////////
1648  // foreach_test (for all dimensions other than the innermost...)
1649  std::vector<llvm::Value *> inExtras;
1650  for (int i = 0; i < nDims - 1; ++i) {
1651  ctx->SetCurrentBasicBlock(bbTest[i]);
1652 
1653  llvm::Value *haveExtras =
1654  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT, endVals[i], alignedEnd[i], "have_extras");
1655 
1656  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
1657  llvm::Value *atAlignedEnd =
1658  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, counter, alignedEnd[i], "at_aligned_end");
1659  llvm::Value *inEx = ctx->BinaryOperator(llvm::Instruction::And, haveExtras, atAlignedEnd, "in_extras");
1660 
1661  if (i == 0)
1662  inExtras.push_back(inEx);
1663  else
1664  inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx, inExtras[i - 1], "in_extras_all"));
1665 
1666  llvm::Value *varyingCounter =
1667  lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], dimVariables[i]->storagePtr, span);
1668 
1669  llvm::Value *smearEnd = ctx->BroadcastValue(endVals[i], LLVMTypes::Int32VectorType, "smear_end");
1670 
1671  // Do a vector compare of its value to the end value to generate a
1672  // mask for this last bit of work.
1673  llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd);
1674  emask = ctx->I1VecToBoolVec(emask);
1675 
1676  if (i == 0)
1677  ctx->StoreInst(emask, extrasMaskPtrs[i]);
1678  else {
1679  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i - 1]);
1680  llvm::Value *newMask = ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, "extras_mask");
1681  ctx->StoreInst(newMask, extrasMaskPtrs[i]);
1682  }
1683 
1684  llvm::Value *notAtEnd = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, counter, endVals[i]);
1685  ctx->BranchInst(bbTest[i + 1], bbReset[i], notAtEnd);
1686  }
1687 
1688  ///////////////////////////////////////////////////////////////////////////
1689  // foreach_test (for innermost dimension)
1690  //
1691  // All of the outer dimensions are handled generically--basically as a
1692  // for() loop from the start value to the end value, where at each loop
1693  // test, we compute the mask of active elements for the current
1694  // dimension and then update an overall mask that is the AND
1695  // combination of all of the outer ones.
1696  //
1697  // The innermost loop is handled specially, for performance purposes.
1698  // When starting the innermost dimension, we start by checking once
1699  // whether any of the outer dimensions has set the mask to be
1700  // partially-active or not. We follow different code paths for these
1701  // two cases, taking advantage of the knowledge that the mask is all
1702  // on, when this is the case.
1703  //
1704  // In each of these code paths, we start with a loop from the starting
1705  // value to the aligned end value for the innermost dimension; we can
1706  // guarantee that the innermost loop will have an "all on" mask (as far
1707  // as its dimension is concerned) for the duration of this loop. Doing
1708  // so allows us to emit code that assumes the mask is all on (for the
1709  // case where none of the outer dimensions has set the mask to be
1710  // partially on), or allows us to emit code that just uses the mask
1711  // from the outer dimensions directly (for the case where they have).
1712  //
1713  // After this loop, we just need to deal with one vector's worth of
1714  // "ragged extra bits", where the mask used includes the effect of the
1715  // mask for the innermost dimension.
1716  //
1717  // We start out this process by emitting the check that determines
1718  // whether any of the enclosing dimensions is partially active
1719  // (i.e. processing extra elements that don't exactly fit into a
1720  // vector).
1721  llvm::BasicBlock *bbOuterInExtras = ctx->CreateBasicBlock("outer_in_extras");
1722  llvm::BasicBlock *bbOuterNotInExtras = ctx->CreateBasicBlock("outer_not_in_extras");
1723 
1724  ctx->SetCurrentBasicBlock(bbTest[nDims - 1]);
1725  if (inExtras.size())
1726  ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras, inExtras.back());
1727  else
1728  // for a 1D iteration domain, we certainly don't have any enclosing
1729  // dimensions that are processing extra elements.
1730  ctx->BranchInst(bbOuterNotInExtras);
1731 
1732  ///////////////////////////////////////////////////////////////////////////
1733  // One or more outer dimensions in extras, so we need to mask for the loop
1734  // body regardless. We break this into two cases, roughly:
1735  // for (counter = start; counter < alignedEnd; counter += step) {
1736  // // mask is all on for inner, so set mask to outer mask
1737  // // run loop body with mask
1738  // }
1739  // // counter == alignedEnd
1740  // if (counter < end) {
1741  // // set mask to outermask & (counter+programCounter < end)
1742  // // run loop body with mask
1743  // }
1744  llvm::BasicBlock *bbAllInnerPartialOuter = ctx->CreateBasicBlock("all_inner_partial_outer");
1745  llvm::BasicBlock *bbPartial = ctx->CreateBasicBlock("both_partial");
1746  ctx->SetCurrentBasicBlock(bbOuterInExtras);
1747  {
1748  // Update the varying counter value here, since all subsequent
1749  // blocks along this path need it.
1750  lUpdateVaryingCounter(nDims - 1, nDims, ctx, uniformCounterPtrs[nDims - 1], dimVariables[nDims - 1]->storagePtr,
1751  span);
1752 
1753  // here we just check to see if counter < alignedEnd
1754  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1], "counter");
1755  llvm::Value *beforeAlignedEnd = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, counter,
1756  alignedEnd[nDims - 1], "before_aligned_end");
1757  ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
1758  }
1759 
1760  // Below we have a basic block that runs the loop body code for the
1761  // case where the mask is partially but not fully on. This same block
1762  // runs in multiple cases: both for handling any ragged extra data for
1763  // the innermost dimension but also when outer dimensions have set the
1764  // mask to be partially on.
1765  //
1766  // The value stored in stepIndexAfterMaskedBodyPtr is used after each
1767  // execution of the body code to determine whether the innermost index
1768  // value should be incremented by the step (we're running the "for"
1769  // loop of full vectors at the innermost dimension, with outer
1770  // dimensions having set the mask to be partially on), or whether we're
1771  // running once for the ragged extra bits at the end of the innermost
1772  // dimension, in which case we're done with the innermost dimension and
1773  // should step the loop counter for the next enclosing dimension
1774  // instead.
1775  llvm::Value *stepIndexAfterMaskedBodyPtr = ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
1776 
1777  ///////////////////////////////////////////////////////////////////////////
1778  // We're in the inner loop part where the only masking is due to outer
1779  // dimensions but the innermost dimension fits fully into a vector's
1780  // width. Set the mask and jump to the masked loop body.
1781  ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter);
1782  {
1783  llvm::Value *mask;
1784  if (nDims == 1)
1785  // 1D loop; we shouldn't ever get here anyway
1786  mask = LLVMMaskAllOff;
1787  else
1788  mask = ctx->LoadInst(extrasMaskPtrs[nDims - 2]);
1789 
1790  ctx->SetInternalMask(mask);
1791 
1792  ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
1793  ctx->BranchInst(bbMaskedBody);
1794  }
1795 
1796  ///////////////////////////////////////////////////////////////////////////
1797  // We need to include the effect of the innermost dimension in the mask
1798  // for the final bits here
1799  ctx->SetCurrentBasicBlock(bbPartial);
1800  {
1801  llvm::Value *varyingCounter = ctx->LoadInst(dimVariables[nDims - 1]->storagePtr);
1802  llvm::Value *smearEnd = ctx->BroadcastValue(endVals[nDims - 1], LLVMTypes::Int32VectorType, "smear_end");
1803 
1804  llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd);
1805  emask = ctx->I1VecToBoolVec(emask);
1806 
1807  if (nDims == 1) {
1808  ctx->SetInternalMask(emask);
1809  } else {
1810  llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims - 2]);
1811  llvm::Value *newMask = ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, "extras_mask");
1812  ctx->SetInternalMask(newMask);
1813  }
1814 
1815  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
1816 
1817  // check to see if counter != end, otherwise, the next step is not necessary
1818  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1], "counter");
1819  llvm::Value *atEnd =
1820  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, counter, endVals[nDims - 1], "at_end");
1821  ctx->BranchInst(bbMaskedBody, bbReset[nDims - 1], atEnd);
1822  }
1823 
1824  ///////////////////////////////////////////////////////////////////////////
1825  // None of the outer dimensions is processing extras; along the lines
1826  // of above, we can express this as:
1827  // for (counter = start; counter < alignedEnd; counter += step) {
1828  // // mask is all on
1829  // // run loop body with mask all on
1830  // }
1831  // // counter == alignedEnd
1832  // if (counter < end) {
1833  // // set mask to (counter+programCounter < end)
1834  // // run loop body with mask
1835  // }
1836  llvm::BasicBlock *bbPartialInnerAllOuter = ctx->CreateBasicBlock("partial_inner_all_outer");
1837  ctx->SetCurrentBasicBlock(bbOuterNotInExtras);
1838  {
1839  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1], "counter");
1840  llvm::Value *beforeAlignedEnd = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, counter,
1841  alignedEnd[nDims - 1], "before_aligned_end");
1842  ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter, beforeAlignedEnd);
1843  }
1844 
1845  ///////////////////////////////////////////////////////////////////////////
1846  // full_body: do a full vector's worth of work. We know that all
1847  // lanes will be running here, so we explicitly set the mask to be 'all
1848  // on'. This ends up being relatively straightforward: just update the
1849  // value of the varying loop counter and have the statements in the
1850  // loop body emit their code.
1851  llvm::BasicBlock *bbFullBodyContinue = ctx->CreateBasicBlock("foreach_full_continue");
1852  ctx->SetCurrentBasicBlock(bbFullBody);
1853  {
1856  lUpdateVaryingCounter(nDims - 1, nDims, ctx, uniformCounterPtrs[nDims - 1], dimVariables[nDims - 1]->storagePtr,
1857  span);
1858  ctx->SetContinueTarget(bbFullBodyContinue);
1859  ctx->AddInstrumentationPoint("foreach loop body (all on)");
1860  stmts->EmitCode(ctx);
1861  AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
1862  ctx->BranchInst(bbFullBodyContinue);
1863  }
1864  ctx->SetCurrentBasicBlock(bbFullBodyContinue);
1865  {
1866  ctx->RestoreContinuedLanes();
1867  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1]);
1868  llvm::Value *newCounter =
1869  ctx->BinaryOperator(llvm::Instruction::Add, counter, LLVMInt32(span[nDims - 1]), "new_counter");
1870  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims - 1]);
1871  ctx->BranchInst(bbOuterNotInExtras);
1872  }
1873 
1874  ///////////////////////////////////////////////////////////////////////////
1875  // We're done running blocks with the mask all on; see if the counter is
1876  // less than the end value, in which case we need to run the body one
1877  // more time to get the extra bits.
1878  llvm::BasicBlock *bbSetInnerMask = ctx->CreateBasicBlock("partial_inner_only");
1879  ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter);
1880  {
1881  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1], "counter");
1882  llvm::Value *beforeFullEnd = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, counter,
1883  endVals[nDims - 1], "before_full_end");
1884  ctx->BranchInst(bbSetInnerMask, bbReset[nDims - 1], beforeFullEnd);
1885  }
1886 
1887  ///////////////////////////////////////////////////////////////////////////
1888  // The outer dimensions are all on, so the mask is just given by the
1889  // mask for the innermost dimension
1890  ctx->SetCurrentBasicBlock(bbSetInnerMask);
1891  {
1892  llvm::Value *varyingCounter = lUpdateVaryingCounter(nDims - 1, nDims, ctx, uniformCounterPtrs[nDims - 1],
1893  dimVariables[nDims - 1]->storagePtr, span);
1894  llvm::Value *smearEnd = ctx->BroadcastValue(endVals[nDims - 1], LLVMTypes::Int32VectorType, "smear_end");
1895  llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd);
1896  emask = ctx->I1VecToBoolVec(emask);
1897  ctx->SetInternalMask(emask);
1898  ctx->SetBlockEntryMask(emask);
1899 
1900  ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
1901  ctx->BranchInst(bbMaskedBody);
1902  }
1903 
1904  ///////////////////////////////////////////////////////////////////////////
1905  // masked_body: set the mask and have the statements emit their
1906  // code again. Note that it's generally worthwhile having two copies
1907  // of the statements' code, since the code above is emitted with the
1908  // mask known to be all-on, which in turn leads to more efficient code
1909  // for that case.
1910  llvm::BasicBlock *bbStepInnerIndex = ctx->CreateBasicBlock("step_inner_index");
1911  llvm::BasicBlock *bbMaskedBodyContinue = ctx->CreateBasicBlock("foreach_masked_continue");
1912  ctx->SetCurrentBasicBlock(bbMaskedBody);
1913  {
1914  ctx->AddInstrumentationPoint("foreach loop body (masked)");
1915  ctx->SetContinueTarget(bbMaskedBodyContinue);
1917  ctx->SetBlockEntryMask(ctx->GetFullMask());
1918  stmts->EmitCode(ctx);
1920  ctx->BranchInst(bbMaskedBodyContinue);
1921  }
1922  ctx->SetCurrentBasicBlock(bbMaskedBodyContinue);
1923  {
1924  ctx->RestoreContinuedLanes();
1925  llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
1926  ctx->BranchInst(bbStepInnerIndex, bbReset[nDims - 1], stepIndex);
1927  }
1928 
1929  ///////////////////////////////////////////////////////////////////////////
1930  // step the innermost index, for the case where we're doing the
1931  // innermost for loop over full vectors.
1932  ctx->SetCurrentBasicBlock(bbStepInnerIndex);
1933  {
1934  llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims - 1]);
1935  llvm::Value *newCounter =
1936  ctx->BinaryOperator(llvm::Instruction::Add, counter, LLVMInt32(span[nDims - 1]), "new_counter");
1937  ctx->StoreInst(newCounter, uniformCounterPtrs[nDims - 1]);
1938  ctx->BranchInst(bbOuterInExtras);
1939  }
1940 
1941  ///////////////////////////////////////////////////////////////////////////
1942  // foreach_exit: All done. Restore the old mask and clean up
1943  ctx->SetCurrentBasicBlock(bbExit);
1944 
1945  ctx->SetInternalMask(oldMask);
1946  ctx->SetFunctionMask(oldFunctionMask);
1947 
1948  ctx->EndForeach();
1949  ctx->EndScope();
1950 }
1951 
1953  bool anyErrors = false;
1954  for (unsigned int i = 0; i < startExprs.size(); ++i) {
1955  if (startExprs[i] != NULL)
1956  startExprs[i] = TypeConvertExpr(startExprs[i], AtomicType::UniformInt32, "foreach starting value");
1957  anyErrors |= (startExprs[i] == NULL);
1958  }
1959  for (unsigned int i = 0; i < endExprs.size(); ++i) {
1960  if (endExprs[i] != NULL)
1961  endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32, "foreach ending value");
1962  anyErrors |= (endExprs[i] == NULL);
1963  }
1964 
1965  if (startExprs.size() < dimVariables.size()) {
1966  Error(pos,
1967  "Not enough initial values provided for \"foreach\" loop; "
1968  "got %d, expected %d\n",
1969  (int)startExprs.size(), (int)dimVariables.size());
1970  anyErrors = true;
1971  } else if (startExprs.size() > dimVariables.size()) {
1972  Error(pos,
1973  "Too many initial values provided for \"foreach\" loop; "
1974  "got %d, expected %d\n",
1975  (int)startExprs.size(), (int)dimVariables.size());
1976  anyErrors = true;
1977  }
1978 
1979  if (endExprs.size() < dimVariables.size()) {
1980  Error(pos,
1981  "Not enough initial values provided for \"foreach\" loop; "
1982  "got %d, expected %d\n",
1983  (int)endExprs.size(), (int)dimVariables.size());
1984  anyErrors = true;
1985  } else if (endExprs.size() > dimVariables.size()) {
1986  Error(pos,
1987  "Too many initial values provided for \"foreach\" loop; "
1988  "got %d, expected %d\n",
1989  (int)endExprs.size(), (int)dimVariables.size());
1990  anyErrors = true;
1991  }
1992 
1993  return anyErrors ? NULL : this;
1994 }
1995 
1997 
1998 void ForeachStmt::Print(int indent) const {
1999  printf("%*cForeach Stmt", indent, ' ');
2000  pos.Print();
2001  printf("\n");
2002 
2003  for (unsigned int i = 0; i < dimVariables.size(); ++i)
2004  if (dimVariables[i] != NULL)
2005  printf("%*cVar %d: %s\n", indent + 4, ' ', i, dimVariables[i]->name.c_str());
2006  else
2007  printf("%*cVar %d: NULL\n", indent + 4, ' ', i);
2008 
2009  printf("Start values:\n");
2010  for (unsigned int i = 0; i < startExprs.size(); ++i) {
2011  if (startExprs[i] != NULL)
2012  startExprs[i]->Print();
2013  else
2014  printf("NULL");
2015  if (i != startExprs.size() - 1)
2016  printf(", ");
2017  else
2018  printf("\n");
2019  }
2020 
2021  printf("End values:\n");
2022  for (unsigned int i = 0; i < endExprs.size(); ++i) {
2023  if (endExprs[i] != NULL)
2024  endExprs[i]->Print();
2025  else
2026  printf("NULL");
2027  if (i != endExprs.size() - 1)
2028  printf(", ");
2029  else
2030  printf("\n");
2031  }
2032 
2033  if (stmts != NULL) {
2034  printf("%*cStmts:\n", indent + 4, ' ');
2035  stmts->Print(indent + 8);
2036  }
2037 }
2038 
2039 ///////////////////////////////////////////////////////////////////////////
2040 // ForeachActiveStmt
2041 
2042 ForeachActiveStmt::ForeachActiveStmt(Symbol *s, Stmt *st, SourcePos pos) : Stmt(pos, ForeachActiveStmtID) {
2043  sym = s;
2044  stmts = st;
2045 }
2046 
2048  if (!ctx->GetCurrentBasicBlock())
2049  return;
2050 
2051  // Allocate storage for the symbol that we'll use for the uniform
2052  // variable that holds the current program instance in each loop
2053  // iteration.
2054  if (sym->type == NULL) {
2055  Assert(m->errorCount > 0);
2056  return;
2057  }
2060 
2061  ctx->SetDebugPos(pos);
2062  ctx->EmitVariableDebugInfo(sym);
2063 
2064  // The various basic blocks that we'll need in the below
2065  llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_active_find_next");
2066  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_active_body");
2067  llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_active_check_for_more");
2068  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_active_done");
2069 
2070  // Save the old mask so that we can restore it at the end
2071  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2072 
2073  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2074  // to be processed by a pass through the loop body. It starts out with
2075  // the current execution mask (which should never be all off going in
2076  // to this)...
2077  llvm::Value *oldFullMask = ctx->GetFullMask();
2078  llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2079  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2080  ctx->StoreInst(movmsk, maskBitsPtr);
2081 
2082  // Officially start the loop.
2083  ctx->StartScope();
2085  ctx->SetContinueTarget(bbCheckForMore);
2086 
2087  // Onward to find the first set of program instance to run the loop for
2088  ctx->BranchInst(bbFindNext);
2089 
2090  ctx->SetCurrentBasicBlock(bbFindNext);
2091  {
2092  // Load the bitmask of the lanes left to be processed
2093  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2094 
2095  // Find the index of the first set bit in the mask
2096  llvm::Function *ctlzFunc = m->module->getFunction("__count_trailing_zeros_i64");
2097  Assert(ctlzFunc != NULL);
2098  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits, "first_set");
2099 
2100  // Store that value into the storage allocated for the iteration
2101  // variable.
2102  ctx->StoreInst(firstSet, sym->storagePtr);
2103 
2104  // Now set the execution mask to be only on for the current program
2105  // instance. (TODO: is there a more efficient way to do this? e.g.
2106  // for AVX1, we might want to do this as float rather than int
2107  // math...)
2108 
2109  // Get the "program index" vector value
2110 #ifdef ISPC_NVPTX_ENABLED
2111  llvm::Value *programIndex =
2112  g->target->getISA() == Target::NVPTX ? ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
2113 #else /* ISPC_NVPTX_ENABLED */
2114  llvm::Value *programIndex = ctx->ProgramIndexVector();
2115 #endif /* ISPC_NVPTX_ENABLED */
2116 
2117  // And smear the current lane out to a vector
2118  llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type, "first_set32");
2119  llvm::Value *firstSet32Smear = ctx->SmearUniform(firstSet32);
2120 
2121  // Now set the execution mask based on doing a vector compare of
2122  // these two
2123  llvm::Value *iterMask =
2124  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, firstSet32Smear, programIndex);
2125  iterMask = ctx->I1VecToBoolVec(iterMask);
2126 
2127  ctx->SetInternalMask(iterMask);
2128 
2129  // Also update the bitvector of lanes left to turn off the bit for
2130  // the lane we're about to run.
2131  llvm::Value *setMask = ctx->BinaryOperator(llvm::Instruction::Shl, LLVMInt64(1), firstSet, "set_mask");
2132  llvm::Value *notSetMask = ctx->NotOperator(setMask);
2133  llvm::Value *newRemaining =
2134  ctx->BinaryOperator(llvm::Instruction::And, remainingBits, notSetMask, "new_remaining");
2135  ctx->StoreInst(newRemaining, maskBitsPtr);
2136 
2137  // and onward to run the loop body...
2138  ctx->BranchInst(bbBody);
2139  }
2140 
2141  ctx->SetCurrentBasicBlock(bbBody);
2142  {
2143  ctx->SetBlockEntryMask(ctx->GetFullMask());
2144 
2145  // Run the code in the body of the loop. This is easy now.
2146  if (stmts)
2147  stmts->EmitCode(ctx);
2148 
2149  Assert(ctx->GetCurrentBasicBlock() != NULL);
2150  ctx->BranchInst(bbCheckForMore);
2151  }
2152 
2153  ctx->SetCurrentBasicBlock(bbCheckForMore);
2154  {
2155  ctx->RestoreContinuedLanes();
2156  // At the end of the loop body (either due to running the
2157  // statements normally, or a continue statement in the middle of
2158  // the loop that jumps to the end, see if there are any lanes left
2159  // to be processed.
2160  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2161  llvm::Value *nonZero = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, remainingBits,
2162  LLVMInt64(0), "remaining_ne_zero");
2163  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2164  }
2165 
2166  ctx->SetCurrentBasicBlock(bbDone);
2167  ctx->SetInternalMask(oldInternalMask);
2168  ctx->EndForeach();
2169  ctx->EndScope();
2170 }
2171 
2172 void ForeachActiveStmt::Print(int indent) const {
2173  printf("%*cForeach_active Stmt", indent, ' ');
2174  pos.Print();
2175  printf("\n");
2176 
2177  printf("%*cIter symbol: ", indent + 4, ' ');
2178  if (sym != NULL) {
2179  printf("%s", sym->name.c_str());
2180  if (sym->type != NULL)
2181  printf(" %s", sym->type->GetString().c_str());
2182  } else
2183  printf("NULL");
2184  printf("\n");
2185 
2186  printf("%*cStmts:\n", indent + 4, ' ');
2187  if (stmts != NULL)
2188  stmts->Print(indent + 8);
2189  else
2190  printf("NULL");
2191  printf("\n");
2192 }
2193 
2195  if (sym == NULL)
2196  return NULL;
2197 
2198  return this;
2199 }
2200 
2202 
2203 ///////////////////////////////////////////////////////////////////////////
2204 // ForeachUniqueStmt
2205 
2206 ForeachUniqueStmt::ForeachUniqueStmt(const char *iterName, Expr *e, Stmt *s, SourcePos pos)
2207  : Stmt(pos, ForeachUniqueStmtID) {
2208  sym = m->symbolTable->LookupVariable(iterName);
2209  expr = e;
2210  stmts = s;
2211 }
2212 
2214  if (!ctx->GetCurrentBasicBlock())
2215  return;
2216 
2217  // First, allocate local storage for the symbol that we'll use for the
2218  // uniform variable that holds the current unique value through each
2219  // loop.
2220  if (sym->type == NULL) {
2221  Assert(m->errorCount > 0);
2222  return;
2223  }
2224  llvm::Type *symType = sym->type->LLVMType(g->ctx);
2225  if (symType == NULL) {
2226  Assert(m->errorCount > 0);
2227  return;
2228  }
2229  sym->storagePtr = ctx->AllocaInst(symType, sym->name.c_str());
2230 
2231  ctx->SetDebugPos(pos);
2232  ctx->EmitVariableDebugInfo(sym);
2233 
2234  // The various basic blocks that we'll need in the below
2235  llvm::BasicBlock *bbFindNext = ctx->CreateBasicBlock("foreach_find_next");
2236  llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
2237  llvm::BasicBlock *bbCheckForMore = ctx->CreateBasicBlock("foreach_check_for_more");
2238  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("foreach_done");
2239 
2240  // Prepare the FunctionEmitContext
2241  ctx->StartScope();
2242 
2243  // Save the old internal mask so that we can restore it at the end
2244  llvm::Value *oldMask = ctx->GetInternalMask();
2245 
2246  // Now, *maskBitsPtr will maintain a bitmask for the lanes that remain
2247  // to be processed by a pass through the foreach_unique loop body. It
2248  // starts out with the full execution mask (which should never be all
2249  // off going in to this)...
2250  llvm::Value *oldFullMask = ctx->GetFullMask();
2251  llvm::Value *maskBitsPtr = ctx->AllocaInst(LLVMTypes::Int64Type, "mask_bits");
2252  llvm::Value *movmsk = ctx->LaneMask(oldFullMask);
2253  ctx->StoreInst(movmsk, maskBitsPtr);
2254 
2255  // Officially start the loop.
2257  ctx->SetContinueTarget(bbCheckForMore);
2258 
2259  // Evaluate the varying expression we're iterating over just once.
2260  llvm::Value *exprValue = expr->GetValue(ctx);
2261 
2262  // And we'll store its value into locally-allocated storage, for ease
2263  // of indexing over it with non-compile-time-constant indices.
2264  const Type *exprType;
2265  llvm::VectorType *llvmExprType;
2266  if (exprValue == NULL || (exprType = expr->GetType()) == NULL ||
2267  (llvmExprType = llvm::dyn_cast<llvm::VectorType>(exprValue->getType())) == NULL) {
2268  Assert(m->errorCount > 0);
2269  return;
2270  }
2271  ctx->SetDebugPos(pos);
2272  const Type *exprPtrType = PointerType::GetUniform(exprType);
2273  llvm::Value *exprMem = ctx->AllocaInst(llvmExprType, "expr_mem");
2274  ctx->StoreInst(exprValue, exprMem);
2275 
2276  // Onward to find the first set of lanes to run the loop for
2277  ctx->BranchInst(bbFindNext);
2278 
2279  ctx->SetCurrentBasicBlock(bbFindNext);
2280  {
2281  // Load the bitmask of the lanes left to be processed
2282  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2283 
2284  // Find the index of the first set bit in the mask
2285  llvm::Function *ctlzFunc = m->module->getFunction("__count_trailing_zeros_i64");
2286  Assert(ctlzFunc != NULL);
2287  llvm::Value *firstSet = ctx->CallInst(ctlzFunc, NULL, remainingBits, "first_set");
2288 
2289  // And load the corresponding element value from the temporary
2290  // memory storing the value of the varying expr.
2291  llvm::Value *uniqueValue;
2292 #ifdef ISPC_NVPTX_ENABLED
2293  if (g->target->getISA() == Target::NVPTX) {
2294  llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
2295  uniqueValue = ctx->Extract(exprValue, firstSet32);
2296  } else {
2297 #endif /* ISPC_NVPTX_ENABLED */
2298  llvm::Value *uniqueValuePtr =
2299  ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType, "unique_index_ptr");
2300  uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
2301 #ifdef ISPC_NVPTX_ENABLED
2302  }
2303 #endif /* ISPC_NVPTX_ENABLED */
2304  // If it's a varying pointer type, need to convert from the int
2305  // type we store in the vector to the actual pointer type
2306  if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
2307  uniqueValue = ctx->IntToPtrInst(uniqueValue, symType);
2308 
2309  // Store that value in sym's storage so that the iteration variable
2310  // has the right value inside the loop body
2311  ctx->StoreInst(uniqueValue, sym->storagePtr);
2312 
2313  // Set the execution mask so that it's on for any lane that a) was
2314  // running at the start of the foreach loop, and b) where that
2315  // lane's value of the varying expression is the same as the value
2316  // we've selected to process this time through--i.e.:
2317  // oldMask & (smear(element) == exprValue)
2318  llvm::Value *uniqueSmear = ctx->SmearUniform(uniqueValue, "unique_smear");
2319  llvm::Value *matchingLanes = NULL;
2320  if (uniqueValue->getType()->isFloatingPointTy())
2321  matchingLanes = ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_OEQ, uniqueSmear, exprValue,
2322  "matching_lanes");
2323  else
2324  matchingLanes =
2325  ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, uniqueSmear, exprValue, "matching_lanes");
2326  matchingLanes = ctx->I1VecToBoolVec(matchingLanes);
2327 
2328  llvm::Value *loopMask =
2329  ctx->BinaryOperator(llvm::Instruction::And, oldMask, matchingLanes, "foreach_unique_loop_mask");
2330  ctx->SetInternalMask(loopMask);
2331 
2332  // Also update the bitvector of lanes left to process in subsequent
2333  // loop iterations:
2334  // remainingBits &= ~movmsk(current mask)
2335  llvm::Value *loopMaskMM = ctx->LaneMask(loopMask);
2336  llvm::Value *notLoopMaskMM = ctx->NotOperator(loopMaskMM);
2337  llvm::Value *newRemaining =
2338  ctx->BinaryOperator(llvm::Instruction::And, remainingBits, notLoopMaskMM, "new_remaining");
2339  ctx->StoreInst(newRemaining, maskBitsPtr);
2340 
2341  // and onward...
2342  ctx->BranchInst(bbBody);
2343  }
2344 
2345  ctx->SetCurrentBasicBlock(bbBody);
2346  {
2347  ctx->SetBlockEntryMask(ctx->GetFullMask());
2348  // Run the code in the body of the loop. This is easy now.
2349  if (stmts)
2350  stmts->EmitCode(ctx);
2351 
2352  Assert(ctx->GetCurrentBasicBlock() != NULL);
2353  ctx->BranchInst(bbCheckForMore);
2354  }
2355 
2356  ctx->SetCurrentBasicBlock(bbCheckForMore);
2357  {
2358  // At the end of the loop body (either due to running the
2359  // statements normally, or a continue statement in the middle of
2360  // the loop that jumps to the end, see if there are any lanes left
2361  // to be processed.
2362  ctx->RestoreContinuedLanes();
2363  llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
2364  llvm::Value *nonZero = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, remainingBits,
2365  LLVMInt64(0), "remaining_ne_zero");
2366  ctx->BranchInst(bbFindNext, bbDone, nonZero);
2367  }
2368 
2369  ctx->SetCurrentBasicBlock(bbDone);
2370  ctx->SetInternalMask(oldMask);
2371  ctx->EndForeach();
2372  ctx->EndScope();
2373 }
2374 
2375 void ForeachUniqueStmt::Print(int indent) const {
2376  printf("%*cForeach_unique Stmt", indent, ' ');
2377  pos.Print();
2378  printf("\n");
2379 
2380  printf("%*cIter symbol: ", indent + 4, ' ');
2381  if (sym != NULL) {
2382  printf("%s", sym->name.c_str());
2383  if (sym->type != NULL)
2384  printf(" %s", sym->type->GetString().c_str());
2385  } else
2386  printf("NULL");
2387  printf("\n");
2388 
2389  printf("%*cIter expr: ", indent + 4, ' ');
2390  if (expr != NULL)
2391  expr->Print();
2392  else
2393  printf("NULL");
2394  printf("\n");
2395 
2396  printf("%*cStmts:\n", indent + 4, ' ');
2397  if (stmts != NULL)
2398  stmts->Print(indent + 8);
2399  else
2400  printf("NULL");
2401  printf("\n");
2402 }
2403 
2405  const Type *type;
2406  if (sym == NULL || expr == NULL || (type = expr->GetType()) == NULL)
2407  return NULL;
2408 
2409  if (type->IsVaryingType() == false) {
2410  Error(expr->pos,
2411  "Iteration domain type in \"foreach_tiled\" loop "
2412  "must be \"varying\" type, not \"%s\".",
2413  type->GetString().c_str());
2414  return NULL;
2415  }
2416 
2417  if (Type::IsBasicType(type) == false) {
2418  Error(expr->pos,
2419  "Iteration domain type in \"foreach_tiled\" loop "
2420  "must be an atomic, pointer, or enum type, not \"%s\".",
2421  type->GetString().c_str());
2422  return NULL;
2423  }
2424 
2425  return this;
2426 }
2427 
2429 
2430 ///////////////////////////////////////////////////////////////////////////
2431 // CaseStmt
2432 
2433 /** Given the statements following a 'case' or 'default' label, this
2434  function determines whether the mask should be checked to see if it is
2435  "all off" immediately after the label, before executing the code for
2436  the statements.
2437  */
2438 static bool lCheckMask(Stmt *stmts) {
2439  if (stmts == NULL)
2440  return false;
2441 
2442  int cost = EstimateCost(stmts);
2443  bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
2444 
2445  // The mask should be checked if the code following the
2446  // 'case'/'default' is relatively complex, or if it would be unsafe to
2447  // run that code with the execution mask all off.
2448  return (cost > PREDICATE_SAFE_IF_STATEMENT_COST || safeToRunWithAllLanesOff == false);
2449 }
2450 
2451 CaseStmt::CaseStmt(int v, Stmt *s, SourcePos pos) : Stmt(pos, CaseStmtID), value(v) { stmts = s; }
2452 
2455  if (stmts)
2456  stmts->EmitCode(ctx);
2457 }
2458 
2459 void CaseStmt::Print(int indent) const {
2460  printf("%*cCase [%d] label", indent, ' ', value);
2461  pos.Print();
2462  printf("\n");
2463  stmts->Print(indent + 4);
2464 }
2465 
2466 Stmt *CaseStmt::TypeCheck() { return this; }
2467 
2468 int CaseStmt::EstimateCost() const { return 0; }
2469 
2470 ///////////////////////////////////////////////////////////////////////////
2471 // DefaultStmt
2472 
2473 DefaultStmt::DefaultStmt(Stmt *s, SourcePos pos) : Stmt(pos, DefaultStmtID) { stmts = s; }
2474 
2477  if (stmts)
2478  stmts->EmitCode(ctx);
2479 }
2480 
2481 void DefaultStmt::Print(int indent) const {
2482  printf("%*cDefault Stmt", indent, ' ');
2483  pos.Print();
2484  printf("\n");
2485  stmts->Print(indent + 4);
2486 }
2487 
2488 Stmt *DefaultStmt::TypeCheck() { return this; }
2489 
2490 int DefaultStmt::EstimateCost() const { return 0; }
2491 
2492 ///////////////////////////////////////////////////////////////////////////
2493 // SwitchStmt
2494 
2495 SwitchStmt::SwitchStmt(Expr *e, Stmt *s, SourcePos pos) : Stmt(pos, SwitchStmtID) {
2496  expr = e;
2497  stmts = s;
2498 }
2499 
2500 /* An instance of this structure is carried along as we traverse the AST
2501  nodes for the statements after a "switch" statement. We use this
2502  structure to record all of the 'case' and 'default' statements after the
2503  "switch". */
2506  ctx = c;
2507  defaultBlock = NULL;
2508  lastBlock = NULL;
2509  }
2510 
2512 
2513  /* Basic block for the code following the "default" label (if any). */
2514  llvm::BasicBlock *defaultBlock;
2515 
2516  /* Map from integer values after "case" labels to the basic blocks that
2517  follow the corresponding "case" label. */
2518  std::vector<std::pair<int, llvm::BasicBlock *>> caseBlocks;
2519 
2520  /* For each basic block for a "case" label or a "default" label,
2521  nextBlock[block] stores the basic block pointer for the next
2522  subsequent "case" or "default" label in the program. */
2523  std::map<llvm::BasicBlock *, llvm::BasicBlock *> nextBlock;
2524 
2525  /* The last basic block created for a "case" or "default" label; when
2526  we create the basic block for the next one, we'll use this to update
2527  the nextBlock map<> above. */
2528  llvm::BasicBlock *lastBlock;
2529 };
2530 
2531 static bool lSwitchASTPreVisit(ASTNode *node, void *d) {
2532  if (llvm::dyn_cast<SwitchStmt>(node) != NULL)
2533  // don't continue recursively into a nested switch--we only want
2534  // our own case and default statements!
2535  return false;
2536 
2537  CaseStmt *cs = llvm::dyn_cast<CaseStmt>(node);
2538  DefaultStmt *ds = llvm::dyn_cast<DefaultStmt>(node);
2539 
2540  SwitchVisitInfo *svi = (SwitchVisitInfo *)d;
2541  llvm::BasicBlock *bb = NULL;
2542  if (cs != NULL) {
2543  // Complain if we've seen a case statement with the same value
2544  // already
2545  for (int i = 0; i < (int)svi->caseBlocks.size(); ++i) {
2546  if (svi->caseBlocks[i].first == cs->value) {
2547  Error(cs->pos, "Duplicate case value \"%d\".", cs->value);
2548  return true;
2549  }
2550  }
2551 
2552  // Otherwise create a new basic block for the code following this
2553  // 'case' statement and record the mappign between the case label
2554  // value and the basic block
2555  char buf[32];
2556  snprintf(buf, sizeof(buf), "case_%d", cs->value);
2557  bb = svi->ctx->CreateBasicBlock(buf);
2558  svi->caseBlocks.push_back(std::make_pair(cs->value, bb));
2559  } else if (ds != NULL) {
2560  // And complain if we've seen another 'default' label..
2561  if (svi->defaultBlock != NULL) {
2562  Error(ds->pos, "Multiple \"default\" lables in switch statement.");
2563  return true;
2564  } else {
2565  // Otherwise create a basic block for the code following the
2566  // "default".
2567  bb = svi->ctx->CreateBasicBlock("default");
2568  svi->defaultBlock = bb;
2569  }
2570  }
2571 
2572  // If we saw a "case" or "default" label, then update the map to record
2573  // that the block we just created follows the block created for the
2574  // previous label in the "switch".
2575  if (bb != NULL) {
2576  svi->nextBlock[svi->lastBlock] = bb;
2577  svi->lastBlock = bb;
2578  }
2579 
2580  return true;
2581 }
2582 
2584  if (ctx->GetCurrentBasicBlock() == NULL)
2585  return;
2586 
2587  const Type *type;
2588  if (expr == NULL || ((type = expr->GetType()) == NULL)) {
2589  AssertPos(pos, m->errorCount > 0);
2590  return;
2591  }
2592 
2593  // Basic block we'll end up after the switch statement
2594  llvm::BasicBlock *bbDone = ctx->CreateBasicBlock("switch_done");
2595 
2596  // Walk the AST of the statements after the 'switch' to collect a bunch
2597  // of information about the structure of the 'case' and 'default'
2598  // statements.
2599  SwitchVisitInfo svi(ctx);
2600  WalkAST(stmts, lSwitchASTPreVisit, NULL, &svi);
2601  // Record that the basic block following the last one created for a
2602  // case/default is the block after the end of the switch statement.
2603  svi.nextBlock[svi.lastBlock] = bbDone;
2604 
2605  llvm::Value *exprValue = expr->GetValue(ctx);
2606  if (exprValue == NULL) {
2607  AssertPos(pos, m->errorCount > 0);
2608  return;
2609  }
2610 
2611  bool isUniformCF = (type->IsUniformType() && lHasVaryingBreakOrContinue(stmts) == false);
2612  ctx->StartSwitch(isUniformCF, bbDone);
2613  ctx->SetBlockEntryMask(ctx->GetFullMask());
2614  ctx->SwitchInst(exprValue, svi.defaultBlock ? svi.defaultBlock : bbDone, svi.caseBlocks, svi.nextBlock);
2615 
2616  if (stmts != NULL)
2617  stmts->EmitCode(ctx);
2618 
2619  if (ctx->GetCurrentBasicBlock() != NULL)
2620  ctx->BranchInst(bbDone);
2621 
2622  ctx->SetCurrentBasicBlock(bbDone);
2623  ctx->EndSwitch();
2624 }
2625 
2626 void SwitchStmt::Print(int indent) const {
2627  printf("%*cSwitch Stmt", indent, ' ');
2628  pos.Print();
2629  printf("\n");
2630  printf("%*cexpr = ", indent, ' ');
2631  expr->Print();
2632  printf("\n");
2633  stmts->Print(indent + 4);
2634 }
2635 
2637  const Type *exprType;
2638  if (expr == NULL || (exprType = expr->GetType()) == NULL) {
2639  Assert(m->errorCount > 0);
2640  return NULL;
2641  }
2642 
2643  const Type *toType = NULL;
2644  exprType = exprType->GetAsConstType();
2647 
2648  if (exprType->IsUniformType()) {
2649  if (is64bit)
2650  toType = AtomicType::UniformInt64;
2651  else
2652  toType = AtomicType::UniformInt32;
2653  } else {
2654  if (is64bit)
2655  toType = AtomicType::VaryingInt64;
2656  else
2657  toType = AtomicType::VaryingInt32;
2658  }
2659 
2660  expr = TypeConvertExpr(expr, toType, "switch expression");
2661  if (expr == NULL)
2662  return NULL;
2663 
2664  return this;
2665 }
2666 
2668  const Type *type = expr->GetType();
2669  if (type && type->IsVaryingType())
2670  return COST_VARYING_SWITCH;
2671  else
2672  return COST_UNIFORM_SWITCH;
2673 }
2674 
2675 ///////////////////////////////////////////////////////////////////////////
2676 // UnmaskedStmt
2677 
2678 UnmaskedStmt::UnmaskedStmt(Stmt *s, SourcePos pos) : Stmt(pos, UnmaskedStmtID) { stmts = s; }
2679 
2681  if (!ctx->GetCurrentBasicBlock() || !stmts)
2682  return;
2683 
2684  llvm::Value *oldInternalMask = ctx->GetInternalMask();
2685  llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
2686 
2689 
2690  stmts->EmitCode(ctx);
2691 
2692  // Do not restore old mask if our basic block is over. This happends if we emit code
2693  // for something like 'unmasked{return;}', for example.
2694  if (ctx->GetCurrentBasicBlock() == NULL)
2695  return;
2696 
2697  ctx->SetInternalMask(oldInternalMask);
2698  ctx->SetFunctionMask(oldFunctionMask);
2699 }
2700 
2701 void UnmaskedStmt::Print(int indent) const {
2702  printf("%*cUnmasked Stmt", indent, ' ');
2703  pos.Print();
2704  printf("\n");
2705 
2706  printf("%*cStmts:\n", indent + 4, ' ');
2707  if (stmts != NULL)
2708  stmts->Print(indent + 8);
2709  else
2710  printf("NULL");
2711  printf("\n");
2712 }
2713 
2714 Stmt *UnmaskedStmt::TypeCheck() { return this; }
2715 
2717 
2718 ///////////////////////////////////////////////////////////////////////////
2719 // ReturnStmt
2720 
2721 ReturnStmt::ReturnStmt(Expr *e, SourcePos p) : Stmt(p, ReturnStmtID), expr(e) {}
2722 
2724  if (!ctx->GetCurrentBasicBlock())
2725  return;
2726 
2727  if (ctx->InForeachLoop()) {
2728  Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
2729  return;
2730  }
2731 
2732  // Make sure we're not trying to return a reference to something where
2733  // that doesn't make sense
2734  const Function *func = ctx->GetFunction();
2735  const Type *returnType = func->GetReturnType();
2736  if (IsReferenceType(returnType) == true && IsReferenceType(expr->GetType()) == false) {
2737  const Type *lvType = expr->GetLValueType();
2738  if (lvType == NULL) {
2739  Error(expr->pos,
2740  "Illegal to return non-lvalue from function "
2741  "returning reference type \"%s\".",
2742  returnType->GetString().c_str());
2743  return;
2744  } else if (lvType->IsUniformType() == false) {
2745  Error(expr->pos,
2746  "Illegal to return varying lvalue type from "
2747  "function returning a reference type \"%s\".",
2748  returnType->GetString().c_str());
2749  return;
2750  }
2751  }
2752 
2753  ctx->SetDebugPos(pos);
2754  ctx->CurrentLanesReturned(expr, true);
2755 }
2756 
2757 Stmt *ReturnStmt::TypeCheck() { return this; }
2758 
2759 int ReturnStmt::EstimateCost() const { return COST_RETURN; }
2760 
2761 void ReturnStmt::Print(int indent) const {
2762  printf("%*cReturn Stmt", indent, ' ');
2763  pos.Print();
2764  if (expr)
2765  expr->Print();
2766  else
2767  printf("(void)");
2768  printf("\n");
2769 }
2770 
2771 ///////////////////////////////////////////////////////////////////////////
2772 // GotoStmt
2773 
2774 GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip) : Stmt(gotoPos, GotoStmtID) {
2775  label = l;
2776  identifierPos = ip;
2777 }
2778 
2780  if (!ctx->GetCurrentBasicBlock())
2781  return;
2782 
2783  if (ctx->VaryingCFDepth() > 0) {
2784  Error(pos, "\"goto\" statements are only legal under \"uniform\" "
2785  "control flow.");
2786  return;
2787  }
2788  if (ctx->InForeachLoop()) {
2789  Error(pos, "\"goto\" statements are currently illegal inside "
2790  "\"foreach\" loops.");
2791  return;
2792  }
2793 
2794  llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
2795  if (bb == NULL) {
2796  /* Label wasn't found. Look for suggestions that are close */
2797  std::vector<std::string> labels = ctx->GetLabels();
2798  std::vector<std::string> matches = MatchStrings(label, labels);
2799  std::string match_output;
2800  if (!matches.empty()) {
2801  /* Print up to 5 matches. Don't want to spew too much */
2802  match_output += "\nDid you mean:";
2803  for (unsigned int i = 0; i < matches.size() && i < 5; i++)
2804  match_output += "\n " + matches[i] + "?";
2805  }
2806 
2807  /* Label wasn't found. Emit an error */
2808  Error(identifierPos, "No label named \"%s\" found in current function.%s", label.c_str(), match_output.c_str());
2809 
2810  return;
2811  }
2812 
2813  ctx->BranchInst(bb);
2814  ctx->SetCurrentBasicBlock(NULL);
2815 }
2816 
2817 void GotoStmt::Print(int indent) const { printf("%*cGoto label \"%s\"\n", indent, ' ', label.c_str()); }
2818 
2819 Stmt *GotoStmt::Optimize() { return this; }
2820 
2821 Stmt *GotoStmt::TypeCheck() { return this; }
2822 
2823 int GotoStmt::EstimateCost() const { return COST_GOTO; }
2824 
2825 ///////////////////////////////////////////////////////////////////////////
2826 // LabeledStmt
2827 
2828 LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p) : Stmt(p, LabeledStmtID) {
2829  name = n;
2830  stmt = s;
2831 }
2832 
2834  llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
2835  AssertPos(pos, bblock != NULL);
2836 
2837  // End the current basic block with a jump to our basic block and then
2838  // set things up for emission to continue there. Note that the current
2839  // basic block may validly be NULL going into this statement due to an
2840  // earlier goto that NULLed it out; that doesn't stop us from
2841  // re-establishing a current basic block starting at the label..
2842  if (ctx->GetCurrentBasicBlock() != NULL)
2843  ctx->BranchInst(bblock);
2844  ctx->SetCurrentBasicBlock(bblock);
2845 
2846  if (stmt != NULL)
2847  stmt->EmitCode(ctx);
2848 }
2849 
2850 void LabeledStmt::Print(int indent) const {
2851  printf("%*cLabel \"%s\"\n", indent, ' ', name.c_str());
2852  if (stmt != NULL)
2853  stmt->Print(indent);
2854 }
2855 
2856 Stmt *LabeledStmt::Optimize() { return this; }
2857 
2859  if (!isalpha(name[0]) || name[0] == '_') {
2860  Error(pos, "Label must start with either alphabetic character or '_'.");
2861  return NULL;
2862  }
2863  for (unsigned int i = 1; i < name.size(); ++i) {
2864  if (!isalnum(name[i]) && name[i] != '_') {
2865  Error(pos, "Character \"%c\" is illegal in labels.", name[i]);
2866  return NULL;
2867  }
2868  }
2869  return this;
2870 }
2871 
2872 int LabeledStmt::EstimateCost() const { return 0; }
2873 
2874 ///////////////////////////////////////////////////////////////////////////
2875 // StmtList
2876 
2878  ctx->StartScope();
2879  ctx->SetDebugPos(pos);
2880  for (unsigned int i = 0; i < stmts.size(); ++i)
2881  if (stmts[i])
2882  stmts[i]->EmitCode(ctx);
2883  ctx->EndScope();
2884 }
2885 
2886 Stmt *StmtList::TypeCheck() { return this; }
2887 
2888 int StmtList::EstimateCost() const { return 0; }
2889 
2890 void StmtList::Print(int indent) const {
2891  printf("%*cStmt List", indent, ' ');
2892  pos.Print();
2893  printf(":\n");
2894  for (unsigned int i = 0; i < stmts.size(); ++i)
2895  if (stmts[i])
2896  stmts[i]->Print(indent + 4);
2897 }
2898 
2899 ///////////////////////////////////////////////////////////////////////////
2900 // PrintStmt
2901 
2902 PrintStmt::PrintStmt(const std::string &f, Expr *v, SourcePos p) : Stmt(p, PrintStmtID), format(f), values(v) {}
2903 
2904 /* Because the pointers to values that are passed to __do_print() are all
2905  void *s (and because ispc print() formatting strings statements don't
2906  encode types), we pass along a string to __do_print() where the i'th
2907  character encodes the type of the i'th value to be printed. Needless to
2908  say, the encoding chosen here and the decoding code in __do_print() need
2909  to agree on the below!
2910  */
2911 static char lEncodeType(const Type *t) {
2913  return 'b';
2915  return 'B';
2917  return 'i';
2919  return 'I';
2921  return 'u';
2923  return 'U';
2925  return 'f';
2927  return 'F';
2929  return 'l';
2931  return 'L';
2933  return 'v';
2935  return 'V';
2937  return 'd';
2939  return 'D';
2940  if (CastType<PointerType>(t) != NULL) {
2941  if (t->IsUniformType())
2942  return 'p';
2943  else
2944  return 'P';
2945  } else
2946  return '\0';
2947 }
2948 
2949 /** Given an Expr for a value to be printed, emit the code to evaluate the
2950  expression and store the result to alloca'd memory. Update the
2951  argTypes string with the type encoding for this expression.
2952  */
2953 static llvm::Value *lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
2954  const Type *type = expr->GetType();
2955  if (type == NULL)
2956  return NULL;
2957 
2958  if (CastType<ReferenceType>(type) != NULL) {
2959  expr = new RefDerefExpr(expr, expr->pos);
2960  type = expr->GetType();
2961  if (type == NULL)
2962  return NULL;
2963  }
2964 
2965  // Just int8 and int16 types to int32s...
2966  const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
2970  expr->pos);
2971  type = expr->GetType();
2972  }
2973 
2974  char t = lEncodeType(type->GetAsNonConstType());
2975  if (t == '\0') {
2976  Error(expr->pos,
2977  "Only atomic types are allowed in print statements; "
2978  "type \"%s\" is illegal.",
2979  type->GetString().c_str());
2980  return NULL;
2981  } else {
2982  if (Type::Equal(baseType, AtomicType::UniformBool)) {
2983  // Blast bools to ints, but do it here to preserve encoding for
2984  // printing 'true' or 'false'
2986  expr->pos);
2987  type = expr->GetType();
2988  }
2989  argTypes.push_back(t);
2990 
2991  llvm::Type *llvmExprType = type->LLVMType(g->ctx);
2992  llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
2993  llvm::Value *val = expr->GetValue(ctx);
2994  if (!val)
2995  return NULL;
2996  ctx->StoreInst(val, ptr);
2997 
2998  ptr = ctx->BitCastInst(ptr, LLVMTypes::VoidPointerType);
2999  return ptr;
3000  }
3001 }
3002 
3003 /* PrintStmt works closely with the __do_print() function implemented in
3004  the builtins-c.c file. In particular, the EmitCode() method here needs to
3005  take the arguments passed to it from ispc and generate a valid call to
3006  __do_print() with the information that __do_print() then needs to do the
3007  actual printing work at runtime.
3008  */
3010  if (!ctx->GetCurrentBasicBlock())
3011  return;
3012 
3013  ctx->SetDebugPos(pos);
3014 
3015  // __do_print takes 5 arguments; we'll get them stored in the args[] array
3016  // in the code emitted below
3017  //
3018  // 1. the format string
3019  // 2. a string encoding the types of the values being printed,
3020  // one character per value
3021  // 3. the number of running program instances (i.e. the target's
3022  // vector width)
3023  // 4. the current lane mask
3024  // 5. a pointer to an array of pointers to the values to be printed
3025  llvm::Value *args[5];
3026  std::string argTypes;
3027 
3028  if (values == NULL) {
3029  llvm::Type *ptrPtrType = llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
3030  args[4] = llvm::Constant::getNullValue(ptrPtrType);
3031  } else {
3032  // Get the values passed to the print() statement evaluated and
3033  // stored in memory so that we set up the array of pointers to them
3034  // for the 5th __do_print() argument
3035  ExprList *elist = llvm::dyn_cast<ExprList>(values);
3036  int nArgs = elist ? elist->exprs.size() : 1;
3037 
3038  // Allocate space for the array of pointers to values to be printed
3039  llvm::Type *argPtrArrayType = llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
3040  llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType, "print_arg_ptrs");
3041  // Store the array pointer as a void **, which is what __do_print()
3042  // expects
3043  args[4] = ctx->BitCastInst(argPtrArray, llvm::PointerType::get(LLVMTypes::VoidPointerType, 0));
3044 
3045  // Now, for each of the arguments, emit code to evaluate its value
3046  // and store the value into alloca'd storage. Then store the
3047  // pointer to the alloca'd storage into argPtrArray.
3048  if (elist) {
3049  for (unsigned int i = 0; i < elist->exprs.size(); ++i) {
3050  Expr *expr = elist->exprs[i];
3051  if (!expr)
3052  return;
3053  llvm::Value *ptr = lProcessPrintArg(expr, ctx, argTypes);
3054  if (!ptr)
3055  return;
3056 
3057  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL);
3058  ctx->StoreInst(ptr, arrayPtr);
3059  }
3060  } else {
3061  llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes);
3062  if (!ptr)
3063  return;
3064  llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL);
3065  ctx->StoreInst(ptr, arrayPtr);
3066  }
3067  }
3068 
3069  // Now we can emit code to call __do_print()
3070 #ifdef ISPC_NVPTX_ENABLED
3071  llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ? m->module->getFunction("__do_print")
3072  : m->module->getFunction("__do_print_nvptx");
3073 #else /* ISPC_NVPTX_ENABLED */
3074  llvm::Function *printFunc = m->module->getFunction("__do_print");
3075 #endif /* ISPC_NVPTX_ENABLED */
3076  AssertPos(pos, printFunc);
3077 
3078  llvm::Value *mask = ctx->GetFullMask();
3079  // Set up the rest of the parameters to it
3080  args[0] = ctx->GetStringPtr(format);
3081  args[1] = ctx->GetStringPtr(argTypes);
3082  args[2] = LLVMInt32(g->target->getVectorWidth());
3083  args[3] = ctx->LaneMask(mask);
3084  std::vector<llvm::Value *> argVec(&args[0], &args[5]);
3085  ctx->CallInst(printFunc, NULL, argVec, "");
3086 }
3087 
3088 void PrintStmt::Print(int indent) const { printf("%*cPrint Stmt (%s)", indent, ' ', format.c_str()); }
3089 
3090 Stmt *PrintStmt::TypeCheck() { return this; }
3091 
3092 int PrintStmt::EstimateCost() const { return COST_FUNCALL; }
3093 
3094 ///////////////////////////////////////////////////////////////////////////
3095 // AssertStmt
3096 
3097 AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p) : Stmt(p, AssertStmtID), message(msg), expr(e) {}
3098 
3100  if (!ctx->GetCurrentBasicBlock())
3101  return;
3102 
3103  const Type *type;
3104  if (expr == NULL || (type = expr->GetType()) == NULL) {
3105  AssertPos(pos, m->errorCount > 0);
3106  return;
3107  }
3108  bool isUniform = type->IsUniformType();
3109 
3110  // The actual functionality to do the check and then handle falure is
3111  // done via a builtin written in bitcode in builtins/util.m4.
3112  llvm::Function *assertFunc =
3113  isUniform ? m->module->getFunction("__do_assert_uniform") : m->module->getFunction("__do_assert_varying");
3114  AssertPos(pos, assertFunc != NULL);
3115 
3116  char *errorString;
3117  if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s", pos.name, pos.first_line, pos.first_column,
3118  message.c_str()) == -1) {
3119  Error(pos, "Fatal error when generating assert string: asprintf() "
3120  "unable to allocate memory!");
3121  return;
3122  }
3123 
3124  std::vector<llvm::Value *> args;
3125  args.push_back(ctx->GetStringPtr(errorString));
3126  llvm::Value *exprValue = expr->GetValue(ctx);
3127  if (exprValue == NULL) {
3128  AssertPos(pos, m->errorCount > 0);
3129  return;
3130  }
3131  args.push_back(exprValue);
3132  args.push_back(ctx->GetFullMask());
3133  ctx->CallInst(assertFunc, NULL, args, "");
3134 
3135  free(errorString);
3136 }
3137 
3138 void AssertStmt::Print(int indent) const { printf("%*cAssert Stmt (%s)", indent, ' ', message.c_str()); }
3139 
3141  const Type *type;
3142  if (expr && (type = expr->GetType()) != NULL) {
3143  bool isUniform = type->IsUniformType();
3145  "\"assert\" statement");
3146  if (expr == NULL)
3147  return NULL;
3148  }
3149  return this;
3150 }
3151 
3152 int AssertStmt::EstimateCost() const { return COST_ASSERT; }
3153 
3154 ///////////////////////////////////////////////////////////////////////////
3155 // DeleteStmt
3156 
3157 DeleteStmt::DeleteStmt(Expr *e, SourcePos p) : Stmt(p, DeleteStmtID) { expr = e; }
3158 
3160  if (!ctx->GetCurrentBasicBlock())
3161  return;
3162 
3163  const Type *exprType;
3164  if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
3165  AssertPos(pos, m->errorCount > 0);
3166  return;
3167  }
3168 
3169  llvm::Value *exprValue = expr->GetValue(ctx);
3170  if (exprValue == NULL) {
3171  AssertPos(pos, m->errorCount > 0);
3172  return;
3173  }
3174 
3175  // Typechecking should catch this
3176  AssertPos(pos, CastType<PointerType>(exprType) != NULL);
3177 
3178  if (exprType->IsUniformType()) {
3179  // For deletion of a uniform pointer, we just need to cast the
3180  // pointer type to a void pointer type, to match what
3181  // __delete_uniform() from the builtins expects.
3182  exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType, "ptr_to_void");
3183  llvm::Function *func;
3184  if (g->target->is32Bit()) {
3185  func = m->module->getFunction("__delete_uniform_32rt");
3186  } else {
3187  func = m->module->getFunction("__delete_uniform_64rt");
3188  }
3189  AssertPos(pos, func != NULL);
3190 
3191  ctx->CallInst(func, NULL, exprValue, "");
3192  } else {
3193  // Varying pointers are arrays of ints, and __delete_varying()
3194  // takes a vector of i64s (even for 32-bit targets). Therefore, we
3195  // only need to extend to 64-bit values on 32-bit targets before
3196  // calling it.
3197  llvm::Function *func;
3198  if (g->target->is32Bit()) {
3199  func = m->module->getFunction("__delete_varying_32rt");
3200  } else {
3201  func = m->module->getFunction("__delete_varying_64rt");
3202  }
3203  AssertPos(pos, func != NULL);
3204  if (g->target->is32Bit())
3205  exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType, "ptr_to_64");
3206  ctx->CallInst(func, NULL, exprValue, "");
3207  }
3208 }
3209 
3210 void DeleteStmt::Print(int indent) const { printf("%*cDelete Stmt", indent, ' '); }
3211 
3213  const Type *exprType;
3214  if (expr == NULL || ((exprType = expr->GetType()) == NULL))
3215  return NULL;
3216 
3217  if (CastType<PointerType>(exprType) == NULL) {
3218  Error(pos, "Illegal to delete non-pointer type \"%s\".", exprType->GetString().c_str());
3219  return NULL;
3220  }
3221 
3222  return this;
3223 }
3224 
3225 int DeleteStmt::EstimateCost() const { return COST_DELETE; }
const Function * GetFunction() const
Definition: ctx.cpp:421
llvm::Value * storagePtr
Definition: sym.h:71
int EstimateCost() const
Definition: stmt.cpp:3092
static const AtomicType * VaryingInt32
Definition: type.h:335
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1187
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
bool IsUniformType() const
Definition: type.h:139
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2213
Expr * expr
Definition: stmt.h:515
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2583
Stmt * bodyStmts
Definition: stmt.h:173
ForStmt(Stmt *initializer, Expr *testExpr, Stmt *stepStatements, Stmt *bodyStatements, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:1117
std::vector< VariableDeclaration > vars
Definition: stmt.h:115
std::vector< std::pair< int, llvm::BasicBlock * > > caseBlocks
Definition: stmt.cpp:2518
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3099
static bool lIsVaryingFor(ASTNode *node)
Definition: stmt.cpp:896
static bool lHasVaryingBreakOrContinue(Stmt *stmt)
Definition: stmt.cpp:948
Definition: func.h:44
int EstimateCost() const
Definition: stmt.cpp:1093
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2179
Opt opt
Definition: ispc.h:535
void StartUniformIf()
Definition: ctx.cpp:489
bool InForeachLoop() const
Definition: ctx.cpp:1068
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:848
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1333
Stmt * stmts
Definition: stmt.h:356
static llvm::Value * lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, llvm::Value *uniformCounterPtr, llvm::Value *varyingCounterPtr, const std::vector< int > &spans)
Definition: stmt.cpp:1353
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:445
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:551
const bool doCoherentCheck
Definition: stmt.h:204
void emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:754
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1506
Stmt * trueStmts
Definition: stmt.h:138
void StartScope()
Definition: ctx.cpp:1455
void PerformanceWarning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:394
void Print(int indent) const
Definition: stmt.cpp:1100
static const AtomicType * VaryingUInt64
Definition: type.h:341
void Print(int indent) const
Definition: stmt.cpp:3210
Expr * expr
Definition: stmt.h:335
std::vector< Expr * > endExprs
Definition: stmt.h:257
SwitchVisitInfo(FunctionEmitContext *c)
Definition: stmt.cpp:2505
bool IsVaryingType() const
Definition: type.h:142
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:451
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:2961
bool IsSOAType() const
Definition: type.h:146
static bool lSwitchASTPreVisit(ASTNode *node, void *d)
Definition: stmt.cpp:2531
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1891
Module * m
Definition: ispc.cpp:102
Stmt * TypeCheck()
Definition: stmt.cpp:2757
Interface class for statements in the ispc language.
Definition: stmt.h:49
int EstimateCost() const
Definition: stmt.cpp:638
Stmt * TypeCheck()
Definition: stmt.cpp:1952
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1637
int first_line
Definition: ispc.h:133
int GetSOAWidth() const
Definition: type.h:150
void Print(int indent) const
Definition: stmt.cpp:2890
void Print(int indent) const
Definition: stmt.cpp:646
Target * target
Definition: ispc.h:537
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2367
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:471
BreakStmt(SourcePos pos)
Definition: stmt.cpp:1299
const std::string message
Definition: stmt.h:513
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:82
void Print(int indent) const
Definition: stmt.cpp:2172
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1107
const std::string format
Definition: stmt.h:485
int EstimateCost() const
Definition: stmt.cpp:2667
int EstimateCost() const
Definition: stmt.cpp:2201
static const AtomicType * VaryingDouble
Definition: type.h:342
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1826
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:2579
#define AssertPos(pos, expr)
Definition: ispc.h:163
int EstimateCost() const
Definition: stmt.cpp:2490
Expression representing a compile-time constant value.
Definition: expr.h:362
llvm::ArrayType * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:1198
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2779
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1679
void EndSwitch()
Definition: ctx.cpp:869
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:491
static llvm::Type * BoolType
Definition: llvmutil.h:73
void SetContinueTarget(llvm::BasicBlock *bb)
Definition: ctx.h:255
const Type * GetElementType() const
Definition: type.cpp:1303
static bool checkInit(const Type *type, Expr **init)
Definition: stmt.cpp:436
void Print(int indent) const
Definition: stmt.cpp:2481
void Print(int indent) const
Definition: stmt.cpp:2375
void StartForeach(ForeachType ft)
Definition: ctx.cpp:589
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:308
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:68
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:95
static const AtomicType * UniformUInt32
Definition: type.h:338
Stmt * Optimize()
Definition: stmt.cpp:2856
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:724
llvm::ConstantInt * LLVMInt8(int8_t i)
Definition: llvmutil.cpp:212
llvm::Value * GetFullMask()
Definition: ctx.cpp:431
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts, bool doAllCheck, SourcePos pos)
Definition: stmt.cpp:503
Stmt * TypeCheck()
Definition: stmt.cpp:2194
int VaryingCFDepth() const
Definition: ctx.cpp:1060
Stmt * TypeCheck()
Definition: stmt.cpp:1065
void Print(int indent) const
Definition: stmt.cpp:3138
Stmt * TypeCheck()
Definition: stmt.cpp:1309
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1413
llvm::BasicBlock * lastBlock
Definition: stmt.cpp:2528
virtual llvm::Constant * GetConstant(const Type *type) const
Definition: expr.cpp:97
Stmt * TypeCheck()
Definition: stmt.cpp:2488
std::string name
Definition: sym.h:70
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1121
static int lLog2(int i)
Definition: stmt.cpp:1475
A list of expressions.
Definition: expr.h:238
CaseStmt(int value, Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2451
static bool lCheckMask(Stmt *stmts)
Definition: stmt.cpp:2438
Symbol * LookupVariable(const char *name)
Definition: sym.cpp:112
void Print(int indent) const
Definition: stmt.cpp:2626
Stmt * TypeCheck()
Definition: stmt.cpp:3090
Statement implementation representing a &#39;do&#39; statement in the program.
Definition: stmt.h:159
int EstimateCost() const
Definition: stmt.cpp:1996
void Print(int indent) const
Definition: stmt.cpp:93
int EstimateCost() const
Definition: stmt.cpp:104
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2833
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:462
void RestoreContinuedLanes()
Definition: ctx.cpp:826
void Print(int indent) const
Definition: stmt.cpp:1313
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:542
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
int EstimateCost() const
Definition: stmt.cpp:2759
virtual void Print() const =0
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:423
static const AtomicType * UniformUInt16
Definition: type.h:337
static bool lHasUnsizedArrays(const Type *type)
Definition: stmt.cpp:111
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:899
ReturnStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:2721
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:653
#define Assert(expr)
Definition: ispc.h:161
const int value
Definition: stmt.h:355
virtual void EmitCode(FunctionEmitContext *ctx) const =0
ConstExpr * constValue
Definition: sym.h:86
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value *> &args, const char *name=NULL)
Definition: ctx.cpp:3112
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1368
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
int EstimateCost() const
Definition: stmt.cpp:2872
header file with declarations for symbol and symbol table classes.
int EstimateCost() const
Definition: stmt.cpp:2823
static const AtomicType * UniformBool
Definition: type.h:332
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3036
Symbol * sym
Definition: stmt.h:297
bool disableMaskAllOnOptimizations
Definition: ispc.h:464
Expr * test
Definition: stmt.h:136
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:228
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:2827
static ASTNode * lVaryingBCPostFunc(ASTNode *node, void *d)
Definition: stmt.cpp:935
llvm::Module * module
Definition: module.h:155
Stmt * stmts
Definition: stmt.h:394
File with declarations for classes related to statements in the language.
static void lEmitIfStatements(FunctionEmitContext *ctx, Stmt *stmts, const char *trueOrFalse)
Definition: stmt.cpp:507
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2680
ContinueStmt(SourcePos pos)
Definition: stmt.cpp:1322
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:966
Stmt * stmts
Definition: stmt.h:374
Globals * g
Definition: ispc.cpp:101
bool IsReferenceType(const Type *t)
Definition: type.h:1044
Expr * expr
Definition: stmt.h:392
Expression representing a type cast of the given expression to a probably-different type...
Definition: expr.h:478
ExprStmt(Expr *expr, SourcePos pos)
Definition: stmt.cpp:80
LabeledStmt(const char *label, Stmt *stmt, SourcePos p)
Definition: stmt.cpp:2828
static const AtomicType * UniformUInt64
Definition: type.h:341
void EndLoop()
Definition: ctx.cpp:576
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:427
bool disableCoherentControlFlow
Definition: ispc.h:486
SourcePos GetDebugPos() const
Definition: ctx.cpp:1434
void Print(int indent) const
Definition: stmt.cpp:2817
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
int EstimateCost() const
Definition: stmt.cpp:2716
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2045
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2475
Stmt * TypeCheck()
Definition: stmt.cpp:2466
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1119
llvm::Constant * LLVMTrue
Definition: llvmutil.cpp:90
Stmt * TypeCheck()
Definition: stmt.cpp:2886
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
ForeachUniqueStmt(const char *iterName, Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2206
void Print(int indent) const
Definition: stmt.cpp:2459
Stmt * stmts
Definition: stmt.h:299
ForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2042
Stmt * TypeCheck()
Definition: stmt.cpp:91
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1234
Stmt * Optimize()
Definition: stmt.cpp:2819
DeclStmt(const std::vector< VariableDeclaration > &v, SourcePos pos)
Definition: stmt.cpp:109
Stmt * step
Definition: stmt.h:201
Stmt * TypeCheck()
Definition: stmt.cpp:2858
static char lEncodeType(const Type *t)
Definition: stmt.cpp:2911
bool SafeToRunWithMaskAllOff(ASTNode *root)
Definition: ast.cpp:417
void Print(int indent) const
Definition: stmt.cpp:3088
Stmt * TypeCheck()
Definition: stmt.cpp:3212
virtual const Type * GetLValueType() const
Definition: expr.cpp:91
static llvm::Type * Int64Type
Definition: llvmutil.h:78
static llvm::Type * Int8Type
Definition: llvmutil.h:75
llvm::Constant * LLVMBoolVector(bool v)
Definition: llvmutil.cpp:398
Stmt * TypeCheck()
Definition: stmt.cpp:2636
bool isTiled
Definition: stmt.h:258
Representation of a structure holding a number of members.
Definition: type.h:670
FunctionEmitContext * ctx
Definition: stmt.cpp:2511
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:96
Header file with declarations for various LLVM utility stuff.
void Print(int indent) const
Definition: stmt.cpp:2850
const AtomicType * GetAsConstType() const
Definition: type.cpp:247
AssertStmt(const std::string &msg, Expr *e, SourcePos p)
Definition: stmt.cpp:3097
static bool IsBasicType(const Type *type)
Definition: type.cpp:3023
SourcePos pos
Definition: sym.h:69
virtual const Type * GetType() const =0
static void lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a)
Definition: stmt.cpp:1493
Stmt * TypeCheck()
Definition: stmt.cpp:1332
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:185
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3009
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:443
int EstimateCost() const
Definition: stmt.cpp:2468
StorageClass storageClass
Definition: sym.h:95
Representation of a range of positions in a source file.
Definition: ispc.h:129
int EstimateCost() const
Definition: stmt.cpp:3225
Stmt * stmts
Definition: stmt.h:278
DoStmt(Expr *testExpr, Stmt *bodyStmts, bool doCoherentCheck, SourcePos pos)
Definition: stmt.cpp:954
void Print(int indent) const
Definition: stmt.cpp:483
Stmt * TypeCheck()
Definition: stmt.cpp:2821
Stmt * stmts
Definition: stmt.h:203
virtual const Type * GetAsNonConstType() const =0
void ClearBreakLanes()
Definition: ctx.cpp:840
static const AtomicType * VaryingBool
Definition: type.h:332
virtual std::string GetString() const =0
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2877
void Print() const
Definition: ispc.cpp:1499
GotoStmt(const char *label, SourcePos gotoPos, SourcePos idPos)
Definition: stmt.cpp:2774
void Print(int indent) const
Definition: stmt.cpp:1336
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test) const
Definition: stmt.cpp:665
int GetElementCount() const
Definition: type.h:722
const char * name
Definition: ispc.h:132
SourcePos pos
Definition: ast.h:77
void Print(int indent) const
Definition: stmt.cpp:1273
int EstimateCost() const
Definition: stmt.cpp:498
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:375
Stmt * TypeCheck()
Definition: stmt.cpp:1250
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:71
static const AtomicType * VaryingInt64
Definition: type.h:340
const bool doCoherentCheck
Definition: stmt.h:174
Expr * values
Definition: stmt.h:488
int getVectorWidth() const
Definition: ispc.h:272
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock *>> &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> &nextBlocks)
Definition: ctx.cpp:1013
bool foundVaryingBreakOrContinue
Definition: stmt.cpp:891
void InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos)
Definition: expr.cpp:595
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1360
Expr * testExpr
Definition: stmt.h:172
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2047
SwitchStmt(Expr *expr, Stmt *stmts, SourcePos pos)
Definition: stmt.cpp:2495
static const AtomicType * UniformUInt8
Definition: type.h:336
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1075
const Type * GetElementType(const std::string &name) const
Definition: type.cpp:2091
static llvm::Type * Int32Type
Definition: llvmutil.h:77
int EstimateCost() const
Definition: stmt.cpp:3152
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1432
virtual const Type * GetAsUniformType() const =0
#define ISPC_MAX_NVEC
Definition: ispc.h:69
bool disableUniformControlFlow
Definition: ispc.h:492
Representation of a function in a source file.
Stmt * falseStmts
Definition: stmt.h:140
Stmt * TypeCheck()
Definition: stmt.cpp:2404
int first_column
Definition: ispc.h:134
void emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *test) const
Definition: stmt.cpp:683
Expr * test
Definition: stmt.h:198
Stmt * init
Definition: stmt.h:195
static bool Equal(const Type *a, const Type *b)
Definition: type.cpp:3114
static bool lVaryingBCPreFunc(ASTNode *node, void *d)
Definition: stmt.cpp:907
Stmt * stmts
Definition: stmt.h:259
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1524
Definition: stmt.h:341
std::vector< Expr * > exprs
Definition: expr.h:254
llvm::BasicBlock * defaultBlock
Definition: stmt.cpp:2514
std::vector< Expr * > startExprs
Definition: stmt.h:256
static const AtomicType * UniformFloat
Definition: type.h:339
llvm::Value * GetInternalMask()
Definition: ctx.cpp:429
ISA getISA() const
Definition: ispc.h:256
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1324
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1738
static const AtomicType * UniformInt32
Definition: type.h:335
Symbol * sym
Definition: stmt.h:277
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:456
SourcePos identifierPos
Definition: stmt.h:414
void Print(int indent) const
Definition: stmt.cpp:2761
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:2976
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:957
Stmt * stmt
Definition: stmt.h:436
int varyingCFDepth
Definition: sym.h:97
void EndForeach()
Definition: ctx.cpp:620
int EstimateCost() const
Definition: stmt.cpp:1334
void Print(int indent) const
Definition: stmt.cpp:2701
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1077
Stmt * TypeCheck()
Definition: stmt.cpp:2714
virtual bool IsConstType() const =0
Interface class that defines the type abstraction.
Definition: type.h:95
static const AtomicType * UniformDouble
Definition: type.h:342
std::vector< Stmt * > stmts
Definition: stmt.h:459
int EstimateCost() const
Definition: stmt.cpp:1311
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:425
virtual const Type * GetAsConstType() const =0
static llvm::VectorType * MaskType
Definition: llvmutil.h:89
ForeachStmt(const std::vector< Symbol *> &loopVars, const std::vector< Expr *> &startExprs, const std::vector< Expr *> &endExprs, Stmt *bodyStatements, bool tiled, SourcePos pos)
Definition: stmt.cpp:1345
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:907
Expr * expr
Definition: stmt.h:87
int EstimateCost(ASTNode *root)
Definition: ast.cpp:271
Stmt * stmts
Definition: stmt.h:317
Stmt * Optimize()
Definition: stmt.cpp:405
DefaultStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2473
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:1301
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:549
int EstimateCost() const
Definition: stmt.cpp:2428
Statement representing a single if statement, possibly with an else clause.
Definition: stmt.h:120
int GetElementCount() const
Definition: type.cpp:1301
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2453
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
Expr * expr
Definition: stmt.h:534
virtual Stmt * Optimize()
Definition: stmt.cpp:75
std::vector< Symbol * > dimVariables
Definition: stmt.h:255
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1798
const Function * parentFunction
Definition: sym.h:103
UnmaskedStmt(Stmt *stmt, SourcePos pos)
Definition: stmt.cpp:2678
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test, llvm::BasicBlock *bDone) const
Definition: stmt.cpp:821
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1202
std::string name
Definition: stmt.h:434
virtual void Print(int indent) const =0
Expression that represents dereferencing a reference to get its value.
Definition: expr.h:559
static const Type * SizeUnsizedArrays(const Type *type, Expr *initExpr)
Definition: type.cpp:1413
std::vector< std::string > MatchStrings(const std::string &str, const std::vector< std::string > &options)
Definition: util.cpp:487
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:3159
Stmt * TypeCheck()
Definition: stmt.cpp:466
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:1706
Expr * expr
Definition: stmt.h:298
std::string label
Definition: stmt.h:413
const Type * GetReturnType() const
Definition: func.cpp:173
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type)
Definition: expr.cpp:566
PrintStmt(const std::string &f, Expr *v, SourcePos p)
Definition: stmt.cpp:2902
bool is32Bit() const
Definition: ispc.h:262
int EstimateCost() const
Definition: stmt.cpp:1266
int EstimateCost() const
Definition: stmt.cpp:2888
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:236
static const AtomicType * UniformInt64
Definition: type.h:340
int errorCount
Definition: module.h:148
llvm::LLVMContext * ctx
Definition: ispc.h:632
void Print(int indent) const
Definition: stmt.cpp:1998
static const AtomicType * UniformInt16
Definition: type.h:334
const Type * type
Definition: sym.h:83
void EmitCode(FunctionEmitContext *ctx) const
Definition: stmt.cpp:2723
static llvm::Value * lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes)
Definition: stmt.cpp:2953
int varyingControlFlowDepth
Definition: stmt.cpp:890
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3116
Stmt * TypeCheck()
Definition: stmt.cpp:623
DeleteStmt(Expr *e, SourcePos p)
Definition: stmt.cpp:3157
void Debug(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:363
const bool doAllCheck
Definition: stmt.h:147
static const AtomicType * VaryingUInt32
Definition: type.h:338
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1608
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1100
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:437
static const AtomicType * VaryingFloat
Definition: type.h:339
SymbolTable * symbolTable
Definition: module.h:152
static const AtomicType * UniformInt8
Definition: type.h:333
bool IsArrayType() const
Definition: type.cpp:203
File with declarations for classes related to type representation.
Stmt * TypeCheck()
Definition: stmt.cpp:3140
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1372
std::map< llvm::BasicBlock *, llvm::BasicBlock * > nextBlock
Definition: stmt.cpp:2523
One-dimensional array type.
Definition: type.h:535