Intel SPMD Program Compiler  1.11.0
ctx.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2019, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ctx.cpp
35  @brief Implementation of the FunctionEmitContext class
36 */
37 
38 #include "ctx.h"
39 #include "expr.h"
40 #include "func.h"
41 #include "llvmutil.h"
42 #include "module.h"
43 #include "stmt.h"
44 #include "sym.h"
45 #include "type.h"
46 #include "util.h"
47 #include <map>
48 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
49 #include <llvm/BinaryFormat/Dwarf.h>
50 #else // LLVM up to 4.x
51 #include <llvm/Support/Dwarf.h>
52 #endif
53 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
54 #include <llvm/DerivedTypes.h>
55 #include <llvm/Instructions.h>
56 #include <llvm/Metadata.h>
57 #include <llvm/Module.h>
58 #else
59 #include <llvm/IR/DerivedTypes.h>
60 #include <llvm/IR/Instructions.h>
61 #include <llvm/IR/Metadata.h>
62 #include <llvm/IR/Module.h>
63 #endif
64 #ifdef ISPC_NVPTX_ENABLED
65 #include <llvm/Support/FormattedStream.h>
66 #include <llvm/Support/raw_ostream.h>
67 #endif /* ISPC_NVPTX_ENABLED */
68 
69 /** This is a small utility structure that records information related to one
70  level of nested control flow. It's mostly used in correctly restoring
71  the mask and other state as we exit control flow nesting levels.
72 */
73 struct CFInfo {
74  /** Returns a new instance of the structure that represents entering an
75  'if' statement */
76  static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
77 
78  /** Returns a new instance of the structure that represents entering a
79  loop. */
80  static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
81  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
82  llvm::Value *savedBlockEntryMask);
83 
84  static CFInfo *GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget,
85  llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr,
86  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
87  llvm::Value *savedBlockEntryMask);
88 
89  static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
90  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr,
91  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr,
92  llvm::BasicBlock *bbDefault,
93  const std::vector<std::pair<int, llvm::BasicBlock *>> *bbCases,
94  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext, bool scUniform);
95 
96  bool IsIf() { return type == If; }
97  bool IsLoop() { return type == Loop; }
98  bool IsForeach() { return (type == ForeachRegular || type == ForeachActive || type == ForeachUnique); }
99  bool IsSwitch() { return type == Switch; }
100  bool IsVarying() { return !isUniform; }
101  bool IsUniform() { return isUniform; }
102 
105  bool isUniform;
109  llvm::Value *savedSwitchExpr;
110  llvm::BasicBlock *savedDefaultBlock;
111  const std::vector<std::pair<int, llvm::BasicBlock *>> *savedCaseBlocks;
112  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
114 
115  private:
116  CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
117  Assert(t == If);
118  type = t;
119  isUniform = uniformIf;
123  savedSwitchExpr = NULL;
124  savedDefaultBlock = NULL;
125  savedCaseBlocks = NULL;
126  savedNextBlocks = NULL;
127  }
128  CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc,
129  llvm::Value *sm, llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL,
130  const std::vector<std::pair<int, llvm::BasicBlock *>> *bbc = NULL,
131  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL, bool scu = false) {
132  Assert(t == Loop || t == Switch);
133  type = t;
134  isUniform = iu;
135  savedBreakTarget = bt;
136  savedContinueTarget = ct;
137  savedBreakLanesPtr = sb;
139  savedMask = sm;
140  savedBlockEntryMask = lm;
141  savedSwitchExpr = sse;
142  savedDefaultBlock = bbd;
143  savedCaseBlocks = bbc;
144  savedNextBlocks = bbn;
146  }
147  CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
148  llvm::Value *lm) {
149  Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
150  type = t;
151  isUniform = false;
152  savedBreakTarget = bt;
153  savedContinueTarget = ct;
154  savedBreakLanesPtr = sb;
156  savedMask = sm;
157  savedBlockEntryMask = lm;
158  savedSwitchExpr = NULL;
159  savedDefaultBlock = NULL;
160  savedCaseBlocks = NULL;
161  savedNextBlocks = NULL;
162  }
163 };
164 
165 CFInfo *CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) { return new CFInfo(If, isUniform, savedMask); }
166 
167 CFInfo *CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
168  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
169  llvm::Value *savedBlockEntryMask) {
170  return new CFInfo(Loop, isUniform, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr,
172 }
173 
174 CFInfo *CFInfo::GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget,
175  llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr,
176  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedForeachMask) {
177  CFType cfType;
178  switch (ft) {
180  cfType = ForeachRegular;
181  break;
183  cfType = ForeachActive;
184  break;
186  cfType = ForeachUnique;
187  break;
188  default:
189  FATAL("Unhandled foreach type");
190  return NULL;
191  }
192 
193  return new CFInfo(cfType, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr, savedMask,
194  savedForeachMask);
195 }
196 
197 CFInfo *CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
198  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
199  llvm::Value *savedBlockEntryMask, llvm::Value *savedSwitchExpr,
200  llvm::BasicBlock *savedDefaultBlock,
201  const std::vector<std::pair<int, llvm::BasicBlock *>> *savedCases,
202  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
203  bool savedSwitchConditionUniform) {
204  return new CFInfo(Switch, isUniform, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr,
206  savedSwitchConditionUniform);
207 }
208 
209 ///////////////////////////////////////////////////////////////////////////
210 
211 FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::Function *lf, SourcePos firstStmtPos) {
212  function = func;
213  llvmFunction = lf;
215 
216  /* Create a new basic block to store all of the allocas */
217  allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
218  bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
219  /* But jump from it immediately into the real entry block */
220  llvm::BranchInst::Create(bblock, allocaBlock);
221 
222  funcStartPos = funSym->pos;
223 
224  internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
226 
228 
229  fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
231 
232  blockEntryMask = NULL;
234  breakTarget = continueTarget = NULL;
235 
236  switchExpr = NULL;
237  caseBlocks = NULL;
238  defaultBlock = NULL;
239  nextBlocks = NULL;
240 
241  returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
243 
244  launchedTasks = false;
246  StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType), launchGroupHandlePtr);
247 
249 
250  const Type *returnType = function->GetReturnType();
251  if (!returnType || returnType->IsVoidType())
252  returnValuePtr = NULL;
253  else {
254  llvm::Type *ftype = returnType->LLVMType(g->ctx);
255  returnValuePtr = AllocaInst(ftype, "return_value_memory");
256  }
257 
259  // This is really disgusting. We want to be able to fool the
260  // compiler to not be able to reason that the mask is all on, but
261  // we don't want to pay too much of a price at the start of each
262  // function to do so.
263  //
264  // Therefore: first, we declare a module-static __all_on_mask
265  // variable that will hold an "all on" mask value. At the start of
266  // each function, we'll load its value and call SetInternalMaskAnd
267  // with the result to set the current internal execution mask.
268  // (This is a no-op at runtime.)
269  //
270  // Then, to fool the optimizer that maybe the value of
271  // __all_on_mask can't be guaranteed to be "all on", we emit a
272  // dummy function that sets __all_on_mask be "all off". (That
273  // function is never actually called.)
274  llvm::Value *globalAllOnMaskPtr = m->module->getNamedGlobal("__all_on_mask");
275  if (globalAllOnMaskPtr == NULL) {
276  globalAllOnMaskPtr =
277  new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false, llvm::GlobalValue::InternalLinkage,
278  LLVMMaskAllOn, "__all_on_mask");
279 
280  char buf[256];
281  snprintf(buf, sizeof(buf), "__off_all_on_mask_%s", g->target->GetISAString());
282 
283 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
284  llvm::Constant *offFunc =
285 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
286  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType, NULL);
287 #else // LLVM 5.0+
288  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
289 #endif
290 #else // LLVM 9.0+
291  llvm::FunctionCallee offFuncCallee = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
292  llvm::Constant *offFunc = llvm::cast<llvm::Constant>(offFuncCallee.getCallee());
293 #endif
294  AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
295  llvm::BasicBlock *offBB = llvm::BasicBlock::Create(*g->ctx, "entry", (llvm::Function *)offFunc, 0);
296  llvm::StoreInst *inst = new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
297  if (g->opt.forceAlignedMemory) {
298  inst->setAlignment(g->target->getNativeVectorAlignment());
299  }
300  llvm::ReturnInst::Create(*g->ctx, offBB);
301  }
302 
303  llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
304  SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
305  }
306 
307  if (m->diBuilder) {
308  currentPos = funSym->pos;
309 
310  /* If debugging is enabled, tell the debug information emission
311  code about this new function */
312 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
314  AssertPos(currentPos, diFile.Verify());
315 #else /* LLVM 3.7+ */
317 #endif
318 
319 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
320  llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU());
321 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
322  llvm::DIScope scope = llvm::DIScope(m->diCompileUnit);
323 #else /* LLVM 3.7+ */
324  llvm::DIScope *scope = m->diCompileUnit;
325 #endif
326 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
327  llvm::DIType diSubprogramType;
328  AssertPos(currentPos, scope.Verify());
329 #else /* LLVM 3.7+ */
330  llvm::DIType *diSubprogramType = NULL;
331 #endif
332 
333  const FunctionType *functionType = function->GetType();
334  if (functionType == NULL)
336  else {
337  diSubprogramType = functionType->GetDIType(scope);
338 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
339  AssertPos(currentPos, diSubprogramType.Verify());
340 #else /* LLVM 3.7+ */
341  // comming soon
342 #endif
343  }
344 
345 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
346  llvm::DIType diSubprogramType_n = diSubprogramType;
347  int flags = llvm::DIDescriptor::FlagPrototyped;
348 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
349  Assert(diSubprogramType.isCompositeType());
350  llvm::DICompositeType diSubprogramType_n = static_cast<llvm::DICompositeType>(diSubprogramType);
351  int flags = llvm::DIDescriptor::FlagPrototyped;
352 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
353  Assert(llvm::isa<llvm::DICompositeTypeBase>(diSubprogramType));
354  llvm::DISubroutineType *diSubprogramType_n =
355  llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
356  int flags = llvm::DINode::FlagPrototyped;
357 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
358  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
359  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
360  int flags = llvm::DINode::FlagPrototyped;
361 #else /* LLVM 4.0+ */
362  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
363  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
364  llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
365 
366 #endif
367 
368  std::string mangledName = llvmFunction->getName();
369  if (mangledName == funSym->name)
370  mangledName = "";
371 
372  bool isStatic = (funSym->storageClass == SC_STATIC);
373  bool isOptimized = (g->opt.level > 0);
374  int firstLine = funcStartPos.first_line;
375 
376 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
377  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
378  diSubprogramType_n, isStatic, true, /* is defn */
379  firstLine, flags, isOptimized, llvmFunction);
380  AssertPos(currentPos, diSubprogram.Verify());
381 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
382  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
383  diSubprogramType_n, isStatic, true, /* is defn */
384  firstLine, flags, isOptimized, llvmFunction);
385 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
386  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
387  diSubprogramType_n, isStatic, true, /* is defn */
388  firstLine, flags, isOptimized);
389  llvmFunction->setSubprogram(diSubprogram);
390 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 && ISPC_LLVM_VERSION <= ISPC_LLVM_7_1 /* LLVM 4.0 to 7.1 */
391  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
392  diSubprogramType_n, isStatic, true, /* is defn */
393  firstLine, flags, isOptimized);
394  llvmFunction->setSubprogram(diSubprogram);
395 #else /* LLVM 8.0+ */
396  /* isDefinition is always set to 'true' */
397  llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagDefinition;
398  if (isOptimized)
399  SPFlags |= llvm::DISubprogram::SPFlagOptimized;
400  if (isStatic)
401  SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
402 
403  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
404  diSubprogramType_n, firstLine, flags, SPFlags);
405  llvmFunction->setSubprogram(diSubprogram);
406 #endif
407 
408  /* And start a scope representing the initial function scope */
409  StartScope();
410  } else {
411  diSubprogram = NULL;
412  diFile = NULL;
413  }
414 }
415 
417  AssertPos(currentPos, controlFlowInfo.size() == 0);
418  AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
419 }
420 
421 const Function *FunctionEmitContext::GetFunction() const { return function; }
422 
423 llvm::BasicBlock *FunctionEmitContext::GetCurrentBasicBlock() { return bblock; }
424 
425 void FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) { bblock = bb; }
426 
428 
429 llvm::Value *FunctionEmitContext::GetInternalMask() { return LoadInst(internalMaskPointer, "load_mask"); }
430 
432  return BinaryOperator(llvm::Instruction::And, GetInternalMask(), functionMaskValue, "internal_mask&function_mask");
433 }
434 
436 
437 void FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
438  functionMaskValue = value;
439  if (bblock != NULL)
441 }
442 
443 void FunctionEmitContext::SetBlockEntryMask(llvm::Value *value) { blockEntryMask = value; }
444 
445 void FunctionEmitContext::SetInternalMask(llvm::Value *value) {
447  // kludge so that __mask returns the right value in ispc code.
449 }
450 
451 void FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
452  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, test, "oldMask&test");
453  SetInternalMask(mask);
454 }
455 
456 void FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
457  llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn, "~test");
458  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest, "oldMask&~test");
459  SetInternalMask(mask);
460 }
461 
462 void FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
463  AssertPos(currentPos, bblock != NULL);
464  llvm::Value *any = Any(GetFullMask());
465  BranchInst(btrue, bfalse, any);
466  // It's illegal to add any additional instructions to the basic block
467  // now that it's terminated, so set bblock to NULL to be safe
468  bblock = NULL;
469 }
470 
471 void FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
472  AssertPos(currentPos, bblock != NULL);
473  llvm::Value *all = All(GetFullMask());
474  BranchInst(btrue, bfalse, all);
475  // It's illegal to add any additional instructions to the basic block
476  // now that it's terminated, so set bblock to NULL to be safe
477  bblock = NULL;
478 }
479 
480 void FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
481  AssertPos(currentPos, bblock != NULL);
482  // switch sense of true/false bblocks
483  BranchIfMaskAny(bfalse, btrue);
484  // It's illegal to add any additional instructions to the basic block
485  // now that it's terminated, so set bblock to NULL to be safe
486  bblock = NULL;
487 }
488 
490 
491 void FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
492  controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
493 }
494 
496  CFInfo *ci = popCFState();
497  // Make sure we match up with a Start{Uniform,Varying}If().
498  AssertPos(currentPos, ci->IsIf());
499 
500  // 'uniform' ifs don't change the mask so we only need to restore the
501  // mask going into the if for 'varying' if statements
502  if (ci->IsUniform() || bblock == NULL)
503  return;
504 
505  // We can't just restore the mask as it was going into the 'if'
506  // statement. First we have to take into account any program
507  // instances that have executed 'return' statements; the restored
508  // mask must be off for those lanes.
510 
511  // If the 'if' statement is inside a loop with a 'varying'
512  // condition, we also need to account for any break or continue
513  // statements that executed inside the 'if' statmeent; we also must
514  // leave the lane masks for the program instances that ran those
515  // off after we restore the mask after the 'if'. The code below
516  // ends up being optimized out in the case that there were no break
517  // or continue statements (and breakLanesPtr and continueLanesPtr
518  // have their initial 'all off' values), so we don't need to check
519  // for that here.
520  //
521  // There are three general cases to deal with here:
522  // - Loops: both break and continue are allowed, and thus the corresponding
523  // lane mask pointers are non-NULL
524  // - Foreach: only continueLanesPtr may be non-NULL
525  // - Switch: only breakLanesPtr may be non-NULL
526  if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
527  // We want to compute:
528  // newMask = (oldMask & ~(breakLanes | continueLanes)),
529  // treading breakLanes or continueLanes as "all off" if the
530  // corresponding pointer is NULL.
531  llvm::Value *bcLanes = NULL;
532 
533  if (continueLanesPtr != NULL)
534  bcLanes = LoadInst(continueLanesPtr, "continue_lanes");
535  else
536  bcLanes = LLVMMaskAllOff;
537 
538  if (breakLanesPtr != NULL) {
539  llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
540  bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes, breakLanes, "|break_lanes");
541  }
542 
543  llvm::Value *notBreakOrContinue =
544  BinaryOperator(llvm::Instruction::Xor, bcLanes, LLVMMaskAllOn, "!(break|continue)_lanes");
545  llvm::Value *oldMask = GetInternalMask();
546  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notBreakOrContinue, "new_mask");
547  SetInternalMask(newMask);
548  }
549 }
550 
551 void FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, bool uniformCF) {
552  // Store the current values of various loop-related state so that we
553  // can restore it when we exit this loop.
554  llvm::Value *oldMask = GetInternalMask();
556  oldMask, blockEntryMask));
557  if (uniformCF)
558  // If the loop has a uniform condition, we don't need to track
559  // which lanes 'break' or 'continue'; all of the running ones go
560  // together, so we just jump
562  else {
563  // For loops with varying conditions, allocate space to store masks
564  // that record which lanes have done these
565  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
567  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
569  }
570 
571  breakTarget = bt;
572  continueTarget = ct;
573  blockEntryMask = NULL; // this better be set by the loop!
574 }
575 
577  CFInfo *ci = popCFState();
578  AssertPos(currentPos, ci->IsLoop());
579 
580  if (!ci->IsUniform())
581  // If the loop had a 'uniform' test, then it didn't make any
582  // changes to the mask so there's nothing to restore. If it had a
583  // varying test, we need to restore the mask to what it was going
584  // into the loop, but still leaving off any lanes that executed a
585  // 'return' statement.
587 }
588 
590  // Issue an error if we're in a nested foreach...
591  if (ft == FOREACH_REGULAR) {
592  for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
593  if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
594  Error(currentPos, "Nested \"foreach\" statements are currently "
595  "illegal.");
596  break;
597  // Don't return here, however, and in turn allow the caller to
598  // do the rest of its codegen and then call EndForeach()
599  // normally--the idea being that this gives a chance to find
600  // any other errors inside the body of the foreach loop...
601  }
602  }
603  }
604 
605  // Store the current values of various loop-related state so that we
606  // can restore it when we exit this loop.
607  llvm::Value *oldMask = GetInternalMask();
608  controlFlowInfo.push_back(
610  breakLanesPtr = NULL;
611  breakTarget = NULL;
612 
613  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
615  continueTarget = NULL; // should be set by SetContinueTarget()
616 
617  blockEntryMask = NULL;
618 }
619 
621  CFInfo *ci = popCFState();
623 }
624 
626  if (!bblock)
627  return;
628 
629  // Restore the mask to the given old mask, but leave off any lanes that
630  // executed a return statement.
631  // newMask = (oldMask & ~returnedLanes)
632  llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, "returned_lanes");
633  llvm::Value *notReturned = BinaryOperator(llvm::Instruction::Xor, returnedLanes, LLVMMaskAllOn, "~returned_lanes");
634  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notReturned, "new_mask");
635  SetInternalMask(newMask);
636 }
637 
638 /** Returns "true" if the first enclosing non-if control flow expression is
639  a "switch" statement.
640 */
642  // Go backwards through controlFlowInfo, since we add new nested scopes
643  // to the back.
644  int i = controlFlowInfo.size() - 1;
645  while (i >= 0 && controlFlowInfo[i]->IsIf())
646  --i;
647  // Got to the first non-if (or end of CF info)
648  if (i == -1)
649  return false;
650  return controlFlowInfo[i]->IsSwitch();
651 }
652 
653 void FunctionEmitContext::Break(bool doCoherenceCheck) {
654  if (breakTarget == NULL) {
655  Error(currentPos, "\"break\" statement is illegal outside of "
656  "for/while/do loops and \"switch\" statements.");
657  return;
658  }
659  AssertPos(currentPos, controlFlowInfo.size() > 0);
660 
661  if (bblock == NULL)
662  return;
663 
665  // We know that all program instances are executing the break, so
666  // just jump to the block immediately after the switch.
667  AssertPos(currentPos, breakTarget != NULL);
669  bblock = NULL;
670  return;
671  }
672 
673  // If all of the enclosing 'if' tests in the loop have uniform control
674  // flow or if we can tell that the mask is all on, then we can just
675  // jump to the break location.
676  if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
678  // Set bblock to NULL since the jump has terminated the basic block
679  bblock = NULL;
680  } else {
681  // Varying switch, uniform switch where the 'break' is under
682  // varying control flow, or a loop with varying 'if's above the
683  // break. In these cases, we need to update the mask of the lanes
684  // that have executed a 'break' statement:
685  // breakLanes = breakLanes | mask
687 
688  llvm::Value *mask = GetInternalMask();
689  llvm::Value *breakMask = LoadInst(breakLanesPtr, "break_mask");
690  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask");
691  StoreInst(newMask, breakLanesPtr);
692 
693  // Set the current mask to be all off, just in case there are any
694  // statements in the same scope after the 'break'. Most of time
695  // this will be optimized away since we'll likely end the scope of
696  // an 'if' statement and restore the mask then.
698 
699  if (doCoherenceCheck) {
700  if (continueTarget != NULL)
701  // If the user has indicated that this is a 'coherent'
702  // break statement, then check to see if the mask is all
703  // off. If so, we have to conservatively jump to the
704  // continueTarget, not the breakTarget, since part of the
705  // reason the mask is all off may be due to 'continue'
706  // statements that executed in the current loop iteration.
708  else if (breakTarget != NULL)
709  // Similarly handle these for switch statements, where we
710  // only have a break target.
712  }
713  }
714 }
715 
716 static bool lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
717  for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
718  if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
719  return true;
720  }
721  return false;
722 }
723 
724 void FunctionEmitContext::Continue(bool doCoherenceCheck) {
725  if (!continueTarget) {
726  Error(currentPos, "\"continue\" statement illegal outside of "
727  "for/while/do/foreach loops.");
728  return;
729  }
730  AssertPos(currentPos, controlFlowInfo.size() > 0);
731 
733  // Similarly to 'break' statements, we can immediately jump to the
734  // continue target if we're only in 'uniform' control flow within
735  // loop or if we can tell that the mask is all on. Here, we can
736  // also jump if the enclosing loop is a 'foreach_active' loop, in
737  // which case we know that only a single program instance is
738  // executing.
739  AddInstrumentationPoint("continue: uniform CF, jumped");
741  bblock = NULL;
742  } else {
743  // Otherwise update the stored value of which lanes have 'continue'd.
744  // continueLanes = continueLanes | mask
746  llvm::Value *mask = GetInternalMask();
747  llvm::Value *continueMask = LoadInst(continueLanesPtr, "continue_mask");
748  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continueMask");
749  StoreInst(newMask, continueLanesPtr);
750 
751  // And set the current mask to be all off in case there are any
752  // statements in the same scope after the 'continue'
754 
755  if (doCoherenceCheck)
756  // If this is a 'coherent continue' statement, then emit the
757  // code to see if all of the lanes are now off due to
758  // breaks/continues and jump to the continue target if so.
760  }
761 }
762 
763 /** This function checks to see if all of the 'if' statements (if any)
764  between the current scope and the first enclosing loop/switch of given
765  control flow type have 'uniform' tests.
766  */
768  AssertPos(currentPos, controlFlowInfo.size() > 0);
769  // Go backwards through controlFlowInfo, since we add new nested scopes
770  // to the back. Stop once we come to the first enclosing control flow
771  // structure of the desired type.
772  int i = controlFlowInfo.size() - 1;
773  while (i >= 0 && controlFlowInfo[i]->type != type) {
774  if (controlFlowInfo[i]->isUniform == false)
775  // Found a scope due to an 'if' statement with a varying test
776  return false;
777  --i;
778  }
779  AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
780  return true;
781 }
782 
783 void FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
784  llvm::Value *allDone = NULL;
785 
786  if (breakLanesPtr == NULL) {
787  llvm::Value *continued = LoadInst(continueLanesPtr, "continue_lanes");
788  continued = BinaryOperator(llvm::Instruction::And, continued, GetFunctionMask(), "continued&func");
789  allDone = MasksAllEqual(continued, blockEntryMask);
790  } else {
791  // Check to see if (returned lanes | continued lanes | break lanes) is
792  // equal to the value of mask at the start of the loop iteration. If
793  // so, everyone is done and we can jump to the given target
794  llvm::Value *returned = LoadInst(returnedLanesPtr, "returned_lanes");
795  llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
796  llvm::Value *finishedLanes = BinaryOperator(llvm::Instruction::Or, returned, breaked, "returned|breaked");
797  if (continueLanesPtr != NULL) {
798  // It's NULL for "switch" statements...
799  llvm::Value *continued = LoadInst(continueLanesPtr, "continue_lanes");
800  finishedLanes =
801  BinaryOperator(llvm::Instruction::Or, finishedLanes, continued, "returned|breaked|continued");
802  }
803 
804  finishedLanes = BinaryOperator(llvm::Instruction::And, finishedLanes, GetFunctionMask(), "finished&func");
805 
806  // Do we match the mask at loop or switch statement entry?
807  allDone = MasksAllEqual(finishedLanes, blockEntryMask);
808  }
809 
810  llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
811  llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
812  BranchInst(bAll, bNotAll, allDone);
813 
814  // If so, have an extra basic block along the way to add
815  // instrumentation, if the user asked for it.
816  bblock = bAll;
817  AddInstrumentationPoint("break/continue: all dynamically went");
818  BranchInst(target);
819 
820  // And set the current basic block to a new one for future instructions
821  // for the path where we weren't able to jump
822  bblock = bNotAll;
823  AddInstrumentationPoint("break/continue: not all went");
824 }
825 
827  if (continueLanesPtr == NULL)
828  return;
829 
830  // mask = mask & continueFlags
831  llvm::Value *mask = GetInternalMask();
832  llvm::Value *continueMask = LoadInst(continueLanesPtr, "continue_mask");
833  llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask");
834  SetInternalMask(orMask);
835 
836  // continueLanes = 0
838 }
839 
841  if (breakLanesPtr == NULL)
842  return;
843 
844  // breakLanes = 0
846 }
847 
848 void FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
849  llvm::Value *oldMask = GetInternalMask();
853 
854  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
856  breakTarget = bbBreak;
857 
858  continueLanesPtr = NULL;
859  continueTarget = NULL;
860  blockEntryMask = NULL;
861 
862  // These will be set by the SwitchInst() method
863  switchExpr = NULL;
864  defaultBlock = NULL;
865  caseBlocks = NULL;
866  nextBlocks = NULL;
867 }
868 
870  AssertPos(currentPos, bblock != NULL);
871 
872  CFInfo *ci = popCFState();
873  if (ci->IsVarying() && bblock != NULL)
875 }
876 
877 /** Emit code to check for an "all off" mask before the code for a
878  case or default label in a "switch" statement.
879  */
881  llvm::Value *allOff = None(mask);
882  llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
883 
884  // Find the basic block for the case or default label immediately after
885  // the current one in the switch statement--that's where we want to
886  // jump if the mask is all off at this label.
887  AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
888  llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
889 
890  // Jump to the next one of the mask is all off; otherwise jump to the
891  // newly created block that will hold the actual code for this label.
892  BranchInst(bbNext, bbSome, allOff);
893  SetCurrentBasicBlock(bbSome);
894 }
895 
896 /** Returns the execution mask at entry to the first enclosing "switch"
897  statement. */
899  AssertPos(currentPos, controlFlowInfo.size() > 0);
900  int i = controlFlowInfo.size() - 1;
901  while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
902  --i;
903  AssertPos(currentPos, i != -1);
904  return controlFlowInfo[i]->savedMask;
905 }
906 
908  if (inSwitchStatement() == false) {
909  Error(pos, "\"default\" label illegal outside of \"switch\" "
910  "statement.");
911  return;
912  }
913 
914  // If there's a default label in the switch, a basic block for it
915  // should have been provided in the previous call to SwitchInst().
917 
918  if (bblock != NULL)
919  // The previous case in the switch fell through, or we're in a
920  // varying switch; terminate the current block with a jump to the
921  // block for the code for the default label.
924 
926  // Nothing more to do for this case; return back to the caller,
927  // which will then emit the code for the default case.
928  return;
929 
930  // For a varying switch, we need to update the execution mask.
931  //
932  // First, compute the mask that corresponds to which program instances
933  // should execute the "default" code; this corresponds to the set of
934  // program instances that don't match any of the case statements.
935  // Therefore, we generate code that compares the value of the switch
936  // expression to the value associated with each of the "case"
937  // statements such that the surviving lanes didn't match any of them.
938  llvm::Value *matchesDefault = getMaskAtSwitchEntry();
939  for (int i = 0; i < (int)caseBlocks->size(); ++i) {
940  int value = (*caseBlocks)[i].first;
941  llvm::Value *valueVec =
942  (switchExpr->getType() == LLVMTypes::Int32VectorType) ? LLVMInt32Vector(value) : LLVMInt64Vector(value);
943  // TODO: for AVX2 at least, the following generates better code
944  // than doing ICMP_NE and skipping the NotOperator() below; file a
945  // LLVM bug?
946  llvm::Value *matchesCaseValue =
947  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, valueVec, "cmp_case_value");
948  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
949 
950  llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
951  matchesDefault =
952  BinaryOperator(llvm::Instruction::And, matchesDefault, notMatchesCaseValue, "default&~case_match");
953  }
954 
955  // The mask may have some lanes on, which corresponds to the previous
956  // label falling through; compute the updated mask by ANDing with the
957  // current mask.
958  llvm::Value *oldMask = GetInternalMask();
959  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, matchesDefault, "old_mask|matches_default");
960  SetInternalMask(newMask);
961 
962  if (checkMask)
963  addSwitchMaskCheck(newMask);
964 }
965 
966 void FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
967  if (inSwitchStatement() == false) {
968  Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
969  return;
970  }
971 
972  // Find the basic block for this case statement.
973  llvm::BasicBlock *bbCase = NULL;
974  AssertPos(currentPos, caseBlocks != NULL);
975  for (int i = 0; i < (int)caseBlocks->size(); ++i)
976  if ((*caseBlocks)[i].first == value) {
977  bbCase = (*caseBlocks)[i].second;
978  break;
979  }
980  AssertPos(currentPos, bbCase != NULL);
981 
982  if (bblock != NULL)
983  // fall through from the previous case
984  BranchInst(bbCase);
985  SetCurrentBasicBlock(bbCase);
986 
988  return;
989 
990  // update the mask: first, get a mask that indicates which program
991  // instances have a value for the switch expression that matches this
992  // case statement.
993  llvm::Value *valueVec =
994  (switchExpr->getType() == LLVMTypes::Int32VectorType) ? LLVMInt32Vector(value) : LLVMInt64Vector(value);
995  llvm::Value *matchesCaseValue =
996  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, valueVec, "cmp_case_value");
997  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
998 
999  // If a lane was off going into the switch, we don't care if has a
1000  // value in the switch expression that happens to match this case.
1001  llvm::Value *entryMask = getMaskAtSwitchEntry();
1002  matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask, matchesCaseValue, "entry_mask&case_match");
1003 
1004  // Take the surviving lanes and turn on the mask for them.
1005  llvm::Value *oldMask = GetInternalMask();
1006  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, matchesCaseValue, "mask|case_match");
1007  SetInternalMask(newMask);
1008 
1009  if (checkMask)
1010  addSwitchMaskCheck(newMask);
1011 }
1012 
1013 void FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
1014  const std::vector<std::pair<int, llvm::BasicBlock *>> &bbCases,
1015  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
1016  // The calling code should have called StartSwitch() before calling
1017  // SwitchInst().
1018  AssertPos(currentPos, controlFlowInfo.size() && controlFlowInfo.back()->IsSwitch());
1019 
1020  switchExpr = expr;
1021  defaultBlock = bbDefault;
1022  caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *>>(bbCases);
1023  nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
1024  switchConditionWasUniform = (llvm::isa<llvm::VectorType>(expr->getType()) == false);
1025 
1026  if (switchConditionWasUniform == true) {
1027  // For a uniform switch condition, just wire things up to the LLVM
1028  // switch instruction.
1029  llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault, bbCases.size(), bblock);
1030  for (int i = 0; i < (int)bbCases.size(); ++i) {
1031  if (expr->getType() == LLVMTypes::Int32Type)
1032  s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
1033  else {
1034  AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
1035  s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
1036  }
1037  }
1038 
1039  AddDebugPos(s);
1040  // switch is a terminator
1041  bblock = NULL;
1042  } else {
1043  // For a varying switch, we first turn off all lanes of the mask
1045 
1046  if (nextBlocks->size() > 0) {
1047  // If there are any labels inside the switch, jump to the first
1048  // one; any code before the first label won't be executed by
1049  // anyone.
1050  std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
1051  iter = nextBlocks->find(NULL);
1052  AssertPos(currentPos, iter != nextBlocks->end());
1053  llvm::BasicBlock *bbFirst = iter->second;
1054  BranchInst(bbFirst);
1055  bblock = NULL;
1056  }
1057  }
1058 }
1059 
1061  int sum = 0;
1062  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1063  if (controlFlowInfo[i]->IsVarying())
1064  ++sum;
1065  return sum;
1066 }
1067 
1069  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1070  if (controlFlowInfo[i]->IsForeach())
1071  return true;
1072  return false;
1073 }
1074 
1076 
1078 
1080  LabeledStmt *ls = llvm::dyn_cast<LabeledStmt>(node);
1081  if (ls == NULL)
1082  return true;
1083 
1085 
1086  if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
1087  Error(ls->pos, "Multiple labels named \"%s\" in function.", ls->name.c_str());
1088  else {
1089  llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
1090  ctx->labelMap[ls->name] = bb;
1091  }
1092  return true;
1093 }
1094 
1096  labelMap.erase(labelMap.begin(), labelMap.end());
1097  WalkAST(code, initLabelBBlocks, NULL, this);
1098 }
1099 
1100 llvm::BasicBlock *FunctionEmitContext::GetLabeledBasicBlock(const std::string &label) {
1101  if (labelMap.find(label) != labelMap.end())
1102  return labelMap[label];
1103  else
1104  return NULL;
1105 }
1106 
1107 std::vector<std::string> FunctionEmitContext::GetLabels() {
1108  // Initialize vector to the right size
1109  std::vector<std::string> labels(labelMap.size());
1110 
1111  // Iterate through labelMap and grab only the keys
1112  std::map<std::string, llvm::BasicBlock *>::iterator iter;
1113  for (iter = labelMap.begin(); iter != labelMap.end(); iter++)
1114  labels.push_back(iter->first);
1115 
1116  return labels;
1117 }
1118 
1119 void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
1120  const Type *returnType = function->GetReturnType();
1121  if (returnType->IsVoidType()) {
1122  if (expr != NULL)
1123  Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
1124  expr->GetType()->GetString().c_str());
1125  } else {
1126  if (expr == NULL) {
1127  Error(funcStartPos, "Must provide return value for return "
1128  "statement for non-void function.");
1129  return;
1130  }
1131 
1132  expr = TypeConvertExpr(expr, returnType, "return statement");
1133  if (expr != NULL) {
1134  llvm::Value *retVal = expr->GetValue(this);
1135  if (retVal != NULL) {
1136  if (returnType->IsUniformType() || CastType<ReferenceType>(returnType) != NULL)
1137  StoreInst(retVal, returnValuePtr);
1138  else {
1139  // Use a masked store to store the value of the expression
1140  // in the return value memory; this preserves the return
1141  // values from other lanes that may have executed return
1142  // statements previously.
1143  StoreInst(retVal, returnValuePtr, GetInternalMask(), returnType,
1144  PointerType::GetUniform(returnType));
1145  }
1146  }
1147  }
1148  }
1149 
1150  if (VaryingCFDepth() == 0) {
1151  // If there is only uniform control flow between us and the
1152  // function entry, then it's guaranteed that all lanes are running,
1153  // so we can just emit a true return instruction
1154  AddInstrumentationPoint("return: uniform control flow");
1155  ReturnInst();
1156  } else {
1157  // Otherwise we update the returnedLanes value by ANDing it with
1158  // the current lane mask.
1159  llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, "old_returned_lanes");
1160  llvm::Value *newReturnedLanes =
1161  BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, GetFullMask(), "old_mask|returned_lanes");
1162 
1163  // For 'coherent' return statements, emit code to check if all
1164  // lanes have returned
1165  if (doCoherenceCheck) {
1166  // if newReturnedLanes == functionMaskValue, get out of here!
1167  llvm::Value *cmp = MasksAllEqual(functionMaskValue, newReturnedLanes);
1168  llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
1169  llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
1170  BranchInst(bDoReturn, bNoReturn, cmp);
1171 
1172  bblock = bDoReturn;
1173  AddInstrumentationPoint("return: all lanes have returned");
1174  ReturnInst();
1175 
1176  bblock = bNoReturn;
1177  }
1178  // Otherwise update returnedLanesPtr and turn off all of the lanes
1179  // in the current mask so that any subsequent statements in the
1180  // same scope after the return have no effect
1181  StoreInst(newReturnedLanes, returnedLanesPtr);
1182  AddInstrumentationPoint("return: some but not all lanes have returned");
1184  }
1185 }
1186 
1187 llvm::Value *FunctionEmitContext::Any(llvm::Value *mask) {
1188  // Call the target-dependent any function to test that the mask is non-zero
1189  std::vector<Symbol *> mm;
1190  m->symbolTable->LookupFunction("__any", &mm);
1191  if (g->target->getMaskBitCount() == 1)
1192  AssertPos(currentPos, mm.size() == 1);
1193  else
1194  // There should be one with signed int signature, one unsigned int.
1195  AssertPos(currentPos, mm.size() == 2);
1196  // We can actually call either one, since both are i32s as far as
1197  // LLVM's type system is concerned...
1198  llvm::Function *fmm = mm[0]->function;
1199  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any"));
1200 }
1201 
1202 llvm::Value *FunctionEmitContext::All(llvm::Value *mask) {
1203  // Call the target-dependent movmsk function to turn the vector mask
1204  // into an i64 value
1205  std::vector<Symbol *> mm;
1206  m->symbolTable->LookupFunction("__all", &mm);
1207  if (g->target->getMaskBitCount() == 1)
1208  AssertPos(currentPos, mm.size() == 1);
1209  else
1210  // There should be one with signed int signature, one unsigned int.
1211  AssertPos(currentPos, mm.size() == 2);
1212  // We can actually call either one, since both are i32s as far as
1213  // LLVM's type system is concerned...
1214  llvm::Function *fmm = mm[0]->function;
1215  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all"));
1216 }
1217 
1218 llvm::Value *FunctionEmitContext::None(llvm::Value *mask) {
1219  // Call the target-dependent movmsk function to turn the vector mask
1220  // into an i64 value
1221  std::vector<Symbol *> mm;
1222  m->symbolTable->LookupFunction("__none", &mm);
1223  if (g->target->getMaskBitCount() == 1)
1224  AssertPos(currentPos, mm.size() == 1);
1225  else
1226  // There should be one with signed int signature, one unsigned int.
1227  AssertPos(currentPos, mm.size() == 2);
1228  // We can actually call either one, since both are i32s as far as
1229  // LLVM's type system is concerned...
1230  llvm::Function *fmm = mm[0]->function;
1231  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none"));
1232 }
1233 
1234 llvm::Value *FunctionEmitContext::LaneMask(llvm::Value *v) {
1235 #ifdef ISPC_NVPTX_ENABLED
1236  /* this makes mandelbrot example slower with "nvptx" target.
1237  * Needs further investigation. */
1238  const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
1239 #else
1240  const char *__movmsk = "__movmsk";
1241 #endif
1242  // Call the target-dependent movmsk function to turn the vector mask
1243  // into an i64 value
1244  std::vector<Symbol *> mm;
1245  m->symbolTable->LookupFunction(__movmsk, &mm);
1246  if (g->target->getMaskBitCount() == 1)
1247  AssertPos(currentPos, mm.size() == 1);
1248  else
1249  // There should be one with signed int signature, one unsigned int.
1250  AssertPos(currentPos, mm.size() == 2);
1251  // We can actually call either one, since both are i32s as far as
1252  // LLVM's type system is concerned...
1253  llvm::Function *fmm = mm[0]->function;
1254  return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
1255 }
1256 
1257 #ifdef ISPC_NVPTX_ENABLED
1258 bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName) {
1259  llvm::Type *type = vector->getType();
1260  if (type == LLVMTypes::Int8VectorType)
1261  funcName += "_int8";
1262  else if (type == LLVMTypes::Int16VectorType)
1263  funcName += "_int16";
1264  else if (type == LLVMTypes::Int32VectorType)
1265  funcName += "_int32";
1266  else if (type == LLVMTypes::Int64VectorType)
1267  funcName += "_int64";
1268  else if (type == LLVMTypes::FloatVectorType)
1269  funcName += "_float";
1270  else if (type == LLVMTypes::DoubleVectorType)
1271  funcName += "_double";
1272  else
1273  return false;
1274  return true;
1275 }
1276 
1277 llvm::Value *FunctionEmitContext::Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar) {
1278  std::string funcName = "__insert";
1279  assert(lAppendInsertExtractName(vector, funcName));
1280  assert(lane->getType() == LLVMTypes::Int32Type);
1281 
1282  llvm::Function *func = m->module->getFunction(funcName.c_str());
1283  assert(func != NULL);
1284  std::vector<llvm::Value *> args;
1285  args.push_back(vector);
1286  args.push_back(lane);
1287  args.push_back(scalar);
1288  llvm::Value *ret =
1289  llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1290  return ret;
1291 }
1292 
1293 llvm::Value *FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane) {
1294  std::string funcName = "__extract";
1295  assert(lAppendInsertExtractName(vector, funcName));
1296  assert(lane->getType() == LLVMTypes::Int32Type);
1297 
1298  llvm::Function *func = m->module->getFunction(funcName.c_str());
1299  assert(func != NULL);
1300  std::vector<llvm::Value *> args;
1301  args.push_back(vector);
1302  args.push_back(lane);
1303  llvm::Value *ret =
1304  llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1305  return ret;
1306 }
1307 #endif /* ISPC_NVPTX_ENABLED */
1308 
1309 llvm::Value *FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
1310 #ifdef ISPC_NVPTX_ENABLED
1311  if (g->target->getISA() == Target::NVPTX) {
1312  // Compare the two masks to get a vector of i1s
1313  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, v1, v2, "v1==v2");
1314  return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
1315  }
1316 #endif /* ISPC_NVPTX_ENABLED */
1317 
1318 #if 0
1319  // Compare the two masks to get a vector of i1s
1320  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1321  v1, v2, "v1==v2");
1322  // Turn that into a bool vector type (often i32s)
1323  cmp = I1VecToBoolVec(cmp);
1324  // And see if it's all on
1325  return All(cmp);
1326 #else
1327  llvm::Value *mm1 = LaneMask(v1);
1328  llvm::Value *mm2 = LaneMask(v2);
1329  return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, LLVMGetName("equal", v1, v2));
1330 #endif
1331 }
1332 
1333 llvm::Value *FunctionEmitContext::ProgramIndexVector(bool is32bits) {
1334  llvm::SmallVector<llvm::Constant *, 16> array;
1335  for (int i = 0; i < g->target->getVectorWidth(); ++i) {
1336  llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
1337  array.push_back(C);
1338  }
1339 
1340  llvm::Constant *index = llvm::ConstantVector::get(array);
1341 
1342  return index;
1343 }
1344 
1345 #ifdef ISPC_NVPTX_ENABLED
1346 llvm::Value *FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
1347  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1348  llvm::Value *__program_index =
1349  CallInst(func_program_index, NULL, std::vector<llvm::Value *>(), "foreach__program_indexS");
1350  llvm::Value *index =
1351  InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), __program_index, 0, "foreach__program_indexV");
1352 #if 0
1353  if (!is32bits)
1354  index = ZExtInst(index, LLVMTypes::Int64VectandType);
1355 #endif
1356  return index;
1357 }
1358 #endif /* ISPC_NVPTX_ENABLED */
1359 
1360 llvm::Value *FunctionEmitContext::GetStringPtr(const std::string &str) {
1361  llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
1362  llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
1363  llvm::Value *lstrPtr =
1364  new llvm::GlobalVariable(*m->module, lstr->getType(), true /*isConst*/, linkage, lstr, "__str");
1365  return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType, "str_void_ptr", bblock);
1366 }
1367 
1368 llvm::BasicBlock *FunctionEmitContext::CreateBasicBlock(const char *name) {
1369  return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
1370 }
1371 
1372 llvm::Value *FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
1373  if (b == NULL) {
1375  return NULL;
1376  }
1377 
1378  if (g->target->getMaskBitCount() == 1)
1379  return b;
1380 
1381  llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(b->getType());
1382  if (at) {
1383  // If we're given an array of vectors of i1s, then do the
1384  // conversion for each of the elements
1385  llvm::Type *boolArrayType = llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
1386  llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
1387 
1388  for (unsigned int i = 0; i < at->getNumElements(); ++i) {
1389  llvm::Value *elt = ExtractInst(b, i);
1390  llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType, LLVMGetName(elt, "_to_boolvec"));
1391  ret = InsertInst(ret, sext, i);
1392  }
1393  return ret;
1394  } else
1395  return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
1396 }
1397 
1398 static llvm::Value *lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
1399  llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, true);
1400  std::string var_name = "_";
1401  var_name = var_name + s;
1402  llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), true /* const */,
1403  llvm::GlobalValue::InternalLinkage, sConstant, var_name.c_str());
1404  llvm::Value *indices[2] = {LLVMInt32(0), LLVMInt32(0)};
1405  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1406 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1407  return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
1408 #else /* LLVM 3.7+ */
1409  return llvm::GetElementPtrInst::Create(PTYPE(sPtr), sPtr, arrayRef, "sptr", bblock);
1410 #endif
1411 }
1412 
1414  AssertPos(currentPos, note != NULL);
1415  if (!g->emitInstrumentation)
1416  return;
1417 
1418  std::vector<llvm::Value *> args;
1419  // arg 1: filename as string
1420  args.push_back(lGetStringAsValue(bblock, currentPos.name));
1421  // arg 2: provided note
1422  args.push_back(lGetStringAsValue(bblock, note));
1423  // arg 3: line number
1424  args.push_back(LLVMInt32(currentPos.first_line));
1425  // arg 4: current mask, movmsk'ed down to an int64
1426  args.push_back(LaneMask(GetFullMask()));
1427 
1428  llvm::Function *finst = m->module->getFunction("ISPCInstrument");
1429  CallInst(finst, NULL, args, "");
1430 }
1431 
1433 
1435 
1436 void FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos, llvm::DIScope *scope) {
1437  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
1438  if (inst != NULL && m->diBuilder) {
1439  SourcePos p = pos ? *pos : currentPos;
1440  if (p.first_line != 0)
1441  // If first_line == 0, then we're in the middle of setting up
1442  // the standard library or the like; don't add debug positions
1443  // for those functions
1444  inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
1445  scope ?
1446 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1447  *scope
1448 #else /* LLVM 3.7+ */
1449  scope
1450 #endif
1451  : GetDIScope()));
1452  }
1453 }
1454 
1456  if (m->diBuilder != NULL) {
1457 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1458  llvm::DIScope parentScope;
1459  llvm::DILexicalBlock lexicalBlock;
1460 #else /* LLVM 3.7+ */
1461  llvm::DIScope *parentScope;
1462  llvm::DILexicalBlock *lexicalBlock;
1463 #endif
1464  if (debugScopes.size() > 0)
1465  parentScope = debugScopes.back();
1466  else
1467  parentScope = diSubprogram;
1468 
1469  lexicalBlock = m->diBuilder->createLexicalBlock(parentScope, diFile, currentPos.first_line,
1470 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_5
1471  // Revision 202736 in LLVM adds support of DWARF discriminator
1472  // to the last argument and revision 202737 in clang adds 0
1473  // for the last argument by default.
1475 #else
1476  // Revision 216239 in LLVM removes support of DWARF
1477  // discriminator as the last argument
1479 #endif // LLVM 3.2, 3.3, 3.4 and 3.6+
1480 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1481  AssertPos(currentPos, lexicalBlock.Verify());
1482  debugScopes.push_back(lexicalBlock);
1483 #else /* LLVM 3.7+ */
1484  debugScopes.push_back(llvm::cast<llvm::DILexicalBlockBase>(lexicalBlock));
1485 #endif
1486  }
1487 }
1488 
1490  if (m->diBuilder != NULL) {
1491  AssertPos(currentPos, debugScopes.size() > 0);
1492  debugScopes.pop_back();
1493  }
1494 }
1495 
1496 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1497 llvm::DIScope
1498 #else /* LLVM 3.7+ */
1499 llvm::DIScope *
1500 #endif
1502  AssertPos(currentPos, debugScopes.size() > 0);
1503  return debugScopes.back();
1504 }
1505 
1507  if (m->diBuilder == NULL)
1508  return;
1509 
1510 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1511  llvm::DIScope scope = GetDIScope();
1512  llvm::DIType diType = sym->type->GetDIType(scope);
1513  AssertPos(currentPos, diType.Verify());
1514  llvm::DIVariable var =
1515 #else /* LLVM 3.7+ */
1516  llvm::DIScope *scope = GetDIScope();
1517  llvm::DIType *diType = sym->type->GetDIType(scope);
1518  llvm::DILocalVariable *var =
1519 #endif
1520 
1521 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7*/
1522  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable, scope, sym->name, sym->pos.GetDIFile(),
1523  sym->pos.first_line, diType, true /* preserve through opts */);
1524 #else /* LLVM 3.8+ */
1525  m->diBuilder->createAutoVariable(scope, sym->name, sym->pos.GetDIFile(), sym->pos.first_line, diType,
1526  true /* preserve through opts */);
1527 #endif
1528 
1529 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1530  AssertPos(currentPos, var.Verify());
1531  llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var,
1532 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1533  m->diBuilder->createExpression(),
1534 #endif
1535  bblock);
1536  AddDebugPos(declareInst, &sym->pos, &scope);
1537 #else /* LLVM 3.7+ */
1538  llvm::Instruction *declareInst =
1539  m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(),
1540  llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock);
1541  AddDebugPos(declareInst, &sym->pos, scope);
1542 #endif
1543 }
1544 
1546  if (m->diBuilder == NULL)
1547  return;
1548 
1549 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
1550  int flags = 0;
1551 #else // LLVM 4.0+
1552  llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
1553 #endif
1554 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1555  llvm::DIScope scope = diSubprogram;
1556  llvm::DIType diType = sym->type->GetDIType(scope);
1557  AssertPos(currentPos, diType.Verify());
1558  llvm::DIVariable var =
1559 #else /* LLVM 3.7+ */
1560  llvm::DIScope *scope = diSubprogram;
1561  llvm::DIType *diType = sym->type->GetDIType(scope);
1562  llvm::DILocalVariable *var =
1563 #endif
1564 
1565 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
1566  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable, scope, sym->name, sym->pos.GetDIFile(),
1567  sym->pos.first_line, diType, true /* preserve through opts */, flags,
1568  argNum + 1);
1569 #else /* LLVM 3.8+ */
1570  m->diBuilder->createParameterVariable(scope, sym->name, argNum + 1, sym->pos.GetDIFile(), sym->pos.first_line,
1571  diType, true /* preserve through opts */, flags);
1572 #endif
1573 
1574 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1575  AssertPos(currentPos, var.Verify());
1576  llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var,
1577 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1578  m->diBuilder->createExpression(),
1579 #endif
1580  bblock);
1581  AddDebugPos(declareInst, &sym->pos, &scope);
1582 #else /* LLVM 3.7+ */
1583  llvm::Instruction *declareInst =
1584  m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(),
1585  llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock);
1586  AddDebugPos(declareInst, &sym->pos, scope);
1587 #endif
1588 }
1589 
1590 /** If the given type is an array of vector types, then it's the
1591  representation of an ispc VectorType with varying elements. If it is
1592  one of these, return the array size (i.e. the VectorType's size).
1593  Otherwise return zero.
1594  */
1595 static int lArrayVectorWidth(llvm::Type *t) {
1596  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(t);
1597  if (arrayType == NULL)
1598  return 0;
1599 
1600  // We shouldn't be seeing arrays of anything but vectors being passed
1601  // to things like FunctionEmitContext::BinaryOperator() as operands.
1602  llvm::VectorType *vectorElementType = llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1603  Assert((vectorElementType != NULL && (int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
1604 
1605  return (int)arrayType->getNumElements();
1606 }
1607 
1608 llvm::Value *FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1,
1609  const char *name) {
1610  if (v0 == NULL || v1 == NULL) {
1612  return NULL;
1613  }
1614 
1615  AssertPos(currentPos, v0->getType() == v1->getType());
1616  llvm::Type *type = v0->getType();
1617  int arraySize = lArrayVectorWidth(type);
1618  if (arraySize == 0) {
1619  llvm::Instruction *bop = llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
1620  AddDebugPos(bop);
1621  return bop;
1622  } else {
1623  // If this is an ispc VectorType, apply the binary operator to each
1624  // of the elements of the array (which in turn should be either
1625  // scalar types or llvm::VectorTypes.)
1626  llvm::Value *ret = llvm::UndefValue::get(type);
1627  for (int i = 0; i < arraySize; ++i) {
1628  llvm::Value *a = ExtractInst(v0, i);
1629  llvm::Value *b = ExtractInst(v1, i);
1630  llvm::Value *op = BinaryOperator(inst, a, b);
1631  ret = InsertInst(ret, op, i);
1632  }
1633  return ret;
1634  }
1635 }
1636 
1637 llvm::Value *FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
1638  if (v == NULL) {
1640  return NULL;
1641  }
1642 
1643  // Similarly to BinaryOperator, do the operation on all the elements of
1644  // the array if we're given an array type; otherwise just do the
1645  // regular llvm operation.
1646  llvm::Type *type = v->getType();
1647  int arraySize = lArrayVectorWidth(type);
1648  if (arraySize == 0) {
1649  llvm::Instruction *binst = llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
1650  AddDebugPos(binst);
1651  return binst;
1652  } else {
1653  llvm::Value *ret = llvm::UndefValue::get(type);
1654  for (int i = 0; i < arraySize; ++i) {
1655  llvm::Value *a = ExtractInst(v, i);
1656  llvm::Value *op = llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
1657  AddDebugPos(op);
1658  ret = InsertInst(ret, op, i);
1659  }
1660  return ret;
1661  }
1662 }
1663 
1664 // Given the llvm Type that represents an ispc VectorType, return an
1665 // equally-shaped type with boolean elements. (This is the type that will
1666 // be returned from CmpInst with ispc VectorTypes).
1667 static llvm::Type *lGetMatchingBoolVectorType(llvm::Type *type) {
1668  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(type);
1669  Assert(arrayType != NULL);
1670 
1671  llvm::VectorType *vectorElementType = llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1672  Assert(vectorElementType != NULL);
1673  Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
1674 
1675  llvm::Type *base = llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
1676  return llvm::ArrayType::get(base, arrayType->getNumElements());
1677 }
1678 
1679 llvm::Value *FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred,
1680  llvm::Value *v0, llvm::Value *v1, const char *name) {
1681  if (v0 == NULL || v1 == NULL) {
1683  return NULL;
1684  }
1685 
1686  AssertPos(currentPos, v0->getType() == v1->getType());
1687  llvm::Type *type = v0->getType();
1688  int arraySize = lArrayVectorWidth(type);
1689  if (arraySize == 0) {
1690  llvm::Instruction *ci = llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp", bblock);
1691  AddDebugPos(ci);
1692  return ci;
1693  } else {
1694  llvm::Type *boolType = lGetMatchingBoolVectorType(type);
1695  llvm::Value *ret = llvm::UndefValue::get(boolType);
1696  for (int i = 0; i < arraySize; ++i) {
1697  llvm::Value *a = ExtractInst(v0, i);
1698  llvm::Value *b = ExtractInst(v1, i);
1699  llvm::Value *op = CmpInst(inst, pred, a, b, name);
1700  ret = InsertInst(ret, op, i);
1701  }
1702  return ret;
1703  }
1704 }
1705 
1706 llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
1707  if (value == NULL) {
1709  return NULL;
1710  }
1711 
1712  llvm::Value *ret = NULL;
1713  llvm::Type *eltType = value->getType();
1714  llvm::Type *vecType = NULL;
1715 
1716  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(eltType);
1717  if (pt != NULL) {
1718  // Varying pointers are represented as vectors of i32/i64s
1720  value = PtrToIntInst(value);
1721  } else {
1722  // All other varying types are represented as vectors of the
1723  // underlying type.
1724  vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
1725  }
1726 
1727  // Check for a constant case.
1728  if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
1729  ret = llvm::ConstantVector::getSplat(g->target->getVectorWidth(), const_val);
1730  return ret;
1731  }
1732 
1733  ret = BroadcastValue(value, vecType, name);
1734 
1735  return ret;
1736 }
1737 
1738 llvm::Value *FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const char *name) {
1739  if (value == NULL) {
1741  return NULL;
1742  }
1743 
1744  if (name == NULL)
1745  name = LLVMGetName(value, "_bitcast");
1746 
1747  llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
1748  AddDebugPos(inst);
1749  return inst;
1750 }
1751 
1752 llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
1753  if (value == NULL) {
1755  return NULL;
1756  }
1757 
1758  if (llvm::isa<llvm::VectorType>(value->getType()))
1759  // no-op for varying pointers; they're already vectors of ints
1760  return value;
1761 
1762  if (name == NULL)
1763  name = LLVMGetName(value, "_ptr2int");
1764  llvm::Type *type = LLVMTypes::PointerIntType;
1765  llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
1766  AddDebugPos(inst);
1767  return inst;
1768 }
1769 
1770 llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const char *name) {
1771  if (value == NULL) {
1773  return NULL;
1774  }
1775 
1776  if (name == NULL)
1777  name = LLVMGetName(value, "_ptr2int");
1778 
1779  llvm::Type *fromType = value->getType();
1780  if (llvm::isa<llvm::VectorType>(fromType)) {
1781  // varying pointer
1782  if (fromType == toType)
1783  // already the right type--done
1784  return value;
1785  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
1786  return TruncInst(value, toType, name);
1787  else {
1788  AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits());
1789  return ZExtInst(value, toType, name);
1790  }
1791  }
1792 
1793  llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
1794  AddDebugPos(inst);
1795  return inst;
1796 }
1797 
1798 llvm::Value *FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const char *name) {
1799  if (value == NULL) {
1801  return NULL;
1802  }
1803 
1804  if (name == NULL)
1805  name = LLVMGetName(value, "_int2ptr");
1806 
1807  llvm::Type *fromType = value->getType();
1808  if (llvm::isa<llvm::VectorType>(fromType)) {
1809  // varying pointer
1810  if (fromType == toType)
1811  // done
1812  return value;
1813  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
1814  return TruncInst(value, toType, name);
1815  else {
1816  AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits());
1817  return ZExtInst(value, toType, name);
1818  }
1819  }
1820 
1821  llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, bblock);
1822  AddDebugPos(inst);
1823  return inst;
1824 }
1825 
1826 llvm::Instruction *FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const char *name) {
1827  if (value == NULL) {
1829  return NULL;
1830  }
1831 
1832  if (name == NULL)
1833  name = LLVMGetName(value, "_trunc");
1834 
1835  // TODO: we should probably handle the array case as in
1836  // e.g. BitCastInst(), but we don't currently need that functionality
1837  llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
1838  AddDebugPos(inst);
1839  return inst;
1840 }
1841 
1842 llvm::Instruction *FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type,
1843  const char *name) {
1844  if (value == NULL) {
1846  return NULL;
1847  }
1848 
1849  if (name == NULL)
1850  name = LLVMGetName(value, "_cast");
1851 
1852  // TODO: we should probably handle the array case as in
1853  // e.g. BitCastInst(), but we don't currently need that functionality
1854  llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name, bblock);
1855  AddDebugPos(inst);
1856  return inst;
1857 }
1858 
1859 llvm::Instruction *FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const char *name) {
1860  if (value == NULL) {
1862  return NULL;
1863  }
1864 
1865  if (name == NULL)
1866  name = LLVMGetName(value, "_cast");
1867 
1868  // TODO: we should probably handle the array case as in
1869  // e.g. BitCastInst(), but we don't currently need that functionality
1870  llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
1871  AddDebugPos(inst);
1872  return inst;
1873 }
1874 
1875 llvm::Instruction *FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const char *name) {
1876  if (value == NULL) {
1878  return NULL;
1879  }
1880 
1881  if (name == NULL)
1882  name = LLVMGetName(value, "_sext");
1883 
1884  // TODO: we should probably handle the array case as in
1885  // e.g. BitCastInst(), but we don't currently need that functionality
1886  llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
1887  AddDebugPos(inst);
1888  return inst;
1889 }
1890 
1891 llvm::Instruction *FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const char *name) {
1892  if (value == NULL) {
1894  return NULL;
1895  }
1896 
1897  if (name == NULL)
1898  name = LLVMGetName(value, "_zext");
1899 
1900  // TODO: we should probably handle the array case as in
1901  // e.g. BitCastInst(), but we don't currently need that functionality
1902  llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
1903  AddDebugPos(inst);
1904  return inst;
1905 }
1906 
1907 /** Utility routine used by the GetElementPtrInst() methods; given a
1908  pointer to some type (either uniform or varying) and an index (also
1909  either uniform or varying), this returns the new pointer (varying if
1910  appropriate) given by offsetting the base pointer by the index times
1911  the size of the object that the pointer points to.
1912  */
1913 llvm::Value *FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType) {
1914  // Find the scale factor for the index (i.e. the size of the object
1915  // that the pointer(s) point(s) to.
1916  const Type *scaleType = ptrType->GetBaseType();
1917  llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
1918 
1919  bool indexIsVarying = llvm::isa<llvm::VectorType>(index->getType());
1920  llvm::Value *offset = NULL;
1921  if (indexIsVarying == false) {
1922  // Truncate or sign extend the index as appropriate to a 32 or
1923  // 64-bit type.
1924  if ((g->target->is32Bit() || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64Type)
1925  index = TruncInst(index, LLVMTypes::Int32Type);
1926  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int32Type)
1927  index = SExtInst(index, LLVMTypes::Int64Type);
1928 
1929  // do a scalar multiply to get the offset as index * scale and then
1930  // smear the result out to be a vector; this is more efficient than
1931  // first promoting both the scale and the index to vectors and then
1932  // multiplying.
1933  offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
1934  offset = SmearUniform(offset);
1935  } else {
1936  // Similarly, truncate or sign extend the index to be a 32 or 64
1937  // bit vector type
1938  if ((g->target->is32Bit() || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64VectorType)
1939  index = TruncInst(index, LLVMTypes::Int32VectorType);
1940  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
1941  index->getType() == LLVMTypes::Int32VectorType)
1942  index = SExtInst(index, LLVMTypes::Int64VectorType);
1943 
1944  scale = SmearUniform(scale);
1945 
1946  // offset = index * scale
1947  offset = BinaryOperator(llvm::Instruction::Mul, scale, index, LLVMGetName("mul", scale, index));
1948  }
1949 
1950  // For 64-bit targets, if we've been doing our offset calculations in
1951  // 32 bits, we still have to convert to a 64-bit value before we
1952  // actually add the offset to the pointer.
1953  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
1954  offset = SExtInst(offset, LLVMTypes::Int64VectorType, LLVMGetName(offset, "_to_64"));
1955 
1956  // Smear out the pointer to be varying; either the base pointer or the
1957  // index must be varying for this method to be called.
1958  bool baseIsUniform = (llvm::isa<llvm::PointerType>(basePtr->getType()));
1959  AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
1960  llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
1961 
1962  // newPtr = ptr + offset
1963  return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, LLVMGetName(basePtr, "_offset"));
1964 }
1965 
1966 void FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
1967  llvm::Type *type0 = (*v0)->getType();
1968  llvm::Type *type1 = (*v1)->getType();
1969 
1970  // First, promote to a vector type if one of the two values is a vector
1971  // type
1972  if (llvm::isa<llvm::VectorType>(type0) && !llvm::isa<llvm::VectorType>(type1)) {
1973  *v1 = SmearUniform(*v1, "smear_v1");
1974  type1 = (*v1)->getType();
1975  }
1976  if (!llvm::isa<llvm::VectorType>(type0) && llvm::isa<llvm::VectorType>(type1)) {
1977  *v0 = SmearUniform(*v0, "smear_v0");
1978  type0 = (*v0)->getType();
1979  }
1980 
1981  // And then update to match bit widths
1982  if (type0 == LLVMTypes::Int32Type && type1 == LLVMTypes::Int64Type)
1983  *v0 = SExtInst(*v0, LLVMTypes::Int64Type);
1984  else if (type1 == LLVMTypes::Int32Type && type0 == LLVMTypes::Int64Type)
1985  *v1 = SExtInst(*v1, LLVMTypes::Int64Type);
1986  else if (type0 == LLVMTypes::Int32VectorType && type1 == LLVMTypes::Int64VectorType)
1988  else if (type1 == LLVMTypes::Int32VectorType && type0 == LLVMTypes::Int64VectorType)
1990 }
1991 
1992 /** Given an integer index in indexValue that's indexing into an array of
1993  soa<> structures with given soaWidth, compute the two sub-indices we
1994  need to do the actual indexing calculation:
1995 
1996  subIndices[0] = (indexValue >> log(soaWidth))
1997  subIndices[1] = (indexValue & (soaWidth-1))
1998  */
1999 static llvm::Value *lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue,
2000  llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset) {
2001  // Compute the log2 of the soaWidth.
2002  Assert(soaWidth > 0);
2003  int logWidth = 0, sw = soaWidth;
2004  while (sw > 1) {
2005  ++logWidth;
2006  sw >>= 1;
2007  }
2008  Assert((1 << logWidth) == soaWidth);
2009 
2010  ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
2011 
2012  llvm::Type *indexType = indexValue->getType();
2013  llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
2014  llvm::Value *mask = LLVMIntAsType(soaWidth - 1, indexType);
2015 
2016  llvm::Value *indexSum = ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset, "index_sum");
2017 
2018  // minor index = (index & (soaWidth - 1))
2019  *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum, mask, "slice_index_minor");
2020  // slice offsets are always 32 bits...
2021  if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
2022  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
2023  else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
2024  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
2025 
2026  // major index = (index >> logWidth)
2027  return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum, shift, "slice_index_major");
2028 }
2029 
2030 llvm::Value *FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
2031  // Create a small struct where the first element is the type of the
2032  // given pointer and the second element is the type of the offset
2033  // value.
2034  std::vector<llvm::Type *> eltTypes;
2035  eltTypes.push_back(ptr->getType());
2036  eltTypes.push_back(offset->getType());
2037  llvm::StructType *st = llvm::StructType::get(*g->ctx, eltTypes);
2038 
2039  llvm::Value *ret = llvm::UndefValue::get(st);
2040  ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
2041  ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
2042  return ret;
2043 }
2044 
2045 llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrRefType,
2046  const char *name) {
2047  if (basePtr == NULL || index == NULL) {
2049  return NULL;
2050  }
2051 
2052  // Regularize to a standard pointer type for basePtr's type
2053  const PointerType *ptrType;
2054  if (CastType<ReferenceType>(ptrRefType) != NULL)
2055  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2056  else {
2057  ptrType = CastType<PointerType>(ptrRefType);
2058  }
2059  AssertPos(currentPos, ptrType != NULL);
2060 
2061  if (ptrType->IsSlice()) {
2062  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2063 
2064  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2065  if (ptrType->IsFrozenSlice() == false) {
2066  // For slice pointers that aren't frozen, we compute a new
2067  // index based on the given index plus the offset in the slice
2068  // pointer. This gives us an updated integer slice index for
2069  // the resulting slice pointer and then an index to index into
2070  // the soa<> structs with.
2071  llvm::Value *newSliceOffset;
2072  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2073  index = lComputeSliceIndex(this, soaWidth, index, ptrSliceOffset, &newSliceOffset);
2074  ptrSliceOffset = newSliceOffset;
2075  }
2076 
2077  // Handle the indexing into the soa<> structs with the major
2078  // component of the index through a recursive call
2079  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index, ptrType->GetAsNonSlice(), name);
2080 
2081  // And mash the results together for the return value
2082  return MakeSlicePointer(p, ptrSliceOffset);
2083  }
2084 
2085  // Double-check consistency between the given pointer type and its LLVM
2086  // type.
2087  if (ptrType->IsUniformType())
2088  AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
2089  else if (ptrType->IsVaryingType())
2090  AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
2091 
2092  bool indexIsVaryingType = llvm::isa<llvm::VectorType>(index->getType());
2093 
2094  if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
2095  // The easy case: both the base pointer and the indices are
2096  // uniform, so just emit the regular LLVM GEP instruction
2097  llvm::Value *ind[1] = {index};
2098  llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
2099 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2100  llvm::Instruction *inst = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "gep", bblock);
2101 #else /* LLVM 3.7+ */
2102  llvm::Instruction *inst =
2103  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock);
2104 #endif
2105  AddDebugPos(inst);
2106  return inst;
2107  } else
2108  return applyVaryingGEP(basePtr, index, ptrType);
2109 }
2110 
2111 llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *index1,
2112  const Type *ptrRefType, const char *name) {
2113  if (basePtr == NULL || index0 == NULL || index1 == NULL) {
2115  return NULL;
2116  }
2117 
2118  // Regaularize the pointer type for basePtr
2119  const PointerType *ptrType = NULL;
2120  if (CastType<ReferenceType>(ptrRefType) != NULL)
2121  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2122  else {
2123  ptrType = CastType<PointerType>(ptrRefType);
2124  AssertPos(currentPos, ptrType != NULL);
2125  }
2126 
2127  if (ptrType->IsSlice()) {
2128  // Similar to the 1D GEP implementation above, for non-frozen slice
2129  // pointers we do the two-step indexing calculation and then pass
2130  // the new major index on to a recursive GEP call.
2131  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2132  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2133  if (ptrType->IsFrozenSlice() == false) {
2134  llvm::Value *newSliceOffset;
2135  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2136  index1 = lComputeSliceIndex(this, soaWidth, index1, ptrSliceOffset, &newSliceOffset);
2137  ptrSliceOffset = newSliceOffset;
2138  }
2139 
2140  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0, index1, ptrType->GetAsNonSlice(), name);
2141  return MakeSlicePointer(p, ptrSliceOffset);
2142  }
2143 
2144  bool index0IsVaryingType = llvm::isa<llvm::VectorType>(index0->getType());
2145  bool index1IsVaryingType = llvm::isa<llvm::VectorType>(index1->getType());
2146 
2147  if (index0IsVaryingType == false && index1IsVaryingType == false && ptrType->IsUniformType() == true) {
2148  // The easy case: both the base pointer and the indices are
2149  // uniform, so just emit the regular LLVM GEP instruction
2150  llvm::Value *indices[2] = {index0, index1};
2151  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
2152 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2153  llvm::Instruction *inst = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "gep", bblock);
2154 #else /* LLVM 3.7+ */
2155  llvm::Instruction *inst =
2156  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock);
2157 #endif
2158  AddDebugPos(inst);
2159  return inst;
2160  } else {
2161  // Handle the first dimension with index0
2162  llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
2163 
2164  // Now index into the second dimension with index1. First figure
2165  // out the type of ptr0.
2166  const Type *baseType = ptrType->GetBaseType();
2167  const SequentialType *st = CastType<SequentialType>(baseType);
2168  AssertPos(currentPos, st != NULL);
2169 
2170  bool ptr0IsUniform = llvm::isa<llvm::PointerType>(ptr0->getType());
2171  const Type *ptr0BaseType = st->GetElementType();
2172  const Type *ptr0Type =
2173  ptr0IsUniform ? PointerType::GetUniform(ptr0BaseType) : PointerType::GetVarying(ptr0BaseType);
2174 
2175  return applyVaryingGEP(ptr0, index1, ptr0Type);
2176  }
2177 }
2178 
2179 llvm::Value *FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, const Type *ptrRefType,
2180  const char *name, const PointerType **resultPtrType) {
2181  if (resultPtrType != NULL)
2182  AssertPos(currentPos, ptrRefType != NULL);
2183 
2184  llvm::PointerType *llvmPtrType = llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
2185  if (llvmPtrType != NULL) {
2186  llvm::StructType *llvmStructType = llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
2187  if (llvmStructType != NULL && llvmStructType->isSized() == false) {
2189  return NULL;
2190  }
2191  }
2192 
2193  // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
2194  // if we have one, regularize into a pointer type.
2195  const PointerType *ptrType = NULL;
2196  if (ptrRefType != NULL) {
2197  // Normalize references to uniform pointers
2198  if (CastType<ReferenceType>(ptrRefType) != NULL)
2199  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2200  else
2201  ptrType = CastType<PointerType>(ptrRefType);
2202  AssertPos(currentPos, ptrType != NULL);
2203  }
2204 
2205  // Similarly, we have to see if the pointer type is a struct to see if
2206  // we have a slice pointer instead of looking at ptrType; this is also
2207  // unfortunate...
2208  llvm::Value *basePtr = fullBasePtr;
2209  bool baseIsSlicePtr = llvm::isa<llvm::StructType>(fullBasePtr->getType());
2210  const PointerType *rpt;
2211  if (baseIsSlicePtr) {
2212  AssertPos(currentPos, ptrType != NULL);
2213  // Update basePtr to just be the part that actually points to the
2214  // start of an soa<> struct for now; the element offset computation
2215  // doesn't change the slice offset, so we'll incorporate that into
2216  // the final value right before this method returns.
2217  basePtr = ExtractInst(fullBasePtr, 0);
2218  if (resultPtrType == NULL)
2219  resultPtrType = &rpt;
2220  }
2221 
2222  // Return the pointer type of the result of this call, for callers that
2223  // want it.
2224  if (resultPtrType != NULL) {
2225  AssertPos(currentPos, ptrType != NULL);
2226  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2227  AssertPos(currentPos, ct != NULL);
2228  *resultPtrType = new PointerType(ct->GetElementType(elementNum), ptrType->GetVariability(),
2229  ptrType->IsConstType(), ptrType->IsSlice());
2230  }
2231 
2232  llvm::Value *resultPtr = NULL;
2233  if (ptrType == NULL || ptrType->IsUniformType()) {
2234  // If the pointer is uniform, we can use the regular LLVM GEP.
2235  llvm::Value *offsets[2] = {LLVMInt32(0), LLVMInt32(elementNum)};
2236  llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
2237 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2238  resultPtr = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "struct_offset", bblock);
2239 #else /* LLVM 3.7+ */
2240  resultPtr =
2241  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "struct_offset", bblock);
2242 #endif
2243  } else {
2244  // Otherwise do the math to find the offset and add it to the given
2245  // varying pointers
2246  const StructType *st = CastType<StructType>(ptrType->GetBaseType());
2247  llvm::Value *offset = NULL;
2248  if (st != NULL)
2249  // If the pointer is to a structure, Target::StructOffset() gives
2250  // us the offset in bytes to the given element of the structure
2251  offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum, bblock);
2252  else {
2253  // Otherwise we should have a vector or array here and the offset
2254  // is given by the element number times the size of the element
2255  // type of the vector.
2256  const SequentialType *st = CastType<SequentialType>(ptrType->GetBaseType());
2257  AssertPos(currentPos, st != NULL);
2258  llvm::Value *size = g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
2259  llvm::Value *scale =
2260  (g->target->is32Bit() || g->opt.force32BitAddressing) ? LLVMInt32(elementNum) : LLVMInt64(elementNum);
2261  offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
2262  }
2263 
2264  offset = SmearUniform(offset, "offset_smear");
2265 
2266  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2267  // If we're doing 32 bit addressing with a 64 bit target, although
2268  // we did the math above in 32 bit, we need to go to 64 bit before
2269  // we add the offset to the varying pointers.
2270  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
2271 
2272  resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset, "struct_ptr_offset");
2273  }
2274 
2275  // Finally, if had a slice pointer going in, mash back together with
2276  // the original (unchanged) slice offset.
2277  if (baseIsSlicePtr)
2278  return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
2279  else
2280  return resultPtr;
2281 }
2282 
2283 llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
2284  if (ptr == NULL) {
2286  return NULL;
2287  }
2288 
2289  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2290  AssertPos(currentPos, pt != NULL);
2291 
2292  if (name == NULL)
2293  name = LLVMGetName(ptr, "_load");
2294 
2295  llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
2296 
2297  if (g->opt.forceAlignedMemory && llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2298  inst->setAlignment(g->target->getNativeVectorAlignment());
2299  }
2300 
2301  AddDebugPos(inst);
2302  return inst;
2303 }
2304 
2305 /** Given a slice pointer to soa'd data that is a basic type (atomic,
2306  pointer, or enum type), use the slice offset to compute pointer(s) to
2307  the appropriate individual data element(s).
2308  */
2309 static llvm::Value *lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType) {
2310  Assert(CastType<PointerType>(*ptrType) != NULL);
2311 
2312  llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
2313  llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
2314 
2315  // slicePtr should be a pointer to an soa-width wide array of the
2316  // final atomic/enum/pointer type
2317  const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
2318  Assert(Type::IsBasicType(unifBaseType));
2319 
2320  // The final pointer type is a uniform or varying pointer to the
2321  // underlying uniform type, depending on whether the given pointer is
2322  // uniform or varying.
2323  *ptrType =
2324  (*ptrType)->IsUniformType() ? PointerType::GetUniform(unifBaseType) : PointerType::GetVarying(unifBaseType);
2325 
2326  // For uniform pointers, bitcast to a pointer to the uniform element
2327  // type, so that the GEP below does the desired indexing
2328  if ((*ptrType)->IsUniformType())
2329  slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
2330 
2331  // And finally index based on the slice offset
2332  return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, LLVMGetName(slicePtr, "_final_gep"));
2333 }
2334 
2335 /** Utility routine that loads from a uniform pointer to soa<> data,
2336  returning a regular uniform (non-SOA result).
2337  */
2338 llvm::Value *FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType,
2339  const char *name) {
2340  const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
2341 
2342  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2343  if (ct != NULL) {
2344  // If we have a struct/array, we need to decompose it into
2345  // individual element loads to fill in the result structure since
2346  // the SOA slice of values we need isn't contiguous in memory...
2347  llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
2348  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2349 
2350  for (int i = 0; i < ct->GetElementCount(); ++i) {
2351  const PointerType *eltPtrType;
2352  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "elt_offset", &eltPtrType);
2353  llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
2354  retValue = InsertInst(retValue, eltValue, i, "set_value");
2355  }
2356 
2357  return retValue;
2358  } else {
2359  // Otherwise we've made our way to a slice pointer to a basic type;
2360  // we need to apply the slice offset into this terminal SOA array
2361  // and then perform the final load
2362  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2363  return LoadInst(ptr, mask, ptrType, name);
2364  }
2365 }
2366 
2367 llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrRefType,
2368  const char *name, bool one_elem) {
2369  if (ptr == NULL) {
2371  return NULL;
2372  }
2373 
2374  AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
2375 
2376  if (name == NULL)
2377  name = LLVMGetName(ptr, "_load");
2378 
2379  const PointerType *ptrType;
2380  if (CastType<ReferenceType>(ptrRefType) != NULL)
2381  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2382  else {
2383  ptrType = CastType<PointerType>(ptrRefType);
2384  AssertPos(currentPos, ptrType != NULL);
2385  }
2386 
2387  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2388  Error(currentPos, "Unable to load to undefined struct type \"%s\".",
2389  ptrType->GetBaseType()->GetString().c_str());
2390  return NULL;
2391  }
2392 
2393  if (ptrType->IsUniformType()) {
2394  if (ptrType->IsSlice()) {
2395  return loadUniformFromSOA(ptr, mask, ptrType, name);
2396  } else {
2397  // FIXME: same issue as above load inst regarding alignment...
2398  //
2399  // If the ptr is a straight up regular pointer, then just issue
2400  // a regular load. First figure out the alignment; in general we
2401  // can just assume the natural alignment (0 here), but for varying
2402  // atomic types, we need to make sure that the compiler emits
2403  // unaligned vector loads, so we specify a reduced alignment here.
2404  int align = 0;
2405  const AtomicType *atomicType = CastType<AtomicType>(ptrType->GetBaseType());
2406  if (atomicType != NULL && atomicType->IsVaryingType())
2407  // We actually just want to align to the vector element
2408  // alignment, but can't easily get that here, so just tell LLVM
2409  // it's totally unaligned. (This shouldn't make any difference
2410  // vs the proper alignment in practice.)
2411  align = 1;
2412  llvm::Instruction *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, align, bblock);
2413  AddDebugPos(inst);
2414  return inst;
2415  }
2416  } else {
2417  // Otherwise we should have a varying ptr and it's time for a
2418  // gather.
2419  llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name);
2420  if (!one_elem)
2421  return gather_result;
2422 
2423  // It is a kludge. When we dereference varying pointer to uniform struct
2424  // with "bound uniform" member, we should return first unmasked member.
2425  Warning(currentPos, "Dereferencing varying pointer to uniform struct with 'bound uniform' member,\n"
2426  " only one value will survive. Possible loss of data.");
2427  // Call the target-dependent movmsk function to turn the vector mask
2428  // into an i64 value
2429  std::vector<Symbol *> mm;
2430  m->symbolTable->LookupFunction("__movmsk", &mm);
2431  if (g->target->getMaskBitCount() == 1)
2432  AssertPos(currentPos, mm.size() == 1);
2433  else
2434  // There should be one with signed int signature, one unsigned int.
2435  AssertPos(currentPos, mm.size() == 2);
2436  // We can actually call either one, since both are i32s as far as
2437  // LLVM's type system is concerned...
2438  llvm::Function *fmm = mm[0]->function;
2439  llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk"));
2440  std::vector<Symbol *> lz;
2441  m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz);
2442  llvm::Function *flz = lz[0]->function;
2443  llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz"));
2444  llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx,
2445  LLVMGetName(gather_result, "_umasked_elem"), bblock);
2446  return elem;
2447  }
2448 }
2449 
2450 llvm::Value *FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask,
2451  const char *name) {
2452  // We should have a varying pointer if we get here...
2453  AssertPos(currentPos, ptrType->IsVaryingType());
2454 
2455  const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
2456  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
2457 
2458  const CollectionType *collectionType = CastType<CollectionType>(ptrType->GetBaseType());
2459  if (collectionType != NULL) {
2460  // For collections, recursively gather element wise to find the
2461  // result.
2462  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2463 
2464  const CollectionType *returnCollectionType = CastType<CollectionType>(returnType->GetBaseType());
2465 
2466  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2467  const PointerType *eltPtrType;
2468  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
2469 
2470  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
2471 
2472  // It is a kludge. When we dereference varying pointer to uniform struct
2473  // with "bound uniform" member, we should return first unmasked member.
2474  int need_one_elem = CastType<StructType>(ptrType->GetBaseType()) &&
2475  returnCollectionType->GetElementType(i)->IsUniformType();
2476  // This in turn will be another gather
2477  llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name, need_one_elem);
2478 
2479  retValue = InsertInst(retValue, eltValues, i, "set_value");
2480  }
2481  return retValue;
2482  } else if (ptrType->IsSlice()) {
2483  // If we have a slice pointer, we need to add the final slice
2484  // offset here right before issuing the actual gather
2485  //
2486  // FIXME: would it be better to do the corresponding same thing for
2487  // all of the varying offsets stuff here (and in scatter)?
2488  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2489  }
2490 
2491  // Otherwise we should just have a basic scalar or pointer type and we
2492  // can go and do the actual gather
2493  AddInstrumentationPoint("gather");
2494 
2495  // Figure out which gather function to call based on the size of
2496  // the elements.
2497  const PointerType *pt = CastType<PointerType>(returnType);
2498  const char *funcName = NULL;
2499  if (pt != NULL)
2500  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" : "__pseudo_gather64_i64";
2501  else if (llvmReturnType == LLVMTypes::DoubleVectorType)
2502  funcName = g->target->is32Bit() ? "__pseudo_gather32_double" : "__pseudo_gather64_double";
2503  else if (llvmReturnType == LLVMTypes::Int64VectorType)
2504  funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" : "__pseudo_gather64_i64";
2505  else if (llvmReturnType == LLVMTypes::FloatVectorType)
2506  funcName = g->target->is32Bit() ? "__pseudo_gather32_float" : "__pseudo_gather64_float";
2507  else if (llvmReturnType == LLVMTypes::Int32VectorType)
2508  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" : "__pseudo_gather64_i32";
2509  else if (llvmReturnType == LLVMTypes::Int16VectorType)
2510  funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" : "__pseudo_gather64_i16";
2511  else {
2512  AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
2513  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" : "__pseudo_gather64_i8";
2514  }
2515 
2516  llvm::Function *gatherFunc = m->module->getFunction(funcName);
2517  AssertPos(currentPos, gatherFunc != NULL);
2518 
2519  llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
2520 
2521  // Add metadata about the source file location so that the
2522  // optimization passes can print useful performance warnings if we
2523  // can't optimize out this gather
2524  if (disableGSWarningCount == 0)
2525  addGSMetadata(gatherCall, currentPos);
2526 
2527  return gatherCall;
2528 }
2529 
2530 /** Add metadata to the given instruction to encode the current source file
2531  position. This data is used in the lGetSourcePosFromMetadata()
2532  function in opt.cpp.
2533 */
2535  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
2536  if (inst == NULL)
2537  return;
2538 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
2539  llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
2540 #else /* LLVN 3.6+ */
2541  llvm::MDString *str = llvm::MDString::get(*g->ctx, pos.name);
2542 #endif
2543  llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
2544  inst->setMetadata("filename", md);
2545 
2546 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
2547  llvm::Value *first_line = LLVMInt32(pos.first_line);
2548 #else /* LLVN 3.6+ */
2549  llvm::Metadata *first_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_line));
2550 #endif
2551  md = llvm::MDNode::get(*g->ctx, first_line);
2552  inst->setMetadata("first_line", md);
2553 
2554 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
2555  llvm::Value *first_column = LLVMInt32(pos.first_column);
2556 #else /* LLVN 3.6+ */
2557  llvm::Metadata *first_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_column));
2558 #endif
2559  md = llvm::MDNode::get(*g->ctx, first_column);
2560  inst->setMetadata("first_column", md);
2561 
2562 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
2563  llvm::Value *last_line = LLVMInt32(pos.last_line);
2564 #else /* LLVN 3.6+ */
2565  llvm::Metadata *last_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_line));
2566 #endif
2567  md = llvm::MDNode::get(*g->ctx, last_line);
2568  inst->setMetadata("last_line", md);
2569 
2570 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
2571  llvm::Value *last_column = LLVMInt32(pos.last_column);
2572 #else /* LLVN 3.6+ */
2573  llvm::Metadata *last_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_column));
2574 #endif
2575  md = llvm::MDNode::get(*g->ctx, last_column);
2576  inst->setMetadata("last_column", md);
2577 }
2578 
2579 llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *name, int align, bool atEntryBlock) {
2580  if (llvmType == NULL) {
2582  return NULL;
2583  }
2584 
2585  llvm::AllocaInst *inst = NULL;
2586  if (atEntryBlock) {
2587  // We usually insert it right before the jump instruction at the
2588  // end of allocaBlock
2589  llvm::Instruction *retInst = allocaBlock->getTerminator();
2590  AssertPos(currentPos, retInst);
2591 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
2592  inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
2593 #else // LLVM 5.0+
2594  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
2595  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", retInst);
2596 #endif
2597  } else {
2598  // Unless the caller overrode the default and wants it in the
2599  // current basic block
2600 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
2601  inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
2602 #else // LLVM 5.0+
2603  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
2604  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", bblock);
2605 #endif
2606  }
2607 
2608  // If no alignment was specified but we have an array of a uniform
2609  // type, then align it to the native vector alignment; it's not
2610  // unlikely that this array will be loaded into varying variables with
2611  // what will be aligned accesses if the uniform -> varying load is done
2612  // in regular chunks.
2613  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(llvmType);
2614  if (align == 0 && arrayType != NULL && !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
2615  align = g->target->getNativeVectorAlignment();
2616 
2617  if (align != 0)
2618  inst->setAlignment(align);
2619  // Don't add debugging info to alloca instructions
2620  return inst;
2621 }
2622 
2623 /** Code to store the given varying value to the given location, only
2624  storing the elements that correspond to active program instances as
2625  given by the provided storeMask value. Note that the lvalue is only a
2626  single pointer, not a varying lvalue of one pointer per program
2627  instance (that case is handled by scatters).
2628  */
2629 void FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask) {
2630  if (value == NULL || ptr == NULL) {
2632  return;
2633  }
2634 
2635  AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
2636  AssertPos(currentPos, ptrType->IsUniformType());
2637 
2638  const Type *valueType = ptrType->GetBaseType();
2639  const CollectionType *collectionType = CastType<CollectionType>(valueType);
2640  if (collectionType != NULL) {
2641  // Assigning a structure / array / vector. Handle each element
2642  // individually with what turns into a recursive call to
2643  // makedStore()
2644  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2645  const Type *eltType = collectionType->GetElementType(i);
2646  if (eltType == NULL) {
2647  Assert(m->errorCount > 0);
2648  continue;
2649  }
2650  llvm::Value *eltValue = ExtractInst(value, i, "value_member");
2651  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
2652  const Type *eltPtrType = PointerType::GetUniform(eltType);
2653  StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
2654  }
2655  return;
2656  }
2657 
2658  // We must have a regular atomic, enumerator, or pointer type at this
2659  // point.
2660  AssertPos(currentPos, Type::IsBasicType(valueType));
2661  valueType = valueType->GetAsNonConstType();
2662 
2663  // Figure out if we need a 8, 16, 32 or 64-bit masked store.
2664  llvm::Function *maskedStoreFunc = NULL;
2665  llvm::Type *llvmValueType = value->getType();
2666 
2667  const PointerType *pt = CastType<PointerType>(valueType);
2668  if (pt != NULL) {
2669  if (pt->IsSlice()) {
2670  // Masked store of (varying) slice pointer.
2672 
2673  // First, extract the pointer from the slice struct and masked
2674  // store that.
2675  llvm::Value *v0 = ExtractInst(value, 0);
2676  llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
2677  maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()), mask);
2678 
2679  // And then do same for the integer offset
2680  llvm::Value *v1 = ExtractInst(value, 1);
2681  llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
2682  const Type *offsetType = AtomicType::VaryingInt32;
2683  maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
2684 
2685  return;
2686  }
2687 
2688  if (g->target->is32Bit())
2689  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
2690  else
2691  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
2692  } else if (llvmValueType == LLVMTypes::Int1VectorType) {
2693  llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask, LLVMMaskAllOn, "~mask");
2694  llvm::Value *old = LoadInst(ptr);
2695  llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old, notMask, "old&~mask");
2696  llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value, mask, "new&mask");
2697  llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld, maskedNew, "old_new_result");
2698  StoreInst(final, ptr);
2699  return;
2700  } else if (llvmValueType == LLVMTypes::DoubleVectorType) {
2701  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
2702  } else if (llvmValueType == LLVMTypes::Int64VectorType) {
2703  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
2704  } else if (llvmValueType == LLVMTypes::FloatVectorType) {
2705  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
2706  } else if (llvmValueType == LLVMTypes::Int32VectorType) {
2707  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
2708  } else if (llvmValueType == LLVMTypes::Int16VectorType) {
2709  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
2710  } else if (llvmValueType == LLVMTypes::Int8VectorType) {
2711  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
2712  }
2713  AssertPos(currentPos, maskedStoreFunc != NULL);
2714 
2715  std::vector<llvm::Value *> args;
2716  args.push_back(ptr);
2717  args.push_back(value);
2718  args.push_back(mask);
2719  CallInst(maskedStoreFunc, NULL, args);
2720 }
2721 
2722 /** Scatter the given varying value to the locations given by the varying
2723  lvalue (which should be an array of pointers with size equal to the
2724  target's vector width. We want to store each rvalue element at the
2725  corresponding pointer's location, *if* the mask for the corresponding
2726  program instance are on. If they're off, don't do anything.
2727 */
2728 void FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *origPt,
2729  llvm::Value *mask) {
2730  const PointerType *ptrType = CastType<PointerType>(origPt);
2731  AssertPos(currentPos, ptrType != NULL);
2732  AssertPos(currentPos, ptrType->IsVaryingType());
2733 
2734  const CollectionType *srcCollectionType = CastType<CollectionType>(valueType);
2735  if (srcCollectionType != NULL) {
2736  // We're scattering a collection type--we need to keep track of the
2737  // source type (the type of the data values to be stored) and the
2738  // destination type (the type of objects in memory that will be
2739  // stored into) separately. This is necessary so that we can get
2740  // all of the addressing calculations right if we're scattering
2741  // from a varying struct to an array of uniform instances of the
2742  // same struct type, versus scattering into an array of varying
2743  // instances of the struct type, etc.
2744  const CollectionType *dstCollectionType = CastType<CollectionType>(ptrType->GetBaseType());
2745  AssertPos(currentPos, dstCollectionType != NULL);
2746 
2747  // Scatter the collection elements individually
2748  for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
2749  // First, get the values for the current element out of the
2750  // source.
2751  llvm::Value *eltValue = ExtractInst(value, i);
2752  const Type *srcEltType = srcCollectionType->GetElementType(i);
2753 
2754  // We may be scattering a uniform atomic element; in this case
2755  // we'll smear it out to be varying before making the recursive
2756  // scatter() call below.
2757  if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
2758  eltValue = SmearUniform(eltValue, "to_varying");
2759  srcEltType = srcEltType->GetAsVaryingType();
2760  }
2761 
2762  // Get the (varying) pointer to the i'th element of the target
2763  // collection
2764  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
2765 
2766  // The destination element type may be uniform (e.g. if we're
2767  // scattering to an array of uniform structs). Thus, we need
2768  // to be careful about passing the correct type to
2769  // addVaryingOffsetsIfNeeded() here.
2770  const Type *dstEltType = dstCollectionType->GetElementType(i);
2771  const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
2772  if (ptrType->IsSlice())
2773  dstEltPtrType = dstEltPtrType->GetAsSlice();
2774 
2775  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
2776 
2777  // And recursively scatter() until we hit a basic type, at
2778  // which point the actual memory operations can be performed...
2779  scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
2780  }
2781  return;
2782  } else if (ptrType->IsSlice()) {
2783  // As with gather, we need to add the final slice offset finally
2784  // once we get to a terminal SOA array of basic types..
2785  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2786  }
2787 
2788  const PointerType *pt = CastType<PointerType>(valueType);
2789 
2790  // And everything should be a pointer or atomic (or enum) from here on out...
2792  pt != NULL || CastType<AtomicType>(valueType) != NULL || CastType<EnumType>(valueType) != NULL);
2793 
2794  llvm::Type *type = value->getType();
2795  const char *funcName = NULL;
2796  if (pt != NULL) {
2797  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" : "__pseudo_scatter64_i64";
2798  } else if (type == LLVMTypes::DoubleVectorType) {
2799  funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" : "__pseudo_scatter64_double";
2800  } else if (type == LLVMTypes::Int64VectorType) {
2801  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" : "__pseudo_scatter64_i64";
2802  } else if (type == LLVMTypes::FloatVectorType) {
2803  funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" : "__pseudo_scatter64_float";
2804  } else if (type == LLVMTypes::Int32VectorType) {
2805  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" : "__pseudo_scatter64_i32";
2806  } else if (type == LLVMTypes::Int16VectorType) {
2807  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" : "__pseudo_scatter64_i16";
2808  } else if (type == LLVMTypes::Int8VectorType) {
2809  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" : "__pseudo_scatter64_i8";
2810  }
2811 
2812  llvm::Function *scatterFunc = m->module->getFunction(funcName);
2813  AssertPos(currentPos, scatterFunc != NULL);
2814 
2815  AddInstrumentationPoint("scatter");
2816 
2817  std::vector<llvm::Value *> args;
2818  args.push_back(ptr);
2819  args.push_back(value);
2820  args.push_back(mask);
2821  llvm::Value *inst = CallInst(scatterFunc, NULL, args);
2822 
2823  if (disableGSWarningCount == 0)
2824  addGSMetadata(inst, currentPos);
2825 }
2826 
2827 void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
2828  if (value == NULL || ptr == NULL) {
2829  // may happen due to error elsewhere
2831  return;
2832  }
2833 
2834  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2835  AssertPos(currentPos, pt != NULL);
2836 
2837  llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
2838 
2839  if (g->opt.forceAlignedMemory && llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2840  inst->setAlignment(g->target->getNativeVectorAlignment());
2841  }
2842 
2843  AddDebugPos(inst);
2844 }
2845 
2846 void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType,
2847  const Type *ptrRefType) {
2848  if (value == NULL || ptr == NULL) {
2849  // may happen due to error elsewhere
2851  return;
2852  }
2853 
2854  const PointerType *ptrType;
2855  if (CastType<ReferenceType>(ptrRefType) != NULL)
2856  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2857  else {
2858  ptrType = CastType<PointerType>(ptrRefType);
2859  AssertPos(currentPos, ptrType != NULL);
2860  }
2861 
2862  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2863  Error(currentPos, "Unable to store to undefined struct type \"%s\".",
2864  ptrType->GetBaseType()->GetString().c_str());
2865  return;
2866  }
2867 
2868  // Figure out what kind of store we're doing here
2869  if (ptrType->IsUniformType()) {
2870  if (ptrType->IsSlice())
2871  // storing a uniform value to a single slice of a SOA type
2872  storeUniformToSOA(value, ptr, mask, valueType, ptrType);
2873  else if (ptrType->GetBaseType()->IsUniformType())
2874  // the easy case
2875  StoreInst(value, ptr);
2876  else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
2877  // Otherwise it is a masked store unless we can determine that the
2878  // mask is all on... (Unclear if this check is actually useful.)
2879  StoreInst(value, ptr);
2880  else
2881  maskedStore(value, ptr, ptrType, mask);
2882  } else {
2883  AssertPos(currentPos, ptrType->IsVaryingType());
2884  // We have a varying ptr (an array of pointers), so it's time to
2885  // scatter
2886  scatter(value, ptr, valueType, ptrType, GetFullMask());
2887  }
2888 }
2889 
2890 /** Store a uniform type to SOA-laid-out memory.
2891  */
2892 void FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask,
2893  const Type *valueType, const PointerType *ptrType) {
2895 
2896  const CollectionType *ct = CastType<CollectionType>(valueType);
2897  if (ct != NULL) {
2898  // Handle collections element wise...
2899  for (int i = 0; i < ct->GetElementCount(); ++i) {
2900  llvm::Value *eltValue = ExtractInst(value, i);
2901  const Type *eltType = ct->GetElementType(i);
2902  const PointerType *dstEltPtrType;
2903  llvm::Value *dstEltPtr = AddElementOffset(ptr, i, ptrType, "slice_offset", &dstEltPtrType);
2904  StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
2905  }
2906  } else {
2907  // We're finally at a leaf SOA array; apply the slice offset and
2908  // then we can do a final regular store
2909  AssertPos(currentPos, Type::IsBasicType(valueType));
2910  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2911  StoreInst(value, ptr);
2912  }
2913 }
2914 
2915 void FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align) {
2918  if (count->getType() != LLVMTypes::Int64Type) {
2919  AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
2920  count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
2921  }
2922  if (align == NULL)
2923  align = LLVMInt32(1);
2924 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
2925  llvm::Constant *mcFunc =
2926 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
2927  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2929  LLVMTypes::BoolType, NULL);
2930 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_6_0 // LLVM 5.0-6.0
2931  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2934 #else // LLVM 7.0+
2935  // Now alignment goes as an attribute, not as a parameter.
2936  // See LLVM r322965/r323597 for more details.
2937  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2939 #endif
2940 #else // LLVM 9.0+
2941  llvm::FunctionCallee mcFuncCallee =
2942  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2944  llvm::Constant *mcFunc = llvm::cast<llvm::Constant>(mcFuncCallee.getCallee());
2945 #endif
2946  AssertPos(currentPos, mcFunc != NULL);
2947  AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
2948 
2949  std::vector<llvm::Value *> args;
2950  args.push_back(dest);
2951  args.push_back(src);
2952  args.push_back(count);
2953 #if ISPC_LLVM_VERSION < ISPC_LLVM_7_0
2954  // Don't bother about setting alignment for 7.0+, as this parameter is never really used by ISPC.
2955  args.push_back(align);
2956 #endif
2957  args.push_back(LLVMFalse); /* not volatile */
2958  CallInst(mcFunc, NULL, args, "");
2959 }
2960 
2961 void FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
2962  llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
2963  AddDebugPos(b);
2964 }
2965 
2966 void FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock, llvm::Value *test) {
2967  if (test == NULL) {
2969  return;
2970  }
2971 
2972  llvm::Instruction *b = llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
2973  AddDebugPos(b);
2974 }
2975 
2976 llvm::Value *FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
2977  if (v == NULL) {
2979  return NULL;
2980  }
2981 
2982  if (name == NULL) {
2983  char buf[32];
2984  snprintf(buf, sizeof(buf), "_extract_%d", elt);
2985  name = LLVMGetName(v, buf);
2986  }
2987 
2988  llvm::Instruction *ei = NULL;
2989  if (llvm::isa<llvm::VectorType>(v->getType()))
2990  ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
2991  else
2992  ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
2993  AddDebugPos(ei);
2994  return ei;
2995 }
2996 
2997 llvm::Value *FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name) {
2998  if (v == NULL || eltVal == NULL) {
3000  return NULL;
3001  }
3002 
3003  if (name == NULL) {
3004  char buf[32];
3005  snprintf(buf, sizeof(buf), "_insert_%d", elt);
3006  name = LLVMGetName(v, buf);
3007  }
3008 
3009  llvm::Instruction *ii = NULL;
3010  if (llvm::isa<llvm::VectorType>(v->getType()))
3011  ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), name, bblock);
3012  else
3013  ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
3014  AddDebugPos(ii);
3015  return ii;
3016 }
3017 
3018 llvm::Value *FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name) {
3019  if (v1 == NULL || v2 == NULL || mask == NULL) {
3021  return NULL;
3022  }
3023 
3024  if (name == NULL) {
3025  char buf[32];
3026  snprintf(buf, sizeof(buf), "_shuffle");
3027  name = LLVMGetName(v1, buf);
3028  }
3029 
3030  llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
3031 
3032  AddDebugPos(ii);
3033  return ii;
3034 }
3035 
3036 llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name) {
3037  if (v == NULL || vecType == NULL) {
3039  return NULL;
3040  }
3041 
3042  llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
3043  Assert(ty && ty->getVectorElementType() == v->getType());
3044 
3045  if (name == NULL) {
3046  char buf[32];
3047  snprintf(buf, sizeof(buf), "_broadcast");
3048  name = LLVMGetName(v, buf);
3049  }
3050 
3051  // Generate the following sequence:
3052  // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
3053  // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
3054  // <4 x i32> zeroinitializer
3055 
3056  llvm::Value *undef1 = llvm::UndefValue::get(vecType);
3057  llvm::Value *undef2 = llvm::UndefValue::get(vecType);
3058 
3059  // InsertElement
3060  llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
3061  llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
3062 
3063  // ShuffleVector
3064  llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
3065  vecType->getVectorNumElements(), llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
3066  llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
3067 
3068  return ret;
3069 }
3070 
3071 llvm::PHINode *FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) {
3072  llvm::PHINode *pn = llvm::PHINode::Create(type, count, name ? name : "phi", bblock);
3073  AddDebugPos(pn);
3074  return pn;
3075 }
3076 
3077 llvm::Instruction *FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1,
3078  const char *name) {
3079  if (test == NULL || val0 == NULL || val1 == NULL) {
3081  return NULL;
3082  }
3083 
3084  if (name == NULL)
3085  name = LLVMGetName(test, "_select");
3086 
3087  llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name, bblock);
3088  AddDebugPos(inst);
3089  return inst;
3090 }
3091 
3092 /** Given a value representing a function to be called or possibly-varying
3093  pointer to a function to be called, figure out how many arguments the
3094  function has. */
3095 static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
3096  llvm::FunctionType *ft = llvm::dyn_cast<llvm::FunctionType>(callee->getType());
3097 
3098  if (ft == NULL) {
3099  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(callee->getType());
3100  if (pt == NULL) {
3101  // varying--in this case, it must be the version of the
3102  // function that takes a mask
3103  return funcType->GetNumParameters() + 1;
3104  }
3105  ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
3106  }
3107 
3108  Assert(ft != NULL);
3109  return ft->getNumParams();
3110 }
3111 
3112 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3113  const std::vector<llvm::Value *> &args, const char *name) {
3114  if (func == NULL) {
3116  return NULL;
3117  }
3118 
3119  std::vector<llvm::Value *> argVals = args;
3120  // Most of the time, the mask is passed as the last argument. this
3121  // isn't the case for things like intrinsics, builtins, and extern "C"
3122  // functions from the application. Add the mask if it's needed.
3123  unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
3124  AssertPos(currentPos, argVals.size() + 1 == calleeArgCount || argVals.size() == calleeArgCount);
3125  if (argVals.size() + 1 == calleeArgCount)
3126  argVals.push_back(GetFullMask());
3127 
3128  if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
3129  // Regular 'uniform' function call--just one function or function
3130  // pointer, so just emit the IR directly.
3131  llvm::Instruction *ci = llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
3132 
3133  // Copy noalias attribute to call instruction, to enable better
3134  // alias analysis.
3135  // TODO: what other attributes needs to be copied?
3136  // TODO: do the same for varing path.
3137 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION < ISPC_LLVM_5_0 // LLVM 3.3-4.0
3138  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3139  if (cc && cc->getCalledFunction() && cc->getCalledFunction()->doesNotAlias(0)) {
3140  cc->addAttribute(0, llvm::Attribute::NoAlias);
3141  }
3142 #else // LLVM 5.0+
3143  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3144  if (cc && cc->getCalledFunction() && cc->getCalledFunction()->returnDoesNotAlias()) {
3145  cc->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias);
3146  }
3147 #endif
3148 
3149  AddDebugPos(ci);
3150  return ci;
3151  } else {
3152  // Emit the code for a varying function call, where we have an
3153  // vector of function pointers, one for each program instance. The
3154  // basic strategy is that we go through the function pointers, and
3155  // for the executing program instances, for each unique function
3156  // pointer that's in the vector, call that function with a mask
3157  // equal to the set of active program instances that also have that
3158  // function pointer. When all unique function pointers have been
3159  // called, we're done.
3160 
3161  llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
3162  llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
3163  llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
3164 
3165  // Get the current mask value so we can restore it later
3166  llvm::Value *origMask = GetInternalMask();
3167 
3168  // First allocate memory to accumulate the various program
3169  // instances' return values...
3170  const Type *returnType = funcType->GetReturnType();
3171  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
3172  llvm::Value *resultPtr = NULL;
3173  if (llvmReturnType->isVoidTy() == false)
3174  resultPtr = AllocaInst(llvmReturnType);
3175 
3176  // The memory pointed to by maskPointer tracks the set of program
3177  // instances for which we still need to call the function they are
3178  // pointing to. It starts out initialized with the mask of
3179  // currently running program instances.
3180  llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
3181  StoreInst(GetFullMask(), maskPtr);
3182 
3183  // And now we branch to the test to see if there's more work to be
3184  // done.
3185  BranchInst(bbTest);
3186 
3187  // bbTest: are any lanes of the mask still on? If so, jump to
3188  // bbCall
3189  SetCurrentBasicBlock(bbTest);
3190  {
3191  llvm::Value *maskLoad = LoadInst(maskPtr);
3192  llvm::Value *any = Any(maskLoad);
3193  BranchInst(bbCall, bbDone, any);
3194  }
3195 
3196  // bbCall: this is the body of the loop that calls out to one of
3197  // the active function pointer values.
3198  SetCurrentBasicBlock(bbCall);
3199  {
3200  // Figure out the first lane that still needs its function
3201  // pointer to be called.
3202  llvm::Value *currentMask = LoadInst(maskPtr);
3203  llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros_i64");
3204  AssertPos(currentPos, cttz != NULL);
3205  llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask), "first_lane64");
3206  llvm::Value *firstLane = TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
3207 
3208  // Get the pointer to the function we're going to call this
3209  // time through: ftpr = func[firstLane]
3210  llvm::Value *fptr = llvm::ExtractElementInst::Create(func, firstLane, "extract_fptr", bblock);
3211 
3212  // Smear it out into an array of function pointers
3213  llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
3214 
3215  // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
3216  // mask for the set of program instances that have the same
3217  // value for their function pointer.
3218  llvm::Value *fpOverlap = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, fptrSmear, func);
3219  fpOverlap = I1VecToBoolVec(fpOverlap);
3220 
3221  // Figure out the mask to use when calling the function
3222  // pointer: we need to AND the current execution mask to handle
3223  // the case of any non-running program instances that happen to
3224  // have this function pointer value.
3225  // callMask = (currentMask & fpOverlap)
3226  llvm::Value *callMask = BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap, "call_mask");
3227 
3228  // Set the mask
3229  SetInternalMask(callMask);
3230 
3231  // bitcast the i32/64 function pointer to the actual function
3232  // pointer type.
3233  llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
3234  llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
3235  llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
3236 
3237  // Call the function: callResult = call ftpr(args, args, call mask)
3238  llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
3239 
3240  // Now, do a masked store into the memory allocated to
3241  // accumulate the result using the call mask.
3242  if (callResult != NULL && callResult->getType() != LLVMTypes::VoidType) {
3243  AssertPos(currentPos, resultPtr != NULL);
3244  StoreInst(callResult, resultPtr, callMask, returnType, PointerType::GetUniform(returnType));
3245  } else
3246  AssertPos(currentPos, resultPtr == NULL);
3247 
3248  // Update the mask to turn off the program instances for which
3249  // we just called the function.
3250  // currentMask = currentMask & ~callmask
3251  llvm::Value *notCallMask = BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn, "~callMask");
3252  currentMask = BinaryOperator(llvm::Instruction::And, currentMask, notCallMask, "currentMask&~callMask");
3253  StoreInst(currentMask, maskPtr);
3254 
3255  // And go back to the test to see if we need to do another
3256  // call.
3257  BranchInst(bbTest);
3258  }
3259 
3260  // bbDone: We're all done; clean up and return the result we've
3261  // accumulated in the result memory.
3262  SetCurrentBasicBlock(bbDone);
3263  SetInternalMask(origMask);
3264  return resultPtr ? LoadInst(resultPtr) : NULL;
3265  }
3266 }
3267 
3268 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg,
3269  const char *name) {
3270  std::vector<llvm::Value *> args;
3271  args.push_back(arg);
3272  return CallInst(func, funcType, args, name);
3273 }
3274 
3275 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0,
3276  llvm::Value *arg1, const char *name) {
3277  std::vector<llvm::Value *> args;
3278  args.push_back(arg0);
3279  args.push_back(arg1);
3280  return CallInst(func, funcType, args, name);
3281 }
3282 
3283 llvm::Instruction *FunctionEmitContext::ReturnInst() {
3284  if (launchedTasks)
3285  // Add a sync call at the end of any function that launched tasks
3286  SyncInst();
3287 
3288  llvm::Instruction *rinst = NULL;
3289  if (returnValuePtr != NULL) {
3290  // We have value(s) to return; load them from their storage
3291  // location
3292  llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
3293  rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
3294  } else {
3296  rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
3297  }
3298 
3299  AddDebugPos(rinst);
3300  bblock = NULL;
3301  return rinst;
3302 }
3303 
3304 llvm::Value *FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector<llvm::Value *> &argVals,
3305  llvm::Value *launchCount[3]) {
3306 #ifdef ISPC_NVPTX_ENABLED
3307  if (g->target->getISA() == Target::NVPTX) {
3308  if (callee == NULL) {
3310  return NULL;
3311  }
3312  launchedTasks = true;
3313 
3314  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3315  std::vector<llvm::Type *> argTypes;
3316 
3317  llvm::Function *F = llvm::dyn_cast<llvm::Function>(callee);
3318  const unsigned int nArgs = F->arg_size();
3319  llvm::Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3320  for (; I != E; ++I)
3321  argTypes.push_back(I->getType());
3322  llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
3323  llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
3324  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3325  if (structSize->getType() != LLVMTypes::Int64Type)
3326  structSize = ZExtInst(structSize, LLVMTypes::Int64Type, "struct_size_to_64");
3327 
3328  const int align = 8;
3329  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3330  AssertPos(currentPos, falloc != NULL);
3331  std::vector<llvm::Value *> allocArgs;
3332  allocArgs.push_back(launchGroupHandlePtr);
3333  allocArgs.push_back(structSize);
3334  allocArgs.push_back(LLVMInt32(align));
3335  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3336  llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
3337  llvm::BasicBlock *if_true = CreateBasicBlock("if_true");
3338  llvm::BasicBlock *if_false = CreateBasicBlock("if_false");
3339 
3340  /* check if the pointer returned by ISPCAlloc is not NULL
3341  * --------------
3342  * this is a workaround for not checking the value of programIndex
3343  * because ISPCAlloc will return NULL pointer for all programIndex > 0
3344  * of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
3345  * will also be NULL
3346  * This check must be added, and also rewrite the code to make it less opaque
3347  */
3348  llvm::Value *cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
3349  BranchInst(if_true, if_false, cmp1);
3350 
3351  /**********************/
3352  bblock = if_true;
3353 
3354  // label_if_then block:
3355  llvm::Type *pt = llvm::PointerType::getUnqual(st);
3356  llvm::Value *argmem = BitCastInst(voidmem, pt);
3357  for (unsigned int i = 0; i < argVals.size(); ++i) {
3358  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3359  // don't need to do masked store here, I think
3360  StoreInst(argVals[i], ptr);
3361  }
3362  if (nArgs == argVals.size() + 1) {
3363  // copy in the mask
3364  llvm::Value *mask = GetFullMask();
3365  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, "funarg_mask");
3366  StoreInst(mask, ptr);
3367  }
3368  BranchInst(if_false);
3369 
3370  /**********************/
3371  bblock = if_false;
3372 
3373  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3374  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3375  AssertPos(currentPos, flaunch != NULL);
3376  std::vector<llvm::Value *> args;
3377  args.push_back(launchGroupHandlePtr);
3378  args.push_back(fptr);
3379  args.push_back(voidmem);
3380  args.push_back(launchCount[0]);
3381  args.push_back(launchCount[1]);
3382  args.push_back(launchCount[2]);
3383  llvm::Value *ret = CallInst(flaunch, NULL, args, "");
3384  return ret;
3385  }
3386 #endif /* ISPC_NVPTX_ENABLED */
3387 
3388  if (callee == NULL) {
3390  return NULL;
3391  }
3392 
3393  launchedTasks = true;
3394 
3395  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3396  llvm::Type *argType = (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
3397  AssertPos(currentPos, llvm::PointerType::classof(argType));
3398  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(argType);
3399  AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
3400  llvm::StructType *argStructType = static_cast<llvm::StructType *>(pt->getElementType());
3401 
3402  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3403  AssertPos(currentPos, falloc != NULL);
3404  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3405  if (structSize->getType() != LLVMTypes::Int64Type)
3406  // ISPCAlloc expects the size as an uint64_t, but on 32-bit
3407  // targets, SizeOf returns a 32-bit value
3408  structSize = ZExtInst(structSize, LLVMTypes::Int64Type, "struct_size_to_64");
3409  int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
3410 
3411  std::vector<llvm::Value *> allocArgs;
3412  allocArgs.push_back(launchGroupHandlePtr);
3413  allocArgs.push_back(structSize);
3414  allocArgs.push_back(LLVMInt32(align));
3415  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3416  llvm::Value *argmem = BitCastInst(voidmem, pt);
3417 
3418  // Copy the values of the parameters into the appropriate place in
3419  // the argument block
3420  for (unsigned int i = 0; i < argVals.size(); ++i) {
3421  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3422  // don't need to do masked store here, I think
3423  StoreInst(argVals[i], ptr);
3424  }
3425 
3426  if (argStructType->getNumElements() == argVals.size() + 1) {
3427  // copy in the mask
3428  llvm::Value *mask = GetFullMask();
3429  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, "funarg_mask");
3430  StoreInst(mask, ptr);
3431  }
3432 
3433  // And emit the call to the user-supplied task launch function, passing
3434  // a pointer to the task function being called and a pointer to the
3435  // argument block we just filled in
3436  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3437  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3438  AssertPos(currentPos, flaunch != NULL);
3439  std::vector<llvm::Value *> args;
3440  args.push_back(launchGroupHandlePtr);
3441  args.push_back(fptr);
3442  args.push_back(voidmem);
3443  args.push_back(launchCount[0]);
3444  args.push_back(launchCount[1]);
3445  args.push_back(launchCount[2]);
3446  return CallInst(flaunch, NULL, args, "");
3447 }
3448 
3450 #ifdef ISPC_NVPTX_ENABLED
3451  if (g->target->getISA() == Target::NVPTX) {
3452  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
3453  llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
3454  llvm::Function *fsync = m->module->getFunction("ISPCSync");
3455  if (fsync == NULL)
3456  FATAL("Couldn't find ISPCSync declaration?!");
3457  CallInst(fsync, NULL, launchGroupHandle, "");
3458  StoreInst(nullPtrValue, launchGroupHandlePtr);
3459  return;
3460  }
3461 #endif /* ISPC_NVPTX_ENABLED */
3462 
3463  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
3464  llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
3465  llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, launchGroupHandle, nullPtrValue);
3466  llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
3467  llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
3468  BranchInst(bSync, bPostSync, nonNull);
3469 
3470  SetCurrentBasicBlock(bSync);
3471  llvm::Function *fsync = m->module->getFunction("ISPCSync");
3472  if (fsync == NULL)
3473  FATAL("Couldn't find ISPCSync declaration?!");
3474  CallInst(fsync, NULL, launchGroupHandle, "");
3475 
3476  // zero out the handle so that if ISPCLaunch is called again in this
3477  // function, it knows it's starting out from scratch
3478  StoreInst(nullPtrValue, launchGroupHandlePtr);
3479 
3480  BranchInst(bPostSync);
3481 
3482  SetCurrentBasicBlock(bPostSync);
3483 }
3484 
3485 /** When we gathering from or scattering to a varying atomic type, we need
3486  to add an appropriate offset to the final address for each lane right
3487  before we use it. Given a varying pointer we're about to use and its
3488  type, this function determines whether these offsets are needed and
3489  returns an updated pointer that incorporates these offsets if needed.
3490  */
3491 llvm::Value *FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType) {
3492  // This should only be called for varying pointers
3493  const PointerType *pt = CastType<PointerType>(ptrType);
3494  AssertPos(currentPos, pt && pt->IsVaryingType());
3495 
3496  const Type *baseType = ptrType->GetBaseType();
3497  if (Type::IsBasicType(baseType) == false)
3498  return ptr;
3499 
3500  if (baseType->IsVaryingType() == false)
3501  return ptr;
3502 
3503  // Find the size of a uniform element of the varying type
3504  llvm::Type *llvmBaseUniformType = baseType->GetAsUniformType()->LLVMType(g->ctx);
3505  llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
3506  unifSize = SmearUniform(unifSize);
3507 
3508  // Compute offset = <0, 1, .. > * unifSize
3509  bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
3510  llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
3511 
3512  llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, varyingOffsets);
3513 
3514  if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
3515  // On 64-bit targets where we're doing 32-bit addressing
3516  // calculations, we need to convert to an i64 vector before adding
3517  // to the pointer
3518  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
3519 
3520  return BinaryOperator(llvm::Instruction::Add, ptr, offset);
3521 }
3522 
3524  AssertPos(currentPos, controlFlowInfo.size() > 0);
3525  CFInfo *ci = controlFlowInfo.back();
3526  controlFlowInfo.pop_back();
3527 
3528  if (ci->IsSwitch()) {
3529  breakTarget = ci->savedBreakTarget;
3530  continueTarget = ci->savedContinueTarget;
3531  breakLanesPtr = ci->savedBreakLanesPtr;
3532  continueLanesPtr = ci->savedContinueLanesPtr;
3533  blockEntryMask = ci->savedBlockEntryMask;
3534  switchExpr = ci->savedSwitchExpr;
3535  defaultBlock = ci->savedDefaultBlock;
3536  caseBlocks = ci->savedCaseBlocks;
3537  nextBlocks = ci->savedNextBlocks;
3538  switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
3539  } else if (ci->IsLoop() || ci->IsForeach()) {
3540  breakTarget = ci->savedBreakTarget;
3541  continueTarget = ci->savedContinueTarget;
3542  breakLanesPtr = ci->savedBreakLanesPtr;
3543  continueLanesPtr = ci->savedContinueLanesPtr;
3544  blockEntryMask = ci->savedBlockEntryMask;
3545  } else {
3546  AssertPos(currentPos, ci->IsIf());
3547  // nothing to do
3548  }
3549 
3550  return ci;
3551 }
const Function * GetFunction() const
Definition: ctx.cpp:421
llvm::Value * storagePtr
Definition: sym.h:71
static const AtomicType * VaryingInt32
Definition: type.h:335
static CFInfo * GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr, llvm::BasicBlock *bbDefault, const std::vector< std::pair< int, llvm::BasicBlock *>> *bbCases, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> *bbNext, bool scUniform)
Definition: ctx.cpp:197
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1187
virtual const Type * GetAsVaryingType() const =0
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
bool IsUniformType() const
Definition: type.h:139
llvm::Value * savedBreakLanesPtr
Definition: ctx.cpp:107
void InitializeLabelMap(Stmt *code)
Definition: ctx.cpp:1095
llvm::Value * PtrToIntInst(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:1752
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target)
Definition: ctx.cpp:783
Definition: func.h:44
std::vector< CFInfo * > controlFlowInfo
Definition: ctx.h:656
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2179
CFInfo * popCFState()
Definition: ctx.cpp:3523
Opt opt
Definition: ispc.h:535
void StartUniformIf()
Definition: ctx.cpp:489
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:480
int last_column
Definition: ispc.h:136
SourcePos currentPos
Definition: ctx.h:577
bool InForeachLoop() const
Definition: ctx.cpp:1068
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:848
llvm::Value * ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name=NULL)
Definition: ctx.cpp:3018
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1333
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:445
llvm::Constant * LLVMInt64Vector(int64_t i)
Definition: llvmutil.cpp:368
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:551
llvm::Instruction * FPCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1859
const Type * GetReturnType() const
Definition: type.h:896
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1506
static llvm::Type * lGetMatchingBoolVectorType(llvm::Type *type)
Definition: ctx.cpp:1667
void StartScope()
Definition: ctx.cpp:1455
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm, llvm::Value *sse=NULL, llvm::BasicBlock *bbd=NULL, const std::vector< std::pair< int, llvm::BasicBlock *>> *bbc=NULL, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> *bbn=NULL, bool scu=false)
Definition: ctx.cpp:128
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm)
Definition: ctx.cpp:147
llvm::BasicBlock * savedBreakTarget
Definition: ctx.cpp:106
bool IsVaryingType() const
Definition: type.h:142
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:451
CFInfo(CFType t, bool uniformIf, llvm::Value *sm)
Definition: ctx.cpp:116
llvm::Value * functionMaskValue
Definition: ctx.h:572
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:2961
const std::vector< std::pair< int, llvm::BasicBlock * > > * savedCaseBlocks
Definition: ctx.cpp:111
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:2629
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1891
Module * m
Definition: ispc.cpp:102
bool IsFrozenSlice() const
Definition: type.h:451
Interface class for statements in the ispc language.
Definition: stmt.h:49
FunctionEmitContext(Function *function, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos)
Definition: ctx.cpp:211
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1637
int first_line
Definition: ispc.h:133
int GetSOAWidth() const
Definition: type.h:150
Target * target
Definition: ispc.h:537
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1328
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2367
static llvm::VectorType * VoidPointerVectorType
Definition: llvmutil.h:107
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:91
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:471
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1107
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
int getNativeVectorAlignment() const
Definition: ispc.h:268
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1826
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:2579
#define AssertPos(pos, expr)
Definition: ispc.h:163
llvm::BasicBlock * allocaBlock
Definition: ctx.h:556
Abstract base class for types that represent sequences.
Definition: type.h:512
bool IsVarying()
Definition: ctx.cpp:100
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1679
void EndSwitch()
Definition: ctx.cpp:869
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:491
static llvm::Type * BoolType
Definition: llvmutil.h:73
llvm::Value * LaunchInst(llvm::Value *callee, std::vector< llvm::Value *> &argVals, llvm::Value *launchCount[3])
Definition: ctx.cpp:3304
llvm::Value * launchGroupHandlePtr
Definition: ctx.h:690
bool ifsInCFAllUniform(int cfType) const
Definition: ctx.cpp:767
void addSwitchMaskCheck(llvm::Value *mask)
Definition: ctx.cpp:880
void StartForeach(ForeachType ft)
Definition: ctx.cpp:589
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:308
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:68
llvm::Value * breakLanesPtr
Definition: ctx.h:591
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:95
int getMaskBitCount() const
Definition: ispc.h:278
bool IsForeach()
Definition: ctx.cpp:98
bool forceAlignedMemory
Definition: ispc.h:459
static PointerType * GetVarying(const Type *t)
Definition: type.cpp:903
llvm::Value * switchExpr
Definition: ctx.h:618
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:724
llvm::Value * GetFullMask()
Definition: ctx.cpp:431
int VaryingCFDepth() const
Definition: ctx.cpp:1060
bool isUniform
Definition: ctx.cpp:105
CFType
Definition: ctx.cpp:103
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1413
llvm::Value * MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset)
Definition: ctx.cpp:2030
int GetNumParameters() const
Definition: type.h:906
std::string name
Definition: sym.h:70
llvm::Value * gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name)
Definition: ctx.cpp:2450
bool launchedTasks
Definition: ctx.h:685
void restoreMaskGivenReturns(llvm::Value *oldMask)
Definition: ctx.cpp:625
Type implementation for pointers to other types.
Definition: type.h:430
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:462
void RestoreContinuedLanes()
Definition: ctx.cpp:826
llvm::Value * returnValuePtr
Definition: ctx.h:650
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
llvm::Value * loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name)
Definition: ctx.cpp:2338
int getNativeVectorWidth() const
Definition: ispc.h:266
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:423
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:899
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:653
static llvm::Value * lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue, llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset)
Definition: ctx.cpp:1999
#define Assert(expr)
Definition: ispc.h:161
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:92
SourcePos funcStartPos
Definition: ctx.h:581
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value *> &args, const char *name=NULL)
Definition: ctx.cpp:3112
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1368
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * nextBlocks
Definition: ctx.h:631
llvm::Value * savedSwitchExpr
Definition: ctx.cpp:109
static CFInfo * GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:167
bool IsIf()
Definition: ctx.cpp:96
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
header file with declarations for symbol and symbol table classes.
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3036
static void addGSMetadata(llvm::Value *inst, SourcePos pos)
Definition: ctx.cpp:2534
llvm::Value * internalMaskPointer
Definition: ctx.h:569
virtual const Type * GetReferenceTarget() const
Definition: type.cpp:2825
static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType)
Definition: ctx.cpp:2309
bool disableMaskAllOnOptimizations
Definition: ispc.h:464
int level
Definition: ispc.h:425
static llvm::Type * VoidType
Definition: llvmutil.h:70
Function * function
Definition: ctx.h:549
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:228
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:2827
llvm::Module * module
Definition: module.h:155
File with declarations for classes related to statements in the language.
llvm::Type * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:1941
llvm::BasicBlock * defaultBlock
Definition: ctx.h:626
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:966
Globals * g
Definition: ispc.cpp:101
llvm::BasicBlock * savedContinueTarget
Definition: ctx.cpp:106
void EndLoop()
Definition: ctx.cpp:576
bool switchConditionWasUniform
Definition: ctx.h:638
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:427
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:93
static CFInfo * GetIf(bool isUniform, llvm::Value *savedMask)
Definition: ctx.cpp:165
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos=NULL, llvm::DIScope *scope=NULL)
Definition: ctx.cpp:1436
SourcePos GetDebugPos() const
Definition: ctx.cpp:1434
bool IsSlice() const
Definition: type.h:450
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2045
CFType type
Definition: ctx.cpp:104
virtual int GetElementCount() const =0
static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType)
Definition: ctx.cpp:3095
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1119
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
bool LookupFunction(const char *name, std::vector< Symbol *> *matches=NULL)
Definition: sym.cpp:139
llvm::BasicBlock * continueTarget
Definition: ctx.h:606
bool IsUniform()
Definition: ctx.cpp:101
llvm::BasicBlock * savedDefaultBlock
Definition: ctx.cpp:110
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:97
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1234
bool IsLoop()
Definition: ctx.cpp:97
bool IsConstType() const
Definition: type.cpp:921
static llvm::Type * Int64Type
Definition: llvmutil.h:78
llvm::DIType GetDIType(llvm::DIDescriptor scope) const
Definition: type.cpp:2670
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align=NULL)
Definition: ctx.cpp:2915
llvm::DIScope GetDIScope() const
Definition: ctx.cpp:1501
llvm::PHINode * PhiNode(llvm::Type *type, int count, const char *name=NULL)
Definition: ctx.cpp:3071
Representation of a structure holding a number of members.
Definition: type.h:670
llvm::Value * addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType)
Definition: ctx.cpp:3491
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:96
Header file with declarations for various LLVM utility stuff.
llvm::DISubprogram diSubprogram
Definition: ctx.h:665
llvm::Value * getMaskAtSwitchEntry()
Definition: ctx.cpp:898
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1)
Definition: ctx.cpp:1966
bool emitInstrumentation
Definition: ispc.h:601
static bool IsBasicType(const Type *type)
Definition: type.cpp:3023
SourcePos pos
Definition: sym.h:69
virtual const Type * GetType() const =0
uint32_t RoundUpPow2(uint32_t v)
Definition: util.h:51
AtomicType represents basic types like floats, ints, etc.
Definition: type.h:278
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:443
std::vector< llvm::DILexicalBlock > debugScopes
Definition: ctx.h:669
StorageClass storageClass
Definition: sym.h:95
Representation of a range of positions in a source file.
Definition: ispc.h:129
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1353
void ClearBreakLanes()
Definition: ctx.cpp:840
virtual std::string GetString() const =0
llvm::Value * continueLanesPtr
Definition: ctx.h:596
Abstract base class for types that represent collections of other types.
Definition: type.h:492
const char * LLVMGetName(llvm::Value *v, const char *)
Definition: llvmutil.cpp:1518
llvm::Value * None(llvm::Value *mask)
Definition: ctx.cpp:1218
bool force32BitAddressing
Definition: ispc.h:445
const char * name
Definition: ispc.h:132
llvm::Instruction * SExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1875
SourcePos pos
Definition: ast.h:77
int disableGSWarningCount
Definition: ctx.h:694
static llvm::Type * PointerIntType
Definition: llvmutil.h:72
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:375
bool IsVoidType() const
Definition: type.cpp:207
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:71
int getVectorWidth() const
Definition: ispc.h:272
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock *>> &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> &nextBlocks)
Definition: ctx.cpp:1013
const PointerType * GetAsSlice() const
Definition: type.cpp:953
llvm::Function * llvmFunction
Definition: ctx.h:552
static bool lEnclosingLoopIsForeachActive(const std::vector< CFInfo *> &controlFlowInfo)
Definition: ctx.cpp:716
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1360
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType)
Definition: ctx.cpp:2892
#define FATAL(message)
Definition: util.h:112
bool savedSwitchConditionWasUniform
Definition: ctx.cpp:113
llvm::Value * InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name=NULL)
Definition: ctx.cpp:2997
llvm::Value * fullMaskPointer
Definition: ctx.h:564
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1075
llvm::Value * savedMask
Definition: ctx.cpp:108
static llvm::Type * Int32Type
Definition: llvmutil.h:77
int last_line
Definition: ispc.h:135
llvm::Value * returnedLanesPtr
Definition: ctx.h:644
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1432
virtual const Type * GetAsUniformType() const =0
#define PTYPE(p)
Definition: llvmutil.h:55
Representation of a function in a source file.
int first_column
Definition: ispc.h:134
virtual const Type * GetElementType() const =0
Definition: ctx.cpp:73
llvm::Value * GetFullMaskPointer()
Definition: ctx.cpp:435
llvm::Value * GetInternalMask()
Definition: ctx.cpp:429
ISA getISA() const
Definition: ispc.h:256
const char * GetISAString() const
Definition: ispc.cpp:1253
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1738
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:456
Type representing a function (return type + argument types)
Definition: type.h:858
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:2976
const PointerType * GetAsNonSlice() const
Definition: type.cpp:959
bool IsSwitch()
Definition: ctx.cpp:99
void EndForeach()
Definition: ctx.cpp:620
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1077
Interface class that defines the type abstraction.
Definition: type.h:95
static bool initLabelBBlocks(ASTNode *node, void *data)
Definition: ctx.cpp:1079
const std::vector< std::pair< int, llvm::BasicBlock * > > * caseBlocks
Definition: ctx.h:622
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:425
static llvm::VectorType * MaskType
Definition: llvmutil.h:89
llvm::Value * savedContinueLanesPtr
Definition: ctx.cpp:107
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:907
llvm::Instruction * SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name=NULL)
Definition: ctx.cpp:3077
static int lArrayVectorWidth(llvm::Type *t)
Definition: ctx.cpp:1595
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:549
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum)
Definition: ctx.cpp:1545
llvm::Value * blockEntryMask
Definition: ctx.h:585
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1798
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:98
llvm::Value * MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2)
Definition: ctx.cpp:1309
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1202
std::string name
Definition: stmt.h:434
llvm::Constant * LLVMIntAsType(int64_t, llvm::Type *t)
Definition: llvmutil.cpp:441
bool inSwitchStatement() const
Definition: ctx.cpp:641
virtual const Type * GetBaseType() const =0
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:1706
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:94
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * savedNextBlocks
Definition: ctx.cpp:112
llvm::DIFile diFile
Definition: ctx.h:661
const Type * GetReturnType() const
Definition: func.cpp:173
Variability GetVariability() const
Definition: type.cpp:911
llvm::Value * savedBlockEntryMask
Definition: ctx.cpp:108
const Type * GetBaseType() const
Definition: type.cpp:923
bool is32Bit() const
Definition: ispc.h:262
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:236
int errorCount
Definition: module.h:148
llvm::LLVMContext * ctx
Definition: ispc.h:632
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1487
const Type * type
Definition: sym.h:83
llvm::DIBuilder * diBuilder
Definition: module.h:158
llvm::BasicBlock * breakTarget
Definition: ctx.h:602
static CFInfo * GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:174
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3116
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:2728
llvm::Instruction * CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1842
llvm::Instruction * ReturnInst()
Definition: ctx.cpp:3283
llvm::Value * applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType)
Definition: ctx.cpp:1913
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1608
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1100
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:437
std::map< std::string, llvm::BasicBlock * > labelMap
Definition: ctx.h:696
llvm::BasicBlock * bblock
Definition: ctx.h:560
SymbolTable * symbolTable
Definition: module.h:152
File with declarations for classes related to type representation.
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1372
llvm::FunctionType * LLVMFunctionType(llvm::LLVMContext *ctx, bool disableMask=false) const
Definition: type.cpp:2738
static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s)
Definition: ctx.cpp:1398
virtual const Type * GetElementType(int index) const =0