Intel® Implicit SPMD Program Compiler (Intel® ISPC)  1.13.0
ctx.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2020, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ctx.cpp
35  @brief Implementation of the FunctionEmitContext class
36 */
37 
38 #include "ctx.h"
39 #include "expr.h"
40 #include "func.h"
41 #include "llvmutil.h"
42 #include "module.h"
43 #include "stmt.h"
44 #include "sym.h"
45 #include "type.h"
46 #include "util.h"
47 #include <llvm/BinaryFormat/Dwarf.h>
48 #include <map>
49 
50 #include <llvm/IR/DerivedTypes.h>
51 #include <llvm/IR/Instructions.h>
52 #include <llvm/IR/Metadata.h>
53 #include <llvm/IR/Module.h>
54 
55 /** This is a small utility structure that records information related to one
56  level of nested control flow. It's mostly used in correctly restoring
57  the mask and other state as we exit control flow nesting levels.
58 */
59 struct CFInfo {
60  /** Returns a new instance of the structure that represents entering an
61  'if' statement */
62  static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
63 
64  /** Returns a new instance of the structure that represents entering a
65  loop. */
66  static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
67  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
68  llvm::Value *savedBlockEntryMask);
69 
70  static CFInfo *GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget,
71  llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr,
72  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
73  llvm::Value *savedBlockEntryMask);
74 
75  static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
76  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr,
77  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr,
78  llvm::BasicBlock *bbDefault,
79  const std::vector<std::pair<int, llvm::BasicBlock *>> *bbCases,
80  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext, bool scUniform);
81 
82  bool IsIf() { return type == If; }
83  bool IsLoop() { return type == Loop; }
84  bool IsForeach() { return (type == ForeachRegular || type == ForeachActive || type == ForeachUnique); }
85  bool IsSwitch() { return type == Switch; }
86  bool IsVarying() { return !isUniform; }
87  bool IsUniform() { return isUniform; }
88 
91  bool isUniform;
95  llvm::Value *savedSwitchExpr;
96  llvm::BasicBlock *savedDefaultBlock;
97  const std::vector<std::pair<int, llvm::BasicBlock *>> *savedCaseBlocks;
98  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
100 
101  private:
102  CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
103  Assert(t == If);
104  type = t;
105  isUniform = uniformIf;
106  savedBreakTarget = savedContinueTarget = NULL;
107  savedBreakLanesPtr = savedContinueLanesPtr = NULL;
108  savedMask = savedBlockEntryMask = sm;
109  savedSwitchExpr = NULL;
110  savedDefaultBlock = NULL;
111  savedCaseBlocks = NULL;
112  savedNextBlocks = NULL;
113  savedSwitchConditionWasUniform = false;
114  }
115  CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc,
116  llvm::Value *sm, llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL,
117  const std::vector<std::pair<int, llvm::BasicBlock *>> *bbc = NULL,
118  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL, bool scu = false) {
119  Assert(t == Loop || t == Switch);
120  type = t;
121  isUniform = iu;
122  savedBreakTarget = bt;
123  savedContinueTarget = ct;
124  savedBreakLanesPtr = sb;
125  savedContinueLanesPtr = sc;
126  savedMask = sm;
127  savedBlockEntryMask = lm;
128  savedSwitchExpr = sse;
129  savedDefaultBlock = bbd;
130  savedCaseBlocks = bbc;
131  savedNextBlocks = bbn;
132  savedSwitchConditionWasUniform = scu;
133  }
134  CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
135  llvm::Value *lm) {
136  Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
137  type = t;
138  isUniform = false;
139  savedBreakTarget = bt;
140  savedContinueTarget = ct;
141  savedBreakLanesPtr = sb;
142  savedContinueLanesPtr = sc;
143  savedMask = sm;
144  savedBlockEntryMask = lm;
145  savedSwitchExpr = NULL;
146  savedDefaultBlock = NULL;
147  savedCaseBlocks = NULL;
148  savedNextBlocks = NULL;
149  savedSwitchConditionWasUniform = false;
150  }
151 };
152 
153 CFInfo *CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) { return new CFInfo(If, isUniform, savedMask); }
154 
155 CFInfo *CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
156  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
157  llvm::Value *savedBlockEntryMask) {
158  return new CFInfo(Loop, isUniform, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr,
159  savedMask, savedBlockEntryMask);
160 }
161 
162 CFInfo *CFInfo::GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget,
163  llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr,
164  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedForeachMask) {
165  CFType cfType;
166  switch (ft) {
168  cfType = ForeachRegular;
169  break;
171  cfType = ForeachActive;
172  break;
174  cfType = ForeachUnique;
175  break;
176  default:
177  FATAL("Unhandled foreach type");
178  return NULL;
179  }
180 
181  return new CFInfo(cfType, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr, savedMask,
182  savedForeachMask);
183 }
184 
185 CFInfo *CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
186  llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
187  llvm::Value *savedBlockEntryMask, llvm::Value *savedSwitchExpr,
188  llvm::BasicBlock *savedDefaultBlock,
189  const std::vector<std::pair<int, llvm::BasicBlock *>> *savedCases,
190  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
191  bool savedSwitchConditionUniform) {
192  return new CFInfo(Switch, isUniform, breakTarget, continueTarget, savedBreakLanesPtr, savedContinueLanesPtr,
193  savedMask, savedBlockEntryMask, savedSwitchExpr, savedDefaultBlock, savedCases, savedNext,
194  savedSwitchConditionUniform);
195 }
196 
197 ///////////////////////////////////////////////////////////////////////////
198 
199 FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::Function *lf, SourcePos firstStmtPos) {
200  function = func;
201  llvmFunction = lf;
202  switchConditionWasUniform = false;
203 
204  /* Create a new basic block to store all of the allocas */
205  allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
206  bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
207  /* But jump from it immediately into the real entry block */
208  llvm::BranchInst::Create(bblock, allocaBlock);
209 
210  funcStartPos = funSym->pos;
211 
212  internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
213  StoreInst(LLVMMaskAllOn, internalMaskPointer);
214 
215  functionMaskValue = LLVMMaskAllOn;
216 
217  fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
218  StoreInst(LLVMMaskAllOn, fullMaskPointer);
219 
220  blockEntryMask = NULL;
221  breakLanesPtr = continueLanesPtr = NULL;
222  breakTarget = continueTarget = NULL;
223 
224  switchExpr = NULL;
225  caseBlocks = NULL;
226  defaultBlock = NULL;
227  nextBlocks = NULL;
228 
229  returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
230  StoreInst(LLVMMaskAllOff, returnedLanesPtr);
231 
232  launchedTasks = false;
233  launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle");
234  StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType), launchGroupHandlePtr);
235 
236  disableGSWarningCount = 0;
237 
238  const Type *returnType = function->GetReturnType();
239  if (!returnType || returnType->IsVoidType())
240  returnValuePtr = NULL;
241  else {
242  returnValuePtr = AllocaInst(returnType, "return_value_memory");
243  }
244 
246  // This is really disgusting. We want to be able to fool the
247  // compiler to not be able to reason that the mask is all on, but
248  // we don't want to pay too much of a price at the start of each
249  // function to do so.
250  //
251  // Therefore: first, we declare a module-static __all_on_mask
252  // variable that will hold an "all on" mask value. At the start of
253  // each function, we'll load its value and call SetInternalMaskAnd
254  // with the result to set the current internal execution mask.
255  // (This is a no-op at runtime.)
256  //
257  // Then, to fool the optimizer that maybe the value of
258  // __all_on_mask can't be guaranteed to be "all on", we emit a
259  // dummy function that sets __all_on_mask be "all off". (That
260  // function is never actually called.)
261  llvm::Value *globalAllOnMaskPtr = m->module->getNamedGlobal("__all_on_mask");
262  if (globalAllOnMaskPtr == NULL) {
263  globalAllOnMaskPtr =
264  new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false, llvm::GlobalValue::InternalLinkage,
265  LLVMMaskAllOn, "__all_on_mask");
266 
267  char buf[256];
268  snprintf(buf, sizeof(buf), "__off_all_on_mask_%s", g->target->GetISAString());
269 
270 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
271  llvm::Constant *offFunc = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
272 #else // LLVM 9.0+
273  llvm::FunctionCallee offFuncCallee = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
274  llvm::Constant *offFunc = llvm::cast<llvm::Constant>(offFuncCallee.getCallee());
275 #endif
276  AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
277  llvm::BasicBlock *offBB = llvm::BasicBlock::Create(*g->ctx, "entry", (llvm::Function *)offFunc, 0);
278  llvm::StoreInst *inst = new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
279  if (g->opt.forceAlignedMemory) {
280 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0
281  inst->setAlignment(g->target->getNativeVectorAlignment());
282 #else // LLVM 10.0+
283  inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()));
284 #endif
285  }
286  llvm::ReturnInst::Create(*g->ctx, offBB);
287  }
288 
289  llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, NULL, "all_on_mask");
290  SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
291  }
292 
293  if (m->diBuilder) {
294  currentPos = funSym->pos;
295 
296  /* If debugging is enabled, tell the debug information emission
297  code about this new function */
298  diFile = funcStartPos.GetDIFile();
299  llvm::DIScope *scope = m->diCompileUnit;
300  llvm::DIType *diSubprogramType = NULL;
301 
302  const FunctionType *functionType = function->GetType();
303  if (functionType == NULL)
304  AssertPos(currentPos, m->errorCount > 0);
305  else {
306  diSubprogramType = functionType->GetDIType(scope);
307  /*#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // 3.2, 3.3, 3.4, 3.5, 3.6
308  AssertPos(currentPos, diSubprogramType.Verify());
309  #else // LLVM 3.7+
310  // comming soon
311  #endif*/
312  }
313  /* LLVM 4.0+ */
314  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
315  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
316  llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
317 
318  std::string mangledName = std::string(llvmFunction->getName());
319  if (mangledName == funSym->name)
320  mangledName = "";
321 
322  bool isStatic = (funSym->storageClass == SC_STATIC);
323  bool isOptimized = (g->opt.level > 0);
324  int firstLine = funcStartPos.first_line;
325 
326 #if ISPC_LLVM_VERSION < ISPC_LLVM_8_0
327  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
328  diSubprogramType_n, isStatic, true, /* is defn */
329  firstLine, flags, isOptimized);
330  llvmFunction->setSubprogram(diSubprogram);
331 #else /* LLVM 8.0+ */
332  /* isDefinition is always set to 'true' */
333  llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagDefinition;
334  if (isOptimized)
335  SPFlags |= llvm::DISubprogram::SPFlagOptimized;
336  if (isStatic)
337  SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
338 
339  diSubprogram = m->diBuilder->createFunction(diFile /* scope */, funSym->name, mangledName, diFile, firstLine,
340  diSubprogramType_n, firstLine, flags, SPFlags);
341  llvmFunction->setSubprogram(diSubprogram);
342 #endif
343 
344  /* And start a scope representing the initial function scope */
345  StartScope();
346  } else {
347  diSubprogram = NULL;
348  diFile = NULL;
349  }
350 }
351 
353  AssertPos(currentPos, controlFlowInfo.size() == 0);
354  AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
355 }
356 
357 const Function *FunctionEmitContext::GetFunction() const { return function; }
358 
359 llvm::BasicBlock *FunctionEmitContext::GetCurrentBasicBlock() { return bblock; }
360 
361 void FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) { bblock = bb; }
362 
363 llvm::Value *FunctionEmitContext::GetFunctionMask() { return functionMaskValue; }
364 
365 llvm::Value *FunctionEmitContext::GetInternalMask() { return LoadInst(internalMaskPointer, NULL, "load_mask"); }
366 
368  return BinaryOperator(llvm::Instruction::And, GetInternalMask(), functionMaskValue, "internal_mask&function_mask");
369 }
370 
371 llvm::Value *FunctionEmitContext::GetFullMaskPointer() { return fullMaskPointer; }
372 
373 void FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
374  functionMaskValue = value;
375  if (bblock != NULL)
376  StoreInst(GetFullMask(), fullMaskPointer);
377 }
378 
379 void FunctionEmitContext::SetBlockEntryMask(llvm::Value *value) { blockEntryMask = value; }
380 
381 void FunctionEmitContext::SetInternalMask(llvm::Value *value) {
382  StoreInst(value, internalMaskPointer);
383  // kludge so that __mask returns the right value in ispc code.
384  StoreInst(GetFullMask(), fullMaskPointer);
385 }
386 
387 void FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
388  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, test, "oldMask&test");
389  SetInternalMask(mask);
390 }
391 
392 void FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
393  llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn, "~test");
394  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest, "oldMask&~test");
395  SetInternalMask(mask);
396 }
397 
398 void FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
399  AssertPos(currentPos, bblock != NULL);
400  llvm::Value *any = Any(GetFullMask());
401  BranchInst(btrue, bfalse, any);
402  // It's illegal to add any additional instructions to the basic block
403  // now that it's terminated, so set bblock to NULL to be safe
404  bblock = NULL;
405 }
406 
407 void FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
408  AssertPos(currentPos, bblock != NULL);
409  llvm::Value *all = All(GetFullMask());
410  BranchInst(btrue, bfalse, all);
411  // It's illegal to add any additional instructions to the basic block
412  // now that it's terminated, so set bblock to NULL to be safe
413  bblock = NULL;
414 }
415 
416 void FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
417  AssertPos(currentPos, bblock != NULL);
418  // switch sense of true/false bblocks
419  BranchIfMaskAny(bfalse, btrue);
420  // It's illegal to add any additional instructions to the basic block
421  // now that it's terminated, so set bblock to NULL to be safe
422  bblock = NULL;
423 }
424 
425 void FunctionEmitContext::StartUniformIf() { controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask())); }
426 
427 void FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
428  controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
429 }
430 
432  CFInfo *ci = popCFState();
433  // Make sure we match up with a Start{Uniform,Varying}If().
434  AssertPos(currentPos, ci->IsIf());
435 
436  // 'uniform' ifs don't change the mask so we only need to restore the
437  // mask going into the if for 'varying' if statements
438  if (ci->IsUniform() || bblock == NULL)
439  return;
440 
441  // We can't just restore the mask as it was going into the 'if'
442  // statement. First we have to take into account any program
443  // instances that have executed 'return' statements; the restored
444  // mask must be off for those lanes.
445  restoreMaskGivenReturns(ci->savedMask);
446 
447  // If the 'if' statement is inside a loop with a 'varying'
448  // condition, we also need to account for any break or continue
449  // statements that executed inside the 'if' statmeent; we also must
450  // leave the lane masks for the program instances that ran those
451  // off after we restore the mask after the 'if'. The code below
452  // ends up being optimized out in the case that there were no break
453  // or continue statements (and breakLanesPtr and continueLanesPtr
454  // have their initial 'all off' values), so we don't need to check
455  // for that here.
456  //
457  // There are three general cases to deal with here:
458  // - Loops: both break and continue are allowed, and thus the corresponding
459  // lane mask pointers are non-NULL
460  // - Foreach: only continueLanesPtr may be non-NULL
461  // - Switch: only breakLanesPtr may be non-NULL
462  if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
463  // We want to compute:
464  // newMask = (oldMask & ~(breakLanes | continueLanes)),
465  // treading breakLanes or continueLanes as "all off" if the
466  // corresponding pointer is NULL.
467  llvm::Value *bcLanes = NULL;
468 
469  if (continueLanesPtr != NULL)
470  bcLanes = LoadInst(continueLanesPtr, NULL, "continue_lanes");
471  else
472  bcLanes = LLVMMaskAllOff;
473 
474  if (breakLanesPtr != NULL) {
475  llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, "break_lanes");
476  bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes, breakLanes, "|break_lanes");
477  }
478 
479  llvm::Value *notBreakOrContinue =
480  BinaryOperator(llvm::Instruction::Xor, bcLanes, LLVMMaskAllOn, "!(break|continue)_lanes");
481  llvm::Value *oldMask = GetInternalMask();
482  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notBreakOrContinue, "new_mask");
483  SetInternalMask(newMask);
484  }
485 }
486 
487 void FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, bool uniformCF) {
488  // Store the current values of various loop-related state so that we
489  // can restore it when we exit this loop.
490  llvm::Value *oldMask = GetInternalMask();
491  controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget, continueTarget, breakLanesPtr, continueLanesPtr,
492  oldMask, blockEntryMask));
493  if (uniformCF)
494  // If the loop has a uniform condition, we don't need to track
495  // which lanes 'break' or 'continue'; all of the running ones go
496  // together, so we just jump
497  breakLanesPtr = continueLanesPtr = NULL;
498  else {
499  // For loops with varying conditions, allocate space to store masks
500  // that record which lanes have done these
501  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
502  StoreInst(LLVMMaskAllOff, continueLanesPtr);
503  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
504  StoreInst(LLVMMaskAllOff, breakLanesPtr);
505  }
506 
507  breakTarget = bt;
508  continueTarget = ct;
509  blockEntryMask = NULL; // this better be set by the loop!
510 }
511 
513  CFInfo *ci = popCFState();
514  AssertPos(currentPos, ci->IsLoop());
515 
516  if (!ci->IsUniform())
517  // If the loop had a 'uniform' test, then it didn't make any
518  // changes to the mask so there's nothing to restore. If it had a
519  // varying test, we need to restore the mask to what it was going
520  // into the loop, but still leaving off any lanes that executed a
521  // 'return' statement.
522  restoreMaskGivenReturns(ci->savedMask);
523 }
524 
526  // Issue an error if we're in a nested foreach...
527  if (ft == FOREACH_REGULAR) {
528  for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
529  if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
530  Error(currentPos, "Nested \"foreach\" statements are currently "
531  "illegal.");
532  break;
533  // Don't return here, however, and in turn allow the caller to
534  // do the rest of its codegen and then call EndForeach()
535  // normally--the idea being that this gives a chance to find
536  // any other errors inside the body of the foreach loop...
537  }
538  }
539  }
540 
541  // Store the current values of various loop-related state so that we
542  // can restore it when we exit this loop.
543  llvm::Value *oldMask = GetInternalMask();
544  controlFlowInfo.push_back(
545  CFInfo::GetForeach(ft, breakTarget, continueTarget, breakLanesPtr, continueLanesPtr, oldMask, blockEntryMask));
546  breakLanesPtr = NULL;
547  breakTarget = NULL;
548 
549  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
550  StoreInst(LLVMMaskAllOff, continueLanesPtr);
551  continueTarget = NULL; // should be set by SetContinueTarget()
552 
553  blockEntryMask = NULL;
554 }
555 
557  CFInfo *ci = popCFState();
558  AssertPos(currentPos, ci->IsForeach());
559 }
560 
562  if (!bblock)
563  return;
564 
565  // Restore the mask to the given old mask, but leave off any lanes that
566  // executed a return statement.
567  // newMask = (oldMask & ~returnedLanes)
568  llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, "returned_lanes");
569  llvm::Value *notReturned = BinaryOperator(llvm::Instruction::Xor, returnedLanes, LLVMMaskAllOn, "~returned_lanes");
570  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notReturned, "new_mask");
571  SetInternalMask(newMask);
572 }
573 
574 /** Returns "true" if the first enclosing non-if control flow expression is
575  a "switch" statement.
576 */
578  // Go backwards through controlFlowInfo, since we add new nested scopes
579  // to the back.
580  int i = controlFlowInfo.size() - 1;
581  while (i >= 0 && controlFlowInfo[i]->IsIf())
582  --i;
583  // Got to the first non-if (or end of CF info)
584  if (i == -1)
585  return false;
586  return controlFlowInfo[i]->IsSwitch();
587 }
588 
589 void FunctionEmitContext::Break(bool doCoherenceCheck) {
590  if (breakTarget == NULL) {
591  Error(currentPos, "\"break\" statement is illegal outside of "
592  "for/while/do loops and \"switch\" statements.");
593  return;
594  }
595  AssertPos(currentPos, controlFlowInfo.size() > 0);
596 
597  if (bblock == NULL)
598  return;
599 
600  if (inSwitchStatement() == true && switchConditionWasUniform == true && ifsInCFAllUniform(CFInfo::Switch)) {
601  // We know that all program instances are executing the break, so
602  // just jump to the block immediately after the switch.
603  AssertPos(currentPos, breakTarget != NULL);
604  BranchInst(breakTarget);
605  bblock = NULL;
606  return;
607  }
608 
609  // If all of the enclosing 'if' tests in the loop have uniform control
610  // flow or if we can tell that the mask is all on, then we can just
611  // jump to the break location.
612  if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
613  BranchInst(breakTarget);
614  // Set bblock to NULL since the jump has terminated the basic block
615  bblock = NULL;
616  } else {
617  // Varying switch, uniform switch where the 'break' is under
618  // varying control flow, or a loop with varying 'if's above the
619  // break. In these cases, we need to update the mask of the lanes
620  // that have executed a 'break' statement:
621  // breakLanes = breakLanes | mask
622  AssertPos(currentPos, breakLanesPtr != NULL);
623 
624  llvm::Value *mask = GetInternalMask();
625  llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask");
626  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask");
627  StoreInst(newMask, breakLanesPtr);
628 
629  // Set the current mask to be all off, just in case there are any
630  // statements in the same scope after the 'break'. Most of time
631  // this will be optimized away since we'll likely end the scope of
632  // an 'if' statement and restore the mask then.
633  SetInternalMask(LLVMMaskAllOff);
634 
635  if (doCoherenceCheck) {
636  if (continueTarget != NULL)
637  // If the user has indicated that this is a 'coherent'
638  // break statement, then check to see if the mask is all
639  // off. If so, we have to conservatively jump to the
640  // continueTarget, not the breakTarget, since part of the
641  // reason the mask is all off may be due to 'continue'
642  // statements that executed in the current loop iteration.
643  jumpIfAllLoopLanesAreDone(continueTarget);
644  else if (breakTarget != NULL)
645  // Similarly handle these for switch statements, where we
646  // only have a break target.
647  jumpIfAllLoopLanesAreDone(breakTarget);
648  }
649  }
650 }
651 
652 static bool lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
653  for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
654  if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
655  return true;
656  }
657  return false;
658 }
659 
660 void FunctionEmitContext::Continue(bool doCoherenceCheck) {
661  if (!continueTarget) {
662  Error(currentPos, "\"continue\" statement illegal outside of "
663  "for/while/do/foreach loops.");
664  return;
665  }
666  AssertPos(currentPos, controlFlowInfo.size() > 0);
667 
668  if (ifsInCFAllUniform(CFInfo::Loop) || lEnclosingLoopIsForeachActive(controlFlowInfo)) {
669  // Similarly to 'break' statements, we can immediately jump to the
670  // continue target if we're only in 'uniform' control flow within
671  // loop or if we can tell that the mask is all on. Here, we can
672  // also jump if the enclosing loop is a 'foreach_active' loop, in
673  // which case we know that only a single program instance is
674  // executing.
675  AddInstrumentationPoint("continue: uniform CF, jumped");
676  BranchInst(continueTarget);
677  bblock = NULL;
678  } else {
679  // Otherwise update the stored value of which lanes have 'continue'd.
680  // continueLanes = continueLanes | mask
681  AssertPos(currentPos, continueLanesPtr);
682  llvm::Value *mask = GetInternalMask();
683  llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask");
684  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continueMask");
685  StoreInst(newMask, continueLanesPtr);
686 
687  // And set the current mask to be all off in case there are any
688  // statements in the same scope after the 'continue'
689  SetInternalMask(LLVMMaskAllOff);
690 
691  if (doCoherenceCheck)
692  // If this is a 'coherent continue' statement, then emit the
693  // code to see if all of the lanes are now off due to
694  // breaks/continues and jump to the continue target if so.
695  jumpIfAllLoopLanesAreDone(continueTarget);
696  }
697 }
698 
699 /** This function checks to see if all of the 'if' statements (if any)
700  between the current scope and the first enclosing loop/switch of given
701  control flow type have 'uniform' tests.
702  */
704  AssertPos(currentPos, controlFlowInfo.size() > 0);
705  // Go backwards through controlFlowInfo, since we add new nested scopes
706  // to the back. Stop once we come to the first enclosing control flow
707  // structure of the desired type.
708  int i = controlFlowInfo.size() - 1;
709  while (i >= 0 && controlFlowInfo[i]->type != type) {
710  if (controlFlowInfo[i]->isUniform == false)
711  // Found a scope due to an 'if' statement with a varying test
712  return false;
713  --i;
714  }
715  AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
716  return true;
717 }
718 
719 void FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
720  llvm::Value *allDone = NULL;
721 
722  if (breakLanesPtr == NULL) {
723  llvm::Value *continued = LoadInst(continueLanesPtr, NULL, "continue_lanes");
724  continued = BinaryOperator(llvm::Instruction::And, continued, GetFunctionMask(), "continued&func");
725  allDone = MasksAllEqual(continued, blockEntryMask);
726  } else {
727  // Check to see if (returned lanes | continued lanes | break lanes) is
728  // equal to the value of mask at the start of the loop iteration. If
729  // so, everyone is done and we can jump to the given target
730  llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, "returned_lanes");
731  llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, "break_lanes");
732  llvm::Value *finishedLanes = BinaryOperator(llvm::Instruction::Or, returned, breaked, "returned|breaked");
733  if (continueLanesPtr != NULL) {
734  // It's NULL for "switch" statements...
735  llvm::Value *continued = LoadInst(continueLanesPtr, NULL, "continue_lanes");
736  finishedLanes =
737  BinaryOperator(llvm::Instruction::Or, finishedLanes, continued, "returned|breaked|continued");
738  }
739 
740  finishedLanes = BinaryOperator(llvm::Instruction::And, finishedLanes, GetFunctionMask(), "finished&func");
741 
742  // Do we match the mask at loop or switch statement entry?
743  allDone = MasksAllEqual(finishedLanes, blockEntryMask);
744  }
745 
746  llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
747  llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
748  BranchInst(bAll, bNotAll, allDone);
749 
750  // If so, have an extra basic block along the way to add
751  // instrumentation, if the user asked for it.
752  bblock = bAll;
753  AddInstrumentationPoint("break/continue: all dynamically went");
754  BranchInst(target);
755 
756  // And set the current basic block to a new one for future instructions
757  // for the path where we weren't able to jump
758  bblock = bNotAll;
759  AddInstrumentationPoint("break/continue: not all went");
760 }
761 
763  if (continueLanesPtr == NULL)
764  return;
765 
766  // mask = mask & continueFlags
767  llvm::Value *mask = GetInternalMask();
768  llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask");
769  llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask");
770  SetInternalMask(orMask);
771 
772  // continueLanes = 0
773  StoreInst(LLVMMaskAllOff, continueLanesPtr);
774 }
775 
777  if (breakLanesPtr == NULL)
778  return;
779 
780  // breakLanes = 0
781  StoreInst(LLVMMaskAllOff, breakLanesPtr);
782 }
783 
784 void FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
785  llvm::Value *oldMask = GetInternalMask();
786  controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget, continueTarget, breakLanesPtr,
787  continueLanesPtr, oldMask, blockEntryMask, switchExpr, defaultBlock,
788  caseBlocks, nextBlocks, switchConditionWasUniform));
789 
790  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
791  StoreInst(LLVMMaskAllOff, breakLanesPtr);
792  breakTarget = bbBreak;
793 
794  continueLanesPtr = NULL;
795  continueTarget = NULL;
796  blockEntryMask = NULL;
797 
798  // These will be set by the SwitchInst() method
799  switchExpr = NULL;
800  defaultBlock = NULL;
801  caseBlocks = NULL;
802  nextBlocks = NULL;
803 }
804 
806  AssertPos(currentPos, bblock != NULL);
807 
808  CFInfo *ci = popCFState();
809  if (ci->IsVarying() && bblock != NULL)
810  restoreMaskGivenReturns(ci->savedMask);
811 }
812 
813 /** Emit code to check for an "all off" mask before the code for a
814  case or default label in a "switch" statement.
815  */
817  llvm::Value *allOff = None(mask);
818  llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
819 
820  // Find the basic block for the case or default label immediately after
821  // the current one in the switch statement--that's where we want to
822  // jump if the mask is all off at this label.
823  AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
824  llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
825 
826  // Jump to the next one of the mask is all off; otherwise jump to the
827  // newly created block that will hold the actual code for this label.
828  BranchInst(bbNext, bbSome, allOff);
829  SetCurrentBasicBlock(bbSome);
830 }
831 
832 /** Returns the execution mask at entry to the first enclosing "switch"
833  statement. */
835  AssertPos(currentPos, controlFlowInfo.size() > 0);
836  int i = controlFlowInfo.size() - 1;
837  while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
838  --i;
839  AssertPos(currentPos, i != -1);
840  return controlFlowInfo[i]->savedMask;
841 }
842 
844  if (inSwitchStatement() == false) {
845  Error(pos, "\"default\" label illegal outside of \"switch\" "
846  "statement.");
847  return;
848  }
849 
850  // If there's a default label in the switch, a basic block for it
851  // should have been provided in the previous call to SwitchInst().
852  AssertPos(currentPos, defaultBlock != NULL);
853 
854  if (bblock != NULL)
855  // The previous case in the switch fell through, or we're in a
856  // varying switch; terminate the current block with a jump to the
857  // block for the code for the default label.
858  BranchInst(defaultBlock);
859  SetCurrentBasicBlock(defaultBlock);
860 
861  if (switchConditionWasUniform)
862  // Nothing more to do for this case; return back to the caller,
863  // which will then emit the code for the default case.
864  return;
865 
866  // For a varying switch, we need to update the execution mask.
867  //
868  // First, compute the mask that corresponds to which program instances
869  // should execute the "default" code; this corresponds to the set of
870  // program instances that don't match any of the case statements.
871  // Therefore, we generate code that compares the value of the switch
872  // expression to the value associated with each of the "case"
873  // statements such that the surviving lanes didn't match any of them.
874  llvm::Value *matchesDefault = getMaskAtSwitchEntry();
875  for (int i = 0; i < (int)caseBlocks->size(); ++i) {
876  int value = (*caseBlocks)[i].first;
877  llvm::Value *valueVec =
878  (switchExpr->getType() == LLVMTypes::Int32VectorType) ? LLVMInt32Vector(value) : LLVMInt64Vector(value);
879  // TODO: for AVX2 at least, the following generates better code
880  // than doing ICMP_NE and skipping the NotOperator() below; file a
881  // LLVM bug?
882  llvm::Value *matchesCaseValue =
883  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, valueVec, "cmp_case_value");
884  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
885 
886  llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
887  matchesDefault =
888  BinaryOperator(llvm::Instruction::And, matchesDefault, notMatchesCaseValue, "default&~case_match");
889  }
890 
891  // The mask may have some lanes on, which corresponds to the previous
892  // label falling through; compute the updated mask by ANDing with the
893  // current mask.
894  llvm::Value *oldMask = GetInternalMask();
895  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, matchesDefault, "old_mask|matches_default");
896  SetInternalMask(newMask);
897 
898  if (checkMask)
899  addSwitchMaskCheck(newMask);
900 }
901 
902 void FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
903  if (inSwitchStatement() == false) {
904  Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
905  return;
906  }
907 
908  // Find the basic block for this case statement.
909  llvm::BasicBlock *bbCase = NULL;
910  AssertPos(currentPos, caseBlocks != NULL);
911  for (int i = 0; i < (int)caseBlocks->size(); ++i)
912  if ((*caseBlocks)[i].first == value) {
913  bbCase = (*caseBlocks)[i].second;
914  break;
915  }
916  AssertPos(currentPos, bbCase != NULL);
917 
918  if (bblock != NULL)
919  // fall through from the previous case
920  BranchInst(bbCase);
921  SetCurrentBasicBlock(bbCase);
922 
923  if (switchConditionWasUniform)
924  return;
925 
926  // update the mask: first, get a mask that indicates which program
927  // instances have a value for the switch expression that matches this
928  // case statement.
929  llvm::Value *valueVec =
930  (switchExpr->getType() == LLVMTypes::Int32VectorType) ? LLVMInt32Vector(value) : LLVMInt64Vector(value);
931  llvm::Value *matchesCaseValue =
932  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr, valueVec, "cmp_case_value");
933  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
934 
935  // If a lane was off going into the switch, we don't care if has a
936  // value in the switch expression that happens to match this case.
937  llvm::Value *entryMask = getMaskAtSwitchEntry();
938  matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask, matchesCaseValue, "entry_mask&case_match");
939 
940  // Take the surviving lanes and turn on the mask for them.
941  llvm::Value *oldMask = GetInternalMask();
942  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask, matchesCaseValue, "mask|case_match");
943  SetInternalMask(newMask);
944 
945  if (checkMask)
946  addSwitchMaskCheck(newMask);
947 }
948 
949 void FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
950  const std::vector<std::pair<int, llvm::BasicBlock *>> &bbCases,
951  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
952  // The calling code should have called StartSwitch() before calling
953  // SwitchInst().
954  AssertPos(currentPos, controlFlowInfo.size() && controlFlowInfo.back()->IsSwitch());
955 
956  switchExpr = expr;
957  defaultBlock = bbDefault;
958  caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *>>(bbCases);
959  nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
960  switchConditionWasUniform = (llvm::isa<llvm::VectorType>(expr->getType()) == false);
961 
962  if (switchConditionWasUniform == true) {
963  // For a uniform switch condition, just wire things up to the LLVM
964  // switch instruction.
965  llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault, bbCases.size(), bblock);
966  for (int i = 0; i < (int)bbCases.size(); ++i) {
967  if (expr->getType() == LLVMTypes::Int32Type)
968  s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
969  else {
970  AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
971  s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
972  }
973  }
974 
975  AddDebugPos(s);
976  // switch is a terminator
977  bblock = NULL;
978  } else {
979  // For a varying switch, we first turn off all lanes of the mask
980  SetInternalMask(LLVMMaskAllOff);
981 
982  if (nextBlocks->size() > 0) {
983  // If there are any labels inside the switch, jump to the first
984  // one; any code before the first label won't be executed by
985  // anyone.
986  std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
987  iter = nextBlocks->find(NULL);
988  AssertPos(currentPos, iter != nextBlocks->end());
989  llvm::BasicBlock *bbFirst = iter->second;
990  BranchInst(bbFirst);
991  bblock = NULL;
992  }
993  }
994 }
995 
997  int sum = 0;
998  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
999  if (controlFlowInfo[i]->IsVarying())
1000  ++sum;
1001  return sum;
1002 }
1003 
1005  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1006  if (controlFlowInfo[i]->IsForeach())
1007  return true;
1008  return false;
1009 }
1010 
1011 void FunctionEmitContext::DisableGatherScatterWarnings() { ++disableGSWarningCount; }
1012 
1013 void FunctionEmitContext::EnableGatherScatterWarnings() { --disableGSWarningCount; }
1014 
1016  LabeledStmt *ls = llvm::dyn_cast<LabeledStmt>(node);
1017  if (ls == NULL)
1018  return true;
1019 
1021 
1022  if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
1023  Error(ls->pos, "Multiple labels named \"%s\" in function.", ls->name.c_str());
1024  else {
1025  llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
1026  ctx->labelMap[ls->name] = bb;
1027  }
1028  return true;
1029 }
1030 
1032  labelMap.erase(labelMap.begin(), labelMap.end());
1033  WalkAST(code, initLabelBBlocks, NULL, this);
1034 }
1035 
1036 llvm::BasicBlock *FunctionEmitContext::GetLabeledBasicBlock(const std::string &label) {
1037  if (labelMap.find(label) != labelMap.end())
1038  return labelMap[label];
1039  else
1040  return NULL;
1041 }
1042 
1043 std::vector<std::string> FunctionEmitContext::GetLabels() {
1044  // Initialize vector to the right size
1045  std::vector<std::string> labels(labelMap.size());
1046 
1047  // Iterate through labelMap and grab only the keys
1048  std::map<std::string, llvm::BasicBlock *>::iterator iter;
1049  for (iter = labelMap.begin(); iter != labelMap.end(); iter++)
1050  labels.push_back(iter->first);
1051 
1052  return labels;
1053 }
1054 
1055 void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
1056  const Type *returnType = function->GetReturnType();
1057  if (returnType->IsVoidType()) {
1058  if (expr != NULL)
1059  Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
1060  expr->GetType()->GetString().c_str());
1061  } else {
1062  if (expr == NULL) {
1063  Error(funcStartPos, "Must provide return value for return "
1064  "statement for non-void function.");
1065  return;
1066  }
1067 
1068  expr = TypeConvertExpr(expr, returnType, "return statement");
1069  if (expr != NULL) {
1070  llvm::Value *retVal = expr->GetValue(this);
1071  if (retVal != NULL) {
1072  if (returnType->IsUniformType() || CastType<ReferenceType>(returnType) != NULL)
1073  StoreInst(retVal, returnValuePtr, returnType);
1074  else {
1075  // Use a masked store to store the value of the expression
1076  // in the return value memory; this preserves the return
1077  // values from other lanes that may have executed return
1078  // statements previously.
1079  StoreInst(retVal, returnValuePtr, GetInternalMask(), returnType,
1080  PointerType::GetUniform(returnType));
1081  }
1082  }
1083  }
1084  }
1085 
1086  if (VaryingCFDepth() == 0) {
1087  // If there is only uniform control flow between us and the
1088  // function entry, then it's guaranteed that all lanes are running,
1089  // so we can just emit a true return instruction
1090  AddInstrumentationPoint("return: uniform control flow");
1091  ReturnInst();
1092  } else {
1093  // Otherwise we update the returnedLanes value by ANDing it with
1094  // the current lane mask.
1095  llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, "old_returned_lanes");
1096  llvm::Value *newReturnedLanes =
1097  BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, GetFullMask(), "old_mask|returned_lanes");
1098 
1099  // For 'coherent' return statements, emit code to check if all
1100  // lanes have returned
1101  if (doCoherenceCheck) {
1102  // if newReturnedLanes == functionMaskValue, get out of here!
1103  llvm::Value *cmp = MasksAllEqual(functionMaskValue, newReturnedLanes);
1104  llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
1105  llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
1106  BranchInst(bDoReturn, bNoReturn, cmp);
1107 
1108  bblock = bDoReturn;
1109  AddInstrumentationPoint("return: all lanes have returned");
1110  ReturnInst();
1111 
1112  bblock = bNoReturn;
1113  }
1114  // Otherwise update returnedLanesPtr and turn off all of the lanes
1115  // in the current mask so that any subsequent statements in the
1116  // same scope after the return have no effect
1117  StoreInst(newReturnedLanes, returnedLanesPtr);
1118  AddInstrumentationPoint("return: some but not all lanes have returned");
1119  SetInternalMask(LLVMMaskAllOff);
1120  }
1121 }
1122 
1123 llvm::Value *FunctionEmitContext::Any(llvm::Value *mask) {
1124  // Call the target-dependent any function to test that the mask is non-zero
1125  std::vector<Symbol *> mm;
1126  m->symbolTable->LookupFunction("__any", &mm);
1127  if (g->target->getMaskBitCount() == 1)
1128  AssertPos(currentPos, mm.size() == 1);
1129  else
1130  // There should be one with signed int signature, one unsigned int.
1131  AssertPos(currentPos, mm.size() == 2);
1132  // We can actually call either one, since both are i32s as far as
1133  // LLVM's type system is concerned...
1134  llvm::Function *fmm = mm[0]->function;
1135  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any"));
1136 }
1137 
1138 llvm::Value *FunctionEmitContext::All(llvm::Value *mask) {
1139  // Call the target-dependent movmsk function to turn the vector mask
1140  // into an i64 value
1141  std::vector<Symbol *> mm;
1142  m->symbolTable->LookupFunction("__all", &mm);
1143  if (g->target->getMaskBitCount() == 1)
1144  AssertPos(currentPos, mm.size() == 1);
1145  else
1146  // There should be one with signed int signature, one unsigned int.
1147  AssertPos(currentPos, mm.size() == 2);
1148  // We can actually call either one, since both are i32s as far as
1149  // LLVM's type system is concerned...
1150  llvm::Function *fmm = mm[0]->function;
1151  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all"));
1152 }
1153 
1154 llvm::Value *FunctionEmitContext::None(llvm::Value *mask) {
1155  // Call the target-dependent movmsk function to turn the vector mask
1156  // into an i64 value
1157  std::vector<Symbol *> mm;
1158  m->symbolTable->LookupFunction("__none", &mm);
1159  if (g->target->getMaskBitCount() == 1)
1160  AssertPos(currentPos, mm.size() == 1);
1161  else
1162  // There should be one with signed int signature, one unsigned int.
1163  AssertPos(currentPos, mm.size() == 2);
1164  // We can actually call either one, since both are i32s as far as
1165  // LLVM's type system is concerned...
1166  llvm::Function *fmm = mm[0]->function;
1167  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none"));
1168 }
1169 
1170 llvm::Value *FunctionEmitContext::LaneMask(llvm::Value *v) {
1171  const char *__movmsk = "__movmsk";
1172 
1173  // Call the target-dependent movmsk function to turn the vector mask
1174  // into an i64 value
1175  std::vector<Symbol *> mm;
1176  m->symbolTable->LookupFunction(__movmsk, &mm);
1177  if (g->target->getMaskBitCount() == 1)
1178  AssertPos(currentPos, mm.size() == 1);
1179  else
1180  // There should be one with signed int signature, one unsigned int.
1181  AssertPos(currentPos, mm.size() == 2);
1182  // We can actually call either one, since both are i32s as far as
1183  // LLVM's type system is concerned...
1184  llvm::Function *fmm = mm[0]->function;
1185  return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
1186 }
1187 
1188 llvm::Value *FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
1189 #if 0
1190  // Compare the two masks to get a vector of i1s
1191  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1192  v1, v2, "v1==v2");
1193  // Turn that into a bool vector type (often i32s)
1194  cmp = I1VecToBoolVec(cmp);
1195  // And see if it's all on
1196  return All(cmp);
1197 #else
1198  if (g->target->getArch() == Arch::wasm32) {
1199  llvm::Function *fmm = m->module->getFunction("__wasm_cmp_msk_eq");
1200  return CallInst(fmm, NULL, {v1, v2}, LLVMGetName("wasm_cmp_msk_eq", v1, v2));
1201  }
1202  llvm::Value *mm1 = LaneMask(v1);
1203  llvm::Value *mm2 = LaneMask(v2);
1204  return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, LLVMGetName("equal", v1, v2));
1205 #endif
1206 }
1207 
1208 llvm::Value *FunctionEmitContext::ProgramIndexVector(bool is32bits) {
1209  llvm::SmallVector<llvm::Constant *, 16> array;
1210  for (int i = 0; i < g->target->getVectorWidth(); ++i) {
1211  llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
1212  array.push_back(C);
1213  }
1214 
1215  llvm::Constant *index = llvm::ConstantVector::get(array);
1216 
1217  return index;
1218 }
1219 
1220 llvm::Value *FunctionEmitContext::GetStringPtr(const std::string &str) {
1221  llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
1222  llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
1223  llvm::Value *lstrPtr =
1224  new llvm::GlobalVariable(*m->module, lstr->getType(), true /*isConst*/, linkage, lstr, "__str");
1225  return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType, "str_void_ptr", bblock);
1226 }
1227 
1228 llvm::BasicBlock *FunctionEmitContext::CreateBasicBlock(const char *name) {
1229  return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
1230 }
1231 
1232 llvm::Value *FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
1233  if (b == NULL) {
1234  AssertPos(currentPos, m->errorCount > 0);
1235  return NULL;
1236  }
1237 
1238  if (g->target->getMaskBitCount() == 1)
1239  return b;
1240 
1241  llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(b->getType());
1242  if (at) {
1243  // If we're given an array of vectors of i1s, then do the
1244  // conversion for each of the elements
1245  llvm::Type *boolArrayType = llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
1246  llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
1247 
1248  for (unsigned int i = 0; i < at->getNumElements(); ++i) {
1249  llvm::Value *elt = ExtractInst(b, i);
1250  llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType, LLVMGetName(elt, "_to_boolvec"));
1251  ret = InsertInst(ret, sext, i);
1252  }
1253  return ret;
1254  } else
1255  return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
1256 }
1257 
1258 static llvm::Value *lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
1259  llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, true);
1260  std::string var_name = "_";
1261  var_name = var_name + s;
1262  llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), true /* const */,
1263  llvm::GlobalValue::InternalLinkage, sConstant, var_name.c_str());
1264  llvm::Value *indices[2] = {LLVMInt32(0), LLVMInt32(0)};
1265  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1266  return llvm::GetElementPtrInst::Create(PTYPE(sPtr), sPtr, arrayRef, "sptr", bblock);
1267 }
1268 
1270  AssertPos(currentPos, note != NULL);
1271  if (!g->emitInstrumentation)
1272  return;
1273 
1274  std::vector<llvm::Value *> args;
1275  // arg 1: filename as string
1276  args.push_back(lGetStringAsValue(bblock, currentPos.name));
1277  // arg 2: provided note
1278  args.push_back(lGetStringAsValue(bblock, note));
1279  // arg 3: line number
1280  args.push_back(LLVMInt32(currentPos.first_line));
1281  // arg 4: current mask, movmsk'ed down to an int64
1282  args.push_back(LaneMask(GetFullMask()));
1283 
1284  llvm::Function *finst = m->module->getFunction("ISPCInstrument");
1285  CallInst(finst, NULL, args, "");
1286 }
1287 
1288 void FunctionEmitContext::SetDebugPos(SourcePos pos) { currentPos = pos; }
1289 
1290 SourcePos FunctionEmitContext::GetDebugPos() const { return currentPos; }
1291 
1292 void FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos, llvm::DIScope *scope) {
1293  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
1294  if (inst != NULL && m->diBuilder) {
1295  SourcePos p = pos ? *pos : currentPos;
1296  if (p.first_line != 0)
1297  // If first_line == 0, then we're in the middle of setting up
1298  // the standard library or the like; don't add debug positions
1299  // for those functions
1300  inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column, scope ? scope : GetDIScope()));
1301  }
1302 }
1303 
1305  if (m->diBuilder != NULL) {
1306  llvm::DIScope *parentScope;
1307  llvm::DILexicalBlock *lexicalBlock;
1308  if (debugScopes.size() > 0)
1309  parentScope = debugScopes.back();
1310  else
1311  parentScope = diSubprogram;
1312 
1313  lexicalBlock = m->diBuilder->createLexicalBlock(parentScope, diFile, currentPos.first_line,
1314  // Revision 216239 in LLVM removes support of DWARF
1315  // discriminator as the last argument
1316  currentPos.first_column);
1317  debugScopes.push_back(llvm::cast<llvm::DILexicalBlockBase>(lexicalBlock));
1318  }
1319 }
1320 
1322  if (m->diBuilder != NULL) {
1323  AssertPos(currentPos, debugScopes.size() > 0);
1324  debugScopes.pop_back();
1325  }
1326 }
1327 
1328 llvm::DIScope *FunctionEmitContext::GetDIScope() const {
1329  AssertPos(currentPos, debugScopes.size() > 0);
1330  return debugScopes.back();
1331 }
1332 
1334  if (m->diBuilder == NULL)
1335  return;
1336 
1337  llvm::DIScope *scope = GetDIScope();
1338  llvm::DIType *diType = sym->type->GetDIType(scope);
1339  llvm::DILocalVariable *var = m->diBuilder->createAutoVariable(
1340  scope, sym->name, sym->pos.GetDIFile(), sym->pos.first_line, diType, true /* preserve through opts */);
1341 
1342  llvm::Instruction *declareInst =
1343  m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(),
1344  llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock);
1345  AddDebugPos(declareInst, &sym->pos, scope);
1346 }
1347 
1349  if (m->diBuilder == NULL)
1350  return;
1351 
1352  llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
1353  llvm::DIScope *scope = diSubprogram;
1354  llvm::DIType *diType = sym->type->GetDIType(scope);
1355  llvm::DILocalVariable *var =
1356  m->diBuilder->createParameterVariable(scope, sym->name, argNum + 1, sym->pos.GetDIFile(), sym->pos.first_line,
1357  diType, true /* preserve through opts */, flags);
1358 
1359  llvm::Instruction *declareInst =
1360  m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(),
1361  llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock);
1362  AddDebugPos(declareInst, &sym->pos, scope);
1363 }
1364 
1365 /** If the given type is an array of vector types, then it's the
1366  representation of an ispc VectorType with varying elements. If it is
1367  one of these, return the array size (i.e. the VectorType's size).
1368  Otherwise return zero.
1369  */
1370 static int lArrayVectorWidth(llvm::Type *t) {
1371  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(t);
1372  if (arrayType == NULL)
1373  return 0;
1374 
1375  // We shouldn't be seeing arrays of anything but vectors being passed
1376  // to things like FunctionEmitContext::BinaryOperator() as operands.
1377  llvm::VectorType *vectorElementType = llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1378  Assert((vectorElementType != NULL && (int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
1379 
1380  return (int)arrayType->getNumElements();
1381 }
1382 
1383 llvm::Value *FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1,
1384  const char *name) {
1385  if (v0 == NULL || v1 == NULL) {
1386  AssertPos(currentPos, m->errorCount > 0);
1387  return NULL;
1388  }
1389 
1390  AssertPos(currentPos, v0->getType() == v1->getType());
1391  llvm::Type *type = v0->getType();
1392  int arraySize = lArrayVectorWidth(type);
1393  if (arraySize == 0) {
1394  llvm::Instruction *bop = llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
1395  AddDebugPos(bop);
1396  return bop;
1397  } else {
1398  // If this is an ispc VectorType, apply the binary operator to each
1399  // of the elements of the array (which in turn should be either
1400  // scalar types or llvm::VectorTypes.)
1401  llvm::Value *ret = llvm::UndefValue::get(type);
1402  for (int i = 0; i < arraySize; ++i) {
1403  llvm::Value *a = ExtractInst(v0, i);
1404  llvm::Value *b = ExtractInst(v1, i);
1405  llvm::Value *op = BinaryOperator(inst, a, b);
1406  ret = InsertInst(ret, op, i);
1407  }
1408  return ret;
1409  }
1410 }
1411 
1412 llvm::Value *FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
1413  if (v == NULL) {
1414  AssertPos(currentPos, m->errorCount > 0);
1415  return NULL;
1416  }
1417 
1418  // Similarly to BinaryOperator, do the operation on all the elements of
1419  // the array if we're given an array type; otherwise just do the
1420  // regular llvm operation.
1421  llvm::Type *type = v->getType();
1422  int arraySize = lArrayVectorWidth(type);
1423  if (arraySize == 0) {
1424  llvm::Instruction *binst = llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
1425  AddDebugPos(binst);
1426  return binst;
1427  } else {
1428  llvm::Value *ret = llvm::UndefValue::get(type);
1429  for (int i = 0; i < arraySize; ++i) {
1430  llvm::Value *a = ExtractInst(v, i);
1431  llvm::Value *op = llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
1432  AddDebugPos(op);
1433  ret = InsertInst(ret, op, i);
1434  }
1435  return ret;
1436  }
1437 }
1438 
1439 // Given the llvm Type that represents an ispc VectorType, return an
1440 // equally-shaped type with boolean elements. (This is the type that will
1441 // be returned from CmpInst with ispc VectorTypes).
1442 static llvm::Type *lGetMatchingBoolVectorType(llvm::Type *type) {
1443  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(type);
1444  Assert(arrayType != NULL);
1445 
1446  llvm::VectorType *vectorElementType = llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1447  Assert(vectorElementType != NULL);
1448  Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
1449 
1450  llvm::Type *base = llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
1451  return llvm::ArrayType::get(base, arrayType->getNumElements());
1452 }
1453 
1454 llvm::Value *FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred,
1455  llvm::Value *v0, llvm::Value *v1, const char *name) {
1456  if (v0 == NULL || v1 == NULL) {
1457  AssertPos(currentPos, m->errorCount > 0);
1458  return NULL;
1459  }
1460 
1461  AssertPos(currentPos, v0->getType() == v1->getType());
1462  llvm::Type *type = v0->getType();
1463  int arraySize = lArrayVectorWidth(type);
1464  if (arraySize == 0) {
1465  llvm::Instruction *ci = llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp", bblock);
1466  AddDebugPos(ci);
1467  return ci;
1468  } else {
1469  llvm::Type *boolType = lGetMatchingBoolVectorType(type);
1470  llvm::Value *ret = llvm::UndefValue::get(boolType);
1471  for (int i = 0; i < arraySize; ++i) {
1472  llvm::Value *a = ExtractInst(v0, i);
1473  llvm::Value *b = ExtractInst(v1, i);
1474  llvm::Value *op = CmpInst(inst, pred, a, b, name);
1475  ret = InsertInst(ret, op, i);
1476  }
1477  return ret;
1478  }
1479 }
1480 
1481 llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
1482  if (value == NULL) {
1483  AssertPos(currentPos, m->errorCount > 0);
1484  return NULL;
1485  }
1486 
1487  llvm::Value *ret = NULL;
1488  llvm::Type *eltType = value->getType();
1489  llvm::Type *vecType = NULL;
1490 
1491  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(eltType);
1492  if (pt != NULL) {
1493  // Varying pointers are represented as vectors of i32/i64s
1495  value = PtrToIntInst(value);
1496  } else {
1497  // All other varying types are represented as vectors of the
1498  // underlying type.
1499  vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
1500  }
1501 
1502  // Check for a constant case.
1503  if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
1504 #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0
1505  ret = llvm::ConstantVector::getSplat(g->target->getVectorWidth(), const_val);
1506 #else // LLVM 11.0+
1507  ret =
1508  llvm::ConstantVector::getSplat({static_cast<unsigned int>(g->target->getVectorWidth()), false}, const_val);
1509 #endif
1510  return ret;
1511  }
1512 
1513  ret = BroadcastValue(value, vecType, name);
1514 
1515  return ret;
1516 }
1517 
1518 llvm::Value *FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const char *name) {
1519  if (value == NULL) {
1520  AssertPos(currentPos, m->errorCount > 0);
1521  return NULL;
1522  }
1523 
1524  if (name == NULL)
1525  name = LLVMGetName(value, "_bitcast");
1526 
1527  llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
1528  AddDebugPos(inst);
1529  return inst;
1530 }
1531 
1532 llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
1533  if (value == NULL) {
1534  AssertPos(currentPos, m->errorCount > 0);
1535  return NULL;
1536  }
1537 
1538  if (llvm::isa<llvm::VectorType>(value->getType()))
1539  // no-op for varying pointers; they're already vectors of ints
1540  return value;
1541 
1542  if (name == NULL)
1543  name = LLVMGetName(value, "_ptr2int");
1544  llvm::Type *type = LLVMTypes::PointerIntType;
1545  llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
1546  AddDebugPos(inst);
1547  return inst;
1548 }
1549 
1550 llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const char *name) {
1551  if (value == NULL) {
1552  AssertPos(currentPos, m->errorCount > 0);
1553  return NULL;
1554  }
1555 
1556  if (name == NULL)
1557  name = LLVMGetName(value, "_ptr2int");
1558 
1559  llvm::Type *fromType = value->getType();
1560  if (llvm::isa<llvm::VectorType>(fromType)) {
1561  // varying pointer
1562  if (fromType == toType)
1563  // already the right type--done
1564  return value;
1565  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
1566  return TruncInst(value, toType, name);
1567  else {
1568  AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits());
1569  return ZExtInst(value, toType, name);
1570  }
1571  }
1572 
1573  llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
1574  AddDebugPos(inst);
1575  return inst;
1576 }
1577 
1578 llvm::Value *FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const char *name) {
1579  if (value == NULL) {
1580  AssertPos(currentPos, m->errorCount > 0);
1581  return NULL;
1582  }
1583 
1584  if (name == NULL)
1585  name = LLVMGetName(value, "_int2ptr");
1586 
1587  llvm::Type *fromType = value->getType();
1588  if (llvm::isa<llvm::VectorType>(fromType)) {
1589  // varying pointer
1590  if (fromType == toType)
1591  // done
1592  return value;
1593  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
1594  return TruncInst(value, toType, name);
1595  else {
1596  AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits());
1597  return ZExtInst(value, toType, name);
1598  }
1599  }
1600 
1601  llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, bblock);
1602  AddDebugPos(inst);
1603  return inst;
1604 }
1605 
1606 llvm::Instruction *FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const char *name) {
1607  if (value == NULL) {
1608  AssertPos(currentPos, m->errorCount > 0);
1609  return NULL;
1610  }
1611 
1612  if (name == NULL)
1613  name = LLVMGetName(value, "_trunc");
1614 
1615  // TODO: we should probably handle the array case as in
1616  // e.g. BitCastInst(), but we don't currently need that functionality
1617  llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
1618  AddDebugPos(inst);
1619  return inst;
1620 }
1621 
1622 llvm::Instruction *FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type,
1623  const char *name) {
1624  if (value == NULL) {
1625  AssertPos(currentPos, m->errorCount > 0);
1626  return NULL;
1627  }
1628 
1629  if (name == NULL)
1630  name = LLVMGetName(value, "_cast");
1631 
1632  // TODO: we should probably handle the array case as in
1633  // e.g. BitCastInst(), but we don't currently need that functionality
1634  llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name, bblock);
1635  AddDebugPos(inst);
1636  return inst;
1637 }
1638 
1639 llvm::Instruction *FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const char *name) {
1640  if (value == NULL) {
1641  AssertPos(currentPos, m->errorCount > 0);
1642  return NULL;
1643  }
1644 
1645  if (name == NULL)
1646  name = LLVMGetName(value, "_cast");
1647 
1648  // TODO: we should probably handle the array case as in
1649  // e.g. BitCastInst(), but we don't currently need that functionality
1650  llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
1651  AddDebugPos(inst);
1652  return inst;
1653 }
1654 
1655 llvm::Instruction *FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const char *name) {
1656  if (value == NULL) {
1657  AssertPos(currentPos, m->errorCount > 0);
1658  return NULL;
1659  }
1660 
1661  if (name == NULL)
1662  name = LLVMGetName(value, "_sext");
1663 
1664  // TODO: we should probably handle the array case as in
1665  // e.g. BitCastInst(), but we don't currently need that functionality
1666  llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
1667  AddDebugPos(inst);
1668  return inst;
1669 }
1670 
1671 llvm::Instruction *FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const char *name) {
1672  if (value == NULL) {
1673  AssertPos(currentPos, m->errorCount > 0);
1674  return NULL;
1675  }
1676 
1677  if (name == NULL)
1678  name = LLVMGetName(value, "_zext");
1679 
1680  // TODO: we should probably handle the array case as in
1681  // e.g. BitCastInst(), but we don't currently need that functionality
1682  llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
1683  AddDebugPos(inst);
1684  return inst;
1685 }
1686 
1687 /** Utility routine used by the GetElementPtrInst() methods; given a
1688  pointer to some type (either uniform or varying) and an index (also
1689  either uniform or varying), this returns the new pointer (varying if
1690  appropriate) given by offsetting the base pointer by the index times
1691  the size of the object that the pointer points to.
1692  */
1693 llvm::Value *FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType) {
1694  // Find the scale factor for the index (i.e. the size of the object
1695  // that the pointer(s) point(s) to.
1696  const Type *scaleType = ptrType->GetBaseType();
1697  llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
1698 
1699  bool indexIsVarying = llvm::isa<llvm::VectorType>(index->getType());
1700  llvm::Value *offset = NULL;
1701  if (indexIsVarying == false) {
1702  // Truncate or sign extend the index as appropriate to a 32 or
1703  // 64-bit type.
1704  if ((g->target->is32Bit() || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64Type)
1705  index = TruncInst(index, LLVMTypes::Int32Type);
1706  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int32Type)
1707  index = SExtInst(index, LLVMTypes::Int64Type);
1708 
1709  // do a scalar multiply to get the offset as index * scale and then
1710  // smear the result out to be a vector; this is more efficient than
1711  // first promoting both the scale and the index to vectors and then
1712  // multiplying.
1713  offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
1714  offset = SmearUniform(offset);
1715  } else {
1716  // Similarly, truncate or sign extend the index to be a 32 or 64
1717  // bit vector type
1718  if ((g->target->is32Bit() || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64VectorType)
1719  index = TruncInst(index, LLVMTypes::Int32VectorType);
1720  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
1721  index->getType() == LLVMTypes::Int32VectorType)
1722  index = SExtInst(index, LLVMTypes::Int64VectorType);
1723 
1724  scale = SmearUniform(scale);
1725  Assert(index != NULL);
1726  // offset = index * scale
1727  offset = BinaryOperator(llvm::Instruction::Mul, scale, index, LLVMGetName("mul", scale, index));
1728  }
1729 
1730  // For 64-bit targets, if we've been doing our offset calculations in
1731  // 32 bits, we still have to convert to a 64-bit value before we
1732  // actually add the offset to the pointer.
1733  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
1734  offset = SExtInst(offset, LLVMTypes::Int64VectorType, LLVMGetName(offset, "_to_64"));
1735 
1736  // Smear out the pointer to be varying; either the base pointer or the
1737  // index must be varying for this method to be called.
1738  bool baseIsUniform = (llvm::isa<llvm::PointerType>(basePtr->getType()));
1739  AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
1740  llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
1741 
1742  // newPtr = ptr + offset
1743  return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, LLVMGetName(basePtr, "_offset"));
1744 }
1745 
1746 void FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
1747  llvm::Type *type0 = (*v0)->getType();
1748  llvm::Type *type1 = (*v1)->getType();
1749 
1750  // First, promote to a vector type if one of the two values is a vector
1751  // type
1752  if (llvm::isa<llvm::VectorType>(type0) && !llvm::isa<llvm::VectorType>(type1)) {
1753  *v1 = SmearUniform(*v1, "smear_v1");
1754  type1 = (*v1)->getType();
1755  }
1756  if (!llvm::isa<llvm::VectorType>(type0) && llvm::isa<llvm::VectorType>(type1)) {
1757  *v0 = SmearUniform(*v0, "smear_v0");
1758  type0 = (*v0)->getType();
1759  }
1760 
1761  // And then update to match bit widths
1762  if (type0 == LLVMTypes::Int32Type && type1 == LLVMTypes::Int64Type)
1763  *v0 = SExtInst(*v0, LLVMTypes::Int64Type);
1764  else if (type1 == LLVMTypes::Int32Type && type0 == LLVMTypes::Int64Type)
1765  *v1 = SExtInst(*v1, LLVMTypes::Int64Type);
1766  else if (type0 == LLVMTypes::Int32VectorType && type1 == LLVMTypes::Int64VectorType)
1767  *v0 = SExtInst(*v0, LLVMTypes::Int64VectorType);
1768  else if (type1 == LLVMTypes::Int32VectorType && type0 == LLVMTypes::Int64VectorType)
1769  *v1 = SExtInst(*v1, LLVMTypes::Int64VectorType);
1770 }
1771 
1772 /** Given an integer index in indexValue that's indexing into an array of
1773  soa<> structures with given soaWidth, compute the two sub-indices we
1774  need to do the actual indexing calculation:
1775 
1776  subIndices[0] = (indexValue >> log(soaWidth))
1777  subIndices[1] = (indexValue & (soaWidth-1))
1778  */
1779 static llvm::Value *lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue,
1780  llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset) {
1781  // Compute the log2 of the soaWidth.
1782  Assert(soaWidth > 0);
1783  int logWidth = 0, sw = soaWidth;
1784  while (sw > 1) {
1785  ++logWidth;
1786  sw >>= 1;
1787  }
1788  Assert((1 << logWidth) == soaWidth);
1789 
1790  ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
1791  Assert(indexValue != NULL);
1792  llvm::Type *indexType = indexValue->getType();
1793  llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
1794  llvm::Value *mask = LLVMIntAsType(soaWidth - 1, indexType);
1795 
1796  llvm::Value *indexSum = ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset, "index_sum");
1797 
1798  // minor index = (index & (soaWidth - 1))
1799  *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum, mask, "slice_index_minor");
1800  // slice offsets are always 32 bits...
1801  if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
1802  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
1803  else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
1804  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
1805 
1806  // major index = (index >> logWidth)
1807  return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum, shift, "slice_index_major");
1808 }
1809 
1810 llvm::Value *FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
1811  // Create a small struct where the first element is the type of the
1812  // given pointer and the second element is the type of the offset
1813  // value.
1814  std::vector<llvm::Type *> eltTypes;
1815  eltTypes.push_back(ptr->getType());
1816  eltTypes.push_back(offset->getType());
1817  llvm::StructType *st = llvm::StructType::get(*g->ctx, eltTypes);
1818 
1819  llvm::Value *ret = llvm::UndefValue::get(st);
1820  ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
1821  ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
1822  return ret;
1823 }
1824 
1825 llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrRefType,
1826  const char *name) {
1827  if (basePtr == NULL || index == NULL) {
1828  AssertPos(currentPos, m->errorCount > 0);
1829  return NULL;
1830  }
1831 
1832  // Regularize to a standard pointer type for basePtr's type
1833  const PointerType *ptrType;
1834  if (CastType<ReferenceType>(ptrRefType) != NULL)
1835  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
1836  else {
1837  ptrType = CastType<PointerType>(ptrRefType);
1838  }
1839  AssertPos(currentPos, ptrType != NULL);
1840 
1841  if (ptrType->IsSlice()) {
1842  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
1843 
1844  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
1845  if (ptrType->IsFrozenSlice() == false) {
1846  // For slice pointers that aren't frozen, we compute a new
1847  // index based on the given index plus the offset in the slice
1848  // pointer. This gives us an updated integer slice index for
1849  // the resulting slice pointer and then an index to index into
1850  // the soa<> structs with.
1851  llvm::Value *newSliceOffset;
1852  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
1853  index = lComputeSliceIndex(this, soaWidth, index, ptrSliceOffset, &newSliceOffset);
1854  ptrSliceOffset = newSliceOffset;
1855  }
1856 
1857  // Handle the indexing into the soa<> structs with the major
1858  // component of the index through a recursive call
1859  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index, ptrType->GetAsNonSlice(), name);
1860 
1861  // And mash the results together for the return value
1862  return MakeSlicePointer(p, ptrSliceOffset);
1863  }
1864 
1865  // Double-check consistency between the given pointer type and its LLVM
1866  // type.
1867  if (ptrType->IsUniformType())
1868  AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
1869  else if (ptrType->IsVaryingType())
1870  AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
1871 
1872  bool indexIsVaryingType = llvm::isa<llvm::VectorType>(index->getType());
1873 
1874  if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
1875  // The easy case: both the base pointer and the indices are
1876  // uniform, so just emit the regular LLVM GEP instruction
1877  llvm::Value *ind[1] = {index};
1878  llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
1879  llvm::Instruction *inst =
1880  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock);
1881  AddDebugPos(inst);
1882  return inst;
1883  } else
1884  return applyVaryingGEP(basePtr, index, ptrType);
1885 }
1886 
1887 llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *index1,
1888  const Type *ptrRefType, const char *name) {
1889  if (basePtr == NULL || index0 == NULL || index1 == NULL) {
1890  AssertPos(currentPos, m->errorCount > 0);
1891  return NULL;
1892  }
1893 
1894  // Regaularize the pointer type for basePtr
1895  const PointerType *ptrType = NULL;
1896  if (CastType<ReferenceType>(ptrRefType) != NULL)
1897  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
1898  else {
1899  ptrType = CastType<PointerType>(ptrRefType);
1900  AssertPos(currentPos, ptrType != NULL);
1901  }
1902 
1903  if (ptrType->IsSlice()) {
1904  // Similar to the 1D GEP implementation above, for non-frozen slice
1905  // pointers we do the two-step indexing calculation and then pass
1906  // the new major index on to a recursive GEP call.
1907  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
1908  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
1909  if (ptrType->IsFrozenSlice() == false) {
1910  llvm::Value *newSliceOffset;
1911  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
1912  index1 = lComputeSliceIndex(this, soaWidth, index1, ptrSliceOffset, &newSliceOffset);
1913  ptrSliceOffset = newSliceOffset;
1914  }
1915 
1916  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0, index1, ptrType->GetAsNonSlice(), name);
1917  return MakeSlicePointer(p, ptrSliceOffset);
1918  }
1919 
1920  bool index0IsVaryingType = llvm::isa<llvm::VectorType>(index0->getType());
1921  bool index1IsVaryingType = llvm::isa<llvm::VectorType>(index1->getType());
1922 
1923  if (index0IsVaryingType == false && index1IsVaryingType == false && ptrType->IsUniformType() == true) {
1924  // The easy case: both the base pointer and the indices are
1925  // uniform, so just emit the regular LLVM GEP instruction
1926  llvm::Value *indices[2] = {index0, index1};
1927  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1928  llvm::Instruction *inst =
1929  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock);
1930  AddDebugPos(inst);
1931  return inst;
1932  } else {
1933  // Handle the first dimension with index0
1934  llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
1935 
1936  // Now index into the second dimension with index1. First figure
1937  // out the type of ptr0.
1938  const Type *baseType = ptrType->GetBaseType();
1939  const SequentialType *st = CastType<SequentialType>(baseType);
1940  AssertPos(currentPos, st != NULL);
1941 
1942  bool ptr0IsUniform = llvm::isa<llvm::PointerType>(ptr0->getType());
1943  const Type *ptr0BaseType = st->GetElementType();
1944  const Type *ptr0Type =
1945  ptr0IsUniform ? PointerType::GetUniform(ptr0BaseType) : PointerType::GetVarying(ptr0BaseType);
1946 
1947  return applyVaryingGEP(ptr0, index1, ptr0Type);
1948  }
1949 }
1950 
1951 llvm::Value *FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, const Type *ptrRefType,
1952  const char *name, const PointerType **resultPtrType) {
1953  if (resultPtrType != NULL)
1954  AssertPos(currentPos, ptrRefType != NULL);
1955 
1956  llvm::PointerType *llvmPtrType = llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
1957  if (llvmPtrType != NULL) {
1958  llvm::StructType *llvmStructType = llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
1959  if (llvmStructType != NULL && llvmStructType->isSized() == false) {
1960  AssertPos(currentPos, m->errorCount > 0);
1961  return NULL;
1962  }
1963  }
1964 
1965  // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
1966  // if we have one, regularize into a pointer type.
1967  const PointerType *ptrType = NULL;
1968  if (ptrRefType != NULL) {
1969  // Normalize references to uniform pointers
1970  if (CastType<ReferenceType>(ptrRefType) != NULL)
1971  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
1972  else
1973  ptrType = CastType<PointerType>(ptrRefType);
1974  AssertPos(currentPos, ptrType != NULL);
1975  }
1976 
1977  // Similarly, we have to see if the pointer type is a struct to see if
1978  // we have a slice pointer instead of looking at ptrType; this is also
1979  // unfortunate...
1980  llvm::Value *basePtr = fullBasePtr;
1981  bool baseIsSlicePtr = llvm::isa<llvm::StructType>(fullBasePtr->getType());
1982  const PointerType *rpt;
1983  if (baseIsSlicePtr) {
1984  AssertPos(currentPos, ptrType != NULL);
1985  // Update basePtr to just be the part that actually points to the
1986  // start of an soa<> struct for now; the element offset computation
1987  // doesn't change the slice offset, so we'll incorporate that into
1988  // the final value right before this method returns.
1989  basePtr = ExtractInst(fullBasePtr, 0);
1990  if (resultPtrType == NULL)
1991  resultPtrType = &rpt;
1992  }
1993 
1994  // Return the pointer type of the result of this call, for callers that
1995  // want it.
1996  if (resultPtrType != NULL) {
1997  AssertPos(currentPos, ptrType != NULL);
1998  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
1999  AssertPos(currentPos, ct != NULL);
2000  *resultPtrType = new PointerType(ct->GetElementType(elementNum), ptrType->GetVariability(),
2001  ptrType->IsConstType(), ptrType->IsSlice());
2002  }
2003 
2004  llvm::Value *resultPtr = NULL;
2005  if (ptrType == NULL || ptrType->IsUniformType()) {
2006  // If the pointer is uniform, we can use the regular LLVM GEP.
2007  llvm::Value *offsets[2] = {LLVMInt32(0), LLVMInt32(elementNum)};
2008  llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
2009  resultPtr =
2010  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "struct_offset", bblock);
2011  } else {
2012  // Otherwise do the math to find the offset and add it to the given
2013  // varying pointers
2014  const StructType *st = CastType<StructType>(ptrType->GetBaseType());
2015  llvm::Value *offset = NULL;
2016  if (st != NULL)
2017  // If the pointer is to a structure, Target::StructOffset() gives
2018  // us the offset in bytes to the given element of the structure
2019  offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum, bblock);
2020  else {
2021  // Otherwise we should have a vector or array here and the offset
2022  // is given by the element number times the size of the element
2023  // type of the vector.
2024  const SequentialType *st = CastType<SequentialType>(ptrType->GetBaseType());
2025  AssertPos(currentPos, st != NULL);
2026  llvm::Value *size = g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
2027  llvm::Value *scale =
2028  (g->target->is32Bit() || g->opt.force32BitAddressing) ? LLVMInt32(elementNum) : LLVMInt64(elementNum);
2029  offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
2030  }
2031 
2032  offset = SmearUniform(offset, "offset_smear");
2033 
2034  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2035  // If we're doing 32 bit addressing with a 64 bit target, although
2036  // we did the math above in 32 bit, we need to go to 64 bit before
2037  // we add the offset to the varying pointers.
2038  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
2039 
2040  resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset, "struct_ptr_offset");
2041  }
2042 
2043  // Finally, if had a slice pointer going in, mash back together with
2044  // the original (unchanged) slice offset.
2045  if (baseIsSlicePtr)
2046  return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
2047  else
2048  return resultPtr;
2049 }
2050 
2051 llvm::Value *FunctionEmitContext::SwitchBoolSize(llvm::Value *value, llvm::Type *fromType, llvm::Type *toType,
2052  const char *name) {
2053  if ((value == NULL) || (fromType == NULL) || (toType == NULL)) {
2054  AssertPos(currentPos, m->errorCount > 0);
2055  return NULL;
2056  }
2057 
2058  if (name == NULL)
2059  name = LLVMGetName(value, "_switchBool");
2060 
2061  llvm::Value *newBool = value;
2062  if (g->target->getDataLayout()->getTypeSizeInBits(fromType) > g->target->getDataLayout()->getTypeSizeInBits(toType))
2063  newBool = TruncInst(value, toType);
2064  else if (g->target->getDataLayout()->getTypeSizeInBits(fromType) <
2065  g->target->getDataLayout()->getTypeSizeInBits(toType))
2066  newBool = SExtInst(value, toType);
2067 
2068  return newBool;
2069 }
2070 
2071 llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, const char *name) {
2072  if (ptr == NULL) {
2073  AssertPos(currentPos, m->errorCount > 0);
2074  return NULL;
2075  }
2076 
2077  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2078  AssertPos(currentPos, pt != NULL);
2079 
2080  if (name == NULL)
2081  name = LLVMGetName(ptr, "_load");
2082 
2083  llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
2084 
2085  if (g->opt.forceAlignedMemory && llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2086 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0
2087  inst->setAlignment(g->target->getNativeVectorAlignment());
2088 #else // LLVM 10.0+
2089  inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()));
2090 #endif
2091  }
2092 
2093  AddDebugPos(inst);
2094 
2095  llvm::Value *loadVal = inst;
2096  // bool type is stored as i8. So, it requires some processing.
2097  if ((type != NULL) && (type->IsBoolType())) {
2098  if (CastType<AtomicType>(type) != NULL) {
2099  loadVal = SwitchBoolSize(loadVal, inst->getType(), type->LLVMType(g->ctx));
2100  } else if ((CastType<VectorType>(type) != NULL)) {
2101  const VectorType *vType = CastType<VectorType>(type);
2102  if (CastType<AtomicType>(vType->GetElementType()) != NULL) {
2103  loadVal = SwitchBoolSize(loadVal, inst->getType(), type->LLVMType(g->ctx));
2104  }
2105  }
2106  }
2107  return loadVal;
2108 }
2109 
2110 /** Given a slice pointer to soa'd data that is a basic type (atomic,
2111  pointer, or enum type), use the slice offset to compute pointer(s) to
2112  the appropriate individual data element(s).
2113  */
2114 static llvm::Value *lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType) {
2115  Assert(CastType<PointerType>(*ptrType) != NULL);
2116 
2117  llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
2118  llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
2119 
2120  // slicePtr should be a pointer to an soa-width wide array of the
2121  // final atomic/enum/pointer type
2122  const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
2123  Assert(Type::IsBasicType(unifBaseType));
2124 
2125  // The final pointer type is a uniform or varying pointer to the
2126  // underlying uniform type, depending on whether the given pointer is
2127  // uniform or varying.
2128  *ptrType =
2129  (*ptrType)->IsUniformType() ? PointerType::GetUniform(unifBaseType) : PointerType::GetVarying(unifBaseType);
2130 
2131  // For uniform pointers, bitcast to a pointer to the uniform element
2132  // type, so that the GEP below does the desired indexing
2133  if ((*ptrType)->IsUniformType())
2134  slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
2135 
2136  // And finally index based on the slice offset
2137  return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, LLVMGetName(slicePtr, "_final_gep"));
2138 }
2139 
2140 /** Utility routine that loads from a uniform pointer to soa<> data,
2141  returning a regular uniform (non-SOA result).
2142  */
2143 llvm::Value *FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType,
2144  const char *name) {
2145  const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
2146 
2147  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2148  if (ct != NULL) {
2149  // If we have a struct/array, we need to decompose it into
2150  // individual element loads to fill in the result structure since
2151  // the SOA slice of values we need isn't contiguous in memory...
2152  llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
2153  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2154 
2155  for (int i = 0; i < ct->GetElementCount(); ++i) {
2156  const PointerType *eltPtrType;
2157  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "elt_offset", &eltPtrType);
2158  llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
2159  retValue = InsertInst(retValue, eltValue, i, "set_value");
2160  }
2161 
2162  return retValue;
2163  } else {
2164  // Otherwise we've made our way to a slice pointer to a basic type;
2165  // we need to apply the slice offset into this terminal SOA array
2166  // and then perform the final load
2167  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2168  return LoadInst(ptr, mask, ptrType, name);
2169  }
2170 }
2171 
2172 llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrRefType,
2173  const char *name, bool one_elem) {
2174  if (ptr == NULL) {
2175  AssertPos(currentPos, m->errorCount > 0);
2176  return NULL;
2177  }
2178 
2179  AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
2180 
2181  if (name == NULL)
2182  name = LLVMGetName(ptr, "_load");
2183 
2184  const PointerType *ptrType;
2185  const Type *elType;
2186  if (CastType<ReferenceType>(ptrRefType) != NULL) {
2187  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2188  elType = ptrRefType->GetReferenceTarget();
2189  } else {
2190  ptrType = CastType<PointerType>(ptrRefType);
2191  AssertPos(currentPos, ptrType != NULL);
2192  elType = ptrType->GetBaseType()->GetBaseType();
2193  }
2194 
2195  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2196  Error(currentPos, "Unable to load to undefined struct type \"%s\".",
2197  ptrType->GetBaseType()->GetString().c_str());
2198  return NULL;
2199  }
2200 
2201  if (ptrType->IsUniformType()) {
2202  if (ptrType->IsSlice()) {
2203  return loadUniformFromSOA(ptr, mask, ptrType, name);
2204  } else {
2205  // FIXME: same issue as above load inst regarding alignment...
2206  //
2207  // If the ptr is a straight up regular pointer, then just issue
2208  // a regular load. First figure out the alignment; in general we
2209  // can just assume the natural alignment (0 here), but for varying
2210  // atomic types, we need to make sure that the compiler emits
2211  // unaligned vector loads, so we specify a reduced alignment here.
2212  int align = 0;
2213  const AtomicType *atomicType = CastType<AtomicType>(ptrType->GetBaseType());
2214  if (atomicType != NULL && atomicType->IsVaryingType())
2215  // We actually just want to align to the vector element
2216  // alignment, but can't easily get that here, so just tell LLVM
2217  // it's totally unaligned. (This shouldn't make any difference
2218  // vs the proper alignment in practice.)
2219  align = 1;
2220 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0
2221  llvm::Instruction *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, align, bblock);
2222 #else // LLVM 10.0+
2223  llvm::Instruction *inst =
2224  new llvm::LoadInst(ptr, name, false /* not volatile */, llvm::MaybeAlign(align), bblock);
2225 #endif
2226  AddDebugPos(inst);
2227  llvm::Value *loadVal = inst;
2228  // bool type is stored as i8. So, it requires some processing.
2229  if (elType->IsBoolType() && (CastType<AtomicType>(elType) != NULL)) {
2230  loadVal = SwitchBoolSize(loadVal, inst->getType(), elType->LLVMType(g->ctx));
2231  }
2232  return loadVal;
2233  }
2234  } else {
2235  // Otherwise we should have a varying ptr and it's time for a
2236  // gather.
2237  llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name);
2238  if (!one_elem)
2239  return gather_result;
2240 
2241  // It is a kludge. When we dereference varying pointer to uniform struct
2242  // with "bound uniform" member, we should return first unmasked member.
2243  Warning(currentPos, "Dereferencing varying pointer to uniform struct with 'bound uniform' member,\n"
2244  " only one value will survive. Possible loss of data.");
2245  // Call the target-dependent movmsk function to turn the vector mask
2246  // into an i64 value
2247  std::vector<Symbol *> mm;
2248  m->symbolTable->LookupFunction("__movmsk", &mm);
2249  if (g->target->getMaskBitCount() == 1)
2250  AssertPos(currentPos, mm.size() == 1);
2251  else
2252  // There should be one with signed int signature, one unsigned int.
2253  AssertPos(currentPos, mm.size() == 2);
2254  // We can actually call either one, since both are i32s as far as
2255  // LLVM's type system is concerned...
2256  llvm::Function *fmm = mm[0]->function;
2257  llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk"));
2258  std::vector<Symbol *> lz;
2259  m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz);
2260  llvm::Function *flz = lz[0]->function;
2261  llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz"));
2262  llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx,
2263  LLVMGetName(gather_result, "_umasked_elem"), bblock);
2264  return elem;
2265  }
2266 }
2267 
2268 llvm::Value *FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask,
2269  const char *name) {
2270  // We should have a varying pointer if we get here...
2271  AssertPos(currentPos, ptrType->IsVaryingType());
2272 
2273  const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
2274  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
2275 
2276  const CollectionType *collectionType = CastType<CollectionType>(ptrType->GetBaseType());
2277  if (collectionType != NULL) {
2278  // For collections, recursively gather element wise to find the
2279  // result.
2280  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2281 
2282  const CollectionType *returnCollectionType = CastType<CollectionType>(returnType->GetBaseType());
2283 
2284  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2285  const PointerType *eltPtrType;
2286  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
2287 
2288  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
2289 
2290  // It is a kludge. When we dereference varying pointer to uniform struct
2291  // with "bound uniform" member, we should return first unmasked member.
2292  int need_one_elem = CastType<StructType>(ptrType->GetBaseType()) &&
2293  returnCollectionType->GetElementType(i)->IsUniformType();
2294  // This in turn will be another gather
2295  llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name, need_one_elem);
2296 
2297  retValue = InsertInst(retValue, eltValues, i, "set_value");
2298  }
2299  return retValue;
2300  } else if (ptrType->IsSlice()) {
2301  // If we have a slice pointer, we need to add the final slice
2302  // offset here right before issuing the actual gather
2303  //
2304  // FIXME: would it be better to do the corresponding same thing for
2305  // all of the varying offsets stuff here (and in scatter)?
2306  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2307  }
2308 
2309  // Otherwise we should just have a basic scalar or pointer type and we
2310  // can go and do the actual gather
2311  AddInstrumentationPoint("gather");
2312 
2313  // Figure out which gather function to call based on the size of
2314  // the elements.
2315  const PointerType *pt = CastType<PointerType>(returnType);
2316  const char *funcName = NULL;
2317  if (pt != NULL)
2318  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" : "__pseudo_gather64_i64";
2319  // bool type is stored as i8.
2320  else if (returnType->IsBoolType())
2321  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" : "__pseudo_gather64_i8";
2322  else if (llvmReturnType == LLVMTypes::DoubleVectorType)
2323  funcName = g->target->is32Bit() ? "__pseudo_gather32_double" : "__pseudo_gather64_double";
2324  else if (llvmReturnType == LLVMTypes::Int64VectorType)
2325  funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" : "__pseudo_gather64_i64";
2326  else if (llvmReturnType == LLVMTypes::FloatVectorType)
2327  funcName = g->target->is32Bit() ? "__pseudo_gather32_float" : "__pseudo_gather64_float";
2328  else if (llvmReturnType == LLVMTypes::Int32VectorType)
2329  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" : "__pseudo_gather64_i32";
2330  else if (llvmReturnType == LLVMTypes::Int16VectorType)
2331  funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" : "__pseudo_gather64_i16";
2332  else {
2333  AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
2334  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" : "__pseudo_gather64_i8";
2335  }
2336 
2337  llvm::Function *gatherFunc = m->module->getFunction(funcName);
2338  AssertPos(currentPos, gatherFunc != NULL);
2339 
2340  llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
2341 
2342  // Add metadata about the source file location so that the
2343  // optimization passes can print useful performance warnings if we
2344  // can't optimize out this gather
2345  if (disableGSWarningCount == 0)
2346  addGSMetadata(gatherCall, currentPos);
2347 
2348  // bool type is stored as i8. So, it requires some processing.
2349  if (returnType->IsBoolType()) {
2350  if (g->target->getDataLayout()->getTypeSizeInBits(returnType->LLVMStorageType(g->ctx)) <
2351  g->target->getDataLayout()->getTypeSizeInBits(llvmReturnType)) {
2352  // This is needed when array of bool is passed in from cpp side
2353  // TRUE in clang is '1'. This is zero extended to i8.
2354  // In ispc, this is uniform * varying which after gather becomes
2355  // varying bool. Varying bool in ispc is '-1'. The most
2356  // significant bit being set to 1 is important for blendv
2357  // operations to work as expected.
2358  if (ptrType->GetBaseType()->IsUniformType()) {
2359  gatherCall = TruncInst(gatherCall, LLVMTypes::Int1VectorType);
2360  gatherCall = SExtInst(gatherCall, llvmReturnType);
2361  } else {
2362  gatherCall = SExtInst(gatherCall, llvmReturnType);
2363  }
2364  } else if (g->target->getDataLayout()->getTypeSizeInBits(returnType->LLVMStorageType(g->ctx)) >
2365  g->target->getDataLayout()->getTypeSizeInBits(llvmReturnType)) {
2366  gatherCall = TruncInst(gatherCall, llvmReturnType);
2367  }
2368  }
2369  return gatherCall;
2370 }
2371 
2372 /** Add metadata to the given instruction to encode the current source file
2373  position. This data is used in the lGetSourcePosFromMetadata()
2374  function in opt.cpp.
2375 */
2377  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
2378  if (inst == NULL)
2379  return;
2380  llvm::MDString *str = llvm::MDString::get(*g->ctx, pos.name);
2381  llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
2382  inst->setMetadata("filename", md);
2383 
2384  llvm::Metadata *first_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_line));
2385  md = llvm::MDNode::get(*g->ctx, first_line);
2386  inst->setMetadata("first_line", md);
2387 
2388  llvm::Metadata *first_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_column));
2389  md = llvm::MDNode::get(*g->ctx, first_column);
2390  inst->setMetadata("first_column", md);
2391 
2392  llvm::Metadata *last_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_line));
2393  md = llvm::MDNode::get(*g->ctx, last_line);
2394  inst->setMetadata("last_line", md);
2395 
2396  llvm::Metadata *last_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_column));
2397  md = llvm::MDNode::get(*g->ctx, last_column);
2398  inst->setMetadata("last_column", md);
2399 }
2400 
2401 llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *name, int align, bool atEntryBlock) {
2402  if (llvmType == NULL) {
2403  AssertPos(currentPos, m->errorCount > 0);
2404  return NULL;
2405  }
2406 
2407  llvm::AllocaInst *inst = NULL;
2408  if (atEntryBlock) {
2409  // We usually insert it right before the jump instruction at the
2410  // end of allocaBlock
2411  llvm::Instruction *retInst = allocaBlock->getTerminator();
2412  AssertPos(currentPos, retInst);
2413  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
2414  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", retInst);
2415  } else {
2416  // Unless the caller overrode the default and wants it in the
2417  // current basic block
2418  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
2419  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", bblock);
2420  }
2421 
2422  // If no alignment was specified but we have an array of a uniform
2423  // type, then align it to the native vector alignment; it's not
2424  // unlikely that this array will be loaded into varying variables with
2425  // what will be aligned accesses if the uniform -> varying load is done
2426  // in regular chunks.
2427  llvm::ArrayType *arrayType = llvm::dyn_cast<llvm::ArrayType>(llvmType);
2428  if (align == 0 && arrayType != NULL && !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
2429  align = g->target->getNativeVectorAlignment();
2430 
2431  if (align != 0) {
2432 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0
2433  inst->setAlignment(align);
2434 #else // LLVM 10.0+
2435  inst->setAlignment(llvm::MaybeAlign(align));
2436 #endif
2437  }
2438  // Don't add debugging info to alloca instructions
2439  return inst;
2440 }
2441 
2442 llvm::Value *FunctionEmitContext::AllocaInst(const Type *ptrType, const char *name, int align, bool atEntryBlock) {
2443  if (ptrType == NULL) {
2444  AssertPos(currentPos, m->errorCount > 0);
2445  return NULL;
2446  }
2447 
2448  llvm::Type *llvmStorageType = ptrType->LLVMType(g->ctx);
2449  if ((((CastType<AtomicType>(ptrType) != NULL) || (CastType<VectorType>(ptrType) != NULL)) &&
2450  (ptrType->IsBoolType())) ||
2451  ((CastType<ArrayType>(ptrType) != NULL) && (ptrType->GetBaseType()->IsBoolType()))) {
2452  llvmStorageType = ptrType->LLVMStorageType(g->ctx);
2453  }
2454 
2455  return AllocaInst(llvmStorageType, name, align, atEntryBlock);
2456 }
2457 
2458 /** Code to store the given varying value to the given location, only
2459  storing the elements that correspond to active program instances as
2460  given by the provided storeMask value. Note that the lvalue is only a
2461  single pointer, not a varying lvalue of one pointer per program
2462  instance (that case is handled by scatters).
2463  */
2464 void FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask) {
2465  if (value == NULL || ptr == NULL) {
2466  AssertPos(currentPos, m->errorCount > 0);
2467  return;
2468  }
2469 
2470  AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
2471  AssertPos(currentPos, ptrType->IsUniformType());
2472 
2473  const Type *valueType = ptrType->GetBaseType();
2474  const CollectionType *collectionType = CastType<CollectionType>(valueType);
2475  if (collectionType != NULL) {
2476  // Assigning a structure / array / vector. Handle each element
2477  // individually with what turns into a recursive call to
2478  // makedStore()
2479  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2480  const Type *eltType = collectionType->GetElementType(i);
2481  if (eltType == NULL) {
2482  Assert(m->errorCount > 0);
2483  continue;
2484  }
2485  llvm::Value *eltValue = ExtractInst(value, i, "value_member");
2486  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
2487  const Type *eltPtrType = PointerType::GetUniform(eltType);
2488  StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
2489  }
2490  return;
2491  }
2492 
2493  // We must have a regular atomic, enumerator, or pointer type at this
2494  // point.
2495  AssertPos(currentPos, Type::IsBasicType(valueType));
2496  valueType = valueType->GetAsNonConstType();
2497 
2498  // Figure out if we need a 8, 16, 32 or 64-bit masked store.
2499  llvm::Function *maskedStoreFunc = NULL;
2500  llvm::Type *llvmValueType = value->getType();
2501  llvm::Type *llvmValueStorageType = llvmValueType;
2502 
2503  const PointerType *pt = CastType<PointerType>(valueType);
2504  // bool type is stored as i8. So, it requires some processing.
2505  if ((pt == NULL) && (valueType->IsBoolType())) {
2506  llvmValueStorageType = LLVMTypes::BoolVectorStorageType;
2507  }
2508  if (pt != NULL) {
2509  if (pt->IsSlice()) {
2510  // Masked store of (varying) slice pointer.
2511  AssertPos(currentPos, pt->IsVaryingType());
2512 
2513  // First, extract the pointer from the slice struct and masked
2514  // store that.
2515  llvm::Value *v0 = ExtractInst(value, 0);
2516  llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
2517  maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()), mask);
2518 
2519  // And then do same for the integer offset
2520  llvm::Value *v1 = ExtractInst(value, 1);
2521  llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
2522  const Type *offsetType = AtomicType::VaryingInt32;
2523  maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
2524 
2525  return;
2526  }
2527 
2528  if (g->target->is32Bit())
2529  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
2530  else
2531  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
2532  } else if (llvmValueType == LLVMTypes::Int1VectorType) {
2533  llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask, LLVMMaskAllOn, "~mask");
2534  llvm::Value *old = LoadInst(ptr, valueType);
2535  llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old, notMask, "old&~mask");
2536  llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value, mask, "new&mask");
2537  llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld, maskedNew, "old_new_result");
2538  StoreInst(final, ptr, valueType);
2539  return;
2540  } else if (llvmValueStorageType == LLVMTypes::DoubleVectorType) {
2541  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
2542  } else if (llvmValueStorageType == LLVMTypes::Int64VectorType) {
2543  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
2544  } else if (llvmValueStorageType == LLVMTypes::FloatVectorType) {
2545  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
2546  } else if (llvmValueStorageType == LLVMTypes::Int32VectorType) {
2547  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
2548  } else if (llvmValueStorageType == LLVMTypes::Int16VectorType) {
2549  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
2550  } else if (llvmValueStorageType == LLVMTypes::Int8VectorType) {
2551  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
2552  value = SwitchBoolSize(value, llvmValueType, llvmValueStorageType);
2553  }
2554  AssertPos(currentPos, maskedStoreFunc != NULL);
2555 
2556  std::vector<llvm::Value *> args;
2557  args.push_back(ptr);
2558  args.push_back(value);
2559  args.push_back(mask);
2560  CallInst(maskedStoreFunc, NULL, args);
2561 }
2562 
2563 /** Scatter the given varying value to the locations given by the varying
2564  lvalue (which should be an array of pointers with size equal to the
2565  target's vector width. We want to store each rvalue element at the
2566  corresponding pointer's location, *if* the mask for the corresponding
2567  program instance are on. If they're off, don't do anything.
2568 */
2569 void FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *origPt,
2570  llvm::Value *mask) {
2571  const PointerType *ptrType = CastType<PointerType>(origPt);
2572  AssertPos(currentPos, ptrType != NULL);
2573  AssertPos(currentPos, ptrType->IsVaryingType());
2574 
2575  const CollectionType *srcCollectionType = CastType<CollectionType>(valueType);
2576  if (srcCollectionType != NULL) {
2577  // We're scattering a collection type--we need to keep track of the
2578  // source type (the type of the data values to be stored) and the
2579  // destination type (the type of objects in memory that will be
2580  // stored into) separately. This is necessary so that we can get
2581  // all of the addressing calculations right if we're scattering
2582  // from a varying struct to an array of uniform instances of the
2583  // same struct type, versus scattering into an array of varying
2584  // instances of the struct type, etc.
2585  const CollectionType *dstCollectionType = CastType<CollectionType>(ptrType->GetBaseType());
2586  AssertPos(currentPos, dstCollectionType != NULL);
2587 
2588  // Scatter the collection elements individually
2589  for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
2590  // First, get the values for the current element out of the
2591  // source.
2592  llvm::Value *eltValue = ExtractInst(value, i);
2593  const Type *srcEltType = srcCollectionType->GetElementType(i);
2594 
2595  // We may be scattering a uniform atomic element; in this case
2596  // we'll smear it out to be varying before making the recursive
2597  // scatter() call below.
2598  if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
2599  eltValue = SmearUniform(eltValue, "to_varying");
2600  srcEltType = srcEltType->GetAsVaryingType();
2601  }
2602 
2603  // Get the (varying) pointer to the i'th element of the target
2604  // collection
2605  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
2606 
2607  // The destination element type may be uniform (e.g. if we're
2608  // scattering to an array of uniform structs). Thus, we need
2609  // to be careful about passing the correct type to
2610  // addVaryingOffsetsIfNeeded() here.
2611  const Type *dstEltType = dstCollectionType->GetElementType(i);
2612  const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
2613  if (ptrType->IsSlice())
2614  dstEltPtrType = dstEltPtrType->GetAsSlice();
2615 
2616  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
2617 
2618  // And recursively scatter() until we hit a basic type, at
2619  // which point the actual memory operations can be performed...
2620  scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
2621  }
2622  return;
2623  } else if (ptrType->IsSlice()) {
2624  // As with gather, we need to add the final slice offset finally
2625  // once we get to a terminal SOA array of basic types..
2626  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2627  }
2628 
2629  const PointerType *pt = CastType<PointerType>(valueType);
2630 
2631  // And everything should be a pointer or atomic (or enum) from here on out...
2632  AssertPos(currentPos,
2633  pt != NULL || CastType<AtomicType>(valueType) != NULL || CastType<EnumType>(valueType) != NULL);
2634 
2635  llvm::Type *type = value->getType();
2636  llvm::Type *llvmStorageType = type;
2637  // bool type is stored as i8. So, it requires some processing.
2638  if ((pt != NULL) && (valueType->IsBoolType())) {
2639  llvmStorageType = LLVMTypes::BoolVectorStorageType;
2640  value = SwitchBoolSize(value, type, llvmStorageType);
2641  }
2642  const char *funcName = NULL;
2643  if (pt != NULL) {
2644  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" : "__pseudo_scatter64_i64";
2645  } else if (llvmStorageType == LLVMTypes::DoubleVectorType) {
2646  funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" : "__pseudo_scatter64_double";
2647  } else if (llvmStorageType == LLVMTypes::Int64VectorType) {
2648  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" : "__pseudo_scatter64_i64";
2649  } else if (llvmStorageType == LLVMTypes::FloatVectorType) {
2650  funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" : "__pseudo_scatter64_float";
2651  } else if (llvmStorageType == LLVMTypes::Int32VectorType) {
2652  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" : "__pseudo_scatter64_i32";
2653  } else if (llvmStorageType == LLVMTypes::Int16VectorType) {
2654  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" : "__pseudo_scatter64_i16";
2655  } else if (llvmStorageType == LLVMTypes::Int8VectorType) {
2656  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" : "__pseudo_scatter64_i8";
2657  }
2658 
2659  llvm::Function *scatterFunc = m->module->getFunction(funcName);
2660  AssertPos(currentPos, scatterFunc != NULL);
2661 
2662  AddInstrumentationPoint("scatter");
2663 
2664  std::vector<llvm::Value *> args;
2665  args.push_back(ptr);
2666  args.push_back(value);
2667  args.push_back(mask);
2668  llvm::Value *inst = CallInst(scatterFunc, NULL, args);
2669 
2670  if (disableGSWarningCount == 0)
2671  addGSMetadata(inst, currentPos);
2672 }
2673 
2674 void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, const Type *ptrType) {
2675  if (value == NULL || ptr == NULL) {
2676  // may happen due to error elsewhere
2677  AssertPos(currentPos, m->errorCount > 0);
2678  return;
2679  }
2680 
2681  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2682  AssertPos(currentPos, pt != NULL);
2683 
2684  if ((ptrType != NULL) && (ptrType->IsBoolType())) {
2685  if ((CastType<AtomicType>(ptrType) != NULL)) {
2686  value = SwitchBoolSize(value, value->getType(), ptrType->LLVMStorageType(g->ctx));
2687  } else if (CastType<VectorType>(ptrType) != NULL) {
2688  const VectorType *vType = CastType<VectorType>(ptrType);
2689  if (CastType<AtomicType>(vType->GetElementType()) != NULL) {
2690  value = SwitchBoolSize(value, value->getType(), ptrType->LLVMStorageType(g->ctx));
2691  }
2692  }
2693  }
2694 
2695  llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
2696 
2697  if (g->opt.forceAlignedMemory && llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2698 #if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0
2699  inst->setAlignment(g->target->getNativeVectorAlignment());
2700 #else // LLVM 10.0+
2701  inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()));
2702 #endif
2703  }
2704 
2705  AddDebugPos(inst);
2706 }
2707 
2708 void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType,
2709  const Type *ptrRefType) {
2710  if (value == NULL || ptr == NULL) {
2711  // may happen due to error elsewhere
2712  AssertPos(currentPos, m->errorCount > 0);
2713  return;
2714  }
2715 
2716  const PointerType *ptrType;
2717  if (CastType<ReferenceType>(ptrRefType) != NULL)
2718  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2719  else {
2720  ptrType = CastType<PointerType>(ptrRefType);
2721  AssertPos(currentPos, ptrType != NULL);
2722  }
2723 
2724  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2725  Error(currentPos, "Unable to store to undefined struct type \"%s\".",
2726  ptrType->GetBaseType()->GetString().c_str());
2727  return;
2728  }
2729 
2730  // Figure out what kind of store we're doing here
2731  if (ptrType->IsUniformType()) {
2732  if (ptrType->IsSlice())
2733  // storing a uniform value to a single slice of a SOA type
2734  storeUniformToSOA(value, ptr, mask, valueType, ptrType);
2735  else if (ptrType->GetBaseType()->IsUniformType())
2736  // the easy case
2737  StoreInst(value, ptr, valueType);
2738  else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
2739  // Otherwise it is a masked store unless we can determine that the
2740  // mask is all on... (Unclear if this check is actually useful.)
2741  StoreInst(value, ptr, valueType);
2742  else
2743  maskedStore(value, ptr, ptrType, mask);
2744  } else {
2745  AssertPos(currentPos, ptrType->IsVaryingType());
2746  // We have a varying ptr (an array of pointers), so it's time to
2747  // scatter
2748  scatter(value, ptr, valueType, ptrType, GetFullMask());
2749  }
2750 }
2751 
2752 /** Store a uniform type to SOA-laid-out memory.
2753  */
2754 void FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask,
2755  const Type *valueType, const PointerType *ptrType) {
2756  AssertPos(currentPos, Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), valueType));
2757 
2758  const CollectionType *ct = CastType<CollectionType>(valueType);
2759  if (ct != NULL) {
2760  // Handle collections element wise...
2761  for (int i = 0; i < ct->GetElementCount(); ++i) {
2762  llvm::Value *eltValue = ExtractInst(value, i);
2763  const Type *eltType = ct->GetElementType(i);
2764  const PointerType *dstEltPtrType;
2765  llvm::Value *dstEltPtr = AddElementOffset(ptr, i, ptrType, "slice_offset", &dstEltPtrType);
2766  StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
2767  }
2768  } else {
2769  // We're finally at a leaf SOA array; apply the slice offset and
2770  // then we can do a final regular store
2771  AssertPos(currentPos, Type::IsBasicType(valueType));
2772  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2773  StoreInst(value, ptr, valueType);
2774  }
2775 }
2776 
2777 void FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align) {
2778  dest = BitCastInst(dest, LLVMTypes::VoidPointerType);
2779  src = BitCastInst(src, LLVMTypes::VoidPointerType);
2780  if (count->getType() != LLVMTypes::Int64Type) {
2781  AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
2782  count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
2783  }
2784  if (align == NULL)
2785  align = LLVMInt32(1);
2786 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
2787  llvm::Constant *mcFunc =
2788 #if ISPC_LLVM_VERSION == ISPC_LLVM_6_0 // LLVM 6.0
2789  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2792 #else // LLVM 7.0+
2793  // Now alignment goes as an attribute, not as a parameter.
2794  // See LLVM r322965/r323597 for more details.
2795  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2797 #endif
2798 #else // LLVM 9.0+
2799  llvm::FunctionCallee mcFuncCallee =
2800  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType,
2802  llvm::Constant *mcFunc = llvm::cast<llvm::Constant>(mcFuncCallee.getCallee());
2803 #endif
2804  AssertPos(currentPos, mcFunc != NULL);
2805  AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
2806 
2807  std::vector<llvm::Value *> args;
2808  args.push_back(dest);
2809  args.push_back(src);
2810  args.push_back(count);
2811 #if ISPC_LLVM_VERSION < ISPC_LLVM_7_0
2812  // Don't bother about setting alignment for 7.0+, as this parameter is never really used by ISPC.
2813  args.push_back(align);
2814 #endif
2815  args.push_back(LLVMFalse); /* not volatile */
2816  CallInst(mcFunc, NULL, args, "");
2817 }
2818 
2819 void FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
2820  llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
2821  AddDebugPos(b);
2822 }
2823 
2824 void FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock, llvm::Value *test) {
2825  if (test == NULL) {
2826  AssertPos(currentPos, m->errorCount > 0);
2827  return;
2828  }
2829 
2830  llvm::Instruction *b = llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
2831  AddDebugPos(b);
2832 }
2833 
2834 llvm::Value *FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
2835  if (v == NULL) {
2836  AssertPos(currentPos, m->errorCount > 0);
2837  return NULL;
2838  }
2839 
2840  if (name == NULL) {
2841  char buf[32];
2842  snprintf(buf, sizeof(buf), "_extract_%d", elt);
2843  name = LLVMGetName(v, buf);
2844  }
2845  llvm::Instruction *ei = NULL;
2846  if (llvm::isa<llvm::VectorType>(v->getType()))
2847  ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
2848  else
2849  ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
2850  AddDebugPos(ei);
2851  return ei;
2852 }
2853 
2854 llvm::Value *FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name) {
2855  if (v == NULL || eltVal == NULL) {
2856  AssertPos(currentPos, m->errorCount > 0);
2857  return NULL;
2858  }
2859 
2860  if (name == NULL) {
2861  char buf[32];
2862  snprintf(buf, sizeof(buf), "_insert_%d", elt);
2863  name = LLVMGetName(v, buf);
2864  }
2865 
2866  llvm::Instruction *ii = NULL;
2867  if (llvm::isa<llvm::VectorType>(v->getType()))
2868  ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), name, bblock);
2869  else
2870  ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
2871  AddDebugPos(ii);
2872  return ii;
2873 }
2874 
2875 llvm::Value *FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name) {
2876  if (v1 == NULL || v2 == NULL || mask == NULL) {
2877  AssertPos(currentPos, m->errorCount > 0);
2878  return NULL;
2879  }
2880 
2881  if (name == NULL) {
2882  char buf[32];
2883  snprintf(buf, sizeof(buf), "_shuffle");
2884  name = LLVMGetName(v1, buf);
2885  }
2886 
2887  llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
2888 
2889  AddDebugPos(ii);
2890  return ii;
2891 }
2892 
2893 llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name) {
2894  if (v == NULL || vecType == NULL) {
2895  AssertPos(currentPos, m->errorCount > 0);
2896  return NULL;
2897  }
2898 
2899  llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
2900  Assert(ty && ty->getVectorElementType() == v->getType());
2901 
2902  if (name == NULL) {
2903  char buf[32];
2904  snprintf(buf, sizeof(buf), "_broadcast");
2905  name = LLVMGetName(v, buf);
2906  }
2907 
2908  // Generate the following sequence:
2909  // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
2910  // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
2911  // <4 x i32> zeroinitializer
2912 
2913  llvm::Value *undef1 = llvm::UndefValue::get(vecType);
2914  llvm::Value *undef2 = llvm::UndefValue::get(vecType);
2915 
2916  // InsertElement
2917  llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
2918  llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
2919 
2920  // ShuffleVector
2921 #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0
2922  llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
2923  vecType->getVectorNumElements(), llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
2924 #else // LLVM 11.0+
2925  llvm::Constant *zeroVec =
2926  llvm::ConstantVector::getSplat({static_cast<unsigned int>(vecType->getVectorNumElements()), false},
2927  llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
2928 #endif
2929  llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
2930 
2931  return ret;
2932 }
2933 
2934 llvm::PHINode *FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) {
2935  llvm::PHINode *pn = llvm::PHINode::Create(type, count, name ? name : "phi", bblock);
2936  AddDebugPos(pn);
2937  return pn;
2938 }
2939 
2940 llvm::Instruction *FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1,
2941  const char *name) {
2942  if (test == NULL || val0 == NULL || val1 == NULL) {
2943  AssertPos(currentPos, m->errorCount > 0);
2944  return NULL;
2945  }
2946 
2947  if (name == NULL)
2948  name = LLVMGetName(test, "_select");
2949 
2950  llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name, bblock);
2951  AddDebugPos(inst);
2952  return inst;
2953 }
2954 
2955 /** Given a value representing a function to be called or possibly-varying
2956  pointer to a function to be called, figure out how many arguments the
2957  function has. */
2958 static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
2959  llvm::FunctionType *ft = llvm::dyn_cast<llvm::FunctionType>(callee->getType());
2960 
2961  if (ft == NULL) {
2962  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(callee->getType());
2963  if (pt == NULL) {
2964  // varying--in this case, it must be the version of the
2965  // function that takes a mask
2966  return funcType->GetNumParameters() + 1;
2967  }
2968  ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
2969  }
2970 
2971  Assert(ft != NULL);
2972  return ft->getNumParams();
2973 }
2974 
2975 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
2976  const std::vector<llvm::Value *> &args, const char *name) {
2977  if (func == NULL) {
2978  AssertPos(currentPos, m->errorCount > 0);
2979  return NULL;
2980  }
2981 
2982  std::vector<llvm::Value *> argVals = args;
2983  // Most of the time, the mask is passed as the last argument. this
2984  // isn't the case for things like intrinsics, builtins, and extern "C"
2985  // functions from the application. Add the mask if it's needed.
2986  unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
2987  AssertPos(currentPos, argVals.size() + 1 == calleeArgCount || argVals.size() == calleeArgCount);
2988  if (argVals.size() + 1 == calleeArgCount)
2989  argVals.push_back(GetFullMask());
2990 
2991  if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
2992  // Regular 'uniform' function call--just one function or function
2993  // pointer, so just emit the IR directly.
2994  llvm::Instruction *ci = llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
2995 
2996  // Copy noalias attribute to call instruction, to enable better
2997  // alias analysis.
2998  // TODO: what other attributes needs to be copied?
2999  // TODO: do the same for varing path.
3000  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3001  if (cc && cc->getCalledFunction() && cc->getCalledFunction()->returnDoesNotAlias()) {
3002  cc->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias);
3003  }
3004 
3005  AddDebugPos(ci);
3006  return ci;
3007  } else {
3008  // Emit the code for a varying function call, where we have an
3009  // vector of function pointers, one for each program instance. The
3010  // basic strategy is that we go through the function pointers, and
3011  // for the executing program instances, for each unique function
3012  // pointer that's in the vector, call that function with a mask
3013  // equal to the set of active program instances that also have that
3014  // function pointer. When all unique function pointers have been
3015  // called, we're done.
3016 
3017  llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
3018  llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
3019  llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
3020 
3021  // Get the current mask value so we can restore it later
3022  llvm::Value *origMask = GetInternalMask();
3023 
3024  // First allocate memory to accumulate the various program
3025  // instances' return values...
3026  const Type *returnType = funcType->GetReturnType();
3027  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
3028  llvm::Value *resultPtr = NULL;
3029  if (llvmReturnType->isVoidTy() == false)
3030  resultPtr = AllocaInst(returnType);
3031 
3032  // The memory pointed to by maskPointer tracks the set of program
3033  // instances for which we still need to call the function they are
3034  // pointing to. It starts out initialized with the mask of
3035  // currently running program instances.
3036  llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
3037  StoreInst(GetFullMask(), maskPtr);
3038 
3039  // And now we branch to the test to see if there's more work to be
3040  // done.
3041  BranchInst(bbTest);
3042 
3043  // bbTest: are any lanes of the mask still on? If so, jump to
3044  // bbCall
3045  SetCurrentBasicBlock(bbTest);
3046  {
3047  llvm::Value *maskLoad = LoadInst(maskPtr);
3048  llvm::Value *any = Any(maskLoad);
3049  BranchInst(bbCall, bbDone, any);
3050  }
3051 
3052  // bbCall: this is the body of the loop that calls out to one of
3053  // the active function pointer values.
3054  SetCurrentBasicBlock(bbCall);
3055  {
3056  // Figure out the first lane that still needs its function
3057  // pointer to be called.
3058  llvm::Value *currentMask = LoadInst(maskPtr);
3059  llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros_i64");
3060  AssertPos(currentPos, cttz != NULL);
3061  llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask), "first_lane64");
3062  llvm::Value *firstLane = TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
3063 
3064  // Get the pointer to the function we're going to call this
3065  // time through: ftpr = func[firstLane]
3066  llvm::Value *fptr = llvm::ExtractElementInst::Create(func, firstLane, "extract_fptr", bblock);
3067 
3068  // Smear it out into an array of function pointers
3069  llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
3070 
3071  // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
3072  // mask for the set of program instances that have the same
3073  // value for their function pointer.
3074  llvm::Value *fpOverlap = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, fptrSmear, func);
3075  fpOverlap = I1VecToBoolVec(fpOverlap);
3076 
3077  // Figure out the mask to use when calling the function
3078  // pointer: we need to AND the current execution mask to handle
3079  // the case of any non-running program instances that happen to
3080  // have this function pointer value.
3081  // callMask = (currentMask & fpOverlap)
3082  llvm::Value *callMask = BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap, "call_mask");
3083 
3084  // Set the mask
3085  SetInternalMask(callMask);
3086 
3087  // bitcast the i32/64 function pointer to the actual function
3088  // pointer type.
3089  llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
3090  llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
3091  llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
3092 
3093  // Call the function: callResult = call ftpr(args, args, call mask)
3094  llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
3095 
3096  // Now, do a masked store into the memory allocated to
3097  // accumulate the result using the call mask.
3098  if (callResult != NULL && callResult->getType() != LLVMTypes::VoidType) {
3099  AssertPos(currentPos, resultPtr != NULL);
3100  StoreInst(callResult, resultPtr, callMask, returnType, PointerType::GetUniform(returnType));
3101  } else
3102  AssertPos(currentPos, resultPtr == NULL);
3103 
3104  // Update the mask to turn off the program instances for which
3105  // we just called the function.
3106  // currentMask = currentMask & ~callmask
3107  llvm::Value *notCallMask = BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn, "~callMask");
3108  currentMask = BinaryOperator(llvm::Instruction::And, currentMask, notCallMask, "currentMask&~callMask");
3109  StoreInst(currentMask, maskPtr);
3110 
3111  // And go back to the test to see if we need to do another
3112  // call.
3113  BranchInst(bbTest);
3114  }
3115 
3116  // bbDone: We're all done; clean up and return the result we've
3117  // accumulated in the result memory.
3118  SetCurrentBasicBlock(bbDone);
3119  SetInternalMask(origMask);
3120  return resultPtr ? LoadInst(resultPtr, funcType->GetReturnType()) : NULL;
3121  }
3122 }
3123 
3124 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg,
3125  const char *name) {
3126  std::vector<llvm::Value *> args;
3127  args.push_back(arg);
3128  return CallInst(func, funcType, args, name);
3129 }
3130 
3131 llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0,
3132  llvm::Value *arg1, const char *name) {
3133  std::vector<llvm::Value *> args;
3134  args.push_back(arg0);
3135  args.push_back(arg1);
3136  return CallInst(func, funcType, args, name);
3137 }
3138 
3139 llvm::Instruction *FunctionEmitContext::ReturnInst() {
3140  if (launchedTasks)
3141  // Add a sync call at the end of any function that launched tasks
3142  SyncInst();
3143 
3144  llvm::Instruction *rinst = NULL;
3145  if (returnValuePtr != NULL) {
3146  // We have value(s) to return; load them from their storage
3147  // location
3148  llvm::Value *retVal = LoadInst(returnValuePtr, function->GetReturnType(), "return_value");
3149  rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
3150  } else {
3151  AssertPos(currentPos, function->GetReturnType()->IsVoidType());
3152  rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
3153  }
3154 
3155  AddDebugPos(rinst);
3156  bblock = NULL;
3157  return rinst;
3158 }
3159 
3160 llvm::Value *FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector<llvm::Value *> &argVals,
3161  llvm::Value *launchCount[3]) {
3162  if (callee == NULL) {
3163  AssertPos(currentPos, m->errorCount > 0);
3164  return NULL;
3165  }
3166 
3167  launchedTasks = true;
3168 
3169  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3170  llvm::Type *argType = (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
3171  AssertPos(currentPos, llvm::PointerType::classof(argType));
3172  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(argType);
3173  AssertPos(currentPos, pt);
3174  AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
3175  llvm::StructType *argStructType = static_cast<llvm::StructType *>(pt->getElementType());
3176 
3177  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3178  AssertPos(currentPos, falloc != NULL);
3179  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3180  if (structSize->getType() != LLVMTypes::Int64Type)
3181  // ISPCAlloc expects the size as an uint64_t, but on 32-bit
3182  // targets, SizeOf returns a 32-bit value
3183  structSize = ZExtInst(structSize, LLVMTypes::Int64Type, "struct_size_to_64");
3184  int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
3185 
3186  std::vector<llvm::Value *> allocArgs;
3187  allocArgs.push_back(launchGroupHandlePtr);
3188  allocArgs.push_back(structSize);
3189  allocArgs.push_back(LLVMInt32(align));
3190  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3191  llvm::Value *argmem = BitCastInst(voidmem, pt);
3192 
3193  // Copy the values of the parameters into the appropriate place in
3194  // the argument block
3195  for (unsigned int i = 0; i < argVals.size(); ++i) {
3196  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3197  // don't need to do masked store here, I think
3198  StoreInst(argVals[i], ptr);
3199  }
3200 
3201  if (argStructType->getNumElements() == argVals.size() + 1) {
3202  // copy in the mask
3203  llvm::Value *mask = GetFullMask();
3204  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, "funarg_mask");
3205  StoreInst(mask, ptr);
3206  }
3207 
3208  // And emit the call to the user-supplied task launch function, passing
3209  // a pointer to the task function being called and a pointer to the
3210  // argument block we just filled in
3211  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3212  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3213  AssertPos(currentPos, flaunch != NULL);
3214  std::vector<llvm::Value *> args;
3215  args.push_back(launchGroupHandlePtr);
3216  args.push_back(fptr);
3217  args.push_back(voidmem);
3218  args.push_back(launchCount[0]);
3219  args.push_back(launchCount[1]);
3220  args.push_back(launchCount[2]);
3221  return CallInst(flaunch, NULL, args, "");
3222 }
3223 
3225  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
3226  llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
3227  llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, launchGroupHandle, nullPtrValue);
3228  llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
3229  llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
3230  BranchInst(bSync, bPostSync, nonNull);
3231 
3232  SetCurrentBasicBlock(bSync);
3233  llvm::Function *fsync = m->module->getFunction("ISPCSync");
3234  if (fsync == NULL)
3235  FATAL("Couldn't find ISPCSync declaration?!");
3236  CallInst(fsync, NULL, launchGroupHandle, "");
3237 
3238  // zero out the handle so that if ISPCLaunch is called again in this
3239  // function, it knows it's starting out from scratch
3240  StoreInst(nullPtrValue, launchGroupHandlePtr);
3241 
3242  BranchInst(bPostSync);
3243 
3244  SetCurrentBasicBlock(bPostSync);
3245 }
3246 
3247 /** When we gathering from or scattering to a varying atomic type, we need
3248  to add an appropriate offset to the final address for each lane right
3249  before we use it. Given a varying pointer we're about to use and its
3250  type, this function determines whether these offsets are needed and
3251  returns an updated pointer that incorporates these offsets if needed.
3252  */
3253 llvm::Value *FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType) {
3254  // This should only be called for varying pointers
3255  const PointerType *pt = CastType<PointerType>(ptrType);
3256  AssertPos(currentPos, pt && pt->IsVaryingType());
3257 
3258  const Type *baseType = ptrType->GetBaseType();
3259  if (Type::IsBasicType(baseType) == false)
3260  return ptr;
3261 
3262  if (baseType->IsVaryingType() == false)
3263  return ptr;
3264 
3265  // Find the size of a uniform element of the varying type
3266  llvm::Type *llvmBaseUniformType = baseType->GetAsUniformType()->LLVMType(g->ctx);
3267  llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
3268  unifSize = SmearUniform(unifSize);
3269 
3270  // Compute offset = <0, 1, .. > * unifSize
3271  bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
3272  llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
3273 
3274  llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, varyingOffsets);
3275 
3276  if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
3277  // On 64-bit targets where we're doing 32-bit addressing
3278  // calculations, we need to convert to an i64 vector before adding
3279  // to the pointer
3280  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
3281 
3282  return BinaryOperator(llvm::Instruction::Add, ptr, offset);
3283 }
3284 
3286  AssertPos(currentPos, controlFlowInfo.size() > 0);
3287  CFInfo *ci = controlFlowInfo.back();
3288  controlFlowInfo.pop_back();
3289 
3290  if (ci->IsSwitch()) {
3291  breakTarget = ci->savedBreakTarget;
3292  continueTarget = ci->savedContinueTarget;
3293  breakLanesPtr = ci->savedBreakLanesPtr;
3294  continueLanesPtr = ci->savedContinueLanesPtr;
3295  blockEntryMask = ci->savedBlockEntryMask;
3296  switchExpr = ci->savedSwitchExpr;
3297  defaultBlock = ci->savedDefaultBlock;
3298  caseBlocks = ci->savedCaseBlocks;
3299  nextBlocks = ci->savedNextBlocks;
3300  switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
3301  } else if (ci->IsLoop() || ci->IsForeach()) {
3302  breakTarget = ci->savedBreakTarget;
3303  continueTarget = ci->savedContinueTarget;
3304  breakLanesPtr = ci->savedBreakLanesPtr;
3305  continueLanesPtr = ci->savedContinueLanesPtr;
3306  blockEntryMask = ci->savedBlockEntryMask;
3307  } else {
3308  AssertPos(currentPos, ci->IsIf());
3309  // nothing to do
3310  }
3311 
3312  return ci;
3313 }
const Function * GetFunction() const
Definition: ctx.cpp:357
llvm::Constant * LLVMIntAsType(int64_t val, llvm::Type *type)
Definition: llvmutil.cpp:463
llvm::Value * storagePtr
Definition: sym.h:70
static const AtomicType * VaryingInt32
Definition: type.h:325
static CFInfo * GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr, llvm::BasicBlock *bbDefault, const std::vector< std::pair< int, llvm::BasicBlock *>> *bbCases, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> *bbNext, bool scUniform)
Definition: ctx.cpp:185
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1123
virtual const Type * GetAsVaryingType() const =0
bool IsUniformType() const
Definition: type.h:134
llvm::Value * savedBreakLanesPtr
Definition: ctx.cpp:93
void InitializeLabelMap(Stmt *code)
Definition: ctx.cpp:1031
llvm::Value * PtrToIntInst(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:1532
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target)
Definition: ctx.cpp:719
Definition: func.h:43
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:1951
CFInfo * popCFState()
Definition: ctx.cpp:3285
Opt opt
Definition: ispc.h:509
void StartUniformIf()
Definition: ctx.cpp:425
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:416
int last_column
Definition: ispc.h:130
llvm::DIFile * GetDIFile() const
Definition: ispc.cpp:1507
bool InForeachLoop() const
Definition: ctx.cpp:1004
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:784
llvm::Value * ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name=NULL)
Definition: ctx.cpp:2875
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1208
ASTNode * WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:68
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:381
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:487
llvm::Instruction * FPCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1639
const Type * GetReturnType() const
Definition: type.h:864
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1333
static llvm::Type * lGetMatchingBoolVectorType(llvm::Type *type)
Definition: ctx.cpp:1442
void StartScope()
Definition: ctx.cpp:1304
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm, llvm::Value *sse=NULL, llvm::BasicBlock *bbd=NULL, const std::vector< std::pair< int, llvm::BasicBlock *>> *bbc=NULL, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> *bbn=NULL, bool scu=false)
Definition: ctx.cpp:115
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm)
Definition: ctx.cpp:134
llvm::BasicBlock * savedBreakTarget
Definition: ctx.cpp:92
bool IsVaryingType() const
Definition: type.h:137
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:387
CFInfo(CFType t, bool uniformIf, llvm::Value *sm)
Definition: ctx.cpp:102
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:2819
const std::vector< std::pair< int, llvm::BasicBlock * > > * savedCaseBlocks
Definition: ctx.cpp:97
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:2464
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1671
#define AssertPos(pos, expr)
Definition: util.h:142
bool IsFrozenSlice() const
Definition: type.h:440
Interface class for statements in the ispc language.
Definition: stmt.h:48
llvm::Constant * LLVMInt64Vector(int64_t ival)
Definition: llvmutil.cpp:373
FunctionEmitContext(Function *function, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos)
Definition: ctx.cpp:199
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1412
int first_line
Definition: ispc.h:127
int GetSOAWidth() const
Definition: type.h:145
Target * target
Definition: ispc.h:512
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1359
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2172
static llvm::VectorType * VoidPointerVectorType
Definition: llvmutil.h:98
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:81
llvm::DIType * GetDIType(llvm::DIScope *scope) const
Definition: type.cpp:2435
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:407
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1043
int getNativeVectorAlignment() const
Definition: ispc.h:241
const char * LLVMGetName(llvm::Value *v, const char *s)
Definition: llvmutil.cpp:1549
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1606
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:94
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:2401
Abstract base class for types that represent sequences.
Definition: type.h:498
bool IsVarying()
Definition: ctx.cpp:86
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1454
void EndSwitch()
Definition: ctx.cpp:805
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:427
static llvm::Type * BoolType
Definition: llvmutil.h:62
llvm::Value * LaunchInst(llvm::Value *callee, std::vector< llvm::Value *> &argVals, llvm::Value *launchCount[3])
Definition: ctx.cpp:3160
bool ifsInCFAllUniform(int cfType) const
Definition: ctx.cpp:703
void addSwitchMaskCheck(llvm::Value *mask)
Definition: ctx.cpp:816
void StartForeach(ForeachType ft)
Definition: ctx.cpp:525
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:86
int getMaskBitCount() const
Definition: ispc.h:251
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:548
bool IsForeach()
Definition: ctx.cpp:84
static llvm::VectorType * BoolVectorStorageType
Definition: llvmutil.h:82
bool forceAlignedMemory
Definition: ispc.h:426
static PointerType * GetVarying(const Type *t)
Definition: type.cpp:803
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:660
llvm::Value * GetFullMask()
Definition: ctx.cpp:367
int VaryingCFDepth() const
Definition: ctx.cpp:996
bool isUniform
Definition: ctx.cpp:91
CFType
Definition: ctx.cpp:89
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1269
llvm::Value * MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset)
Definition: ctx.cpp:1810
int GetNumParameters() const
Definition: type.h:874
Module * m
Definition: ispc.cpp:73
std::string name
Definition: sym.h:69
llvm::Value * gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name)
Definition: ctx.cpp:2268
llvm::Value * SwitchBoolSize(llvm::Value *value, llvm::Type *fromType, llvm::Type *toType, const char *name=NULL)
Definition: ctx.cpp:2051
void restoreMaskGivenReturns(llvm::Value *oldMask)
Definition: ctx.cpp:561
Type implementation for pointers to other types.
Definition: type.h:419
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:398
void RestoreContinuedLanes()
Definition: ctx.cpp:762
llvm::Value * loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name)
Definition: ctx.cpp:2143
int getNativeVectorWidth() const
Definition: ispc.h:239
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:359
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:799
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:589
static llvm::Value * lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue, llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset)
Definition: ctx.cpp:1779
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:83
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value *> &args, const char *name=NULL)
Definition: ctx.cpp:2975
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1228
llvm::Value * savedSwitchExpr
Definition: ctx.cpp:95
static CFInfo * GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:155
bool IsIf()
Definition: ctx.cpp:82
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
header file with declarations for symbol and symbol table classes.
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:2893
static void addGSMetadata(llvm::Value *inst, SourcePos pos)
Definition: ctx.cpp:2376
virtual const Type * GetReferenceTarget() const
Definition: type.cpp:2564
static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType)
Definition: ctx.cpp:2114
bool disableMaskAllOnOptimizations
Definition: ispc.h:431
virtual llvm::DIType * GetDIType(llvm::DIScope *scope) const =0
int level
Definition: ispc.h:392
static llvm::Type * VoidType
Definition: llvmutil.h:59
llvm::Module * module
Definition: module.h:151
File with declarations for classes related to statements in the language.
llvm::Type * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:1809
void StoreInst(llvm::Value *value, llvm::Value *ptr, const Type *ptrType=NULL)
Definition: ctx.cpp:2674
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:902
llvm::BasicBlock * savedContinueTarget
Definition: ctx.cpp:92
void EndLoop()
Definition: ctx.cpp:512
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:363
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:84
static CFInfo * GetIf(bool isUniform, llvm::Value *savedMask)
Definition: ctx.cpp:153
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos=NULL, llvm::DIScope *scope=NULL)
Definition: ctx.cpp:1292
llvm::Constant * LLVMInt32Vector(int32_t ival)
Definition: llvmutil.cpp:313
SourcePos GetDebugPos() const
Definition: ctx.cpp:1290
bool IsSlice() const
Definition: type.h:439
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:49
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:1825
CFType type
Definition: ctx.cpp:90
virtual int GetElementCount() const =0
static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType)
Definition: ctx.cpp:2958
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1055
bool LookupFunction(const char *name, std::vector< Symbol *> *matches=NULL)
Definition: sym.cpp:139
bool IsUniform()
Definition: ctx.cpp:87
llvm::BasicBlock * savedDefaultBlock
Definition: ctx.cpp:96
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:88
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1170
bool IsLoop()
Definition: ctx.cpp:83
bool IsConstType() const
Definition: type.cpp:821
virtual bool IsBoolType() const =0
static llvm::Type * Int64Type
Definition: llvmutil.h:68
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align=NULL)
Definition: ctx.cpp:2777
llvm::PHINode * PhiNode(llvm::Type *type, int count, const char *name=NULL)
Definition: ctx.cpp:2934
Representation of a structure holding a number of members.
Definition: type.h:650
llvm::Value * addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType)
Definition: ctx.cpp:3253
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:87
Header file with declarations for various LLVM utility stuff.
llvm::Value * getMaskAtSwitchEntry()
Definition: ctx.cpp:834
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1)
Definition: ctx.cpp:1746
bool emitInstrumentation
Definition: ispc.h:582
static bool IsBasicType(const Type *type)
Definition: type.cpp:2762
SourcePos pos
Definition: sym.h:68
virtual const Type * GetType() const =0
uint32_t RoundUpPow2(uint32_t v)
Definition: util.h:50
AtomicType represents basic types like floats, ints, etc.
Definition: type.h:270
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:379
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:95
StorageClass storageClass
Definition: sym.h:94
Representation of a range of positions in a source file.
Definition: ispc.h:123
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1380
llvm::ConstantInt * LLVMInt32(int32_t ival)
Definition: llvmutil.cpp:233
void ClearBreakLanes()
Definition: ctx.cpp:776
virtual std::string GetString() const =0
Abstract base class for types that represent collections of other types.
Definition: type.h:478
llvm::Value * None(llvm::Value *mask)
Definition: ctx.cpp:1154
bool force32BitAddressing
Definition: ispc.h:412
const char * name
Definition: ispc.h:126
llvm::Instruction * SExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1655
SourcePos pos
Definition: ast.h:76
static llvm::Type * PointerIntType
Definition: llvmutil.h:61
bool IsVoidType() const
Definition: type.cpp:167
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:60
void Error(SourcePos p, const char *fmt,...)
Definition: util.cpp:351
int getVectorWidth() const
Definition: ispc.h:245
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock *>> &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock *> &nextBlocks)
Definition: ctx.cpp:949
const PointerType * GetAsSlice() const
Definition: type.cpp:853
static bool lEnclosingLoopIsForeachActive(const std::vector< CFInfo *> &controlFlowInfo)
Definition: ctx.cpp:652
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1220
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType)
Definition: ctx.cpp:2754
#define FATAL(message)
Definition: util.h:116
const AtomicType * GetElementType() const
Definition: type.cpp:1418
bool savedSwitchConditionWasUniform
Definition: ctx.cpp:99
A (short) vector of atomic types.
Definition: type.h:600
llvm::Value * InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name=NULL)
Definition: ctx.cpp:2854
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1011
llvm::Value * savedMask
Definition: ctx.cpp:94
static llvm::Type * Int32Type
Definition: llvmutil.h:67
int last_line
Definition: ispc.h:129
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:224
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1288
virtual const Type * GetAsUniformType() const =0
#define PTYPE(p)
Definition: llvmutil.h:47
Representation of a function in a source file.
int first_column
Definition: ispc.h:128
Arch getArch() const
Definition: ispc.h:233
#define Assert(expr)
Definition: util.h:128
virtual const Type * GetElementType() const =0
Definition: ctx.cpp:59
llvm::Value * GetFullMaskPointer()
Definition: ctx.cpp:371
llvm::Value * GetInternalMask()
Definition: ctx.cpp:365
const char * GetISAString() const
Definition: ispc.cpp:1287
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1518
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:392
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
Type representing a function (return type + argument types)
Definition: type.h:829
Representation of a program symbol.
Definition: sym.h:62
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:2834
const PointerType * GetAsNonSlice() const
Definition: type.cpp:859
bool IsSwitch()
Definition: ctx.cpp:85
void EndForeach()
Definition: ctx.cpp:556
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1013
Interface class that defines the type abstraction.
Definition: type.h:90
Globals * g
Definition: ispc.cpp:72
static bool initLabelBBlocks(ASTNode *node, void *data)
Definition: ctx.cpp:1015
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:361
static llvm::VectorType * MaskType
Definition: llvmutil.h:79
llvm::Value * savedContinueLanesPtr
Definition: ctx.cpp:93
llvm::DICompileUnit * diCompileUnit
Definition: module.h:156
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:843
llvm::Instruction * SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name=NULL)
Definition: ctx.cpp:2940
static int lArrayVectorWidth(llvm::Type *t)
Definition: ctx.cpp:1370
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum)
Definition: ctx.cpp:1348
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:47
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1578
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:89
llvm::Value * MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2)
Definition: ctx.cpp:1188
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1138
std::string name
Definition: stmt.h:433
bool inSwitchStatement() const
Definition: ctx.cpp:577
llvm::ConstantInt * LLVMInt64(int64_t ival)
Definition: llvmutil.cpp:241
virtual const Type * GetBaseType() const =0
llvm::DIScope * GetDIScope() const
Definition: ctx.cpp:1328
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:1481
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:85
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * savedNextBlocks
Definition: ctx.cpp:98
Variability GetVariability() const
Definition: type.cpp:811
llvm::Value * savedBlockEntryMask
Definition: ctx.cpp:94
const Type * GetBaseType() const
Definition: type.cpp:823
bool is32Bit() const
Definition: ispc.h:235
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
int errorCount
Definition: module.h:144
llvm::LLVMContext * ctx
Definition: ispc.h:611
const Type * type
Definition: sym.h:82
llvm::DIBuilder * diBuilder
Definition: module.h:154
void Warning(SourcePos p, const char *fmt,...)
Definition: util.cpp:378
static CFInfo * GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:162
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:2855
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:2569
llvm::Instruction * CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:1622
llvm::Instruction * ReturnInst()
Definition: ctx.cpp:3139
llvm::Value * applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType)
Definition: ctx.cpp:1693
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1383
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1036
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:373
std::map< std::string, llvm::BasicBlock * > labelMap
Definition: ctx.h:677
virtual llvm::Type * LLVMStorageType(llvm::LLVMContext *ctx) const
Definition: type.cpp:125
SymbolTable * symbolTable
Definition: module.h:148
File with declarations for classes related to type representation.
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1232
llvm::FunctionType * LLVMFunctionType(llvm::LLVMContext *ctx, bool disableMask=false) const
Definition: type.cpp:2476
static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s)
Definition: ctx.cpp:1258
virtual const Type * GetElementType(int index) const =0