Intel SPMD Program Compiler  1.9.2
ctx.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2015, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ctx.cpp
35  @brief Implementation of the FunctionEmitContext class
36 */
37 
38 #include "ctx.h"
39 #include "util.h"
40 #include "func.h"
41 #include "llvmutil.h"
42 #include "type.h"
43 #include "stmt.h"
44 #include "expr.h"
45 #include "module.h"
46 #include "sym.h"
47 #include <map>
48 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
49  #include <llvm/BinaryFormat/Dwarf.h>
50 #else // LLVM up to 4.x
51  #include <llvm/Support/Dwarf.h>
52 #endif
53 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
54  #include <llvm/Metadata.h>
55  #include <llvm/Module.h>
56  #include <llvm/Instructions.h>
57  #include <llvm/DerivedTypes.h>
58 #else
59  #include <llvm/IR/Metadata.h>
60  #include <llvm/IR/Module.h>
61  #include <llvm/IR/Instructions.h>
62  #include <llvm/IR/DerivedTypes.h>
63 #endif
64 #ifdef ISPC_NVPTX_ENABLED
65 #include <llvm/Support/raw_ostream.h>
66 #include <llvm/Support/FormattedStream.h>
67 #endif /* ISPC_NVPTX_ENABLED */
68 
69 /** This is a small utility structure that records information related to one
70  level of nested control flow. It's mostly used in correctly restoring
71  the mask and other state as we exit control flow nesting levels.
72 */
73 struct CFInfo {
74  /** Returns a new instance of the structure that represents entering an
75  'if' statement */
76  static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
77 
78  /** Returns a new instance of the structure that represents entering a
79  loop. */
80  static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
81  llvm::BasicBlock *continueTarget,
82  llvm::Value *savedBreakLanesPtr,
83  llvm::Value *savedContinueLanesPtr,
84  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
85 
87  llvm::BasicBlock *breakTarget,
88  llvm::BasicBlock *continueTarget,
89  llvm::Value *savedBreakLanesPtr,
90  llvm::Value *savedContinueLanesPtr,
91  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
92 
93  static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
94  llvm::BasicBlock *continueTarget,
95  llvm::Value *savedBreakLanesPtr,
96  llvm::Value *savedContinueLanesPtr,
97  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask,
98  llvm::Value *switchExpr,
99  llvm::BasicBlock *bbDefault,
100  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbCases,
101  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext,
102  bool scUniform);
103 
104  bool IsIf() { return type == If; }
105  bool IsLoop() { return type == Loop; }
106  bool IsForeach() { return (type == ForeachRegular ||
107  type == ForeachActive ||
108  type == ForeachUnique); }
109  bool IsSwitch() { return type == Switch; }
110  bool IsVarying() { return !isUniform; }
111  bool IsUniform() { return isUniform; }
112 
116  bool isUniform;
120  llvm::Value *savedSwitchExpr;
121  llvm::BasicBlock *savedDefaultBlock;
122  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCaseBlocks;
123  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
125 
126 private:
127  CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
128  Assert(t == If);
129  type = t;
130  isUniform = uniformIf;
131  savedBreakTarget = savedContinueTarget = NULL;
132  savedBreakLanesPtr = savedContinueLanesPtr = NULL;
133  savedMask = savedBlockEntryMask = sm;
134  savedSwitchExpr = NULL;
135  savedDefaultBlock = NULL;
136  savedCaseBlocks = NULL;
137  savedNextBlocks = NULL;
138  }
139  CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
140  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
141  llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL,
142  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbc = NULL,
143  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL,
144  bool scu = false) {
145  Assert(t == Loop || t == Switch);
146  type = t;
147  isUniform = iu;
148  savedBreakTarget = bt;
149  savedContinueTarget = ct;
150  savedBreakLanesPtr = sb;
151  savedContinueLanesPtr = sc;
152  savedMask = sm;
153  savedBlockEntryMask = lm;
154  savedSwitchExpr = sse;
155  savedDefaultBlock = bbd;
156  savedCaseBlocks = bbc;
157  savedNextBlocks = bbn;
158  savedSwitchConditionWasUniform = scu;
159  }
160  CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
161  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
162  llvm::Value *lm) {
163  Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
164  type = t;
165  isUniform = false;
166  savedBreakTarget = bt;
167  savedContinueTarget = ct;
168  savedBreakLanesPtr = sb;
169  savedContinueLanesPtr = sc;
170  savedMask = sm;
171  savedBlockEntryMask = lm;
172  savedSwitchExpr = NULL;
173  savedDefaultBlock = NULL;
174  savedCaseBlocks = NULL;
175  savedNextBlocks = NULL;
176  }
177 };
178 
179 
180 CFInfo *
181 CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) {
182  return new CFInfo(If, isUniform, savedMask);
183 }
184 
185 
186 CFInfo *
187 CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
188  llvm::BasicBlock *continueTarget,
189  llvm::Value *savedBreakLanesPtr,
190  llvm::Value *savedContinueLanesPtr,
191  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask) {
192  return new CFInfo(Loop, isUniform, breakTarget, continueTarget,
193  savedBreakLanesPtr, savedContinueLanesPtr,
194  savedMask, savedBlockEntryMask);
195 }
196 
197 
198 CFInfo *
200  llvm::BasicBlock *breakTarget,
201  llvm::BasicBlock *continueTarget,
202  llvm::Value *savedBreakLanesPtr,
203  llvm::Value *savedContinueLanesPtr,
204  llvm::Value *savedMask, llvm::Value *savedForeachMask) {
205  CFType cfType;
206  switch (ft) {
208  cfType = ForeachRegular;
209  break;
211  cfType = ForeachActive;
212  break;
214  cfType = ForeachUnique;
215  break;
216  default:
217  FATAL("Unhandled foreach type");
218  return NULL;
219  }
220 
221  return new CFInfo(cfType, breakTarget, continueTarget,
222  savedBreakLanesPtr, savedContinueLanesPtr,
223  savedMask, savedForeachMask);
224 }
225 
226 
227 CFInfo *
228 CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
229  llvm::BasicBlock *continueTarget,
230  llvm::Value *savedBreakLanesPtr,
231  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
232  llvm::Value *savedBlockEntryMask, llvm::Value *savedSwitchExpr,
233  llvm::BasicBlock *savedDefaultBlock,
234  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCases,
235  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
236  bool savedSwitchConditionUniform) {
237  return new CFInfo(Switch, isUniform, breakTarget, continueTarget,
238  savedBreakLanesPtr, savedContinueLanesPtr,
239  savedMask, savedBlockEntryMask, savedSwitchExpr, savedDefaultBlock,
240  savedCases, savedNext, savedSwitchConditionUniform);
241 }
242 
243 ///////////////////////////////////////////////////////////////////////////
244 
246  llvm::Function *lf,
247  SourcePos firstStmtPos) {
248  function = func;
249  llvmFunction = lf;
250 
251  /* Create a new basic block to store all of the allocas */
252  allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
253  bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
254  /* But jump from it immediately into the real entry block */
255  llvm::BranchInst::Create(bblock, allocaBlock);
256 
257  funcStartPos = funSym->pos;
258 
259  internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
260  StoreInst(LLVMMaskAllOn, internalMaskPointer);
261 
262  functionMaskValue = LLVMMaskAllOn;
263 
264  fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
265  StoreInst(LLVMMaskAllOn, fullMaskPointer);
266 
267  blockEntryMask = NULL;
268  breakLanesPtr = continueLanesPtr = NULL;
269  breakTarget = continueTarget = NULL;
270 
271  switchExpr = NULL;
272  caseBlocks = NULL;
273  defaultBlock = NULL;
274  nextBlocks = NULL;
275 
276  returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
277  StoreInst(LLVMMaskAllOff, returnedLanesPtr);
278 
279  launchedTasks = false;
280  launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle");
281  StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType),
282  launchGroupHandlePtr);
283 
284  disableGSWarningCount = 0;
285 
286  const Type *returnType = function->GetReturnType();
287  if (!returnType || returnType->IsVoidType())
288  returnValuePtr = NULL;
289  else {
290  llvm::Type *ftype = returnType->LLVMType(g->ctx);
291  returnValuePtr = AllocaInst(ftype, "return_value_memory");
292  }
293 
295  // This is really disgusting. We want to be able to fool the
296  // compiler to not be able to reason that the mask is all on, but
297  // we don't want to pay too much of a price at the start of each
298  // function to do so.
299  //
300  // Therefore: first, we declare a module-static __all_on_mask
301  // variable that will hold an "all on" mask value. At the start of
302  // each function, we'll load its value and call SetInternalMaskAnd
303  // with the result to set the current internal execution mask.
304  // (This is a no-op at runtime.)
305  //
306  // Then, to fool the optimizer that maybe the value of
307  // __all_on_mask can't be guaranteed to be "all on", we emit a
308  // dummy function that sets __all_on_mask be "all off". (That
309  // function is never actually called.)
310  llvm::Value *globalAllOnMaskPtr =
311  m->module->getNamedGlobal("__all_on_mask");
312  if (globalAllOnMaskPtr == NULL) {
313  globalAllOnMaskPtr =
314  new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
315  llvm::GlobalValue::InternalLinkage,
316  LLVMMaskAllOn, "__all_on_mask");
317 
318  char buf[256];
319  sprintf(buf, "__off_all_on_mask_%s", g->target->GetISAString());
320  llvm::Constant *offFunc =
321 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
322  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
323  NULL);
324 #else // LLVM 5.0+
325  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
326 #endif
327 
328  AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
329  llvm::BasicBlock *offBB =
330  llvm::BasicBlock::Create(*g->ctx, "entry",
331  (llvm::Function *)offFunc, 0);
332  llvm::StoreInst *inst =
333  new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
334  if (g->opt.forceAlignedMemory) {
335  inst->setAlignment(g->target->getNativeVectorAlignment());
336  }
337  llvm::ReturnInst::Create(*g->ctx, offBB);
338  }
339 
340  llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
341  SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
342  }
343 
344  if (m->diBuilder) {
345  currentPos = funSym->pos;
346 
347  /* If debugging is enabled, tell the debug information emission
348  code about this new function */
349 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
350  diFile = funcStartPos.GetDIFile();
351  AssertPos(currentPos, diFile.Verify());
352 #else /* LLVM 3.7+ */
353  diFile = funcStartPos.GetDIFile();
354 #endif
355 
356 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
357  llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU());
358 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
359  llvm::DIScope scope = llvm::DIScope(m->diCompileUnit);
360 #else /* LLVM 3.7+ */
361  llvm::DIScope *scope = m->diCompileUnit;
362 #endif
363 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
364  llvm::DIType diSubprogramType;
365  AssertPos(currentPos, scope.Verify());
366 #else /* LLVM 3.7+ */
367  llvm::DIType *diSubprogramType = NULL;
368 #endif
369 
370  const FunctionType *functionType = function->GetType();
371  if (functionType == NULL)
372  AssertPos(currentPos, m->errorCount > 0);
373  else {
374  diSubprogramType = functionType->GetDIType(scope);
375 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
376  AssertPos(currentPos, diSubprogramType.Verify());
377 #else /* LLVM 3.7+ */
378  //comming soon
379 #endif
380  }
381 
382 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
383  llvm::DIType diSubprogramType_n = diSubprogramType;
384  int flags = llvm::DIDescriptor::FlagPrototyped;
385 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
386  Assert(diSubprogramType.isCompositeType());
387  llvm::DICompositeType diSubprogramType_n =
388  static_cast<llvm::DICompositeType>(diSubprogramType);
389  int flags = llvm::DIDescriptor::FlagPrototyped;
390 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
391  Assert(llvm::isa<llvm::DICompositeTypeBase>(diSubprogramType));
392  llvm::DISubroutineType *diSubprogramType_n =
393  llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
394  int flags = llvm::DINode::FlagPrototyped;
395 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
396  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
397  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
398  int flags = llvm::DINode::FlagPrototyped;
399 #else /* LLVM 4.0+ */
400  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
401  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
402  llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
403 
404 #endif
405 
406  std::string mangledName = llvmFunction->getName();
407  if (mangledName == funSym->name)
408  mangledName = "";
409 
410  bool isStatic = (funSym->storageClass == SC_STATIC);
411  bool isOptimized = (g->opt.level > 0);
412  int firstLine = funcStartPos.first_line;
413 
414 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
415  diSubprogram =
416  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
417  mangledName, diFile,
418  firstLine, diSubprogramType_n,
419  isStatic, true, /* is defn */
420  firstLine, flags,
421  isOptimized, llvmFunction);
422  AssertPos(currentPos, diSubprogram.Verify());
423 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
424  diSubprogram =
425  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
426  mangledName, diFile,
427  firstLine, diSubprogramType_n,
428  isStatic, true, /* is defn */
429  firstLine, flags,
430  isOptimized, llvmFunction);
431 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
432  diSubprogram =
433  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
434  mangledName, diFile,
435  firstLine, diSubprogramType_n,
436  isStatic, true, /* is defn */
437  firstLine, flags,
438  isOptimized);
439  llvmFunction->setSubprogram(diSubprogram);
440 #else /* LLVM 4.0+ */
441  diSubprogram =
442  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
443  mangledName, diFile,
444  firstLine, diSubprogramType_n,
445  isStatic, true, /* is defn */
446  firstLine, flags,
447  isOptimized);
448  llvmFunction->setSubprogram(diSubprogram);
449 #endif
450 
451  /* And start a scope representing the initial function scope */
452  StartScope();
453  }
454 }
455 
456 
458  AssertPos(currentPos, controlFlowInfo.size() == 0);
459  AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
460 }
461 
462 
463 const Function *
465  return function;
466 }
467 
468 
469 llvm::BasicBlock *
471  return bblock;
472 }
473 
474 
475 void
477  bblock = bb;
478 }
479 
480 
481 llvm::Value *
483  return functionMaskValue;
484 }
485 
486 
487 llvm::Value *
489  return LoadInst(internalMaskPointer, "load_mask");
490 }
491 
492 
493 llvm::Value *
495  return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
496  functionMaskValue, "internal_mask&function_mask");
497 }
498 
499 
500 llvm::Value *
502  return fullMaskPointer;
503 }
504 
505 
506 void
508  functionMaskValue = value;
509  if (bblock != NULL)
510  StoreInst(GetFullMask(), fullMaskPointer);
511 }
512 
513 
514 void
516  blockEntryMask = value;
517 }
518 
519 
520 void
522  StoreInst(value, internalMaskPointer);
523  // kludge so that __mask returns the right value in ispc code.
524  StoreInst(GetFullMask(), fullMaskPointer);
525 }
526 
527 
528 void
529 FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
530  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask,
531  test, "oldMask&test");
532  SetInternalMask(mask);
533 }
534 
535 
536 void
537 FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
538  llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
539  "~test");
540  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
541  "oldMask&~test");
542  SetInternalMask(mask);
543 }
544 
545 
546 void
547 FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
548  AssertPos(currentPos, bblock != NULL);
549  llvm::Value *any = Any(GetFullMask());
550  BranchInst(btrue, bfalse, any);
551  // It's illegal to add any additional instructions to the basic block
552  // now that it's terminated, so set bblock to NULL to be safe
553  bblock = NULL;
554 }
555 
556 
557 void
558 FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
559  AssertPos(currentPos, bblock != NULL);
560  llvm::Value *all = All(GetFullMask());
561  BranchInst(btrue, bfalse, all);
562  // It's illegal to add any additional instructions to the basic block
563  // now that it's terminated, so set bblock to NULL to be safe
564  bblock = NULL;
565 }
566 
567 
568 void
569 FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
570  AssertPos(currentPos, bblock != NULL);
571  // switch sense of true/false bblocks
572  BranchIfMaskAny(bfalse, btrue);
573  // It's illegal to add any additional instructions to the basic block
574  // now that it's terminated, so set bblock to NULL to be safe
575  bblock = NULL;
576 }
577 
578 
579 void
581  controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
582 }
583 
584 
585 void
586 FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
587  controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
588 }
589 
590 
591 void
593  CFInfo *ci = popCFState();
594  // Make sure we match up with a Start{Uniform,Varying}If().
595  AssertPos(currentPos, ci->IsIf());
596 
597  // 'uniform' ifs don't change the mask so we only need to restore the
598  // mask going into the if for 'varying' if statements
599  if (ci->IsUniform() || bblock == NULL)
600  return;
601 
602  // We can't just restore the mask as it was going into the 'if'
603  // statement. First we have to take into account any program
604  // instances that have executed 'return' statements; the restored
605  // mask must be off for those lanes.
606  restoreMaskGivenReturns(ci->savedMask);
607 
608  // If the 'if' statement is inside a loop with a 'varying'
609  // condition, we also need to account for any break or continue
610  // statements that executed inside the 'if' statmeent; we also must
611  // leave the lane masks for the program instances that ran those
612  // off after we restore the mask after the 'if'. The code below
613  // ends up being optimized out in the case that there were no break
614  // or continue statements (and breakLanesPtr and continueLanesPtr
615  // have their initial 'all off' values), so we don't need to check
616  // for that here.
617  //
618  // There are three general cases to deal with here:
619  // - Loops: both break and continue are allowed, and thus the corresponding
620  // lane mask pointers are non-NULL
621  // - Foreach: only continueLanesPtr may be non-NULL
622  // - Switch: only breakLanesPtr may be non-NULL
623  if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
624  // We want to compute:
625  // newMask = (oldMask & ~(breakLanes | continueLanes)),
626  // treading breakLanes or continueLanes as "all off" if the
627  // corresponding pointer is NULL.
628  llvm::Value *bcLanes = NULL;
629 
630  if (continueLanesPtr != NULL)
631  bcLanes = LoadInst(continueLanesPtr, "continue_lanes");
632  else
633  bcLanes = LLVMMaskAllOff;
634 
635  if (breakLanesPtr != NULL) {
636  llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
637  bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes,
638  breakLanes, "|break_lanes");
639  }
640 
641  llvm::Value *notBreakOrContinue =
642  BinaryOperator(llvm::Instruction::Xor,
643  bcLanes, LLVMMaskAllOn,
644  "!(break|continue)_lanes");
645  llvm::Value *oldMask = GetInternalMask();
646  llvm::Value *newMask =
647  BinaryOperator(llvm::Instruction::And, oldMask,
648  notBreakOrContinue, "new_mask");
649  SetInternalMask(newMask);
650  }
651 }
652 
653 
654 void
655 FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
656  bool uniformCF) {
657  // Store the current values of various loop-related state so that we
658  // can restore it when we exit this loop.
659  llvm::Value *oldMask = GetInternalMask();
660  controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget,
661  continueTarget, breakLanesPtr,
662  continueLanesPtr, oldMask, blockEntryMask));
663  if (uniformCF)
664  // If the loop has a uniform condition, we don't need to track
665  // which lanes 'break' or 'continue'; all of the running ones go
666  // together, so we just jump
667  breakLanesPtr = continueLanesPtr = NULL;
668  else {
669  // For loops with varying conditions, allocate space to store masks
670  // that record which lanes have done these
671  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
672  StoreInst(LLVMMaskAllOff, continueLanesPtr);
673  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
674  StoreInst(LLVMMaskAllOff, breakLanesPtr);
675  }
676 
677  breakTarget = bt;
678  continueTarget = ct;
679  blockEntryMask = NULL; // this better be set by the loop!
680 }
681 
682 
683 void
685  CFInfo *ci = popCFState();
686  AssertPos(currentPos, ci->IsLoop());
687 
688  if (!ci->IsUniform())
689  // If the loop had a 'uniform' test, then it didn't make any
690  // changes to the mask so there's nothing to restore. If it had a
691  // varying test, we need to restore the mask to what it was going
692  // into the loop, but still leaving off any lanes that executed a
693  // 'return' statement.
694  restoreMaskGivenReturns(ci->savedMask);
695 }
696 
697 
698 void
700  // Issue an error if we're in a nested foreach...
701  if (ft == FOREACH_REGULAR) {
702  for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
703  if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
704  Error(currentPos, "Nested \"foreach\" statements are currently "
705  "illegal.");
706  break;
707  // Don't return here, however, and in turn allow the caller to
708  // do the rest of its codegen and then call EndForeach()
709  // normally--the idea being that this gives a chance to find
710  // any other errors inside the body of the foreach loop...
711  }
712  }
713  }
714 
715  // Store the current values of various loop-related state so that we
716  // can restore it when we exit this loop.
717  llvm::Value *oldMask = GetInternalMask();
718  controlFlowInfo.push_back(CFInfo::GetForeach(ft, breakTarget, continueTarget,
719  breakLanesPtr, continueLanesPtr,
720  oldMask, blockEntryMask));
721  breakLanesPtr = NULL;
722  breakTarget = NULL;
723 
724  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
725  StoreInst(LLVMMaskAllOff, continueLanesPtr);
726  continueTarget = NULL; // should be set by SetContinueTarget()
727 
728  blockEntryMask = NULL;
729 }
730 
731 
732 void
734  CFInfo *ci = popCFState();
735  AssertPos(currentPos, ci->IsForeach());
736 }
737 
738 
739 void
741  if (!bblock)
742  return;
743 
744  // Restore the mask to the given old mask, but leave off any lanes that
745  // executed a return statement.
746  // newMask = (oldMask & ~returnedLanes)
747  llvm::Value *returnedLanes = LoadInst(returnedLanesPtr,
748  "returned_lanes");
749  llvm::Value *notReturned = BinaryOperator(llvm::Instruction::Xor,
750  returnedLanes, LLVMMaskAllOn,
751  "~returned_lanes");
752  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
753  oldMask, notReturned, "new_mask");
754  SetInternalMask(newMask);
755 }
756 
757 
758 /** Returns "true" if the first enclosing non-if control flow expression is
759  a "switch" statement.
760 */
761 bool
763  // Go backwards through controlFlowInfo, since we add new nested scopes
764  // to the back.
765  int i = controlFlowInfo.size() - 1;
766  while (i >= 0 && controlFlowInfo[i]->IsIf())
767  --i;
768  // Got to the first non-if (or end of CF info)
769  if (i == -1)
770  return false;
771  return controlFlowInfo[i]->IsSwitch();
772 }
773 
774 
775 void
776 FunctionEmitContext::Break(bool doCoherenceCheck) {
777  if (breakTarget == NULL) {
778  Error(currentPos, "\"break\" statement is illegal outside of "
779  "for/while/do loops and \"switch\" statements.");
780  return;
781  }
782  AssertPos(currentPos, controlFlowInfo.size() > 0);
783 
784  if (bblock == NULL)
785  return;
786 
787  if (inSwitchStatement() == true &&
788  switchConditionWasUniform == true &&
789  ifsInCFAllUniform(CFInfo::Switch)) {
790  // We know that all program instances are executing the break, so
791  // just jump to the block immediately after the switch.
792  AssertPos(currentPos, breakTarget != NULL);
793  BranchInst(breakTarget);
794  bblock = NULL;
795  return;
796  }
797 
798  // If all of the enclosing 'if' tests in the loop have uniform control
799  // flow or if we can tell that the mask is all on, then we can just
800  // jump to the break location.
801  if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
802  BranchInst(breakTarget);
803  // Set bblock to NULL since the jump has terminated the basic block
804  bblock = NULL;
805  }
806  else {
807  // Varying switch, uniform switch where the 'break' is under
808  // varying control flow, or a loop with varying 'if's above the
809  // break. In these cases, we need to update the mask of the lanes
810  // that have executed a 'break' statement:
811  // breakLanes = breakLanes | mask
812  AssertPos(currentPos, breakLanesPtr != NULL);
813 
814  llvm::Value *mask = GetInternalMask();
815  llvm::Value *breakMask = LoadInst(breakLanesPtr,
816  "break_mask");
817  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
818  mask, breakMask, "mask|break_mask");
819  StoreInst(newMask, breakLanesPtr);
820 
821  // Set the current mask to be all off, just in case there are any
822  // statements in the same scope after the 'break'. Most of time
823  // this will be optimized away since we'll likely end the scope of
824  // an 'if' statement and restore the mask then.
825  SetInternalMask(LLVMMaskAllOff);
826 
827  if (doCoherenceCheck) {
828  if (continueTarget != NULL)
829  // If the user has indicated that this is a 'coherent'
830  // break statement, then check to see if the mask is all
831  // off. If so, we have to conservatively jump to the
832  // continueTarget, not the breakTarget, since part of the
833  // reason the mask is all off may be due to 'continue'
834  // statements that executed in the current loop iteration.
835  jumpIfAllLoopLanesAreDone(continueTarget);
836  else if (breakTarget != NULL)
837  // Similarly handle these for switch statements, where we
838  // only have a break target.
839  jumpIfAllLoopLanesAreDone(breakTarget);
840  }
841  }
842 }
843 
844 
845 static bool
846 lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
847  for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
848  if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
849  return true;
850  }
851  return false;
852 }
853 
854 
855 void
856 FunctionEmitContext::Continue(bool doCoherenceCheck) {
857  if (!continueTarget) {
858  Error(currentPos, "\"continue\" statement illegal outside of "
859  "for/while/do/foreach loops.");
860  return;
861  }
862  AssertPos(currentPos, controlFlowInfo.size() > 0);
863 
864  if (ifsInCFAllUniform(CFInfo::Loop) ||
865  lEnclosingLoopIsForeachActive(controlFlowInfo)) {
866  // Similarly to 'break' statements, we can immediately jump to the
867  // continue target if we're only in 'uniform' control flow within
868  // loop or if we can tell that the mask is all on. Here, we can
869  // also jump if the enclosing loop is a 'foreach_active' loop, in
870  // which case we know that only a single program instance is
871  // executing.
872  AddInstrumentationPoint("continue: uniform CF, jumped");
873  BranchInst(continueTarget);
874  bblock = NULL;
875  }
876  else {
877  // Otherwise update the stored value of which lanes have 'continue'd.
878  // continueLanes = continueLanes | mask
879  AssertPos(currentPos, continueLanesPtr);
880  llvm::Value *mask = GetInternalMask();
881  llvm::Value *continueMask =
882  LoadInst(continueLanesPtr, "continue_mask");
883  llvm::Value *newMask =
884  BinaryOperator(llvm::Instruction::Or, mask, continueMask,
885  "mask|continueMask");
886  StoreInst(newMask, continueLanesPtr);
887 
888  // And set the current mask to be all off in case there are any
889  // statements in the same scope after the 'continue'
890  SetInternalMask(LLVMMaskAllOff);
891 
892  if (doCoherenceCheck)
893  // If this is a 'coherent continue' statement, then emit the
894  // code to see if all of the lanes are now off due to
895  // breaks/continues and jump to the continue target if so.
896  jumpIfAllLoopLanesAreDone(continueTarget);
897  }
898 }
899 
900 
901 /** This function checks to see if all of the 'if' statements (if any)
902  between the current scope and the first enclosing loop/switch of given
903  control flow type have 'uniform' tests.
904  */
905 bool
907  AssertPos(currentPos, controlFlowInfo.size() > 0);
908  // Go backwards through controlFlowInfo, since we add new nested scopes
909  // to the back. Stop once we come to the first enclosing control flow
910  // structure of the desired type.
911  int i = controlFlowInfo.size() - 1;
912  while (i >= 0 && controlFlowInfo[i]->type != type) {
913  if (controlFlowInfo[i]->isUniform == false)
914  // Found a scope due to an 'if' statement with a varying test
915  return false;
916  --i;
917  }
918  AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
919  return true;
920 }
921 
922 
923 void
925  llvm::Value *allDone = NULL;
926 
927  if (breakLanesPtr == NULL) {
928  llvm::Value *continued = LoadInst(continueLanesPtr,
929  "continue_lanes");
930  continued = BinaryOperator(llvm::Instruction::And,
931  continued, GetFunctionMask(),
932  "continued&func");
933  allDone = MasksAllEqual(continued, blockEntryMask);
934  }
935  else {
936  // Check to see if (returned lanes | continued lanes | break lanes) is
937  // equal to the value of mask at the start of the loop iteration. If
938  // so, everyone is done and we can jump to the given target
939  llvm::Value *returned = LoadInst(returnedLanesPtr,
940  "returned_lanes");
941  llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
942  llvm::Value *finishedLanes = BinaryOperator(llvm::Instruction::Or,
943  returned, breaked,
944  "returned|breaked");
945  if (continueLanesPtr != NULL) {
946  // It's NULL for "switch" statements...
947  llvm::Value *continued = LoadInst(continueLanesPtr,
948  "continue_lanes");
949  finishedLanes = BinaryOperator(llvm::Instruction::Or, finishedLanes,
950  continued, "returned|breaked|continued");
951  }
952 
953  finishedLanes = BinaryOperator(llvm::Instruction::And,
954  finishedLanes, GetFunctionMask(),
955  "finished&func");
956 
957  // Do we match the mask at loop or switch statement entry?
958  allDone = MasksAllEqual(finishedLanes, blockEntryMask);
959  }
960 
961  llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
962  llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
963  BranchInst(bAll, bNotAll, allDone);
964 
965  // If so, have an extra basic block along the way to add
966  // instrumentation, if the user asked for it.
967  bblock = bAll;
968  AddInstrumentationPoint("break/continue: all dynamically went");
969  BranchInst(target);
970 
971  // And set the current basic block to a new one for future instructions
972  // for the path where we weren't able to jump
973  bblock = bNotAll;
974  AddInstrumentationPoint("break/continue: not all went");
975 }
976 
977 
978 void
980  if (continueLanesPtr == NULL)
981  return;
982 
983  // mask = mask & continueFlags
984  llvm::Value *mask = GetInternalMask();
985  llvm::Value *continueMask = LoadInst(continueLanesPtr,
986  "continue_mask");
987  llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
988  mask, continueMask, "mask|continue_mask");
989  SetInternalMask(orMask);
990 
991  // continueLanes = 0
992  StoreInst(LLVMMaskAllOff, continueLanesPtr);
993 }
994 
995 
996 void
998  if (breakLanesPtr == NULL)
999  return;
1000 
1001  // breakLanes = 0
1002  StoreInst(LLVMMaskAllOff, breakLanesPtr);
1003 }
1004 
1005 
1006 void
1007 FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
1008  llvm::Value *oldMask = GetInternalMask();
1009  controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget,
1010  continueTarget, breakLanesPtr,
1011  continueLanesPtr, oldMask,
1012  blockEntryMask, switchExpr, defaultBlock,
1013  caseBlocks, nextBlocks,
1014  switchConditionWasUniform));
1015 
1016  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
1017  StoreInst(LLVMMaskAllOff, breakLanesPtr);
1018  breakTarget = bbBreak;
1019 
1020  continueLanesPtr = NULL;
1021  continueTarget = NULL;
1022  blockEntryMask = NULL;
1023 
1024  // These will be set by the SwitchInst() method
1025  switchExpr = NULL;
1026  defaultBlock = NULL;
1027  caseBlocks = NULL;
1028  nextBlocks = NULL;
1029 }
1030 
1031 
1032 void
1034  AssertPos(currentPos, bblock != NULL);
1035 
1036  CFInfo *ci = popCFState();
1037  if (ci->IsVarying() && bblock != NULL)
1038  restoreMaskGivenReturns(ci->savedMask);
1039 }
1040 
1041 
1042 /** Emit code to check for an "all off" mask before the code for a
1043  case or default label in a "switch" statement.
1044  */
1045 void
1047  llvm::Value *allOff = None(mask);
1048  llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
1049 
1050  // Find the basic block for the case or default label immediately after
1051  // the current one in the switch statement--that's where we want to
1052  // jump if the mask is all off at this label.
1053  AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
1054  llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
1055 
1056  // Jump to the next one of the mask is all off; otherwise jump to the
1057  // newly created block that will hold the actual code for this label.
1058  BranchInst(bbNext, bbSome, allOff);
1059  SetCurrentBasicBlock(bbSome);
1060 }
1061 
1062 
1063 /** Returns the execution mask at entry to the first enclosing "switch"
1064  statement. */
1065 llvm::Value *
1067  AssertPos(currentPos, controlFlowInfo.size() > 0);
1068  int i = controlFlowInfo.size() - 1;
1069  while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
1070  --i;
1071  AssertPos(currentPos, i != -1);
1072  return controlFlowInfo[i]->savedMask;
1073 }
1074 
1075 
1076 void
1078  if (inSwitchStatement() == false) {
1079  Error(pos, "\"default\" label illegal outside of \"switch\" "
1080  "statement.");
1081  return;
1082  }
1083 
1084  // If there's a default label in the switch, a basic block for it
1085  // should have been provided in the previous call to SwitchInst().
1086  AssertPos(currentPos, defaultBlock != NULL);
1087 
1088  if (bblock != NULL)
1089  // The previous case in the switch fell through, or we're in a
1090  // varying switch; terminate the current block with a jump to the
1091  // block for the code for the default label.
1092  BranchInst(defaultBlock);
1093  SetCurrentBasicBlock(defaultBlock);
1094 
1095  if (switchConditionWasUniform)
1096  // Nothing more to do for this case; return back to the caller,
1097  // which will then emit the code for the default case.
1098  return;
1099 
1100  // For a varying switch, we need to update the execution mask.
1101  //
1102  // First, compute the mask that corresponds to which program instances
1103  // should execute the "default" code; this corresponds to the set of
1104  // program instances that don't match any of the case statements.
1105  // Therefore, we generate code that compares the value of the switch
1106  // expression to the value associated with each of the "case"
1107  // statements such that the surviving lanes didn't match any of them.
1108  llvm::Value *matchesDefault = getMaskAtSwitchEntry();
1109  for (int i = 0; i < (int)caseBlocks->size(); ++i) {
1110  int value = (*caseBlocks)[i].first;
1111  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1112  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1113  // TODO: for AVX2 at least, the following generates better code
1114  // than doing ICMP_NE and skipping the NotOperator() below; file a
1115  // LLVM bug?
1116  llvm::Value *matchesCaseValue =
1117  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1118  valueVec, "cmp_case_value");
1119  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1120 
1121  llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
1122  matchesDefault = BinaryOperator(llvm::Instruction::And, matchesDefault,
1123  notMatchesCaseValue, "default&~case_match");
1124  }
1125 
1126  // The mask may have some lanes on, which corresponds to the previous
1127  // label falling through; compute the updated mask by ANDing with the
1128  // current mask.
1129  llvm::Value *oldMask = GetInternalMask();
1130  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1131  matchesDefault, "old_mask|matches_default");
1132  SetInternalMask(newMask);
1133 
1134  if (checkMask)
1135  addSwitchMaskCheck(newMask);
1136 }
1137 
1138 
1139 void
1140 FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
1141  if (inSwitchStatement() == false) {
1142  Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
1143  return;
1144  }
1145 
1146  // Find the basic block for this case statement.
1147  llvm::BasicBlock *bbCase = NULL;
1148  AssertPos(currentPos, caseBlocks != NULL);
1149  for (int i = 0; i < (int)caseBlocks->size(); ++i)
1150  if ((*caseBlocks)[i].first == value) {
1151  bbCase = (*caseBlocks)[i].second;
1152  break;
1153  }
1154  AssertPos(currentPos, bbCase != NULL);
1155 
1156  if (bblock != NULL)
1157  // fall through from the previous case
1158  BranchInst(bbCase);
1159  SetCurrentBasicBlock(bbCase);
1160 
1161  if (switchConditionWasUniform)
1162  return;
1163 
1164  // update the mask: first, get a mask that indicates which program
1165  // instances have a value for the switch expression that matches this
1166  // case statement.
1167  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1168  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1169  llvm::Value *matchesCaseValue =
1170  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1171  valueVec, "cmp_case_value");
1172  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1173 
1174  // If a lane was off going into the switch, we don't care if has a
1175  // value in the switch expression that happens to match this case.
1176  llvm::Value *entryMask = getMaskAtSwitchEntry();
1177  matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask,
1178  matchesCaseValue, "entry_mask&case_match");
1179 
1180  // Take the surviving lanes and turn on the mask for them.
1181  llvm::Value *oldMask = GetInternalMask();
1182  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1183  matchesCaseValue, "mask|case_match");
1184  SetInternalMask(newMask);
1185 
1186  if (checkMask)
1187  addSwitchMaskCheck(newMask);
1188 }
1189 
1190 
1191 void
1192 FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
1193  const std::vector<std::pair<int, llvm::BasicBlock *> > &bbCases,
1194  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
1195  // The calling code should have called StartSwitch() before calling
1196  // SwitchInst().
1197  AssertPos(currentPos, controlFlowInfo.size() &&
1198  controlFlowInfo.back()->IsSwitch());
1199 
1200  switchExpr = expr;
1201  defaultBlock = bbDefault;
1202  caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases);
1203  nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
1204  switchConditionWasUniform =
1205  (llvm::isa<llvm::VectorType>(expr->getType()) == false);
1206 
1207  if (switchConditionWasUniform == true) {
1208  // For a uniform switch condition, just wire things up to the LLVM
1209  // switch instruction.
1210  llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault,
1211  bbCases.size(), bblock);
1212  for (int i = 0; i < (int)bbCases.size(); ++i) {
1213  if (expr->getType() == LLVMTypes::Int32Type)
1214  s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
1215  else {
1216  AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
1217  s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
1218  }
1219  }
1220 
1221  AddDebugPos(s);
1222  // switch is a terminator
1223  bblock = NULL;
1224  }
1225  else {
1226  // For a varying switch, we first turn off all lanes of the mask
1227  SetInternalMask(LLVMMaskAllOff);
1228 
1229  if (nextBlocks->size() > 0) {
1230  // If there are any labels inside the switch, jump to the first
1231  // one; any code before the first label won't be executed by
1232  // anyone.
1233  std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
1234  iter = nextBlocks->find(NULL);
1235  AssertPos(currentPos, iter != nextBlocks->end());
1236  llvm::BasicBlock *bbFirst = iter->second;
1237  BranchInst(bbFirst);
1238  bblock = NULL;
1239  }
1240  }
1241 }
1242 
1243 
1244 int
1246  int sum = 0;
1247  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1248  if (controlFlowInfo[i]->IsVarying())
1249  ++sum;
1250  return sum;
1251 }
1252 
1253 
1254 bool
1256  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1257  if (controlFlowInfo[i]->IsForeach())
1258  return true;
1259  return false;
1260 }
1261 
1262 
1263 void
1265  ++disableGSWarningCount;
1266 }
1267 
1268 
1269 void
1271  --disableGSWarningCount;
1272 }
1273 
1274 
1275 
1276 bool
1278  LabeledStmt *ls = llvm::dyn_cast<LabeledStmt>(node);
1279  if (ls == NULL)
1280  return true;
1281 
1283 
1284  if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
1285  Error(ls->pos, "Multiple labels named \"%s\" in function.",
1286  ls->name.c_str());
1287  else {
1288  llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
1289  ctx->labelMap[ls->name] = bb;
1290  }
1291  return true;
1292 }
1293 
1294 
1295 void
1297  labelMap.erase(labelMap.begin(), labelMap.end());
1298  WalkAST(code, initLabelBBlocks, NULL, this);
1299 }
1300 
1301 
1302 llvm::BasicBlock *
1304  if (labelMap.find(label) != labelMap.end())
1305  return labelMap[label];
1306  else
1307  return NULL;
1308 }
1309 
1310 std::vector<std::string>
1312  // Initialize vector to the right size
1313  std::vector<std::string> labels(labelMap.size());
1314 
1315  // Iterate through labelMap and grab only the keys
1316  std::map<std::string, llvm::BasicBlock*>::iterator iter;
1317  for (iter=labelMap.begin(); iter != labelMap.end(); iter++)
1318  labels.push_back(iter->first);
1319 
1320  return labels;
1321 }
1322 
1323 
1324 void
1325 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
1326  const Type *returnType = function->GetReturnType();
1327  if (returnType->IsVoidType()) {
1328  if (expr != NULL)
1329  Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
1330  expr->GetType()->GetString().c_str());
1331  }
1332  else {
1333  if (expr == NULL) {
1334  Error(funcStartPos, "Must provide return value for return "
1335  "statement for non-void function.");
1336  return;
1337  }
1338 
1339  expr = TypeConvertExpr(expr, returnType, "return statement");
1340  if (expr != NULL) {
1341  llvm::Value *retVal = expr->GetValue(this);
1342  if (retVal != NULL) {
1343  if (returnType->IsUniformType() ||
1344  CastType<ReferenceType>(returnType) != NULL)
1345  StoreInst(retVal, returnValuePtr);
1346  else {
1347  // Use a masked store to store the value of the expression
1348  // in the return value memory; this preserves the return
1349  // values from other lanes that may have executed return
1350  // statements previously.
1351  StoreInst(retVal, returnValuePtr, GetInternalMask(),
1352  returnType, PointerType::GetUniform(returnType));
1353  }
1354  }
1355  }
1356  }
1357 
1358  if (VaryingCFDepth() == 0) {
1359  // If there is only uniform control flow between us and the
1360  // function entry, then it's guaranteed that all lanes are running,
1361  // so we can just emit a true return instruction
1362  AddInstrumentationPoint("return: uniform control flow");
1363  ReturnInst();
1364  }
1365  else {
1366  // Otherwise we update the returnedLanes value by ANDing it with
1367  // the current lane mask.
1368  llvm::Value *oldReturnedLanes =
1369  LoadInst(returnedLanesPtr, "old_returned_lanes");
1370  llvm::Value *newReturnedLanes =
1371  BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
1372  GetFullMask(), "old_mask|returned_lanes");
1373 
1374  // For 'coherent' return statements, emit code to check if all
1375  // lanes have returned
1376  if (doCoherenceCheck) {
1377  // if newReturnedLanes == functionMaskValue, get out of here!
1378  llvm::Value *cmp = MasksAllEqual(functionMaskValue,
1379  newReturnedLanes);
1380  llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
1381  llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
1382  BranchInst(bDoReturn, bNoReturn, cmp);
1383 
1384  bblock = bDoReturn;
1385  AddInstrumentationPoint("return: all lanes have returned");
1386  ReturnInst();
1387 
1388  bblock = bNoReturn;
1389  }
1390  // Otherwise update returnedLanesPtr and turn off all of the lanes
1391  // in the current mask so that any subsequent statements in the
1392  // same scope after the return have no effect
1393  StoreInst(newReturnedLanes, returnedLanesPtr);
1394  AddInstrumentationPoint("return: some but not all lanes have returned");
1395  SetInternalMask(LLVMMaskAllOff);
1396  }
1397 }
1398 
1399 
1400 llvm::Value *
1401 FunctionEmitContext::Any(llvm::Value *mask) {
1402  // Call the target-dependent any function to test that the mask is non-zero
1403  std::vector<Symbol *> mm;
1404  m->symbolTable->LookupFunction("__any", &mm);
1405  if (g->target->getMaskBitCount() == 1)
1406  AssertPos(currentPos, mm.size() == 1);
1407  else
1408  // There should be one with signed int signature, one unsigned int.
1409  AssertPos(currentPos, mm.size() == 2);
1410  // We can actually call either one, since both are i32s as far as
1411  // LLVM's type system is concerned...
1412  llvm::Function *fmm = mm[0]->function;
1413  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any"));
1414 }
1415 
1416 
1417 llvm::Value *
1418 FunctionEmitContext::All(llvm::Value *mask) {
1419  // Call the target-dependent movmsk function to turn the vector mask
1420  // into an i64 value
1421  std::vector<Symbol *> mm;
1422  m->symbolTable->LookupFunction("__all", &mm);
1423  if (g->target->getMaskBitCount() == 1)
1424  AssertPos(currentPos, mm.size() == 1);
1425  else
1426  // There should be one with signed int signature, one unsigned int.
1427  AssertPos(currentPos, mm.size() == 2);
1428  // We can actually call either one, since both are i32s as far as
1429  // LLVM's type system is concerned...
1430  llvm::Function *fmm = mm[0]->function;
1431  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all"));
1432 }
1433 
1434 
1435 llvm::Value *
1436 FunctionEmitContext::None(llvm::Value *mask) {
1437  // Call the target-dependent movmsk function to turn the vector mask
1438  // into an i64 value
1439  std::vector<Symbol *> mm;
1440  m->symbolTable->LookupFunction("__none", &mm);
1441  if (g->target->getMaskBitCount() == 1)
1442  AssertPos(currentPos, mm.size() == 1);
1443  else
1444  // There should be one with signed int signature, one unsigned int.
1445  AssertPos(currentPos, mm.size() == 2);
1446  // We can actually call either one, since both are i32s as far as
1447  // LLVM's type system is concerned...
1448  llvm::Function *fmm = mm[0]->function;
1449  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none"));
1450 }
1451 
1452 
1453 llvm::Value *
1455 #ifdef ISPC_NVPTX_ENABLED
1456  /* this makes mandelbrot example slower with "nvptx" target.
1457  * Needs further investigation. */
1458  const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
1459 #else
1460  const char *__movmsk = "__movmsk";
1461 #endif
1462  // Call the target-dependent movmsk function to turn the vector mask
1463  // into an i64 value
1464  std::vector<Symbol *> mm;
1465  m->symbolTable->LookupFunction(__movmsk, &mm);
1466  if (g->target->getMaskBitCount() == 1)
1467  AssertPos(currentPos, mm.size() == 1);
1468  else
1469  // There should be one with signed int signature, one unsigned int.
1470  AssertPos(currentPos, mm.size() == 2);
1471  // We can actually call either one, since both are i32s as far as
1472  // LLVM's type system is concerned...
1473  llvm::Function *fmm = mm[0]->function;
1474  return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
1475 }
1476 
1477 #ifdef ISPC_NVPTX_ENABLED
1478 bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
1479 {
1480  llvm::Type *type = vector->getType();
1481  if (type == LLVMTypes::Int8VectorType)
1482  funcName += "_int8";
1483  else if (type == LLVMTypes::Int16VectorType)
1484  funcName += "_int16";
1485  else if (type == LLVMTypes::Int32VectorType)
1486  funcName += "_int32";
1487  else if (type == LLVMTypes::Int64VectorType)
1488  funcName += "_int64";
1489  else if (type == LLVMTypes::FloatVectorType)
1490  funcName += "_float";
1491  else if (type == LLVMTypes::DoubleVectorType)
1492  funcName += "_double";
1493  else
1494  return false;
1495  return true;
1496 }
1497 
1498 llvm::Value*
1499 FunctionEmitContext::Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar)
1500 {
1501  std::string funcName = "__insert";
1502  assert(lAppendInsertExtractName(vector, funcName));
1503  assert(lane->getType() == LLVMTypes::Int32Type);
1504 
1505  llvm::Function *func = m->module->getFunction(funcName.c_str());
1506  assert(func != NULL);
1507  std::vector<llvm::Value *> args;
1508  args.push_back(vector);
1509  args.push_back(lane);
1510  args.push_back(scalar);
1511  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1512  return ret;
1513 }
1514 
1515 llvm::Value*
1516 FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
1517 {
1518  std::string funcName = "__extract";
1519  assert(lAppendInsertExtractName(vector, funcName));
1520  assert(lane->getType() == LLVMTypes::Int32Type);
1521 
1522  llvm::Function *func = m->module->getFunction(funcName.c_str());
1523  assert(func != NULL);
1524  std::vector<llvm::Value *> args;
1525  args.push_back(vector);
1526  args.push_back(lane);
1527  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1528  return ret;
1529 }
1530 #endif /* ISPC_NVPTX_ENABLED */
1531 
1532 
1533 llvm::Value *
1534 FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
1535 #ifdef ISPC_NVPTX_ENABLED
1536  if (g->target->getISA() == Target::NVPTX)
1537  {
1538  // Compare the two masks to get a vector of i1s
1539  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1540  v1, v2, "v1==v2");
1541  return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
1542  }
1543 #endif /* ISPC_NVPTX_ENABLED */
1544 
1545 #if 0
1546  // Compare the two masks to get a vector of i1s
1547  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1548  v1, v2, "v1==v2");
1549  // Turn that into a bool vector type (often i32s)
1550  cmp = I1VecToBoolVec(cmp);
1551  // And see if it's all on
1552  return All(cmp);
1553 #else
1554  llvm::Value *mm1 = LaneMask(v1);
1555  llvm::Value *mm2 = LaneMask(v2);
1556  return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
1557  LLVMGetName("equal", v1, v2));
1558 #endif
1559 }
1560 
1561 llvm::Value *
1563  llvm::SmallVector<llvm::Constant*, 16> array;
1564  for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
1565  llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
1566  array.push_back(C);
1567  }
1568 
1569  llvm::Constant* index = llvm::ConstantVector::get(array);
1570 
1571  return index;
1572 }
1573 
1574 #ifdef ISPC_NVPTX_ENABLED
1575 llvm::Value *
1576 FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
1577  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1578  llvm::Value *__program_index = CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__program_indexS");
1579  llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), __program_index, 0, "foreach__program_indexV");
1580 #if 0
1581  if (!is32bits)
1582  index = ZExtInst(index, LLVMTypes::Int64VectandType);
1583 #endif
1584  return index;
1585 }
1586 #endif /* ISPC_NVPTX_ENABLED */
1587 
1588 
1589 llvm::Value *
1590 FunctionEmitContext::GetStringPtr(const std::string &str) {
1591  llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
1592  llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
1593  llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
1594  true /*isConst*/,
1595  linkage, lstr, "__str");
1596  return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType,
1597  "str_void_ptr", bblock);
1598 }
1599 
1600 
1601 llvm::BasicBlock *
1603  return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
1604 }
1605 
1606 
1607 llvm::Value *
1609  if (b == NULL) {
1610  AssertPos(currentPos, m->errorCount > 0);
1611  return NULL;
1612  }
1613 
1614  if (g->target->getMaskBitCount() == 1)
1615  return b;
1616 
1617  llvm::ArrayType *at =
1618  llvm::dyn_cast<llvm::ArrayType>(b->getType());
1619  if (at) {
1620  // If we're given an array of vectors of i1s, then do the
1621  // conversion for each of the elements
1622  llvm::Type *boolArrayType =
1623  llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
1624  llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
1625 
1626  for (unsigned int i = 0; i < at->getNumElements(); ++i) {
1627  llvm::Value *elt = ExtractInst(b, i);
1628  llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
1629  LLVMGetName(elt, "_to_boolvec"));
1630  ret = InsertInst(ret, sext, i);
1631  }
1632  return ret;
1633  }
1634  else
1635  return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
1636 }
1637 
1638 
1639 static llvm::Value *
1640 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
1641  llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, true);
1642  std::string var_name = "_";
1643  var_name = var_name + s;
1644  llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
1645  true /* const */,
1646  llvm::GlobalValue::InternalLinkage,
1647  sConstant, var_name.c_str());
1648  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
1649  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1650 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1651  return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
1652 #else /* LLVM 3.7+ */
1653  return llvm::GetElementPtrInst::Create(PTYPE(sPtr),
1654  sPtr, arrayRef, "sptr", bblock);
1655 #endif
1656 }
1657 
1658 
1659 void
1661  AssertPos(currentPos, note != NULL);
1662  if (!g->emitInstrumentation)
1663  return;
1664 
1665  std::vector<llvm::Value *> args;
1666  // arg 1: filename as string
1667  args.push_back(lGetStringAsValue(bblock, currentPos.name));
1668  // arg 2: provided note
1669  args.push_back(lGetStringAsValue(bblock, note));
1670  // arg 3: line number
1671  args.push_back(LLVMInt32(currentPos.first_line));
1672  // arg 4: current mask, movmsk'ed down to an int64
1673  args.push_back(LaneMask(GetFullMask()));
1674 
1675  llvm::Function *finst = m->module->getFunction("ISPCInstrument");
1676  CallInst(finst, NULL, args, "");
1677 }
1678 
1679 
1680 void
1682  currentPos = pos;
1683 }
1684 
1685 
1686 SourcePos
1688  return currentPos;
1689 }
1690 
1691 
1692 void
1693 FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
1694  llvm::DIScope *scope) {
1695  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
1696  if (inst != NULL && m->diBuilder) {
1697  SourcePos p = pos ? *pos : currentPos;
1698  if (p.first_line != 0)
1699  // If first_line == 0, then we're in the middle of setting up
1700  // the standard library or the like; don't add debug positions
1701  // for those functions
1702  inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
1703  scope ?
1704 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1705  *scope
1706 #else /* LLVM 3.7+ */
1707  scope
1708 #endif
1709  : GetDIScope()));
1710  }
1711 }
1712 
1713 
1714 void
1716  if (m->diBuilder != NULL) {
1717 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1718  llvm::DIScope parentScope;
1719  llvm::DILexicalBlock lexicalBlock;
1720 #else /* LLVM 3.7+ */
1721  llvm::DIScope *parentScope;
1722  llvm::DILexicalBlock *lexicalBlock;
1723 #endif
1724  if (debugScopes.size() > 0)
1725  parentScope = debugScopes.back();
1726  else
1727  parentScope = diSubprogram;
1728 
1729  lexicalBlock =
1730  m->diBuilder->createLexicalBlock(parentScope, diFile,
1731  currentPos.first_line,
1732 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_5
1733  // Revision 202736 in LLVM adds support of DWARF discriminator
1734  // to the last argument and revision 202737 in clang adds 0
1735  // for the last argument by default.
1736  currentPos.first_column, 0);
1737 #else
1738  // Revision 216239 in LLVM removes support of DWARF discriminator
1739  // as the last argument
1740  currentPos.first_column);
1741 #endif // LLVM 3.2, 3.3, 3.4 and 3.6+
1742 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1743  AssertPos(currentPos, lexicalBlock.Verify());
1744  debugScopes.push_back(lexicalBlock);
1745 #else /* LLVM 3.7+ */
1746  debugScopes.push_back(llvm::cast<llvm::DILexicalBlockBase>(lexicalBlock));
1747 #endif
1748  }
1749 }
1750 
1751 
1752 void
1754  if (m->diBuilder != NULL) {
1755  AssertPos(currentPos, debugScopes.size() > 0);
1756  debugScopes.pop_back();
1757  }
1758 }
1759 
1760 
1761 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1762 llvm::DIScope
1763 #else /* LLVM 3.7+ */
1764 llvm::DIScope*
1765 #endif
1767  AssertPos(currentPos, debugScopes.size() > 0);
1768  return debugScopes.back();
1769 }
1770 
1771 
1772 void
1774  if (m->diBuilder == NULL)
1775  return;
1776 
1777 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1778  llvm::DIScope scope = GetDIScope();
1779  llvm::DIType diType = sym->type->GetDIType(scope);
1780  AssertPos(currentPos, diType.Verify());
1781  llvm::DIVariable var =
1782 #else /* LLVM 3.7+ */
1783  llvm::DIScope *scope = GetDIScope();
1784  llvm::DIType *diType = sym->type->GetDIType(scope);
1785  llvm::DILocalVariable *var =
1786 #endif
1787 
1788 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7*/
1789  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
1790  scope,
1791  sym->name,
1792  sym->pos.GetDIFile(),
1793  sym->pos.first_line,
1794  diType,
1795  true /* preserve through opts */);
1796 #else /* LLVM 3.8+ */
1797  m->diBuilder->createAutoVariable(scope,
1798  sym->name,
1799  sym->pos.GetDIFile(),
1800  sym->pos.first_line,
1801  diType,
1802  true /* preserve through opts */);
1803 #endif
1804 
1805 
1806 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1807  AssertPos(currentPos, var.Verify());
1808  llvm::Instruction *declareInst =
1809  m->diBuilder->insertDeclare(sym->storagePtr, var,
1810  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1811  m->diBuilder->createExpression(),
1812  #endif
1813  bblock);
1814  AddDebugPos(declareInst, &sym->pos, &scope);
1815 #else /* LLVM 3.7+ */
1816  llvm::Instruction *declareInst =
1817  m->diBuilder->insertDeclare(sym->storagePtr, var,
1818  m->diBuilder->createExpression(),
1819  llvm::DebugLoc::get(sym->pos.first_line,
1820  sym->pos.first_column, scope),
1821  bblock);
1822  AddDebugPos(declareInst, &sym->pos, scope);
1823 #endif
1824 }
1825 
1826 
1827 void
1829  if (m->diBuilder == NULL)
1830  return;
1831 
1832 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
1833  int flags = 0;
1834 #else // LLVM 4.0+
1835  llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
1836 #endif
1837 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1838  llvm::DIScope scope = diSubprogram;
1839  llvm::DIType diType = sym->type->GetDIType(scope);
1840  AssertPos(currentPos, diType.Verify());
1841  llvm::DIVariable var =
1842 #else /* LLVM 3.7+ */
1843  llvm::DIScope *scope = diSubprogram;
1844  llvm::DIType *diType = sym->type->GetDIType(scope);
1845  llvm::DILocalVariable *var =
1846 #endif
1847 
1848 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
1849  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
1850  scope,
1851  sym->name,
1852  sym->pos.GetDIFile(),
1853  sym->pos.first_line,
1854  diType,
1855  true /* preserve through opts */,
1856  flags,
1857  argNum + 1);
1858 #else /* LLVM 3.8+ */
1859  m->diBuilder->createParameterVariable(scope,
1860  sym->name,
1861  argNum + 1,
1862  sym->pos.GetDIFile(),
1863  sym->pos.first_line,
1864  diType,
1865  true /* preserve through opts */,
1866  flags);
1867 #endif
1868 
1869 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1870  AssertPos(currentPos, var.Verify());
1871  llvm::Instruction *declareInst =
1872  m->diBuilder->insertDeclare(sym->storagePtr, var,
1873  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1874  m->diBuilder->createExpression(),
1875  #endif
1876  bblock);
1877  AddDebugPos(declareInst, &sym->pos, &scope);
1878 #else /* LLVM 3.7+ */
1879  llvm::Instruction *declareInst =
1880  m->diBuilder->insertDeclare(sym->storagePtr, var,
1881  m->diBuilder->createExpression(),
1882  llvm::DebugLoc::get(sym->pos.first_line,
1883  sym->pos.first_column, scope),
1884  bblock);
1885  AddDebugPos(declareInst, &sym->pos, scope);
1886 #endif
1887 }
1888 
1889 
1890 /** If the given type is an array of vector types, then it's the
1891  representation of an ispc VectorType with varying elements. If it is
1892  one of these, return the array size (i.e. the VectorType's size).
1893  Otherwise return zero.
1894  */
1895 static int
1896 lArrayVectorWidth(llvm::Type *t) {
1897  llvm::ArrayType *arrayType =
1898  llvm::dyn_cast<llvm::ArrayType>(t);
1899  if (arrayType == NULL)
1900  return 0;
1901 
1902  // We shouldn't be seeing arrays of anything but vectors being passed
1903  // to things like FunctionEmitContext::BinaryOperator() as operands.
1904  llvm::VectorType *vectorElementType =
1905  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1906  Assert((vectorElementType != NULL &&
1907  (int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
1908 
1909  return (int)arrayType->getNumElements();
1910 }
1911 
1912 
1913 llvm::Value *
1914 FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
1915  llvm::Value *v0, llvm::Value *v1,
1916  const char *name) {
1917  if (v0 == NULL || v1 == NULL) {
1918  AssertPos(currentPos, m->errorCount > 0);
1919  return NULL;
1920  }
1921 
1922  AssertPos(currentPos, v0->getType() == v1->getType());
1923  llvm::Type *type = v0->getType();
1924  int arraySize = lArrayVectorWidth(type);
1925  if (arraySize == 0) {
1926  llvm::Instruction *bop =
1927  llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
1928  AddDebugPos(bop);
1929  return bop;
1930  }
1931  else {
1932  // If this is an ispc VectorType, apply the binary operator to each
1933  // of the elements of the array (which in turn should be either
1934  // scalar types or llvm::VectorTypes.)
1935  llvm::Value *ret = llvm::UndefValue::get(type);
1936  for (int i = 0; i < arraySize; ++i) {
1937  llvm::Value *a = ExtractInst(v0, i);
1938  llvm::Value *b = ExtractInst(v1, i);
1939  llvm::Value *op = BinaryOperator(inst, a, b);
1940  ret = InsertInst(ret, op, i);
1941  }
1942  return ret;
1943  }
1944 }
1945 
1946 
1947 llvm::Value *
1948 FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
1949  if (v == NULL) {
1950  AssertPos(currentPos, m->errorCount > 0);
1951  return NULL;
1952  }
1953 
1954  // Similarly to BinaryOperator, do the operation on all the elements of
1955  // the array if we're given an array type; otherwise just do the
1956  // regular llvm operation.
1957  llvm::Type *type = v->getType();
1958  int arraySize = lArrayVectorWidth(type);
1959  if (arraySize == 0) {
1960  llvm::Instruction *binst =
1961  llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
1962  AddDebugPos(binst);
1963  return binst;
1964  }
1965  else {
1966  llvm::Value *ret = llvm::UndefValue::get(type);
1967  for (int i = 0; i < arraySize; ++i) {
1968  llvm::Value *a = ExtractInst(v, i);
1969  llvm::Value *op =
1970  llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
1971  AddDebugPos(op);
1972  ret = InsertInst(ret, op, i);
1973  }
1974  return ret;
1975  }
1976 }
1977 
1978 
1979 // Given the llvm Type that represents an ispc VectorType, return an
1980 // equally-shaped type with boolean elements. (This is the type that will
1981 // be returned from CmpInst with ispc VectorTypes).
1982 static llvm::Type *
1984  llvm::ArrayType *arrayType =
1985  llvm::dyn_cast<llvm::ArrayType>(type);
1986  Assert(arrayType != NULL);
1987 
1988  llvm::VectorType *vectorElementType =
1989  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1990  Assert(vectorElementType != NULL);
1991  Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
1992 
1993  llvm::Type *base =
1994  llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
1995  return llvm::ArrayType::get(base, arrayType->getNumElements());
1996 }
1997 
1998 
1999 llvm::Value *
2000 FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
2001  llvm::CmpInst::Predicate pred,
2002  llvm::Value *v0, llvm::Value *v1,
2003  const char *name) {
2004  if (v0 == NULL || v1 == NULL) {
2005  AssertPos(currentPos, m->errorCount > 0);
2006  return NULL;
2007  }
2008 
2009  AssertPos(currentPos, v0->getType() == v1->getType());
2010  llvm::Type *type = v0->getType();
2011  int arraySize = lArrayVectorWidth(type);
2012  if (arraySize == 0) {
2013  llvm::Instruction *ci =
2014  llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp",
2015  bblock);
2016  AddDebugPos(ci);
2017  return ci;
2018  }
2019  else {
2020  llvm::Type *boolType = lGetMatchingBoolVectorType(type);
2021  llvm::Value *ret = llvm::UndefValue::get(boolType);
2022  for (int i = 0; i < arraySize; ++i) {
2023  llvm::Value *a = ExtractInst(v0, i);
2024  llvm::Value *b = ExtractInst(v1, i);
2025  llvm::Value *op = CmpInst(inst, pred, a, b, name);
2026  ret = InsertInst(ret, op, i);
2027  }
2028  return ret;
2029  }
2030 }
2031 
2032 
2033 llvm::Value *
2034 FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
2035  if (value == NULL) {
2036  AssertPos(currentPos, m->errorCount > 0);
2037  return NULL;
2038  }
2039 
2040  llvm::Value *ret = NULL;
2041  llvm::Type *eltType = value->getType();
2042  llvm::Type *vecType = NULL;
2043 
2044  llvm::PointerType *pt =
2045  llvm::dyn_cast<llvm::PointerType>(eltType);
2046  if (pt != NULL) {
2047  // Varying pointers are represented as vectors of i32/i64s
2049  value = PtrToIntInst(value);
2050  }
2051  else {
2052  // All other varying types are represented as vectors of the
2053  // underlying type.
2054  vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
2055  }
2056 
2057  // Check for a constant case.
2058  if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
2059  ret = llvm::ConstantVector::getSplat(
2060  g->target->getVectorWidth(),
2061  const_val);
2062  return ret;
2063  }
2064 
2065  ret = BroadcastValue(value, vecType, name);
2066 
2067  return ret;
2068 }
2069 
2070 
2071 llvm::Value *
2072 FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type,
2073  const char *name) {
2074  if (value == NULL) {
2075  AssertPos(currentPos, m->errorCount > 0);
2076  return NULL;
2077  }
2078 
2079  if (name == NULL)
2080  name = LLVMGetName(value, "_bitcast");
2081 
2082  llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
2083  AddDebugPos(inst);
2084  return inst;
2085 }
2086 
2087 
2088 llvm::Value *
2089 FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
2090  if (value == NULL) {
2091  AssertPos(currentPos, m->errorCount > 0);
2092  return NULL;
2093  }
2094 
2095  if (llvm::isa<llvm::VectorType>(value->getType()))
2096  // no-op for varying pointers; they're already vectors of ints
2097  return value;
2098 
2099  if (name == NULL)
2100  name = LLVMGetName(value, "_ptr2int");
2101  llvm::Type *type = LLVMTypes::PointerIntType;
2102  llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
2103  AddDebugPos(inst);
2104  return inst;
2105 }
2106 
2107 
2108 llvm::Value *
2109 FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
2110  const char *name) {
2111  if (value == NULL) {
2112  AssertPos(currentPos, m->errorCount > 0);
2113  return NULL;
2114  }
2115 
2116  if (name == NULL)
2117  name = LLVMGetName(value, "_ptr2int");
2118 
2119  llvm::Type *fromType = value->getType();
2120  if (llvm::isa<llvm::VectorType>(fromType)) {
2121  // varying pointer
2122  if (fromType == toType)
2123  // already the right type--done
2124  return value;
2125  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2126  return TruncInst(value, toType, name);
2127  else {
2128  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2129  toType->getScalarSizeInBits());
2130  return ZExtInst(value, toType, name);
2131  }
2132  }
2133 
2134  llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
2135  AddDebugPos(inst);
2136  return inst;
2137 }
2138 
2139 
2140 llvm::Value *
2141 FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType,
2142  const char *name) {
2143  if (value == NULL) {
2144  AssertPos(currentPos, m->errorCount > 0);
2145  return NULL;
2146  }
2147 
2148  if (name == NULL)
2149  name = LLVMGetName(value, "_int2ptr");
2150 
2151  llvm::Type *fromType = value->getType();
2152  if (llvm::isa<llvm::VectorType>(fromType)) {
2153  // varying pointer
2154  if (fromType == toType)
2155  // done
2156  return value;
2157  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2158  return TruncInst(value, toType, name);
2159  else {
2160  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2161  toType->getScalarSizeInBits());
2162  return ZExtInst(value, toType, name);
2163  }
2164  }
2165 
2166  llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name,
2167  bblock);
2168  AddDebugPos(inst);
2169  return inst;
2170 }
2171 
2172 
2173 llvm::Instruction *
2174 FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type,
2175  const char *name) {
2176  if (value == NULL) {
2177  AssertPos(currentPos, m->errorCount > 0);
2178  return NULL;
2179  }
2180 
2181  if (name == NULL)
2182  name = LLVMGetName(value, "_trunc");
2183 
2184  // TODO: we should probably handle the array case as in
2185  // e.g. BitCastInst(), but we don't currently need that functionality
2186  llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
2187  AddDebugPos(inst);
2188  return inst;
2189 }
2190 
2191 
2192 llvm::Instruction *
2193 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
2194  llvm::Type *type, const char *name) {
2195  if (value == NULL) {
2196  AssertPos(currentPos, m->errorCount > 0);
2197  return NULL;
2198  }
2199 
2200  if (name == NULL)
2201  name = LLVMGetName(value, "_cast");
2202 
2203  // TODO: we should probably handle the array case as in
2204  // e.g. BitCastInst(), but we don't currently need that functionality
2205  llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name,
2206  bblock);
2207  AddDebugPos(inst);
2208  return inst;
2209 }
2210 
2211 
2212 llvm::Instruction *
2213 FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type,
2214  const char *name) {
2215  if (value == NULL) {
2216  AssertPos(currentPos, m->errorCount > 0);
2217  return NULL;
2218  }
2219 
2220  if (name == NULL)
2221  name = LLVMGetName(value, "_cast");
2222 
2223  // TODO: we should probably handle the array case as in
2224  // e.g. BitCastInst(), but we don't currently need that functionality
2225  llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
2226  AddDebugPos(inst);
2227  return inst;
2228 }
2229 
2230 
2231 llvm::Instruction *
2232 FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type,
2233  const char *name) {
2234  if (value == NULL) {
2235  AssertPos(currentPos, m->errorCount > 0);
2236  return NULL;
2237  }
2238 
2239  if (name == NULL)
2240  name = LLVMGetName(value, "_sext");
2241 
2242  // TODO: we should probably handle the array case as in
2243  // e.g. BitCastInst(), but we don't currently need that functionality
2244  llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
2245  AddDebugPos(inst);
2246  return inst;
2247 }
2248 
2249 
2250 llvm::Instruction *
2251 FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type,
2252  const char *name) {
2253  if (value == NULL) {
2254  AssertPos(currentPos, m->errorCount > 0);
2255  return NULL;
2256  }
2257 
2258  if (name == NULL)
2259  name = LLVMGetName(value, "_zext");
2260 
2261  // TODO: we should probably handle the array case as in
2262  // e.g. BitCastInst(), but we don't currently need that functionality
2263  llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
2264  AddDebugPos(inst);
2265  return inst;
2266 }
2267 
2268 
2269 /** Utility routine used by the GetElementPtrInst() methods; given a
2270  pointer to some type (either uniform or varying) and an index (also
2271  either uniform or varying), this returns the new pointer (varying if
2272  appropriate) given by offsetting the base pointer by the index times
2273  the size of the object that the pointer points to.
2274  */
2275 llvm::Value *
2276 FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
2277  const Type *ptrType) {
2278  // Find the scale factor for the index (i.e. the size of the object
2279  // that the pointer(s) point(s) to.
2280  const Type *scaleType = ptrType->GetBaseType();
2281  llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
2282 
2283  bool indexIsVarying =
2284  llvm::isa<llvm::VectorType>(index->getType());
2285  llvm::Value *offset = NULL;
2286  if (indexIsVarying == false) {
2287  // Truncate or sign extend the index as appropriate to a 32 or
2288  // 64-bit type.
2289  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2290  index->getType() == LLVMTypes::Int64Type)
2291  index = TruncInst(index, LLVMTypes::Int32Type);
2292  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2293  index->getType() == LLVMTypes::Int32Type)
2294  index = SExtInst(index, LLVMTypes::Int64Type);
2295 
2296  // do a scalar multiply to get the offset as index * scale and then
2297  // smear the result out to be a vector; this is more efficient than
2298  // first promoting both the scale and the index to vectors and then
2299  // multiplying.
2300  offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
2301  offset = SmearUniform(offset);
2302  }
2303  else {
2304  // Similarly, truncate or sign extend the index to be a 32 or 64
2305  // bit vector type
2306  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2307  index->getType() == LLVMTypes::Int64VectorType)
2308  index = TruncInst(index, LLVMTypes::Int32VectorType);
2309  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2310  index->getType() == LLVMTypes::Int32VectorType)
2311  index = SExtInst(index, LLVMTypes::Int64VectorType);
2312 
2313  scale = SmearUniform(scale);
2314 
2315  // offset = index * scale
2316  offset = BinaryOperator(llvm::Instruction::Mul, scale, index,
2317  LLVMGetName("mul", scale, index));
2318  }
2319 
2320  // For 64-bit targets, if we've been doing our offset calculations in
2321  // 32 bits, we still have to convert to a 64-bit value before we
2322  // actually add the offset to the pointer.
2323  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2324  offset = SExtInst(offset, LLVMTypes::Int64VectorType,
2325  LLVMGetName(offset, "_to_64"));
2326 
2327  // Smear out the pointer to be varying; either the base pointer or the
2328  // index must be varying for this method to be called.
2329  bool baseIsUniform =
2330  (llvm::isa<llvm::PointerType>(basePtr->getType()));
2331  AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
2332  llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
2333 
2334  // newPtr = ptr + offset
2335  return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset,
2336  LLVMGetName(basePtr, "_offset"));
2337 }
2338 
2339 
2340 void
2341 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
2342  llvm::Type *type0 = (*v0)->getType();
2343  llvm::Type *type1 = (*v1)->getType();
2344 
2345  // First, promote to a vector type if one of the two values is a vector
2346  // type
2347  if (llvm::isa<llvm::VectorType>(type0) &&
2348  !llvm::isa<llvm::VectorType>(type1)) {
2349  *v1 = SmearUniform(*v1, "smear_v1");
2350  type1 = (*v1)->getType();
2351  }
2352  if (!llvm::isa<llvm::VectorType>(type0) &&
2353  llvm::isa<llvm::VectorType>(type1)) {
2354  *v0 = SmearUniform(*v0, "smear_v0");
2355  type0 = (*v0)->getType();
2356  }
2357 
2358  // And then update to match bit widths
2359  if (type0 == LLVMTypes::Int32Type &&
2360  type1 == LLVMTypes::Int64Type)
2361  *v0 = SExtInst(*v0, LLVMTypes::Int64Type);
2362  else if (type1 == LLVMTypes::Int32Type &&
2363  type0 == LLVMTypes::Int64Type)
2364  *v1 = SExtInst(*v1, LLVMTypes::Int64Type);
2365  else if (type0 == LLVMTypes::Int32VectorType &&
2366  type1 == LLVMTypes::Int64VectorType)
2367  *v0 = SExtInst(*v0, LLVMTypes::Int64VectorType);
2368  else if (type1 == LLVMTypes::Int32VectorType &&
2369  type0 == LLVMTypes::Int64VectorType)
2370  *v1 = SExtInst(*v1, LLVMTypes::Int64VectorType);
2371 }
2372 
2373 
2374 /** Given an integer index in indexValue that's indexing into an array of
2375  soa<> structures with given soaWidth, compute the two sub-indices we
2376  need to do the actual indexing calculation:
2377 
2378  subIndices[0] = (indexValue >> log(soaWidth))
2379  subIndices[1] = (indexValue & (soaWidth-1))
2380  */
2381 static llvm::Value *
2383  llvm::Value *indexValue, llvm::Value *ptrSliceOffset,
2384  llvm::Value **newSliceOffset) {
2385  // Compute the log2 of the soaWidth.
2386  Assert(soaWidth > 0);
2387  int logWidth = 0, sw = soaWidth;
2388  while (sw > 1) {
2389  ++logWidth;
2390  sw >>= 1;
2391  }
2392  Assert((1 << logWidth) == soaWidth);
2393 
2394  ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
2395 
2396  llvm::Type *indexType = indexValue->getType();
2397  llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
2398  llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType);
2399 
2400  llvm::Value *indexSum =
2401  ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset,
2402  "index_sum");
2403 
2404  // minor index = (index & (soaWidth - 1))
2405  *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum,
2406  mask, "slice_index_minor");
2407  // slice offsets are always 32 bits...
2408  if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
2409  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
2410  else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
2411  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
2412 
2413  // major index = (index >> logWidth)
2414  return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum,
2415  shift, "slice_index_major");
2416 }
2417 
2418 
2419 llvm::Value *
2420 FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
2421  // Create a small struct where the first element is the type of the
2422  // given pointer and the second element is the type of the offset
2423  // value.
2424  std::vector<llvm::Type *> eltTypes;
2425  eltTypes.push_back(ptr->getType());
2426  eltTypes.push_back(offset->getType());
2427  llvm::StructType *st =
2428  llvm::StructType::get(*g->ctx, eltTypes);
2429 
2430  llvm::Value *ret = llvm::UndefValue::get(st);
2431  ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
2432  ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
2433  return ret;
2434 }
2435 
2436 
2437 llvm::Value *
2438 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
2439  const Type *ptrRefType, const char *name) {
2440  if (basePtr == NULL || index == NULL) {
2441  AssertPos(currentPos, m->errorCount > 0);
2442  return NULL;
2443  }
2444 
2445  // Regularize to a standard pointer type for basePtr's type
2446  const PointerType *ptrType;
2447  if (CastType<ReferenceType>(ptrRefType) != NULL)
2448  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2449  else {
2450  ptrType = CastType<PointerType>(ptrRefType);
2451  AssertPos(currentPos, ptrType != NULL);
2452  }
2453 
2454  if (ptrType->IsSlice()) {
2455  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2456 
2457  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2458  if (ptrType->IsFrozenSlice() == false) {
2459  // For slice pointers that aren't frozen, we compute a new
2460  // index based on the given index plus the offset in the slice
2461  // pointer. This gives us an updated integer slice index for
2462  // the resulting slice pointer and then an index to index into
2463  // the soa<> structs with.
2464  llvm::Value *newSliceOffset;
2465  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2466  index = lComputeSliceIndex(this, soaWidth, index,
2467  ptrSliceOffset, &newSliceOffset);
2468  ptrSliceOffset = newSliceOffset;
2469  }
2470 
2471  // Handle the indexing into the soa<> structs with the major
2472  // component of the index through a recursive call
2473  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index,
2474  ptrType->GetAsNonSlice(), name);
2475 
2476  // And mash the results together for the return value
2477  return MakeSlicePointer(p, ptrSliceOffset);
2478  }
2479 
2480  // Double-check consistency between the given pointer type and its LLVM
2481  // type.
2482  if (ptrType->IsUniformType())
2483  AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
2484  else if (ptrType->IsVaryingType())
2485  AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
2486 
2487  bool indexIsVaryingType =
2488  llvm::isa<llvm::VectorType>(index->getType());
2489 
2490  if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
2491  // The easy case: both the base pointer and the indices are
2492  // uniform, so just emit the regular LLVM GEP instruction
2493  llvm::Value *ind[1] = { index };
2494  llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
2495 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2496  llvm::Instruction *inst =
2497  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2498  name ? name : "gep", bblock);
2499 #else /* LLVM 3.7+ */
2500  llvm::Instruction *inst =
2501  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2502  basePtr, arrayRef,
2503  name ? name : "gep", bblock);
2504 #endif
2505  AddDebugPos(inst);
2506  return inst;
2507  }
2508  else
2509  return applyVaryingGEP(basePtr, index, ptrType);
2510 }
2511 
2512 
2513 llvm::Value *
2514 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
2515  llvm::Value *index1, const Type *ptrRefType,
2516  const char *name) {
2517  if (basePtr == NULL || index0 == NULL || index1 == NULL) {
2518  AssertPos(currentPos, m->errorCount > 0);
2519  return NULL;
2520  }
2521 
2522  // Regaularize the pointer type for basePtr
2523  const PointerType *ptrType = NULL;
2524  if (CastType<ReferenceType>(ptrRefType) != NULL)
2525  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2526  else {
2527  ptrType = CastType<PointerType>(ptrRefType);
2528  AssertPos(currentPos, ptrType != NULL);
2529  }
2530 
2531  if (ptrType->IsSlice()) {
2532  // Similar to the 1D GEP implementation above, for non-frozen slice
2533  // pointers we do the two-step indexing calculation and then pass
2534  // the new major index on to a recursive GEP call.
2535  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2536  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2537  if (ptrType->IsFrozenSlice() == false) {
2538  llvm::Value *newSliceOffset;
2539  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2540  index1 = lComputeSliceIndex(this, soaWidth, index1,
2541  ptrSliceOffset, &newSliceOffset);
2542  ptrSliceOffset = newSliceOffset;
2543  }
2544 
2545  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0,
2546  index1, ptrType->GetAsNonSlice(),
2547  name);
2548  return MakeSlicePointer(p, ptrSliceOffset);
2549  }
2550 
2551  bool index0IsVaryingType =
2552  llvm::isa<llvm::VectorType>(index0->getType());
2553  bool index1IsVaryingType =
2554  llvm::isa<llvm::VectorType>(index1->getType());
2555 
2556  if (index0IsVaryingType == false && index1IsVaryingType == false &&
2557  ptrType->IsUniformType() == true) {
2558  // The easy case: both the base pointer and the indices are
2559  // uniform, so just emit the regular LLVM GEP instruction
2560  llvm::Value *indices[2] = { index0, index1 };
2561  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
2562 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2563  llvm::Instruction *inst =
2564  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2565  name ? name : "gep", bblock);
2566 #else /* LLVM 3.7+ */
2567  llvm::Instruction *inst =
2568  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2569  basePtr, arrayRef,
2570  name ? name : "gep", bblock);
2571 #endif
2572  AddDebugPos(inst);
2573  return inst;
2574  }
2575  else {
2576  // Handle the first dimension with index0
2577  llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
2578 
2579  // Now index into the second dimension with index1. First figure
2580  // out the type of ptr0.
2581  const Type *baseType = ptrType->GetBaseType();
2582  const SequentialType *st = CastType<SequentialType>(baseType);
2583  AssertPos(currentPos, st != NULL);
2584 
2585  bool ptr0IsUniform =
2586  llvm::isa<llvm::PointerType>(ptr0->getType());
2587  const Type *ptr0BaseType = st->GetElementType();
2588  const Type *ptr0Type = ptr0IsUniform ?
2589  PointerType::GetUniform(ptr0BaseType) :
2590  PointerType::GetVarying(ptr0BaseType);
2591 
2592  return applyVaryingGEP(ptr0, index1, ptr0Type);
2593  }
2594 }
2595 
2596 
2597 llvm::Value *
2598 FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
2599  const Type *ptrRefType, const char *name,
2600  const PointerType **resultPtrType) {
2601  if (resultPtrType != NULL)
2602  AssertPos(currentPos, ptrRefType != NULL);
2603 
2604  llvm::PointerType *llvmPtrType =
2605  llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
2606  if (llvmPtrType != NULL) {
2607  llvm::StructType *llvmStructType =
2608  llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
2609  if (llvmStructType != NULL && llvmStructType->isSized() == false) {
2610  AssertPos(currentPos, m->errorCount > 0);
2611  return NULL;
2612  }
2613  }
2614 
2615  // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
2616  // if we have one, regularize into a pointer type.
2617  const PointerType *ptrType = NULL;
2618  if (ptrRefType != NULL) {
2619  // Normalize references to uniform pointers
2620  if (CastType<ReferenceType>(ptrRefType) != NULL)
2621  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2622  else
2623  ptrType = CastType<PointerType>(ptrRefType);
2624  AssertPos(currentPos, ptrType != NULL);
2625  }
2626 
2627  // Similarly, we have to see if the pointer type is a struct to see if
2628  // we have a slice pointer instead of looking at ptrType; this is also
2629  // unfortunate...
2630  llvm::Value *basePtr = fullBasePtr;
2631  bool baseIsSlicePtr =
2632  llvm::isa<llvm::StructType>(fullBasePtr->getType());
2633  const PointerType *rpt;
2634  if (baseIsSlicePtr) {
2635  AssertPos(currentPos, ptrType != NULL);
2636  // Update basePtr to just be the part that actually points to the
2637  // start of an soa<> struct for now; the element offset computation
2638  // doesn't change the slice offset, so we'll incorporate that into
2639  // the final value right before this method returns.
2640  basePtr = ExtractInst(fullBasePtr, 0);
2641  if (resultPtrType == NULL)
2642  resultPtrType = &rpt;
2643  }
2644 
2645  // Return the pointer type of the result of this call, for callers that
2646  // want it.
2647  if (resultPtrType != NULL) {
2648  AssertPos(currentPos, ptrType != NULL);
2649  const CollectionType *ct =
2650  CastType<CollectionType>(ptrType->GetBaseType());
2651  AssertPos(currentPos, ct != NULL);
2652  *resultPtrType = new PointerType(ct->GetElementType(elementNum),
2653  ptrType->GetVariability(),
2654  ptrType->IsConstType(),
2655  ptrType->IsSlice());
2656  }
2657 
2658  llvm::Value *resultPtr = NULL;
2659  if (ptrType == NULL || ptrType->IsUniformType()) {
2660  // If the pointer is uniform, we can use the regular LLVM GEP.
2661  llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
2662  llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
2663 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2664  resultPtr =
2665  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2666  name ? name : "struct_offset", bblock);
2667 #else /* LLVM 3.7+ */
2668  resultPtr =
2669  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef,
2670  name ? name : "struct_offset", bblock);
2671 #endif
2672  }
2673  else {
2674  // Otherwise do the math to find the offset and add it to the given
2675  // varying pointers
2676  const StructType *st = CastType<StructType>(ptrType->GetBaseType());
2677  llvm::Value *offset = NULL;
2678  if (st != NULL)
2679  // If the pointer is to a structure, Target::StructOffset() gives
2680  // us the offset in bytes to the given element of the structure
2681  offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum,
2682  bblock);
2683  else {
2684  // Otherwise we should have a vector or array here and the offset
2685  // is given by the element number times the size of the element
2686  // type of the vector.
2687  const SequentialType *st =
2688  CastType<SequentialType>(ptrType->GetBaseType());
2689  AssertPos(currentPos, st != NULL);
2690  llvm::Value *size =
2691  g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
2692  llvm::Value *scale = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
2693  LLVMInt32(elementNum) : LLVMInt64(elementNum);
2694  offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
2695  }
2696 
2697  offset = SmearUniform(offset, "offset_smear");
2698 
2699  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2700  // If we're doing 32 bit addressing with a 64 bit target, although
2701  // we did the math above in 32 bit, we need to go to 64 bit before
2702  // we add the offset to the varying pointers.
2703  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
2704 
2705  resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset,
2706  "struct_ptr_offset");
2707  }
2708 
2709  // Finally, if had a slice pointer going in, mash back together with
2710  // the original (unchanged) slice offset.
2711  if (baseIsSlicePtr)
2712  return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
2713  else
2714  return resultPtr;
2715 }
2716 
2717 
2718 llvm::Value *
2719 FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
2720  if (ptr == NULL) {
2721  AssertPos(currentPos, m->errorCount > 0);
2722  return NULL;
2723  }
2724 
2725  llvm::PointerType *pt =
2726  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2727  AssertPos(currentPos, pt != NULL);
2728 
2729  if (name == NULL)
2730  name = LLVMGetName(ptr, "_load");
2731 
2732  llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
2733 
2734  if (g->opt.forceAlignedMemory &&
2735  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2736  inst->setAlignment(g->target->getNativeVectorAlignment());
2737  }
2738 
2739  AddDebugPos(inst);
2740  return inst;
2741 }
2742 
2743 
2744 /** Given a slice pointer to soa'd data that is a basic type (atomic,
2745  pointer, or enum type), use the slice offset to compute pointer(s) to
2746  the appropriate individual data element(s).
2747  */
2748 static llvm::Value *
2750  const PointerType **ptrType) {
2751  Assert(CastType<PointerType>(*ptrType) != NULL);
2752 
2753  llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
2754  llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
2755 
2756  // slicePtr should be a pointer to an soa-width wide array of the
2757  // final atomic/enum/pointer type
2758  const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
2759  Assert(Type::IsBasicType(unifBaseType));
2760 
2761  // The final pointer type is a uniform or varying pointer to the
2762  // underlying uniform type, depending on whether the given pointer is
2763  // uniform or varying.
2764  *ptrType = (*ptrType)->IsUniformType() ?
2765  PointerType::GetUniform(unifBaseType) :
2766  PointerType::GetVarying(unifBaseType);
2767 
2768  // For uniform pointers, bitcast to a pointer to the uniform element
2769  // type, so that the GEP below does the desired indexing
2770  if ((*ptrType)->IsUniformType())
2771  slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
2772 
2773  // And finally index based on the slice offset
2774  return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType,
2775  LLVMGetName(slicePtr, "_final_gep"));
2776 }
2777 
2778 
2779 /** Utility routine that loads from a uniform pointer to soa<> data,
2780  returning a regular uniform (non-SOA result).
2781  */
2782 llvm::Value *
2783 FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
2784  const PointerType *ptrType,
2785  const char *name) {
2786  const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
2787 
2788  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2789  if (ct != NULL) {
2790  // If we have a struct/array, we need to decompose it into
2791  // individual element loads to fill in the result structure since
2792  // the SOA slice of values we need isn't contiguous in memory...
2793  llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
2794  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2795 
2796  for (int i = 0; i < ct->GetElementCount(); ++i) {
2797  const PointerType *eltPtrType;
2798  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType,
2799  "elt_offset", &eltPtrType);
2800  llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
2801  retValue = InsertInst(retValue, eltValue, i, "set_value");
2802  }
2803 
2804  return retValue;
2805  }
2806  else {
2807  // Otherwise we've made our way to a slice pointer to a basic type;
2808  // we need to apply the slice offset into this terminal SOA array
2809  // and then perform the final load
2810  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2811  return LoadInst(ptr, mask, ptrType, name);
2812  }
2813 }
2814 
2815 
2816 llvm::Value *
2817 FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
2818  const Type *ptrRefType, const char *name,
2819  bool one_elem) {
2820  if (ptr == NULL) {
2821  AssertPos(currentPos, m->errorCount > 0);
2822  return NULL;
2823  }
2824 
2825  AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
2826 
2827  if (name == NULL)
2828  name = LLVMGetName(ptr, "_load");
2829 
2830  const PointerType *ptrType;
2831  if (CastType<ReferenceType>(ptrRefType) != NULL)
2832  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2833  else {
2834  ptrType = CastType<PointerType>(ptrRefType);
2835  AssertPos(currentPos, ptrType != NULL);
2836  }
2837 
2838  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2839  Error(currentPos, "Unable to load to undefined struct type \"%s\".",
2840  ptrType->GetBaseType()->GetString().c_str());
2841  return NULL;
2842  }
2843 
2844  if (ptrType->IsUniformType()) {
2845  if (ptrType->IsSlice()) {
2846  return loadUniformFromSOA(ptr, mask, ptrType, name);
2847  }
2848  else {
2849  // FIXME: same issue as above load inst regarding alignment...
2850  //
2851  // If the ptr is a straight up regular pointer, then just issue
2852  // a regular load. First figure out the alignment; in general we
2853  // can just assume the natural alignment (0 here), but for varying
2854  // atomic types, we need to make sure that the compiler emits
2855  // unaligned vector loads, so we specify a reduced alignment here.
2856  int align = 0;
2857  const AtomicType *atomicType =
2858  CastType<AtomicType>(ptrType->GetBaseType());
2859  if (atomicType != NULL && atomicType->IsVaryingType())
2860  // We actually just want to align to the vector element
2861  // alignment, but can't easily get that here, so just tell LLVM
2862  // it's totally unaligned. (This shouldn't make any difference
2863  // vs the proper alignment in practice.)
2864  align = 1;
2865  llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
2866  false /* not volatile */,
2867  align, bblock);
2868  AddDebugPos(inst);
2869  return inst;
2870  }
2871  }
2872  else {
2873  // Otherwise we should have a varying ptr and it's time for a
2874  // gather.
2875  llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name);
2876  if (!one_elem)
2877  return gather_result;
2878 
2879  // It is a kludge. When we dereference varying pointer to uniform struct
2880  // with "bound uniform" member, we should return first unmasked member.
2881  Warning(currentPos, "Dereferencing varying pointer to uniform struct with 'bound uniform' member,\n"
2882  " only one value will survive. Possible loss of data.");
2883  // Call the target-dependent movmsk function to turn the vector mask
2884  // into an i64 value
2885  std::vector<Symbol *> mm;
2886  m->symbolTable->LookupFunction("__movmsk", &mm);
2887  if (g->target->getMaskBitCount() == 1)
2888  AssertPos(currentPos, mm.size() == 1);
2889  else
2890  // There should be one with signed int signature, one unsigned int.
2891  AssertPos(currentPos, mm.size() == 2);
2892  // We can actually call either one, since both are i32s as far as
2893  // LLVM's type system is concerned...
2894  llvm::Function *fmm = mm[0]->function;
2895  llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk"));
2896  std::vector<Symbol *> lz;
2897  m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz);
2898  llvm::Function *flz = lz[0]->function;
2899  llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz"));
2900  llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx, LLVMGetName(gather_result, "_umasked_elem"), bblock);
2901  return elem;
2902  }
2903 }
2904 
2905 
2906 llvm::Value *
2907 FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
2908  llvm::Value *mask, const char *name) {
2909  // We should have a varying pointer if we get here...
2910  AssertPos(currentPos, ptrType->IsVaryingType());
2911 
2912  const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
2913  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
2914 
2915  const CollectionType *collectionType =
2916  CastType<CollectionType>(ptrType->GetBaseType());
2917  if (collectionType != NULL) {
2918  // For collections, recursively gather element wise to find the
2919  // result.
2920  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2921 
2922  const CollectionType *returnCollectionType =
2923  CastType<CollectionType>(returnType->GetBaseType());
2924 
2925  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2926  const PointerType *eltPtrType;
2927  llvm::Value *eltPtr =
2928  AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
2929 
2930  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
2931 
2932  // It is a kludge. When we dereference varying pointer to uniform struct
2933  // with "bound uniform" member, we should return first unmasked member.
2934  int need_one_elem = CastType<StructType>(ptrType->GetBaseType()) &&
2935  returnCollectionType->GetElementType(i)->IsUniformType();
2936  // This in turn will be another gather
2937  llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name, need_one_elem);
2938 
2939  retValue = InsertInst(retValue, eltValues, i, "set_value");
2940  }
2941  return retValue;
2942  }
2943  else if (ptrType->IsSlice()) {
2944  // If we have a slice pointer, we need to add the final slice
2945  // offset here right before issuing the actual gather
2946  //
2947  // FIXME: would it be better to do the corresponding same thing for
2948  // all of the varying offsets stuff here (and in scatter)?
2949  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2950  }
2951 
2952  // Otherwise we should just have a basic scalar or pointer type and we
2953  // can go and do the actual gather
2954  AddInstrumentationPoint("gather");
2955 
2956  // Figure out which gather function to call based on the size of
2957  // the elements.
2958  const PointerType *pt = CastType<PointerType>(returnType);
2959  const char *funcName = NULL;
2960  if (pt != NULL)
2961  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2962  "__pseudo_gather64_i64";
2963  else if (llvmReturnType == LLVMTypes::DoubleVectorType)
2964  funcName = g->target->is32Bit() ? "__pseudo_gather32_double" :
2965  "__pseudo_gather64_double";
2966  else if (llvmReturnType == LLVMTypes::Int64VectorType)
2967  funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" :
2968  "__pseudo_gather64_i64";
2969  else if (llvmReturnType == LLVMTypes::FloatVectorType)
2970  funcName = g->target->is32Bit() ? "__pseudo_gather32_float" :
2971  "__pseudo_gather64_float";
2972  else if (llvmReturnType == LLVMTypes::Int32VectorType)
2973  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2974  "__pseudo_gather64_i32";
2975  else if (llvmReturnType == LLVMTypes::Int16VectorType)
2976  funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" :
2977  "__pseudo_gather64_i16";
2978  else {
2979  AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
2980  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" :
2981  "__pseudo_gather64_i8";
2982  }
2983 
2984  llvm::Function *gatherFunc = m->module->getFunction(funcName);
2985  AssertPos(currentPos, gatherFunc != NULL);
2986 
2987  llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
2988 
2989  // Add metadata about the source file location so that the
2990  // optimization passes can print useful performance warnings if we
2991  // can't optimize out this gather
2992  if (disableGSWarningCount == 0)
2993  addGSMetadata(gatherCall, currentPos);
2994 
2995  return gatherCall;
2996 }
2997 
2998 
2999 /** Add metadata to the given instruction to encode the current source file
3000  position. This data is used in the lGetSourcePosFromMetadata()
3001  function in opt.cpp.
3002 */
3003 void
3005  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
3006  if (inst == NULL)
3007  return;
3008 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3009  llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
3010 #else /* LLVN 3.6+ */
3011  llvm::MDString *str = llvm::MDString::get(*g->ctx, pos.name);
3012 #endif
3013  llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
3014  inst->setMetadata("filename", md);
3015 
3016 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3017  llvm::Value *first_line = LLVMInt32(pos.first_line);
3018 #else /* LLVN 3.6+ */
3019  llvm::Metadata *first_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_line));
3020 #endif
3021  md = llvm::MDNode::get(*g->ctx, first_line);
3022  inst->setMetadata("first_line", md);
3023 
3024 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3025  llvm::Value *first_column = LLVMInt32(pos.first_column);
3026 #else /* LLVN 3.6+ */
3027  llvm::Metadata *first_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_column));
3028 #endif
3029  md = llvm::MDNode::get(*g->ctx, first_column);
3030  inst->setMetadata("first_column", md);
3031 
3032 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3033  llvm::Value *last_line = LLVMInt32(pos.last_line);
3034 #else /* LLVN 3.6+ */
3035  llvm::Metadata *last_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_line));
3036 #endif
3037  md = llvm::MDNode::get(*g->ctx, last_line);
3038  inst->setMetadata("last_line", md);
3039 
3040 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3041  llvm::Value *last_column = LLVMInt32(pos.last_column);
3042 #else /* LLVN 3.6+ */
3043  llvm::Metadata *last_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_column));
3044 #endif
3045  md = llvm::MDNode::get(*g->ctx, last_column);
3046  inst->setMetadata("last_column", md);
3047 }
3048 
3049 
3050 llvm::Value *
3051 FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
3052  const char *name, int align,
3053  bool atEntryBlock) {
3054  if (llvmType == NULL) {
3055  AssertPos(currentPos, m->errorCount > 0);
3056  return NULL;
3057  }
3058 
3059  llvm::AllocaInst *inst = NULL;
3060  if (atEntryBlock) {
3061  // We usually insert it right before the jump instruction at the
3062  // end of allocaBlock
3063  llvm::Instruction *retInst = allocaBlock->getTerminator();
3064  AssertPos(currentPos, retInst);
3065 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3066  inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
3067 #else // LLVM 5.0+
3068  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
3069  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", retInst);
3070 #endif
3071  }
3072  else {
3073  // Unless the caller overrode the default and wants it in the
3074  // current basic block
3075 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3076  inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
3077 #else // LLVM 5.0+
3078  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
3079  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", bblock);
3080 #endif
3081  }
3082 
3083  // If no alignment was specified but we have an array of a uniform
3084  // type, then align it to the native vector alignment; it's not
3085  // unlikely that this array will be loaded into varying variables with
3086  // what will be aligned accesses if the uniform -> varying load is done
3087  // in regular chunks.
3088  llvm::ArrayType *arrayType =
3089  llvm::dyn_cast<llvm::ArrayType>(llvmType);
3090  if (align == 0 && arrayType != NULL &&
3091  !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
3092  align = g->target->getNativeVectorAlignment();
3093 
3094  if (align != 0)
3095  inst->setAlignment(align);
3096  // Don't add debugging info to alloca instructions
3097  return inst;
3098 }
3099 
3100 
3101 /** Code to store the given varying value to the given location, only
3102  storing the elements that correspond to active program instances as
3103  given by the provided storeMask value. Note that the lvalue is only a
3104  single pointer, not a varying lvalue of one pointer per program
3105  instance (that case is handled by scatters).
3106  */
3107 void
3108 FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
3109  const Type *ptrType, llvm::Value *mask) {
3110  if (value == NULL || ptr == NULL) {
3111  AssertPos(currentPos, m->errorCount > 0);
3112  return;
3113  }
3114 
3115  AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
3116  AssertPos(currentPos, ptrType->IsUniformType());
3117 
3118  const Type *valueType = ptrType->GetBaseType();
3119  const CollectionType *collectionType = CastType<CollectionType>(valueType);
3120  if (collectionType != NULL) {
3121  // Assigning a structure / array / vector. Handle each element
3122  // individually with what turns into a recursive call to
3123  // makedStore()
3124  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
3125  const Type *eltType = collectionType->GetElementType(i);
3126  if (eltType == NULL) {
3127  Assert(m->errorCount > 0);
3128  continue;
3129  }
3130  llvm::Value *eltValue = ExtractInst(value, i, "value_member");
3131  llvm::Value *eltPtr =
3132  AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
3133  const Type *eltPtrType = PointerType::GetUniform(eltType);
3134  StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
3135  }
3136  return;
3137  }
3138 
3139  // We must have a regular atomic, enumerator, or pointer type at this
3140  // point.
3141  AssertPos(currentPos, Type::IsBasicType(valueType));
3142  valueType = valueType->GetAsNonConstType();
3143 
3144  // Figure out if we need a 8, 16, 32 or 64-bit masked store.
3145  llvm::Function *maskedStoreFunc = NULL;
3146  llvm::Type *llvmValueType = value->getType();
3147 
3148  const PointerType *pt = CastType<PointerType>(valueType);
3149  if (pt != NULL) {
3150  if (pt->IsSlice()) {
3151  // Masked store of (varying) slice pointer.
3152  AssertPos(currentPos, pt->IsVaryingType());
3153 
3154  // First, extract the pointer from the slice struct and masked
3155  // store that.
3156  llvm::Value *v0 = ExtractInst(value, 0);
3157  llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
3158  maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()),
3159  mask);
3160 
3161  // And then do same for the integer offset
3162  llvm::Value *v1 = ExtractInst(value, 1);
3163  llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
3164  const Type *offsetType = AtomicType::VaryingInt32;
3165  maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
3166 
3167  return;
3168  }
3169 
3170  if (g->target->is32Bit())
3171  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3172  else
3173  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3174  }
3175  else if (llvmValueType == LLVMTypes::Int1VectorType) {
3176  llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
3177  LLVMMaskAllOn, "~mask");
3178  llvm::Value *old = LoadInst(ptr);
3179  llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old,
3180  notMask, "old&~mask");
3181  llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value,
3182  mask, "new&mask");
3183  llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld,
3184  maskedNew, "old_new_result");
3185  StoreInst(final, ptr);
3186  return;
3187  }
3188  else if (llvmValueType == LLVMTypes::DoubleVectorType) {
3189  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
3190  }
3191  else if (llvmValueType == LLVMTypes::Int64VectorType) {
3192  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3193  }
3194  else if (llvmValueType == LLVMTypes::FloatVectorType) {
3195  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
3196  }
3197  else if (llvmValueType == LLVMTypes::Int32VectorType) {
3198  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3199  }
3200  else if (llvmValueType == LLVMTypes::Int16VectorType) {
3201  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
3202  }
3203  else if (llvmValueType == LLVMTypes::Int8VectorType) {
3204  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
3205  }
3206  AssertPos(currentPos, maskedStoreFunc != NULL);
3207 
3208  std::vector<llvm::Value *> args;
3209  args.push_back(ptr);
3210  args.push_back(value);
3211  args.push_back(mask);
3212  CallInst(maskedStoreFunc, NULL, args);
3213 }
3214 
3215 
3216 
3217 /** Scatter the given varying value to the locations given by the varying
3218  lvalue (which should be an array of pointers with size equal to the
3219  target's vector width. We want to store each rvalue element at the
3220  corresponding pointer's location, *if* the mask for the corresponding
3221  program instance are on. If they're off, don't do anything.
3222 */
3223 void
3224 FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
3225  const Type *valueType, const Type *origPt,
3226  llvm::Value *mask) {
3227  const PointerType *ptrType = CastType<PointerType>(origPt);
3228  AssertPos(currentPos, ptrType != NULL);
3229  AssertPos(currentPos, ptrType->IsVaryingType());
3230 
3231  const CollectionType *srcCollectionType =
3232  CastType<CollectionType>(valueType);
3233  if (srcCollectionType != NULL) {
3234  // We're scattering a collection type--we need to keep track of the
3235  // source type (the type of the data values to be stored) and the
3236  // destination type (the type of objects in memory that will be
3237  // stored into) separately. This is necessary so that we can get
3238  // all of the addressing calculations right if we're scattering
3239  // from a varying struct to an array of uniform instances of the
3240  // same struct type, versus scattering into an array of varying
3241  // instances of the struct type, etc.
3242  const CollectionType *dstCollectionType =
3243  CastType<CollectionType>(ptrType->GetBaseType());
3244  AssertPos(currentPos, dstCollectionType != NULL);
3245 
3246  // Scatter the collection elements individually
3247  for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
3248  // First, get the values for the current element out of the
3249  // source.
3250  llvm::Value *eltValue = ExtractInst(value, i);
3251  const Type *srcEltType = srcCollectionType->GetElementType(i);
3252 
3253  // We may be scattering a uniform atomic element; in this case
3254  // we'll smear it out to be varying before making the recursive
3255  // scatter() call below.
3256  if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
3257  eltValue = SmearUniform(eltValue, "to_varying");
3258  srcEltType = srcEltType->GetAsVaryingType();
3259  }
3260 
3261  // Get the (varying) pointer to the i'th element of the target
3262  // collection
3263  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
3264 
3265  // The destination element type may be uniform (e.g. if we're
3266  // scattering to an array of uniform structs). Thus, we need
3267  // to be careful about passing the correct type to
3268  // addVaryingOffsetsIfNeeded() here.
3269  const Type *dstEltType = dstCollectionType->GetElementType(i);
3270  const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
3271  if (ptrType->IsSlice())
3272  dstEltPtrType = dstEltPtrType->GetAsSlice();
3273 
3274  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
3275 
3276  // And recursively scatter() until we hit a basic type, at
3277  // which point the actual memory operations can be performed...
3278  scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
3279  }
3280  return;
3281  }
3282  else if (ptrType->IsSlice()) {
3283  // As with gather, we need to add the final slice offset finally
3284  // once we get to a terminal SOA array of basic types..
3285  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3286  }
3287 
3288  const PointerType *pt = CastType<PointerType>(valueType);
3289 
3290  // And everything should be a pointer or atomic (or enum) from here on out...
3291  AssertPos(currentPos,
3292  pt != NULL
3293  || CastType<AtomicType>(valueType) != NULL
3294  || CastType<EnumType>(valueType) != NULL);
3295 
3296  llvm::Type *type = value->getType();
3297  const char *funcName = NULL;
3298  if (pt != NULL) {
3299  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3300  "__pseudo_scatter64_i64";
3301  }
3302  else if (type == LLVMTypes::DoubleVectorType) {
3303  funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" :
3304  "__pseudo_scatter64_double";
3305  }
3306  else if (type == LLVMTypes::Int64VectorType) {
3307  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" :
3308  "__pseudo_scatter64_i64";
3309  }
3310  else if (type == LLVMTypes::FloatVectorType) {
3311  funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" :
3312  "__pseudo_scatter64_float";
3313  }
3314  else if (type == LLVMTypes::Int32VectorType) {
3315  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3316  "__pseudo_scatter64_i32";
3317  }
3318  else if (type == LLVMTypes::Int16VectorType) {
3319  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" :
3320  "__pseudo_scatter64_i16";
3321  }
3322  else if (type == LLVMTypes::Int8VectorType) {
3323  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" :
3324  "__pseudo_scatter64_i8";
3325  }
3326 
3327  llvm::Function *scatterFunc = m->module->getFunction(funcName);
3328  AssertPos(currentPos, scatterFunc != NULL);
3329 
3330  AddInstrumentationPoint("scatter");
3331 
3332  std::vector<llvm::Value *> args;
3333  args.push_back(ptr);
3334  args.push_back(value);
3335  args.push_back(mask);
3336  llvm::Value *inst = CallInst(scatterFunc, NULL, args);
3337 
3338  if (disableGSWarningCount == 0)
3339  addGSMetadata(inst, currentPos);
3340 }
3341 
3342 
3343 void
3344 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
3345  if (value == NULL || ptr == NULL) {
3346  // may happen due to error elsewhere
3347  AssertPos(currentPos, m->errorCount > 0);
3348  return;
3349  }
3350 
3351  llvm::PointerType *pt =
3352  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
3353  AssertPos(currentPos, pt != NULL);
3354 
3355  llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
3356 
3357  if (g->opt.forceAlignedMemory &&
3358  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
3359  inst->setAlignment(g->target->getNativeVectorAlignment());
3360  }
3361 
3362  AddDebugPos(inst);
3363 }
3364 
3365 
3366 void
3367 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
3368  llvm::Value *mask, const Type *valueType,
3369  const Type *ptrRefType) {
3370  if (value == NULL || ptr == NULL) {
3371  // may happen due to error elsewhere
3372  AssertPos(currentPos, m->errorCount > 0);
3373  return;
3374  }
3375 
3376  const PointerType *ptrType;
3377  if (CastType<ReferenceType>(ptrRefType) != NULL)
3378  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
3379  else {
3380  ptrType = CastType<PointerType>(ptrRefType);
3381  AssertPos(currentPos, ptrType != NULL);
3382  }
3383 
3384  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
3385  Error(currentPos, "Unable to store to undefined struct type \"%s\".",
3386  ptrType->GetBaseType()->GetString().c_str());
3387  return;
3388  }
3389 
3390  // Figure out what kind of store we're doing here
3391  if (ptrType->IsUniformType()) {
3392  if (ptrType->IsSlice())
3393  // storing a uniform value to a single slice of a SOA type
3394  storeUniformToSOA(value, ptr, mask, valueType, ptrType);
3395  else if (ptrType->GetBaseType()->IsUniformType())
3396  // the easy case
3397  StoreInst(value, ptr);
3398  else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
3399  // Otherwise it is a masked store unless we can determine that the
3400  // mask is all on... (Unclear if this check is actually useful.)
3401  StoreInst(value, ptr);
3402  else
3403  maskedStore(value, ptr, ptrType, mask);
3404  }
3405  else {
3406  AssertPos(currentPos, ptrType->IsVaryingType());
3407  // We have a varying ptr (an array of pointers), so it's time to
3408  // scatter
3409  scatter(value, ptr, valueType, ptrType, GetFullMask());
3410  }
3411 }
3412 
3413 
3414 /** Store a uniform type to SOA-laid-out memory.
3415  */
3416 void
3417 FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr,
3418  llvm::Value *mask, const Type *valueType,
3419  const PointerType *ptrType) {
3421  valueType));
3422 
3423  const CollectionType *ct = CastType<CollectionType>(valueType);
3424  if (ct != NULL) {
3425  // Handle collections element wise...
3426  for (int i = 0; i < ct->GetElementCount(); ++i) {
3427  llvm::Value *eltValue = ExtractInst(value, i);
3428  const Type *eltType = ct->GetElementType(i);
3429  const PointerType *dstEltPtrType;
3430  llvm::Value *dstEltPtr =
3431  AddElementOffset(ptr, i, ptrType, "slice_offset",
3432  &dstEltPtrType);
3433  StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
3434  }
3435  }
3436  else {
3437  // We're finally at a leaf SOA array; apply the slice offset and
3438  // then we can do a final regular store
3439  AssertPos(currentPos, Type::IsBasicType(valueType));
3440  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3441  StoreInst(value, ptr);
3442  }
3443 }
3444 
3445 
3446 void
3447 FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src,
3448  llvm::Value *count, llvm::Value *align) {
3449  dest = BitCastInst(dest, LLVMTypes::VoidPointerType);
3450  src = BitCastInst(src, LLVMTypes::VoidPointerType);
3451  if (count->getType() != LLVMTypes::Int64Type) {
3452  AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
3453  count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
3454  }
3455  if (align == NULL)
3456  align = LLVMInt32(1);
3457 
3458  llvm::Constant *mcFunc =
3459 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3460  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3464 #else // LLVM 5.0+
3465  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3469 #endif
3470 
3471  AssertPos(currentPos, mcFunc != NULL);
3472  AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
3473 
3474  std::vector<llvm::Value *> args;
3475  args.push_back(dest);
3476  args.push_back(src);
3477  args.push_back(count);
3478  args.push_back(align);
3479  args.push_back(LLVMFalse); /* not volatile */
3480  CallInst(mcFunc, NULL, args, "");
3481 }
3482 
3483 
3484 void
3485 FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
3486  llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
3487  AddDebugPos(b);
3488 }
3489 
3490 
3491 void
3492 FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
3493  llvm::BasicBlock *falseBlock,
3494  llvm::Value *test) {
3495  if (test == NULL) {
3496  AssertPos(currentPos, m->errorCount > 0);
3497  return;
3498  }
3499 
3500  llvm::Instruction *b =
3501  llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
3502  AddDebugPos(b);
3503 }
3504 
3505 
3506 llvm::Value *
3507 FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
3508  if (v == NULL) {
3509  AssertPos(currentPos, m->errorCount > 0);
3510  return NULL;
3511  }
3512 
3513  if (name == NULL) {
3514  char buf[32];
3515  sprintf(buf, "_extract_%d", elt);
3516  name = LLVMGetName(v, buf);
3517  }
3518 
3519  llvm::Instruction *ei = NULL;
3520  if (llvm::isa<llvm::VectorType>(v->getType()))
3521  ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
3522  else
3523  ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
3524  AddDebugPos(ei);
3525  return ei;
3526 }
3527 
3528 
3529 llvm::Value *
3530 FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
3531  const char *name) {
3532  if (v == NULL || eltVal == NULL) {
3533  AssertPos(currentPos, m->errorCount > 0);
3534  return NULL;
3535  }
3536 
3537  if (name == NULL) {
3538  char buf[32];
3539  sprintf(buf, "_insert_%d", elt);
3540  name = LLVMGetName(v, buf);
3541  }
3542 
3543  llvm::Instruction *ii = NULL;
3544  if (llvm::isa<llvm::VectorType>(v->getType()))
3545  ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt),
3546  name, bblock);
3547  else
3548  ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
3549  AddDebugPos(ii);
3550  return ii;
3551 }
3552 
3553 
3554 llvm::Value *
3555 FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
3556  const char *name) {
3557  if (v1 == NULL || v2 == NULL || mask == NULL) {
3558  AssertPos(currentPos, m->errorCount > 0);
3559  return NULL;
3560  }
3561 
3562  if (name == NULL) {
3563  char buf[32];
3564  sprintf(buf, "_shuffle");
3565  name = LLVMGetName(v1, buf);
3566  }
3567 
3568  llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
3569 
3570  AddDebugPos(ii);
3571  return ii;
3572 }
3573 
3574 
3575 llvm::Value *
3576 FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
3577  const char *name) {
3578  if (v == NULL || vecType == NULL) {
3579  AssertPos(currentPos, m->errorCount > 0);
3580  return NULL;
3581  }
3582 
3583  llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
3584  Assert(ty && ty->getVectorElementType() == v->getType());
3585 
3586  if (name == NULL) {
3587  char buf[32];
3588  sprintf(buf, "_broadcast");
3589  name = LLVMGetName(v, buf);
3590  }
3591 
3592  // Generate the following sequence:
3593  // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
3594  // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
3595  // <4 x i32> zeroinitializer
3596 
3597  llvm::Value *undef1 = llvm::UndefValue::get(vecType);
3598  llvm::Value *undef2 = llvm::UndefValue::get(vecType);
3599 
3600  // InsertElement
3601  llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
3602  llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
3603 
3604  // ShuffleVector
3605  llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
3606  vecType->getVectorNumElements(),
3607  llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
3608  llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
3609 
3610  return ret;
3611 }
3612 
3613 
3614 llvm::PHINode *
3615 FunctionEmitContext::PhiNode(llvm::Type *type, int count,
3616  const char *name) {
3617  llvm::PHINode *pn = llvm::PHINode::Create(type, count,
3618  name ? name : "phi", bblock);
3619  AddDebugPos(pn);
3620  return pn;
3621 }
3622 
3623 
3624 llvm::Instruction *
3625 FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
3626  llvm::Value *val1, const char *name) {
3627  if (test == NULL || val0 == NULL || val1 == NULL) {
3628  AssertPos(currentPos, m->errorCount > 0);
3629  return NULL;
3630  }
3631 
3632  if (name == NULL)
3633  name = LLVMGetName(test, "_select");
3634 
3635  llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name,
3636  bblock);
3637  AddDebugPos(inst);
3638  return inst;
3639 }
3640 
3641 
3642 /** Given a value representing a function to be called or possibly-varying
3643  pointer to a function to be called, figure out how many arguments the
3644  function has. */
3645 static unsigned int
3646 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
3647  llvm::FunctionType *ft =
3648  llvm::dyn_cast<llvm::FunctionType>(callee->getType());
3649 
3650  if (ft == NULL) {
3651  llvm::PointerType *pt =
3652  llvm::dyn_cast<llvm::PointerType>(callee->getType());
3653  if (pt == NULL) {
3654  // varying--in this case, it must be the version of the
3655  // function that takes a mask
3656  return funcType->GetNumParameters() + 1;
3657  }
3658  ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
3659  }
3660 
3661  Assert(ft != NULL);
3662  return ft->getNumParams();
3663 }
3664 
3665 
3666 llvm::Value *
3667 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3668  const std::vector<llvm::Value *> &args,
3669  const char *name) {
3670  if (func == NULL) {
3671  AssertPos(currentPos, m->errorCount > 0);
3672  return NULL;
3673  }
3674 
3675  std::vector<llvm::Value *> argVals = args;
3676  // Most of the time, the mask is passed as the last argument. this
3677  // isn't the case for things like intrinsics, builtins, and extern "C"
3678  // functions from the application. Add the mask if it's needed.
3679  unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
3680  AssertPos(currentPos, argVals.size() + 1 == calleeArgCount ||
3681  argVals.size() == calleeArgCount);
3682  if (argVals.size() + 1 == calleeArgCount)
3683  argVals.push_back(GetFullMask());
3684 
3685  if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
3686  // Regular 'uniform' function call--just one function or function
3687  // pointer, so just emit the IR directly.
3688  llvm::Instruction *ci =
3689  llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
3690 
3691  // Copy noalias attribute to call instruction, to enable better
3692  // alias analysis.
3693  // TODO: what other attributes needs to be copied?
3694  // TODO: do the same for varing path.
3695 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION < ISPC_LLVM_5_0 // LLVM 3.3-4.0
3696  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3697  if (cc &&
3698  cc->getCalledFunction() &&
3699  cc->getCalledFunction()->doesNotAlias(0)) {
3700  cc->addAttribute(0, llvm::Attribute::NoAlias);
3701  }
3702 #else // LLVM 5.0+
3703  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3704  if (cc &&
3705  cc->getCalledFunction() &&
3706  cc->getCalledFunction()->returnDoesNotAlias()) {
3707  cc->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias);
3708  }
3709 #endif
3710 
3711  AddDebugPos(ci);
3712  return ci;
3713  }
3714  else {
3715  // Emit the code for a varying function call, where we have an
3716  // vector of function pointers, one for each program instance. The
3717  // basic strategy is that we go through the function pointers, and
3718  // for the executing program instances, for each unique function
3719  // pointer that's in the vector, call that function with a mask
3720  // equal to the set of active program instances that also have that
3721  // function pointer. When all unique function pointers have been
3722  // called, we're done.
3723 
3724  llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
3725  llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
3726  llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
3727 
3728  // Get the current mask value so we can restore it later
3729  llvm::Value *origMask = GetInternalMask();
3730 
3731  // First allocate memory to accumulate the various program
3732  // instances' return values...
3733  const Type *returnType = funcType->GetReturnType();
3734  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
3735  llvm::Value *resultPtr = NULL;
3736  if (llvmReturnType->isVoidTy() == false)
3737  resultPtr = AllocaInst(llvmReturnType);
3738 
3739  // The memory pointed to by maskPointer tracks the set of program
3740  // instances for which we still need to call the function they are
3741  // pointing to. It starts out initialized with the mask of
3742  // currently running program instances.
3743  llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
3744  StoreInst(GetFullMask(), maskPtr);
3745 
3746  // And now we branch to the test to see if there's more work to be
3747  // done.
3748  BranchInst(bbTest);
3749 
3750  // bbTest: are any lanes of the mask still on? If so, jump to
3751  // bbCall
3752  SetCurrentBasicBlock(bbTest); {
3753  llvm::Value *maskLoad = LoadInst(maskPtr);
3754  llvm::Value *any = Any(maskLoad);
3755  BranchInst(bbCall, bbDone, any);
3756  }
3757 
3758  // bbCall: this is the body of the loop that calls out to one of
3759  // the active function pointer values.
3760  SetCurrentBasicBlock(bbCall); {
3761  // Figure out the first lane that still needs its function
3762  // pointer to be called.
3763  llvm::Value *currentMask = LoadInst(maskPtr);
3764  llvm::Function *cttz =
3765  m->module->getFunction("__count_trailing_zeros_i64");
3766  AssertPos(currentPos, cttz != NULL);
3767  llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask),
3768  "first_lane64");
3769  llvm::Value *firstLane =
3770  TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
3771 
3772  // Get the pointer to the function we're going to call this
3773  // time through: ftpr = func[firstLane]
3774  llvm::Value *fptr =
3775  llvm::ExtractElementInst::Create(func, firstLane,
3776  "extract_fptr", bblock);
3777 
3778  // Smear it out into an array of function pointers
3779  llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
3780 
3781  // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
3782  // mask for the set of program instances that have the same
3783  // value for their function pointer.
3784  llvm::Value *fpOverlap =
3785  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
3786  fptrSmear, func);
3787  fpOverlap = I1VecToBoolVec(fpOverlap);
3788 
3789  // Figure out the mask to use when calling the function
3790  // pointer: we need to AND the current execution mask to handle
3791  // the case of any non-running program instances that happen to
3792  // have this function pointer value.
3793  // callMask = (currentMask & fpOverlap)
3794  llvm::Value *callMask =
3795  BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap,
3796  "call_mask");
3797 
3798  // Set the mask
3799  SetInternalMask(callMask);
3800 
3801  // bitcast the i32/64 function pointer to the actual function
3802  // pointer type.
3803  llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
3804  llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
3805  llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
3806 
3807  // Call the function: callResult = call ftpr(args, args, call mask)
3808  llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
3809 
3810  // Now, do a masked store into the memory allocated to
3811  // accumulate the result using the call mask.
3812  if (callResult != NULL &&
3813  callResult->getType() != LLVMTypes::VoidType) {
3814  AssertPos(currentPos, resultPtr != NULL);
3815  StoreInst(callResult, resultPtr, callMask, returnType,
3816  PointerType::GetUniform(returnType));
3817  }
3818  else
3819  AssertPos(currentPos, resultPtr == NULL);
3820 
3821  // Update the mask to turn off the program instances for which
3822  // we just called the function.
3823  // currentMask = currentMask & ~callmask
3824  llvm::Value *notCallMask =
3825  BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn,
3826  "~callMask");
3827  currentMask = BinaryOperator(llvm::Instruction::And, currentMask,
3828  notCallMask, "currentMask&~callMask");
3829  StoreInst(currentMask, maskPtr);
3830 
3831  // And go back to the test to see if we need to do another
3832  // call.
3833  BranchInst(bbTest);
3834  }
3835 
3836  // bbDone: We're all done; clean up and return the result we've
3837  // accumulated in the result memory.
3838  SetCurrentBasicBlock(bbDone);
3839  SetInternalMask(origMask);
3840  return resultPtr ? LoadInst(resultPtr) : NULL;
3841  }
3842 }
3843 
3844 
3845 llvm::Value *
3846 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3847  llvm::Value *arg, const char *name) {
3848  std::vector<llvm::Value *> args;
3849  args.push_back(arg);
3850  return CallInst(func, funcType, args, name);
3851 }
3852 
3853 
3854 llvm::Value *
3855 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3856  llvm::Value *arg0, llvm::Value *arg1,
3857  const char *name) {
3858  std::vector<llvm::Value *> args;
3859  args.push_back(arg0);
3860  args.push_back(arg1);
3861  return CallInst(func, funcType, args, name);
3862 }
3863 
3864 
3865 llvm::Instruction *
3867  if (launchedTasks)
3868  // Add a sync call at the end of any function that launched tasks
3869  SyncInst();
3870 
3871  llvm::Instruction *rinst = NULL;
3872  if (returnValuePtr != NULL) {
3873  // We have value(s) to return; load them from their storage
3874  // location
3875  llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
3876  rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
3877  }
3878  else {
3879  AssertPos(currentPos, function->GetReturnType()->IsVoidType());
3880  rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
3881  }
3882 
3883  AddDebugPos(rinst);
3884  bblock = NULL;
3885  return rinst;
3886 }
3887 
3888 
3889 llvm::Value *
3891  std::vector<llvm::Value *> &argVals,
3892  llvm::Value *launchCount[3]){
3893 #ifdef ISPC_NVPTX_ENABLED
3894  if (g->target->getISA() == Target::NVPTX)
3895  {
3896  if (callee == NULL) {
3897  AssertPos(currentPos, m->errorCount > 0);
3898  return NULL;
3899  }
3900  launchedTasks = true;
3901 
3902  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3903  std::vector<llvm::Type*> argTypes;
3904 
3905  llvm::Function *F = llvm::dyn_cast<llvm::Function>(callee);
3906  const unsigned int nArgs = F->arg_size();
3907  llvm::Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3908  for (; I != E; ++I)
3909  argTypes.push_back(I->getType());
3910  llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
3911  llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
3912  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3913  if (structSize->getType() != LLVMTypes::Int64Type)
3914  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
3915  "struct_size_to_64");
3916 
3917  const int align = 8;
3918  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3919  AssertPos(currentPos, falloc != NULL);
3920  std::vector<llvm::Value *> allocArgs;
3921  allocArgs.push_back(launchGroupHandlePtr);
3922  allocArgs.push_back(structSize);
3923  allocArgs.push_back(LLVMInt32(align));
3924  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3925  llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
3926  llvm::BasicBlock* if_true = CreateBasicBlock("if_true");
3927  llvm::BasicBlock* if_false = CreateBasicBlock("if_false");
3928 
3929  /* check if the pointer returned by ISPCAlloc is not NULL
3930  * --------------
3931  * this is a workaround for not checking the value of programIndex
3932  * because ISPCAlloc will return NULL pointer for all programIndex > 0
3933  * of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
3934  * will also be NULL
3935  * This check must be added, and also rewrite the code to make it less opaque
3936  */
3937  llvm::Value* cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
3938  BranchInst(if_true, if_false, cmp1);
3939 
3940  /**********************/
3941  bblock = if_true;
3942 
3943  // label_if_then block:
3944  llvm::Type *pt = llvm::PointerType::getUnqual(st);
3945  llvm::Value *argmem = BitCastInst(voidmem, pt);
3946  for (unsigned int i = 0; i < argVals.size(); ++i)
3947  {
3948  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3949  // don't need to do masked store here, I think
3950  StoreInst(argVals[i], ptr);
3951  }
3952  if (nArgs == argVals.size() + 1) {
3953  // copy in the mask
3954  llvm::Value *mask = GetFullMask();
3955  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
3956  "funarg_mask");
3957  StoreInst(mask, ptr);
3958  }
3959  BranchInst(if_false);
3960 
3961  /**********************/
3962  bblock = if_false;
3963 
3964  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3965  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3966  AssertPos(currentPos, flaunch != NULL);
3967  std::vector<llvm::Value *> args;
3968  args.push_back(launchGroupHandlePtr);
3969  args.push_back(fptr);
3970  args.push_back(voidmem);
3971  args.push_back(launchCount[0]);
3972  args.push_back(launchCount[1]);
3973  args.push_back(launchCount[2]);
3974  llvm::Value *ret = CallInst(flaunch, NULL, args, "");
3975  return ret;
3976  }
3977 #endif /* ISPC_NVPTX_ENABLED */
3978 
3979  if (callee == NULL) {
3980  AssertPos(currentPos, m->errorCount > 0);
3981  return NULL;
3982  }
3983 
3984  launchedTasks = true;
3985 
3986  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3987  llvm::Type *argType =
3988  (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
3989  AssertPos(currentPos, llvm::PointerType::classof(argType));
3990  llvm::PointerType *pt =
3991  llvm::dyn_cast<llvm::PointerType>(argType);
3992  AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
3993  llvm::StructType *argStructType =
3994  static_cast<llvm::StructType *>(pt->getElementType());
3995 
3996  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3997  AssertPos(currentPos, falloc != NULL);
3998  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3999  if (structSize->getType() != LLVMTypes::Int64Type)
4000  // ISPCAlloc expects the size as an uint64_t, but on 32-bit
4001  // targets, SizeOf returns a 32-bit value
4002  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
4003  "struct_size_to_64");
4004  int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
4005 
4006  std::vector<llvm::Value *> allocArgs;
4007  allocArgs.push_back(launchGroupHandlePtr);
4008  allocArgs.push_back(structSize);
4009  allocArgs.push_back(LLVMInt32(align));
4010  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
4011  llvm::Value *argmem = BitCastInst(voidmem, pt);
4012 
4013  // Copy the values of the parameters into the appropriate place in
4014  // the argument block
4015  for (unsigned int i = 0; i < argVals.size(); ++i) {
4016  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
4017  // don't need to do masked store here, I think
4018  StoreInst(argVals[i], ptr);
4019  }
4020 
4021  if (argStructType->getNumElements() == argVals.size() + 1) {
4022  // copy in the mask
4023  llvm::Value *mask = GetFullMask();
4024  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
4025  "funarg_mask");
4026  StoreInst(mask, ptr);
4027  }
4028 
4029  // And emit the call to the user-supplied task launch function, passing
4030  // a pointer to the task function being called and a pointer to the
4031  // argument block we just filled in
4032  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
4033  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
4034  AssertPos(currentPos, flaunch != NULL);
4035  std::vector<llvm::Value *> args;
4036  args.push_back(launchGroupHandlePtr);
4037  args.push_back(fptr);
4038  args.push_back(voidmem);
4039  args.push_back(launchCount[0]);
4040  args.push_back(launchCount[1]);
4041  args.push_back(launchCount[2]);
4042  return CallInst(flaunch, NULL, args, "");
4043 }
4044 
4045 
4046 void
4048 #ifdef ISPC_NVPTX_ENABLED
4049  if (g->target->getISA() == Target::NVPTX)
4050  {
4051  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4052  llvm::Value *nullPtrValue =
4053  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4054  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4055  if (fsync == NULL)
4056  FATAL("Couldn't find ISPCSync declaration?!");
4057  CallInst(fsync, NULL, launchGroupHandle, "");
4058  StoreInst(nullPtrValue, launchGroupHandlePtr);
4059  return;
4060  }
4061 #endif /* ISPC_NVPTX_ENABLED */
4062 
4063  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4064  llvm::Value *nullPtrValue =
4065  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4066  llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
4067  llvm::CmpInst::ICMP_NE,
4068  launchGroupHandle, nullPtrValue);
4069  llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
4070  llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
4071  BranchInst(bSync, bPostSync, nonNull);
4072 
4073  SetCurrentBasicBlock(bSync);
4074  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4075  if (fsync == NULL)
4076  FATAL("Couldn't find ISPCSync declaration?!");
4077  CallInst(fsync, NULL, launchGroupHandle, "");
4078 
4079  // zero out the handle so that if ISPCLaunch is called again in this
4080  // function, it knows it's starting out from scratch
4081  StoreInst(nullPtrValue, launchGroupHandlePtr);
4082 
4083  BranchInst(bPostSync);
4084 
4085  SetCurrentBasicBlock(bPostSync);
4086 }
4087 
4088 
4089 /** When we gathering from or scattering to a varying atomic type, we need
4090  to add an appropriate offset to the final address for each lane right
4091  before we use it. Given a varying pointer we're about to use and its
4092  type, this function determines whether these offsets are needed and
4093  returns an updated pointer that incorporates these offsets if needed.
4094  */
4095 llvm::Value *
4097  const Type *ptrType) {
4098  // This should only be called for varying pointers
4099  const PointerType *pt = CastType<PointerType>(ptrType);
4100  AssertPos(currentPos, pt && pt->IsVaryingType());
4101 
4102  const Type *baseType = ptrType->GetBaseType();
4103  if (Type::IsBasicType(baseType) == false)
4104  return ptr;
4105 
4106  if (baseType->IsVaryingType() == false)
4107  return ptr;
4108 
4109  // Find the size of a uniform element of the varying type
4110  llvm::Type *llvmBaseUniformType =
4111  baseType->GetAsUniformType()->LLVMType(g->ctx);
4112  llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
4113  unifSize = SmearUniform(unifSize);
4114 
4115  // Compute offset = <0, 1, .. > * unifSize
4116  bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
4117  llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
4118 
4119  llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
4120  varyingOffsets);
4121 
4122  if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
4123  // On 64-bit targets where we're doing 32-bit addressing
4124  // calculations, we need to convert to an i64 vector before adding
4125  // to the pointer
4126  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
4127 
4128  return BinaryOperator(llvm::Instruction::Add, ptr, offset);
4129 }
4130 
4131 
4132 CFInfo *
4134  AssertPos(currentPos, controlFlowInfo.size() > 0);
4135  CFInfo *ci = controlFlowInfo.back();
4136  controlFlowInfo.pop_back();
4137 
4138  if (ci->IsSwitch()) {
4139  breakTarget = ci->savedBreakTarget;
4140  continueTarget = ci->savedContinueTarget;
4141  breakLanesPtr = ci->savedBreakLanesPtr;
4142  continueLanesPtr = ci->savedContinueLanesPtr;
4143  blockEntryMask = ci->savedBlockEntryMask;
4144  switchExpr = ci->savedSwitchExpr;
4145  defaultBlock = ci->savedDefaultBlock;
4146  caseBlocks = ci->savedCaseBlocks;
4147  nextBlocks = ci->savedNextBlocks;
4148  switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
4149  }
4150  else if (ci->IsLoop() || ci->IsForeach()) {
4151  breakTarget = ci->savedBreakTarget;
4152  continueTarget = ci->savedContinueTarget;
4153  breakLanesPtr = ci->savedBreakLanesPtr;
4154  continueLanesPtr = ci->savedContinueLanesPtr;
4155  blockEntryMask = ci->savedBlockEntryMask;
4156  }
4157  else {
4158  AssertPos(currentPos, ci->IsIf());
4159  // nothing to do
4160  }
4161 
4162  return ci;
4163 }
bool IsVoidType() const
Definition: type.cpp:250
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1401
bool IsVaryingType() const
Definition: type.h:150
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
llvm::Value * savedBreakLanesPtr
Definition: ctx.cpp:118
void InitializeLabelMap(Stmt *code)
Definition: ctx.cpp:1296
llvm::Value * PtrToIntInst(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2089
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target)
Definition: ctx.cpp:924
Definition: func.h:44
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2598
CFInfo * popCFState()
Definition: ctx.cpp:4133
Opt opt
Definition: ispc.h:547
void StartUniformIf()
Definition: ctx.cpp:580
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock * > > &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > &nextBlocks)
Definition: ctx.cpp:1192
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:569
int last_column
Definition: ispc.h:144
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm, llvm::Value *sse=NULL, llvm::BasicBlock *bbd=NULL, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbc=NULL, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbn=NULL, bool scu=false)
Definition: ctx.cpp:139
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:1007
llvm::Value * ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name=NULL)
Definition: ctx.cpp:3555
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1562
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:521
llvm::Constant * LLVMInt64Vector(int64_t i)
Definition: llvmutil.cpp:455
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:655
llvm::Instruction * FPCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2213
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1773
static llvm::Type * lGetMatchingBoolVectorType(llvm::Type *type)
Definition: ctx.cpp:1983
void StartScope()
Definition: ctx.cpp:1715
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm)
Definition: ctx.cpp:160
llvm::BasicBlock * savedBreakTarget
Definition: ctx.cpp:117
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:529
CFInfo(CFType t, bool uniformIf, llvm::Value *sm)
Definition: ctx.cpp:127
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:3485
const std::vector< std::pair< int, llvm::BasicBlock * > > * savedCaseBlocks
Definition: ctx.cpp:122
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3108
bool IsFrozenSlice() const
Definition: type.h:468
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2251
int getVectorWidth() const
Definition: ispc.h:285
Module * m
Definition: ispc.cpp:93
llvm::DIType GetDIType(llvm::DIDescriptor scope) const
Definition: type.cpp:3241
Interface class for statements in the ispc language.
Definition: stmt.h:49
FunctionEmitContext(Function *function, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos)
Definition: ctx.cpp:245
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1948
int first_line
Definition: ispc.h:141
Target * target
Definition: ispc.h:549
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1407
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2817
static llvm::VectorType * VoidPointerVectorType
Definition: llvmutil.h:108
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:92
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:558
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1311
virtual const Type * GetElementType(int index) const =0
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2174
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:3051
Abstract base class for types that represent sequences.
Definition: type.h:531
bool IsVarying()
Definition: ctx.cpp:110
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:2000
void EndSwitch()
Definition: ctx.cpp:1033
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:586
const PointerType * GetAsNonSlice() const
Definition: type.cpp:1124
static llvm::Type * BoolType
Definition: llvmutil.h:74
#define Assert(expr)
Definition: ispc.h:172
void addSwitchMaskCheck(llvm::Value *mask)
Definition: ctx.cpp:1046
void StartForeach(ForeachType ft)
Definition: ctx.cpp:699
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
llvm::FunctionType * LLVMFunctionType(llvm::LLVMContext *ctx, bool disableMask=false) const
Definition: type.cpp:3319
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:74
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:96
bool IsForeach()
Definition: ctx.cpp:106
bool forceAlignedMemory
Definition: ispc.h:471
static PointerType * GetVarying(const Type *t)
Definition: type.cpp:1021
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:856
llvm::Value * GetFullMask()
Definition: ctx.cpp:494
const char * GetISAString() const
Definition: ispc.cpp:1315
bool isUniform
Definition: ctx.cpp:116
virtual const Type * GetAsUniformType() const =0
CFType
Definition: ctx.cpp:113
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1660
llvm::Value * MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset)
Definition: ctx.cpp:2420
std::string name
Definition: sym.h:71
llvm::Value * gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name)
Definition: ctx.cpp:2907
void restoreMaskGivenReturns(llvm::Value *oldMask)
Definition: ctx.cpp:740
virtual const Type * GetElementType() const =0
Type implementation for pointers to other types.
Definition: type.h:446
int getNativeVectorAlignment() const
Definition: ispc.h:281
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:547
void RestoreContinuedLanes()
Definition: ctx.cpp:979
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
llvm::Value * loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name)
Definition: ctx.cpp:2783
virtual std::string GetString() const =0
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:470
int GetSOAWidth() const
Definition: type.h:160
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:1015
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:776
static llvm::Value * lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue, llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset)
Definition: ctx.cpp:2382
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:93
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1602
llvm::Value * savedSwitchExpr
Definition: ctx.cpp:120
static CFInfo * GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:187
bool IsIf()
Definition: ctx.cpp:104
header file with declarations for symbol and symbol table classes.
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3576
static void addGSMetadata(llvm::Value *inst, SourcePos pos)
Definition: ctx.cpp:3004
static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType)
Definition: ctx.cpp:2749
bool disableMaskAllOnOptimizations
Definition: ispc.h:476
int level
Definition: ispc.h:437
static llvm::Type * VoidType
Definition: llvmutil.h:71
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:3344
llvm::Module * module
Definition: module.h:158
File with declarations for classes related to statements in the language.
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:1140
Globals * g
Definition: ispc.cpp:92
llvm::BasicBlock * savedContinueTarget
Definition: ctx.cpp:117
bool IsUniformType() const
Definition: type.h:145
void EndLoop()
Definition: ctx.cpp:684
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:482
int getMaskBitCount() const
Definition: ispc.h:291
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:94
static CFInfo * GetIf(bool isUniform, llvm::Value *savedMask)
Definition: ctx.cpp:181
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos=NULL, llvm::DIScope *scope=NULL)
Definition: ctx.cpp:1693
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2438
bool LookupFunction(const char *name, std::vector< Symbol * > *matches=NULL)
Definition: sym.cpp:162
CFType type
Definition: ctx.cpp:115
static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType)
Definition: ctx.cpp:3646
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1325
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
bool inSwitchStatement() const
Definition: ctx.cpp:762
llvm::DIScope GetDIScope() const
Definition: ctx.cpp:1766
bool IsUniform()
Definition: ctx.cpp:111
llvm::BasicBlock * savedDefaultBlock
Definition: ctx.cpp:121
SourcePos GetDebugPos() const
Definition: ctx.cpp:1687
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:98
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1454
bool IsLoop()
Definition: ctx.cpp:105
bool IsSlice() const
Definition: type.h:467
static CFInfo * GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr, llvm::BasicBlock *bbDefault, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbCases, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbNext, bool scUniform)
Definition: ctx.cpp:228
static llvm::Type * Int64Type
Definition: llvmutil.h:79
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align=NULL)
Definition: ctx.cpp:3447
llvm::PHINode * PhiNode(llvm::Type *type, int count, const char *name=NULL)
Definition: ctx.cpp:3615
Representation of a structure holding a number of members.
Definition: type.h:691
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
llvm::Value * addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType)
Definition: ctx.cpp:4096
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:97
Header file with declarations for various LLVM utility stuff.
#define AssertPos(pos, expr)
Definition: ispc.h:175
llvm::Value * getMaskAtSwitchEntry()
Definition: ctx.cpp:1066
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1)
Definition: ctx.cpp:2341
ISA getISA() const
Definition: ispc.h:269
bool emitInstrumentation
Definition: ispc.h:609
static bool IsBasicType(const Type *type)
Definition: type.cpp:3625
SourcePos pos
Definition: sym.h:70
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value * > &args, const char *name=NULL)
Definition: ctx.cpp:3667
uint32_t RoundUpPow2(uint32_t v)
Definition: util.h:51
bool ifsInCFAllUniform(int cfType) const
Definition: ctx.cpp:906
AtomicType represents basic types like floats, ints, etc.
Definition: type.h:292
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:515
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
StorageClass storageClass
Definition: sym.h:96
Representation of a range of positions in a source file.
Definition: ispc.h:136
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1442
static bool lEnclosingLoopIsForeachActive(const std::vector< CFInfo * > &controlFlowInfo)
Definition: ctx.cpp:846
bool InForeachLoop() const
Definition: ctx.cpp:1255
int VaryingCFDepth() const
Definition: ctx.cpp:1245
void ClearBreakLanes()
Definition: ctx.cpp:997
Abstract base class for types that represent collections of other types.
Definition: type.h:510
const char * LLVMGetName(llvm::Value *v, const char *)
Definition: llvmutil.cpp:1712
llvm::Value * None(llvm::Value *mask)
Definition: ctx.cpp:1436
bool force32BitAddressing
Definition: ispc.h:457
const char * name
Definition: ispc.h:140
llvm::Instruction * SExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2232
SourcePos pos
Definition: ast.h:77
static llvm::Type * PointerIntType
Definition: llvmutil.h:73
const PointerType * GetAsSlice() const
Definition: type.cpp:1116
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:72
const Type * GetBaseType() const
Definition: type.cpp:1070
int getNativeVectorWidth() const
Definition: ispc.h:279
const Function * GetFunction() const
Definition: ctx.cpp:464
const Type * GetReturnType() const
Definition: type.h:925
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1590
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType)
Definition: ctx.cpp:3417
#define FATAL(message)
Definition: util.h:113
bool savedSwitchConditionWasUniform
Definition: ctx.cpp:124
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
llvm::Value * InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name=NULL)
Definition: ctx.cpp:3530
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1264
llvm::Value * savedMask
Definition: ctx.cpp:119
static llvm::Type * Int32Type
Definition: llvmutil.h:78
int last_line
Definition: ispc.h:143
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1681
#define PTYPE(p)
Definition: llvmutil.h:55
Representation of a function in a source file.
Variability GetVariability() const
Definition: type.cpp:1034
int first_column
Definition: ispc.h:142
Definition: ctx.cpp:73
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1585
virtual const Type * GetAsVaryingType() const =0
virtual const Type * GetType() const =0
llvm::Value * GetFullMaskPointer()
Definition: ctx.cpp:501
llvm::Value * GetInternalMask()
Definition: ctx.cpp:488
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2072
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:537
llvm::Value * LaunchInst(llvm::Value *callee, std::vector< llvm::Value * > &argVals, llvm::Value *launchCount[3])
Definition: ctx.cpp:3890
Type representing a function (return type + argument types)
Definition: type.h:884
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:3507
bool IsSwitch()
Definition: ctx.cpp:109
void EndForeach()
Definition: ctx.cpp:733
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1270
Interface class that defines the type abstraction.
Definition: type.h:101
static bool initLabelBBlocks(ASTNode *node, void *data)
Definition: ctx.cpp:1277
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:476
static llvm::VectorType * MaskType
Definition: llvmutil.h:90
virtual const Type * GetBaseType() const =0
llvm::Value * savedContinueLanesPtr
Definition: ctx.cpp:118
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:1077
llvm::Instruction * SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name=NULL)
Definition: ctx.cpp:3625
static int lArrayVectorWidth(llvm::Type *t)
Definition: ctx.cpp:1896
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:595
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum)
Definition: ctx.cpp:1828
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2141
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:99
llvm::Value * MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2)
Definition: ctx.cpp:1534
llvm::Type * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:2317
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1418
std::string name
Definition: stmt.h:494
llvm::Constant * LLVMIntAsType(int64_t, llvm::Type *t)
Definition: llvmutil.cpp:548
virtual int GetElementCount() const =0
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2034
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:95
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * savedNextBlocks
Definition: ctx.cpp:123
llvm::Value * savedBlockEntryMask
Definition: ctx.cpp:119
bool IsConstType() const
Definition: type.cpp:1064
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:638
const Type * type
Definition: sym.h:84
llvm::DIBuilder * diBuilder
Definition: module.h:161
static CFInfo * GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:199
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3741
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3224
virtual const Type * GetReferenceTarget() const
Definition: type.cpp:3417
llvm::Instruction * CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2193
bool is32Bit() const
Definition: ispc.h:275
llvm::Instruction * ReturnInst()
Definition: ctx.cpp:3866
llvm::Value * applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType)
Definition: ctx.cpp:2276
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1914
int GetNumParameters() const
Definition: type.h:936
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1303
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:507
std::map< std::string, llvm::BasicBlock * > labelMap
Definition: ctx.h:726
SymbolTable * symbolTable
Definition: module.h:155
File with declarations for classes related to type representation.
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1608
static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s)
Definition: ctx.cpp:1640