Intel SPMD Program Compiler  1.9.1
ctx.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2015, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ctx.cpp
35  @brief Implementation of the FunctionEmitContext class
36 */
37 
38 #include "ctx.h"
39 #include "util.h"
40 #include "func.h"
41 #include "llvmutil.h"
42 #include "type.h"
43 #include "stmt.h"
44 #include "expr.h"
45 #include "module.h"
46 #include "sym.h"
47 #include <map>
48 #include <llvm/Support/Dwarf.h>
49 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
50  #include <llvm/Metadata.h>
51  #include <llvm/Module.h>
52  #include <llvm/Instructions.h>
53  #include <llvm/DerivedTypes.h>
54 #else
55  #include <llvm/IR/Metadata.h>
56  #include <llvm/IR/Module.h>
57  #include <llvm/IR/Instructions.h>
58  #include <llvm/IR/DerivedTypes.h>
59 #endif
60 #ifdef ISPC_NVPTX_ENABLED
61 #include <llvm/Support/raw_ostream.h>
62 #include <llvm/Support/FormattedStream.h>
63 #endif /* ISPC_NVPTX_ENABLED */
64 
65 /** This is a small utility structure that records information related to one
66  level of nested control flow. It's mostly used in correctly restoring
67  the mask and other state as we exit control flow nesting levels.
68 */
69 struct CFInfo {
70  /** Returns a new instance of the structure that represents entering an
71  'if' statement */
72  static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
73 
74  /** Returns a new instance of the structure that represents entering a
75  loop. */
76  static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
77  llvm::BasicBlock *continueTarget,
78  llvm::Value *savedBreakLanesPtr,
79  llvm::Value *savedContinueLanesPtr,
80  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
81 
83  llvm::BasicBlock *breakTarget,
84  llvm::BasicBlock *continueTarget,
85  llvm::Value *savedBreakLanesPtr,
86  llvm::Value *savedContinueLanesPtr,
87  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
88 
89  static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
90  llvm::BasicBlock *continueTarget,
91  llvm::Value *savedBreakLanesPtr,
92  llvm::Value *savedContinueLanesPtr,
93  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask,
94  llvm::Value *switchExpr,
95  llvm::BasicBlock *bbDefault,
96  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbCases,
97  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext,
98  bool scUniform);
99 
100  bool IsIf() { return type == If; }
101  bool IsLoop() { return type == Loop; }
102  bool IsForeach() { return (type == ForeachRegular ||
103  type == ForeachActive ||
104  type == ForeachUnique); }
105  bool IsSwitch() { return type == Switch; }
106  bool IsVarying() { return !isUniform; }
107  bool IsUniform() { return isUniform; }
108 
112  bool isUniform;
116  llvm::Value *savedSwitchExpr;
117  llvm::BasicBlock *savedDefaultBlock;
118  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCaseBlocks;
119  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
121 
122 private:
123  CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
124  Assert(t == If);
125  type = t;
126  isUniform = uniformIf;
127  savedBreakTarget = savedContinueTarget = NULL;
128  savedBreakLanesPtr = savedContinueLanesPtr = NULL;
129  savedMask = savedBlockEntryMask = sm;
130  savedSwitchExpr = NULL;
131  savedDefaultBlock = NULL;
132  savedCaseBlocks = NULL;
133  savedNextBlocks = NULL;
134  }
135  CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
136  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
137  llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL,
138  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbc = NULL,
139  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL,
140  bool scu = false) {
141  Assert(t == Loop || t == Switch);
142  type = t;
143  isUniform = iu;
144  savedBreakTarget = bt;
145  savedContinueTarget = ct;
146  savedBreakLanesPtr = sb;
147  savedContinueLanesPtr = sc;
148  savedMask = sm;
149  savedBlockEntryMask = lm;
150  savedSwitchExpr = sse;
151  savedDefaultBlock = bbd;
152  savedCaseBlocks = bbc;
153  savedNextBlocks = bbn;
154  savedSwitchConditionWasUniform = scu;
155  }
156  CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
157  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
158  llvm::Value *lm) {
159  Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
160  type = t;
161  isUniform = false;
162  savedBreakTarget = bt;
163  savedContinueTarget = ct;
164  savedBreakLanesPtr = sb;
165  savedContinueLanesPtr = sc;
166  savedMask = sm;
167  savedBlockEntryMask = lm;
168  savedSwitchExpr = NULL;
169  savedDefaultBlock = NULL;
170  savedCaseBlocks = NULL;
171  savedNextBlocks = NULL;
172  }
173 };
174 
175 
176 CFInfo *
177 CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) {
178  return new CFInfo(If, isUniform, savedMask);
179 }
180 
181 
182 CFInfo *
183 CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
184  llvm::BasicBlock *continueTarget,
185  llvm::Value *savedBreakLanesPtr,
186  llvm::Value *savedContinueLanesPtr,
187  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask) {
188  return new CFInfo(Loop, isUniform, breakTarget, continueTarget,
189  savedBreakLanesPtr, savedContinueLanesPtr,
190  savedMask, savedBlockEntryMask);
191 }
192 
193 
194 CFInfo *
196  llvm::BasicBlock *breakTarget,
197  llvm::BasicBlock *continueTarget,
198  llvm::Value *savedBreakLanesPtr,
199  llvm::Value *savedContinueLanesPtr,
200  llvm::Value *savedMask, llvm::Value *savedForeachMask) {
201  CFType cfType;
202  switch (ft) {
204  cfType = ForeachRegular;
205  break;
207  cfType = ForeachActive;
208  break;
210  cfType = ForeachUnique;
211  break;
212  default:
213  FATAL("Unhandled foreach type");
214  return NULL;
215  }
216 
217  return new CFInfo(cfType, breakTarget, continueTarget,
218  savedBreakLanesPtr, savedContinueLanesPtr,
219  savedMask, savedForeachMask);
220 }
221 
222 
223 CFInfo *
224 CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
225  llvm::BasicBlock *continueTarget,
226  llvm::Value *savedBreakLanesPtr,
227  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
228  llvm::Value *savedBlockEntryMask, llvm::Value *savedSwitchExpr,
229  llvm::BasicBlock *savedDefaultBlock,
230  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCases,
231  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
232  bool savedSwitchConditionUniform) {
233  return new CFInfo(Switch, isUniform, breakTarget, continueTarget,
234  savedBreakLanesPtr, savedContinueLanesPtr,
235  savedMask, savedBlockEntryMask, savedSwitchExpr, savedDefaultBlock,
236  savedCases, savedNext, savedSwitchConditionUniform);
237 }
238 
239 ///////////////////////////////////////////////////////////////////////////
240 
242  llvm::Function *lf,
243  SourcePos firstStmtPos) {
244  function = func;
245  llvmFunction = lf;
246 
247  /* Create a new basic block to store all of the allocas */
248  allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
249  bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
250  /* But jump from it immediately into the real entry block */
251  llvm::BranchInst::Create(bblock, allocaBlock);
252 
253  funcStartPos = funSym->pos;
254 
255  internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
257 
259 
260  fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
262 
263  blockEntryMask = NULL;
265  breakTarget = continueTarget = NULL;
266 
267  switchExpr = NULL;
268  caseBlocks = NULL;
269  defaultBlock = NULL;
270  nextBlocks = NULL;
271 
272  returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
274 
275  launchedTasks = false;
277  StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType),
279 
281 
282  const Type *returnType = function->GetReturnType();
283  if (!returnType || returnType->IsVoidType())
284  returnValuePtr = NULL;
285  else {
286  llvm::Type *ftype = returnType->LLVMType(g->ctx);
287  returnValuePtr = AllocaInst(ftype, "return_value_memory");
288  }
289 
291  // This is really disgusting. We want to be able to fool the
292  // compiler to not be able to reason that the mask is all on, but
293  // we don't want to pay too much of a price at the start of each
294  // function to do so.
295  //
296  // Therefore: first, we declare a module-static __all_on_mask
297  // variable that will hold an "all on" mask value. At the start of
298  // each function, we'll load its value and call SetInternalMaskAnd
299  // with the result to set the current internal execution mask.
300  // (This is a no-op at runtime.)
301  //
302  // Then, to fool the optimizer that maybe the value of
303  // __all_on_mask can't be guaranteed to be "all on", we emit a
304  // dummy function that sets __all_on_mask be "all off". (That
305  // function is never actually called.)
306  llvm::Value *globalAllOnMaskPtr =
307  m->module->getNamedGlobal("__all_on_mask");
308  if (globalAllOnMaskPtr == NULL) {
309  globalAllOnMaskPtr =
310  new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
311  llvm::GlobalValue::InternalLinkage,
312  LLVMMaskAllOn, "__all_on_mask");
313 
314  char buf[256];
315  sprintf(buf, "__off_all_on_mask_%s", g->target->GetISAString());
316  llvm::Constant *offFunc =
317  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
318  NULL);
319  AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
320  llvm::BasicBlock *offBB =
321  llvm::BasicBlock::Create(*g->ctx, "entry",
322  (llvm::Function *)offFunc, 0);
323  llvm::StoreInst *inst =
324  new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
325  if (g->opt.forceAlignedMemory) {
326  inst->setAlignment(g->target->getNativeVectorAlignment());
327  }
328  llvm::ReturnInst::Create(*g->ctx, offBB);
329  }
330 
331  llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
332  SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
333  }
334 
335  if (m->diBuilder) {
336  currentPos = funSym->pos;
337 
338  /* If debugging is enabled, tell the debug information emission
339  code about this new function */
340 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
342  AssertPos(currentPos, diFile.Verify());
343 #else /* LLVM 3.7+ */
345 #endif
346 
347 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
348  llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU());
349 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
350  llvm::DIScope scope = llvm::DIScope(m->diCompileUnit);
351 #else /* LLVM 3.7+ */
352  llvm::DIScope *scope = m->diCompileUnit;
353  //llvm::MDScope *scope = m->diCompileUnit;
354 #endif
355 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
356  llvm::DIType diSubprogramType;
357  AssertPos(currentPos, scope.Verify());
358 #else /* LLVM 3.7+ */
359  llvm::DIType *diSubprogramType = NULL;
360  //llvm::MDType *diSubprogramType = NULL;
361 #endif
362 
363  const FunctionType *functionType = function->GetType();
364  if (functionType == NULL)
366  else {
367  diSubprogramType = functionType->GetDIType(scope);
368 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
369  AssertPos(currentPos, diSubprogramType.Verify());
370 #else /* LLVM 3.7+ */
371  //comming soon
372 #endif
373  }
374 
375 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
376  llvm::DIType diSubprogramType_n = diSubprogramType;
377  int flags = llvm::DIDescriptor::FlagPrototyped;
378 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
379  Assert(diSubprogramType.isCompositeType());
380  llvm::DICompositeType diSubprogramType_n =
381  static_cast<llvm::DICompositeType>(diSubprogramType);
382  int flags = llvm::DIDescriptor::FlagPrototyped;
383 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
384  Assert(llvm::isa<llvm::DICompositeTypeBase>(diSubprogramType));
385  llvm::DISubroutineType *diSubprogramType_n =
386  llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
387  int flags = llvm::DINode::FlagPrototyped;
388 #else /* LLVM 3.8+ */
389  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
390  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
391  int flags = llvm::DINode::FlagPrototyped;
392 
393 #endif
394 
395  std::string mangledName = llvmFunction->getName();
396  if (mangledName == funSym->name)
397  mangledName = "";
398 
399  bool isStatic = (funSym->storageClass == SC_STATIC);
400  bool isOptimized = (g->opt.level > 0);
401  int firstLine = funcStartPos.first_line;
402 
403 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
404  diSubprogram =
405  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
406  mangledName, diFile,
407  firstLine, diSubprogramType_n,
408  isStatic, true, /* is defn */
409  firstLine, flags,
410  isOptimized, llvmFunction);
411  AssertPos(currentPos, diSubprogram.Verify());
412 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
413  diSubprogram =
414  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
415  mangledName, diFile,
416  firstLine, diSubprogramType_n,
417  isStatic, true, /* is defn */
418  firstLine, flags,
419  isOptimized, llvmFunction);
420 #else /* LLVM 3.8+ */
421  diSubprogram =
422  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
423  mangledName, diFile,
424  firstLine, diSubprogramType_n,
425  isStatic, true, /* is defn */
426  firstLine, flags,
427  isOptimized);
428  llvmFunction->setSubprogram(diSubprogram);
429 #endif
430 
431  /* And start a scope representing the initial function scope */
432  StartScope();
433  }
434 }
435 
436 
438  AssertPos(currentPos, controlFlowInfo.size() == 0);
439  AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
440 }
441 
442 
443 const Function *
445  return function;
446 }
447 
448 
449 llvm::BasicBlock *
451  return bblock;
452 }
453 
454 
455 void
457  bblock = bb;
458 }
459 
460 
461 llvm::Value *
463  return functionMaskValue;
464 }
465 
466 
467 llvm::Value *
469  return LoadInst(internalMaskPointer, "load_mask");
470 }
471 
472 
473 llvm::Value *
475  return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
476  functionMaskValue, "internal_mask&function_mask");
477 }
478 
479 
480 llvm::Value *
482  return fullMaskPointer;
483 }
484 
485 
486 void
488  functionMaskValue = value;
489  if (bblock != NULL)
491 }
492 
493 
494 void
496  blockEntryMask = value;
497 }
498 
499 
500 void
503  // kludge so that __mask returns the right value in ispc code.
505 }
506 
507 
508 void
509 FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
510  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask,
511  test, "oldMask&test");
512  SetInternalMask(mask);
513 }
514 
515 
516 void
517 FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
518  llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
519  "~test");
520  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
521  "oldMask&~test");
522  SetInternalMask(mask);
523 }
524 
525 
526 void
527 FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
528  AssertPos(currentPos, bblock != NULL);
529  llvm::Value *any = Any(GetFullMask());
530  BranchInst(btrue, bfalse, any);
531  // It's illegal to add any additional instructions to the basic block
532  // now that it's terminated, so set bblock to NULL to be safe
533  bblock = NULL;
534 }
535 
536 
537 void
538 FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
539  AssertPos(currentPos, bblock != NULL);
540  llvm::Value *all = All(GetFullMask());
541  BranchInst(btrue, bfalse, all);
542  // It's illegal to add any additional instructions to the basic block
543  // now that it's terminated, so set bblock to NULL to be safe
544  bblock = NULL;
545 }
546 
547 
548 void
549 FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
550  AssertPos(currentPos, bblock != NULL);
551  // switch sense of true/false bblocks
552  BranchIfMaskAny(bfalse, btrue);
553  // It's illegal to add any additional instructions to the basic block
554  // now that it's terminated, so set bblock to NULL to be safe
555  bblock = NULL;
556 }
557 
558 
559 void
561  controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
562 }
563 
564 
565 void
566 FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
567  controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
568 }
569 
570 
571 void
573  CFInfo *ci = popCFState();
574  // Make sure we match up with a Start{Uniform,Varying}If().
575  AssertPos(currentPos, ci->IsIf());
576 
577  // 'uniform' ifs don't change the mask so we only need to restore the
578  // mask going into the if for 'varying' if statements
579  if (ci->IsUniform() || bblock == NULL)
580  return;
581 
582  // We can't just restore the mask as it was going into the 'if'
583  // statement. First we have to take into account any program
584  // instances that have executed 'return' statements; the restored
585  // mask must be off for those lanes.
587 
588  // If the 'if' statement is inside a loop with a 'varying'
589  // condition, we also need to account for any break or continue
590  // statements that executed inside the 'if' statmeent; we also must
591  // leave the lane masks for the program instances that ran those
592  // off after we restore the mask after the 'if'. The code below
593  // ends up being optimized out in the case that there were no break
594  // or continue statements (and breakLanesPtr and continueLanesPtr
595  // have their initial 'all off' values), so we don't need to check
596  // for that here.
597  //
598  // There are three general cases to deal with here:
599  // - Loops: both break and continue are allowed, and thus the corresponding
600  // lane mask pointers are non-NULL
601  // - Foreach: only continueLanesPtr may be non-NULL
602  // - Switch: only breakLanesPtr may be non-NULL
603  if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
604  // We want to compute:
605  // newMask = (oldMask & ~(breakLanes | continueLanes)),
606  // treading breakLanes or continueLanes as "all off" if the
607  // corresponding pointer is NULL.
608  llvm::Value *bcLanes = NULL;
609 
610  if (continueLanesPtr != NULL)
611  bcLanes = LoadInst(continueLanesPtr, "continue_lanes");
612  else
613  bcLanes = LLVMMaskAllOff;
614 
615  if (breakLanesPtr != NULL) {
616  llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
617  bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes,
618  breakLanes, "|break_lanes");
619  }
620 
621  llvm::Value *notBreakOrContinue =
622  BinaryOperator(llvm::Instruction::Xor,
623  bcLanes, LLVMMaskAllOn,
624  "!(break|continue)_lanes");
625  llvm::Value *oldMask = GetInternalMask();
626  llvm::Value *newMask =
627  BinaryOperator(llvm::Instruction::And, oldMask,
628  notBreakOrContinue, "new_mask");
629  SetInternalMask(newMask);
630  }
631 }
632 
633 
634 void
635 FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
636  bool uniformCF) {
637  // Store the current values of various loop-related state so that we
638  // can restore it when we exit this loop.
639  llvm::Value *oldMask = GetInternalMask();
640  controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget,
642  continueLanesPtr, oldMask, blockEntryMask));
643  if (uniformCF)
644  // If the loop has a uniform condition, we don't need to track
645  // which lanes 'break' or 'continue'; all of the running ones go
646  // together, so we just jump
648  else {
649  // For loops with varying conditions, allocate space to store masks
650  // that record which lanes have done these
651  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
653  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
655  }
656 
657  breakTarget = bt;
658  continueTarget = ct;
659  blockEntryMask = NULL; // this better be set by the loop!
660 }
661 
662 
663 void
665  CFInfo *ci = popCFState();
666  AssertPos(currentPos, ci->IsLoop());
667 
668  if (!ci->IsUniform())
669  // If the loop had a 'uniform' test, then it didn't make any
670  // changes to the mask so there's nothing to restore. If it had a
671  // varying test, we need to restore the mask to what it was going
672  // into the loop, but still leaving off any lanes that executed a
673  // 'return' statement.
675 }
676 
677 
678 void
680  // Issue an error if we're in a nested foreach...
681  if (ft == FOREACH_REGULAR) {
682  for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
683  if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
684  Error(currentPos, "Nested \"foreach\" statements are currently "
685  "illegal.");
686  break;
687  // Don't return here, however, and in turn allow the caller to
688  // do the rest of its codegen and then call EndForeach()
689  // normally--the idea being that this gives a chance to find
690  // any other errors inside the body of the foreach loop...
691  }
692  }
693  }
694 
695  // Store the current values of various loop-related state so that we
696  // can restore it when we exit this loop.
697  llvm::Value *oldMask = GetInternalMask();
700  oldMask, blockEntryMask));
701  breakLanesPtr = NULL;
702  breakTarget = NULL;
703 
704  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
706  continueTarget = NULL; // should be set by SetContinueTarget()
707 
708  blockEntryMask = NULL;
709 }
710 
711 
712 void
714  CFInfo *ci = popCFState();
716 }
717 
718 
719 void
721  if (!bblock)
722  return;
723 
724  // Restore the mask to the given old mask, but leave off any lanes that
725  // executed a return statement.
726  // newMask = (oldMask & ~returnedLanes)
727  llvm::Value *returnedLanes = LoadInst(returnedLanesPtr,
728  "returned_lanes");
729  llvm::Value *notReturned = BinaryOperator(llvm::Instruction::Xor,
730  returnedLanes, LLVMMaskAllOn,
731  "~returned_lanes");
732  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
733  oldMask, notReturned, "new_mask");
734  SetInternalMask(newMask);
735 }
736 
737 
738 /** Returns "true" if the first enclosing non-if control flow expression is
739  a "switch" statement.
740 */
741 bool
743  // Go backwards through controlFlowInfo, since we add new nested scopes
744  // to the back.
745  int i = controlFlowInfo.size() - 1;
746  while (i >= 0 && controlFlowInfo[i]->IsIf())
747  --i;
748  // Got to the first non-if (or end of CF info)
749  if (i == -1)
750  return false;
751  return controlFlowInfo[i]->IsSwitch();
752 }
753 
754 
755 void
756 FunctionEmitContext::Break(bool doCoherenceCheck) {
757  if (breakTarget == NULL) {
758  Error(currentPos, "\"break\" statement is illegal outside of "
759  "for/while/do loops and \"switch\" statements.");
760  return;
761  }
762  AssertPos(currentPos, controlFlowInfo.size() > 0);
763 
764  if (bblock == NULL)
765  return;
766 
767  if (inSwitchStatement() == true &&
768  switchConditionWasUniform == true &&
770  // We know that all program instances are executing the break, so
771  // just jump to the block immediately after the switch.
772  AssertPos(currentPos, breakTarget != NULL);
774  bblock = NULL;
775  return;
776  }
777 
778  // If all of the enclosing 'if' tests in the loop have uniform control
779  // flow or if we can tell that the mask is all on, then we can just
780  // jump to the break location.
781  if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
783  // Set bblock to NULL since the jump has terminated the basic block
784  bblock = NULL;
785  }
786  else {
787  // Varying switch, uniform switch where the 'break' is under
788  // varying control flow, or a loop with varying 'if's above the
789  // break. In these cases, we need to update the mask of the lanes
790  // that have executed a 'break' statement:
791  // breakLanes = breakLanes | mask
793 
794  llvm::Value *mask = GetInternalMask();
795  llvm::Value *breakMask = LoadInst(breakLanesPtr,
796  "break_mask");
797  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
798  mask, breakMask, "mask|break_mask");
799  StoreInst(newMask, breakLanesPtr);
800 
801  // Set the current mask to be all off, just in case there are any
802  // statements in the same scope after the 'break'. Most of time
803  // this will be optimized away since we'll likely end the scope of
804  // an 'if' statement and restore the mask then.
806 
807  if (doCoherenceCheck) {
808  if (continueTarget != NULL)
809  // If the user has indicated that this is a 'coherent'
810  // break statement, then check to see if the mask is all
811  // off. If so, we have to conservatively jump to the
812  // continueTarget, not the breakTarget, since part of the
813  // reason the mask is all off may be due to 'continue'
814  // statements that executed in the current loop iteration.
816  else if (breakTarget != NULL)
817  // Similarly handle these for switch statements, where we
818  // only have a break target.
820  }
821  }
822 }
823 
824 
825 static bool
826 lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
827  for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
828  if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
829  return true;
830  }
831  return false;
832 }
833 
834 
835 void
836 FunctionEmitContext::Continue(bool doCoherenceCheck) {
837  if (!continueTarget) {
838  Error(currentPos, "\"continue\" statement illegal outside of "
839  "for/while/do/foreach loops.");
840  return;
841  }
842  AssertPos(currentPos, controlFlowInfo.size() > 0);
843 
846  // Similarly to 'break' statements, we can immediately jump to the
847  // continue target if we're only in 'uniform' control flow within
848  // loop or if we can tell that the mask is all on. Here, we can
849  // also jump if the enclosing loop is a 'foreach_active' loop, in
850  // which case we know that only a single program instance is
851  // executing.
852  AddInstrumentationPoint("continue: uniform CF, jumped");
854  bblock = NULL;
855  }
856  else {
857  // Otherwise update the stored value of which lanes have 'continue'd.
858  // continueLanes = continueLanes | mask
860  llvm::Value *mask = GetInternalMask();
861  llvm::Value *continueMask =
862  LoadInst(continueLanesPtr, "continue_mask");
863  llvm::Value *newMask =
864  BinaryOperator(llvm::Instruction::Or, mask, continueMask,
865  "mask|continueMask");
866  StoreInst(newMask, continueLanesPtr);
867 
868  // And set the current mask to be all off in case there are any
869  // statements in the same scope after the 'continue'
871 
872  if (doCoherenceCheck)
873  // If this is a 'coherent continue' statement, then emit the
874  // code to see if all of the lanes are now off due to
875  // breaks/continues and jump to the continue target if so.
877  }
878 }
879 
880 
881 /** This function checks to see if all of the 'if' statements (if any)
882  between the current scope and the first enclosing loop/switch of given
883  control flow type have 'uniform' tests.
884  */
885 bool
887  AssertPos(currentPos, controlFlowInfo.size() > 0);
888  // Go backwards through controlFlowInfo, since we add new nested scopes
889  // to the back. Stop once we come to the first enclosing control flow
890  // structure of the desired type.
891  int i = controlFlowInfo.size() - 1;
892  while (i >= 0 && controlFlowInfo[i]->type != type) {
893  if (controlFlowInfo[i]->isUniform == false)
894  // Found a scope due to an 'if' statement with a varying test
895  return false;
896  --i;
897  }
898  AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
899  return true;
900 }
901 
902 
903 void
905  llvm::Value *allDone = NULL;
906 
907  if (breakLanesPtr == NULL) {
908  llvm::Value *continued = LoadInst(continueLanesPtr,
909  "continue_lanes");
910  continued = BinaryOperator(llvm::Instruction::And,
911  continued, GetFunctionMask(),
912  "continued&func");
913  allDone = MasksAllEqual(continued, blockEntryMask);
914  }
915  else {
916  // Check to see if (returned lanes | continued lanes | break lanes) is
917  // equal to the value of mask at the start of the loop iteration. If
918  // so, everyone is done and we can jump to the given target
919  llvm::Value *returned = LoadInst(returnedLanesPtr,
920  "returned_lanes");
921  llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
922  llvm::Value *finishedLanes = BinaryOperator(llvm::Instruction::Or,
923  returned, breaked,
924  "returned|breaked");
925  if (continueLanesPtr != NULL) {
926  // It's NULL for "switch" statements...
927  llvm::Value *continued = LoadInst(continueLanesPtr,
928  "continue_lanes");
929  finishedLanes = BinaryOperator(llvm::Instruction::Or, finishedLanes,
930  continued, "returned|breaked|continued");
931  }
932 
933  finishedLanes = BinaryOperator(llvm::Instruction::And,
934  finishedLanes, GetFunctionMask(),
935  "finished&func");
936 
937  // Do we match the mask at loop or switch statement entry?
938  allDone = MasksAllEqual(finishedLanes, blockEntryMask);
939  }
940 
941  llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
942  llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
943  BranchInst(bAll, bNotAll, allDone);
944 
945  // If so, have an extra basic block along the way to add
946  // instrumentation, if the user asked for it.
947  bblock = bAll;
948  AddInstrumentationPoint("break/continue: all dynamically went");
949  BranchInst(target);
950 
951  // And set the current basic block to a new one for future instructions
952  // for the path where we weren't able to jump
953  bblock = bNotAll;
954  AddInstrumentationPoint("break/continue: not all went");
955 }
956 
957 
958 void
960  if (continueLanesPtr == NULL)
961  return;
962 
963  // mask = mask & continueFlags
964  llvm::Value *mask = GetInternalMask();
965  llvm::Value *continueMask = LoadInst(continueLanesPtr,
966  "continue_mask");
967  llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
968  mask, continueMask, "mask|continue_mask");
969  SetInternalMask(orMask);
970 
971  // continueLanes = 0
973 }
974 
975 
976 void
978  if (breakLanesPtr == NULL)
979  return;
980 
981  // breakLanes = 0
983 }
984 
985 
986 void
987 FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
988  llvm::Value *oldMask = GetInternalMask();
989  controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget,
991  continueLanesPtr, oldMask,
995 
996  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
998  breakTarget = bbBreak;
999 
1000  continueLanesPtr = NULL;
1001  continueTarget = NULL;
1002  blockEntryMask = NULL;
1003 
1004  // These will be set by the SwitchInst() method
1005  switchExpr = NULL;
1006  defaultBlock = NULL;
1007  caseBlocks = NULL;
1008  nextBlocks = NULL;
1009 }
1010 
1011 
1012 void
1014  AssertPos(currentPos, bblock != NULL);
1015 
1016  CFInfo *ci = popCFState();
1017  if (ci->IsVarying() && bblock != NULL)
1019 }
1020 
1021 
1022 /** Emit code to check for an "all off" mask before the code for a
1023  case or default label in a "switch" statement.
1024  */
1025 void
1027  llvm::Value *allOff = None(mask);
1028  llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
1029 
1030  // Find the basic block for the case or default label immediately after
1031  // the current one in the switch statement--that's where we want to
1032  // jump if the mask is all off at this label.
1033  AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
1034  llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
1035 
1036  // Jump to the next one of the mask is all off; otherwise jump to the
1037  // newly created block that will hold the actual code for this label.
1038  BranchInst(bbNext, bbSome, allOff);
1039  SetCurrentBasicBlock(bbSome);
1040 }
1041 
1042 
1043 /** Returns the execution mask at entry to the first enclosing "switch"
1044  statement. */
1045 llvm::Value *
1047  AssertPos(currentPos, controlFlowInfo.size() > 0);
1048  int i = controlFlowInfo.size() - 1;
1049  while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
1050  --i;
1051  AssertPos(currentPos, i != -1);
1052  return controlFlowInfo[i]->savedMask;
1053 }
1054 
1055 
1056 void
1058  if (inSwitchStatement() == false) {
1059  Error(pos, "\"default\" label illegal outside of \"switch\" "
1060  "statement.");
1061  return;
1062  }
1063 
1064  // If there's a default label in the switch, a basic block for it
1065  // should have been provided in the previous call to SwitchInst().
1066  AssertPos(currentPos, defaultBlock != NULL);
1067 
1068  if (bblock != NULL)
1069  // The previous case in the switch fell through, or we're in a
1070  // varying switch; terminate the current block with a jump to the
1071  // block for the code for the default label.
1074 
1076  // Nothing more to do for this case; return back to the caller,
1077  // which will then emit the code for the default case.
1078  return;
1079 
1080  // For a varying switch, we need to update the execution mask.
1081  //
1082  // First, compute the mask that corresponds to which program instances
1083  // should execute the "default" code; this corresponds to the set of
1084  // program instances that don't match any of the case statements.
1085  // Therefore, we generate code that compares the value of the switch
1086  // expression to the value associated with each of the "case"
1087  // statements such that the surviving lanes didn't match any of them.
1088  llvm::Value *matchesDefault = getMaskAtSwitchEntry();
1089  for (int i = 0; i < (int)caseBlocks->size(); ++i) {
1090  int value = (*caseBlocks)[i].first;
1091  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1092  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1093  // TODO: for AVX2 at least, the following generates better code
1094  // than doing ICMP_NE and skipping the NotOperator() below; file a
1095  // LLVM bug?
1096  llvm::Value *matchesCaseValue =
1097  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1098  valueVec, "cmp_case_value");
1099  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1100 
1101  llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
1102  matchesDefault = BinaryOperator(llvm::Instruction::And, matchesDefault,
1103  notMatchesCaseValue, "default&~case_match");
1104  }
1105 
1106  // The mask may have some lanes on, which corresponds to the previous
1107  // label falling through; compute the updated mask by ANDing with the
1108  // current mask.
1109  llvm::Value *oldMask = GetInternalMask();
1110  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1111  matchesDefault, "old_mask|matches_default");
1112  SetInternalMask(newMask);
1113 
1114  if (checkMask)
1115  addSwitchMaskCheck(newMask);
1116 }
1117 
1118 
1119 void
1120 FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
1121  if (inSwitchStatement() == false) {
1122  Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
1123  return;
1124  }
1125 
1126  // Find the basic block for this case statement.
1127  llvm::BasicBlock *bbCase = NULL;
1128  AssertPos(currentPos, caseBlocks != NULL);
1129  for (int i = 0; i < (int)caseBlocks->size(); ++i)
1130  if ((*caseBlocks)[i].first == value) {
1131  bbCase = (*caseBlocks)[i].second;
1132  break;
1133  }
1134  AssertPos(currentPos, bbCase != NULL);
1135 
1136  if (bblock != NULL)
1137  // fall through from the previous case
1138  BranchInst(bbCase);
1139  SetCurrentBasicBlock(bbCase);
1140 
1142  return;
1143 
1144  // update the mask: first, get a mask that indicates which program
1145  // instances have a value for the switch expression that matches this
1146  // case statement.
1147  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1148  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1149  llvm::Value *matchesCaseValue =
1150  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1151  valueVec, "cmp_case_value");
1152  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1153 
1154  // If a lane was off going into the switch, we don't care if has a
1155  // value in the switch expression that happens to match this case.
1156  llvm::Value *entryMask = getMaskAtSwitchEntry();
1157  matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask,
1158  matchesCaseValue, "entry_mask&case_match");
1159 
1160  // Take the surviving lanes and turn on the mask for them.
1161  llvm::Value *oldMask = GetInternalMask();
1162  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1163  matchesCaseValue, "mask|case_match");
1164  SetInternalMask(newMask);
1165 
1166  if (checkMask)
1167  addSwitchMaskCheck(newMask);
1168 }
1169 
1170 
1171 void
1172 FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
1173  const std::vector<std::pair<int, llvm::BasicBlock *> > &bbCases,
1174  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
1175  // The calling code should have called StartSwitch() before calling
1176  // SwitchInst().
1178  controlFlowInfo.back()->IsSwitch());
1179 
1180  switchExpr = expr;
1181  defaultBlock = bbDefault;
1182  caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases);
1183  nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
1185  (llvm::isa<llvm::VectorType>(expr->getType()) == false);
1186 
1187  if (switchConditionWasUniform == true) {
1188  // For a uniform switch condition, just wire things up to the LLVM
1189  // switch instruction.
1190  llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault,
1191  bbCases.size(), bblock);
1192  for (int i = 0; i < (int)bbCases.size(); ++i) {
1193  if (expr->getType() == LLVMTypes::Int32Type)
1194  s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
1195  else {
1196  AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
1197  s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
1198  }
1199  }
1200 
1201  AddDebugPos(s);
1202  // switch is a terminator
1203  bblock = NULL;
1204  }
1205  else {
1206  // For a varying switch, we first turn off all lanes of the mask
1208 
1209  if (nextBlocks->size() > 0) {
1210  // If there are any labels inside the switch, jump to the first
1211  // one; any code before the first label won't be executed by
1212  // anyone.
1213  std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
1214  iter = nextBlocks->find(NULL);
1215  AssertPos(currentPos, iter != nextBlocks->end());
1216  llvm::BasicBlock *bbFirst = iter->second;
1217  BranchInst(bbFirst);
1218  bblock = NULL;
1219  }
1220  }
1221 }
1222 
1223 
1224 int
1226  int sum = 0;
1227  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1228  if (controlFlowInfo[i]->IsVarying())
1229  ++sum;
1230  return sum;
1231 }
1232 
1233 
1234 bool
1236  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1237  if (controlFlowInfo[i]->IsForeach())
1238  return true;
1239  return false;
1240 }
1241 
1242 
1243 void
1246 }
1247 
1248 
1249 void
1252 }
1253 
1254 
1255 
1256 bool
1258  LabeledStmt *ls = llvm::dyn_cast<LabeledStmt>(node);
1259  if (ls == NULL)
1260  return true;
1261 
1263 
1264  if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
1265  Error(ls->pos, "Multiple labels named \"%s\" in function.",
1266  ls->name.c_str());
1267  else {
1268  llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
1269  ctx->labelMap[ls->name] = bb;
1270  }
1271  return true;
1272 }
1273 
1274 
1275 void
1277  labelMap.erase(labelMap.begin(), labelMap.end());
1278  WalkAST(code, initLabelBBlocks, NULL, this);
1279 }
1280 
1281 
1282 llvm::BasicBlock *
1284  if (labelMap.find(label) != labelMap.end())
1285  return labelMap[label];
1286  else
1287  return NULL;
1288 }
1289 
1290 std::vector<std::string>
1292  // Initialize vector to the right size
1293  std::vector<std::string> labels(labelMap.size());
1294 
1295  // Iterate through labelMap and grab only the keys
1296  std::map<std::string, llvm::BasicBlock*>::iterator iter;
1297  for (iter=labelMap.begin(); iter != labelMap.end(); iter++)
1298  labels.push_back(iter->first);
1299 
1300  return labels;
1301 }
1302 
1303 
1304 void
1305 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
1306  const Type *returnType = function->GetReturnType();
1307  if (returnType->IsVoidType()) {
1308  if (expr != NULL)
1309  Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
1310  expr->GetType()->GetString().c_str());
1311  }
1312  else {
1313  if (expr == NULL) {
1314  Error(funcStartPos, "Must provide return value for return "
1315  "statement for non-void function.");
1316  return;
1317  }
1318 
1319  expr = TypeConvertExpr(expr, returnType, "return statement");
1320  if (expr != NULL) {
1321  llvm::Value *retVal = expr->GetValue(this);
1322  if (retVal != NULL) {
1323  if (returnType->IsUniformType() ||
1324  CastType<ReferenceType>(returnType) != NULL)
1325  StoreInst(retVal, returnValuePtr);
1326  else {
1327  // Use a masked store to store the value of the expression
1328  // in the return value memory; this preserves the return
1329  // values from other lanes that may have executed return
1330  // statements previously.
1332  returnType, PointerType::GetUniform(returnType));
1333  }
1334  }
1335  }
1336  }
1337 
1338  if (VaryingCFDepth() == 0) {
1339  // If there is only uniform control flow between us and the
1340  // function entry, then it's guaranteed that all lanes are running,
1341  // so we can just emit a true return instruction
1342  AddInstrumentationPoint("return: uniform control flow");
1343  ReturnInst();
1344  }
1345  else {
1346  // Otherwise we update the returnedLanes value by ANDing it with
1347  // the current lane mask.
1348  llvm::Value *oldReturnedLanes =
1349  LoadInst(returnedLanesPtr, "old_returned_lanes");
1350  llvm::Value *newReturnedLanes =
1351  BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
1352  GetFullMask(), "old_mask|returned_lanes");
1353 
1354  // For 'coherent' return statements, emit code to check if all
1355  // lanes have returned
1356  if (doCoherenceCheck) {
1357  // if newReturnedLanes == functionMaskValue, get out of here!
1358  llvm::Value *cmp = MasksAllEqual(functionMaskValue,
1359  newReturnedLanes);
1360  llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
1361  llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
1362  BranchInst(bDoReturn, bNoReturn, cmp);
1363 
1364  bblock = bDoReturn;
1365  AddInstrumentationPoint("return: all lanes have returned");
1366  ReturnInst();
1367 
1368  bblock = bNoReturn;
1369  }
1370  // Otherwise update returnedLanesPtr and turn off all of the lanes
1371  // in the current mask so that any subsequent statements in the
1372  // same scope after the return have no effect
1373  StoreInst(newReturnedLanes, returnedLanesPtr);
1374  AddInstrumentationPoint("return: some but not all lanes have returned");
1376  }
1377 }
1378 
1379 
1380 llvm::Value *
1381 FunctionEmitContext::Any(llvm::Value *mask) {
1382  // Call the target-dependent any function to test that the mask is non-zero
1383  std::vector<Symbol *> mm;
1384  m->symbolTable->LookupFunction("__any", &mm);
1385  if (g->target->getMaskBitCount() == 1)
1386  AssertPos(currentPos, mm.size() == 1);
1387  else
1388  // There should be one with signed int signature, one unsigned int.
1389  AssertPos(currentPos, mm.size() == 2);
1390  // We can actually call either one, since both are i32s as far as
1391  // LLVM's type system is concerned...
1392  llvm::Function *fmm = mm[0]->function;
1393  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any"));
1394 }
1395 
1396 
1397 llvm::Value *
1398 FunctionEmitContext::All(llvm::Value *mask) {
1399  // Call the target-dependent movmsk function to turn the vector mask
1400  // into an i64 value
1401  std::vector<Symbol *> mm;
1402  m->symbolTable->LookupFunction("__all", &mm);
1403  if (g->target->getMaskBitCount() == 1)
1404  AssertPos(currentPos, mm.size() == 1);
1405  else
1406  // There should be one with signed int signature, one unsigned int.
1407  AssertPos(currentPos, mm.size() == 2);
1408  // We can actually call either one, since both are i32s as far as
1409  // LLVM's type system is concerned...
1410  llvm::Function *fmm = mm[0]->function;
1411  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all"));
1412 }
1413 
1414 
1415 llvm::Value *
1416 FunctionEmitContext::None(llvm::Value *mask) {
1417  // Call the target-dependent movmsk function to turn the vector mask
1418  // into an i64 value
1419  std::vector<Symbol *> mm;
1420  m->symbolTable->LookupFunction("__none", &mm);
1421  if (g->target->getMaskBitCount() == 1)
1422  AssertPos(currentPos, mm.size() == 1);
1423  else
1424  // There should be one with signed int signature, one unsigned int.
1425  AssertPos(currentPos, mm.size() == 2);
1426  // We can actually call either one, since both are i32s as far as
1427  // LLVM's type system is concerned...
1428  llvm::Function *fmm = mm[0]->function;
1429  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none"));
1430 }
1431 
1432 
1433 llvm::Value *
1435 #ifdef ISPC_NVPTX_ENABLED
1436  /* this makes mandelbrot example slower with "nvptx" target.
1437  * Needs further investigation. */
1438  const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
1439 #else
1440  const char *__movmsk = "__movmsk";
1441 #endif
1442  // Call the target-dependent movmsk function to turn the vector mask
1443  // into an i64 value
1444  std::vector<Symbol *> mm;
1445  m->symbolTable->LookupFunction(__movmsk, &mm);
1446  if (g->target->getMaskBitCount() == 1)
1447  AssertPos(currentPos, mm.size() == 1);
1448  else
1449  // There should be one with signed int signature, one unsigned int.
1450  AssertPos(currentPos, mm.size() == 2);
1451  // We can actually call either one, since both are i32s as far as
1452  // LLVM's type system is concerned...
1453  llvm::Function *fmm = mm[0]->function;
1454  return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
1455 }
1456 
1457 #ifdef ISPC_NVPTX_ENABLED
1458 bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
1459 {
1460  llvm::Type *type = vector->getType();
1461  if (type == LLVMTypes::Int8VectorType)
1462  funcName += "_int8";
1463  else if (type == LLVMTypes::Int16VectorType)
1464  funcName += "_int16";
1465  else if (type == LLVMTypes::Int32VectorType)
1466  funcName += "_int32";
1467  else if (type == LLVMTypes::Int64VectorType)
1468  funcName += "_int64";
1469  else if (type == LLVMTypes::FloatVectorType)
1470  funcName += "_float";
1471  else if (type == LLVMTypes::DoubleVectorType)
1472  funcName += "_double";
1473  else
1474  return false;
1475  return true;
1476 }
1477 
1478 llvm::Value*
1479 FunctionEmitContext::Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar)
1480 {
1481  std::string funcName = "__insert";
1482  assert(lAppendInsertExtractName(vector, funcName));
1483  assert(lane->getType() == LLVMTypes::Int32Type);
1484 
1485  llvm::Function *func = m->module->getFunction(funcName.c_str());
1486  assert(func != NULL);
1487  std::vector<llvm::Value *> args;
1488  args.push_back(vector);
1489  args.push_back(lane);
1490  args.push_back(scalar);
1491  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1492  return ret;
1493 }
1494 
1495 llvm::Value*
1496 FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
1497 {
1498  std::string funcName = "__extract";
1499  assert(lAppendInsertExtractName(vector, funcName));
1500  assert(lane->getType() == LLVMTypes::Int32Type);
1501 
1502  llvm::Function *func = m->module->getFunction(funcName.c_str());
1503  assert(func != NULL);
1504  std::vector<llvm::Value *> args;
1505  args.push_back(vector);
1506  args.push_back(lane);
1507  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1508  return ret;
1509 }
1510 #endif /* ISPC_NVPTX_ENABLED */
1511 
1512 
1513 llvm::Value *
1514 FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
1515 #ifdef ISPC_NVPTX_ENABLED
1516  if (g->target->getISA() == Target::NVPTX)
1517  {
1518  // Compare the two masks to get a vector of i1s
1519  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1520  v1, v2, "v1==v2");
1521  return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
1522  }
1523 #endif /* ISPC_NVPTX_ENABLED */
1524 
1525 #if 0
1526  // Compare the two masks to get a vector of i1s
1527  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1528  v1, v2, "v1==v2");
1529  // Turn that into a bool vector type (often i32s)
1530  cmp = I1VecToBoolVec(cmp);
1531  // And see if it's all on
1532  return All(cmp);
1533 #else
1534  llvm::Value *mm1 = LaneMask(v1);
1535  llvm::Value *mm2 = LaneMask(v2);
1536  return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
1537  LLVMGetName("equal", v1, v2));
1538 #endif
1539 }
1540 
1541 llvm::Value *
1543  llvm::SmallVector<llvm::Constant*, 16> array;
1544  for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
1545  llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
1546  array.push_back(C);
1547  }
1548 
1549  llvm::Constant* index = llvm::ConstantVector::get(array);
1550 
1551  return index;
1552 }
1553 
1554 #ifdef ISPC_NVPTX_ENABLED
1555 llvm::Value *
1556 FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
1557  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1558  llvm::Value *__program_index = CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__program_indexS");
1559  llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), __program_index, 0, "foreach__program_indexV");
1560 #if 0
1561  if (!is32bits)
1562  index = ZExtInst(index, LLVMTypes::Int64VectandType);
1563 #endif
1564  return index;
1565 }
1566 #endif /* ISPC_NVPTX_ENABLED */
1567 
1568 
1569 llvm::Value *
1570 FunctionEmitContext::GetStringPtr(const std::string &str) {
1571  llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
1572  llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
1573  llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
1574  true /*isConst*/,
1575  linkage, lstr, "__str");
1576  return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType,
1577  "str_void_ptr", bblock);
1578 }
1579 
1580 
1581 llvm::BasicBlock *
1583  return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
1584 }
1585 
1586 
1587 llvm::Value *
1589  if (b == NULL) {
1591  return NULL;
1592  }
1593 
1594  if (g->target->getMaskBitCount() == 1)
1595  return b;
1596 
1597  llvm::ArrayType *at =
1598  llvm::dyn_cast<llvm::ArrayType>(b->getType());
1599  if (at) {
1600  // If we're given an array of vectors of i1s, then do the
1601  // conversion for each of the elements
1602  llvm::Type *boolArrayType =
1603  llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
1604  llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
1605 
1606  for (unsigned int i = 0; i < at->getNumElements(); ++i) {
1607  llvm::Value *elt = ExtractInst(b, i);
1608  llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
1609  LLVMGetName(elt, "_to_boolvec"));
1610  ret = InsertInst(ret, sext, i);
1611  }
1612  return ret;
1613  }
1614  else
1615  return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
1616 }
1617 
1618 
1619 static llvm::Value *
1620 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
1621  llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, true);
1622  std::string var_name = "_";
1623  var_name = var_name + s;
1624  llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
1625  true /* const */,
1626  llvm::GlobalValue::InternalLinkage,
1627  sConstant, var_name.c_str());
1628  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
1629  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1630 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1631  return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
1632 #else /* LLVM 3.7+ */
1633  return llvm::GetElementPtrInst::Create(PTYPE(sPtr),
1634  sPtr, arrayRef, "sptr", bblock);
1635 #endif
1636 }
1637 
1638 
1639 void
1641  AssertPos(currentPos, note != NULL);
1642  if (!g->emitInstrumentation)
1643  return;
1644 
1645  std::vector<llvm::Value *> args;
1646  // arg 1: filename as string
1647  args.push_back(lGetStringAsValue(bblock, currentPos.name));
1648  // arg 2: provided note
1649  args.push_back(lGetStringAsValue(bblock, note));
1650  // arg 3: line number
1651  args.push_back(LLVMInt32(currentPos.first_line));
1652  // arg 4: current mask, movmsk'ed down to an int64
1653  args.push_back(LaneMask(GetFullMask()));
1654 
1655  llvm::Function *finst = m->module->getFunction("ISPCInstrument");
1656  CallInst(finst, NULL, args, "");
1657 }
1658 
1659 
1660 void
1662  currentPos = pos;
1663 }
1664 
1665 
1666 SourcePos
1668  return currentPos;
1669 }
1670 
1671 
1672 void
1673 FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
1674 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1675  llvm::DIScope *scope) {
1676 #else /* LLVM 3.7+ */
1677  llvm::DIScope *scope) {
1678  //llvm::MDScope *scope) {
1679 #endif
1680  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
1681  if (inst != NULL && m->diBuilder) {
1682  SourcePos p = pos ? *pos : currentPos;
1683  if (p.first_line != 0)
1684  // If first_line == 0, then we're in the middle of setting up
1685  // the standard library or the like; don't add debug positions
1686  // for those functions
1687  inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
1688  scope ?
1689 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1690  *scope
1691 #else /* LLVM 3.7+ */
1692  scope
1693 #endif
1694  : GetDIScope()));
1695  }
1696 }
1697 
1698 
1699 void
1701  if (m->diBuilder != NULL) {
1702 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1703  llvm::DIScope parentScope;
1704  llvm::DILexicalBlock lexicalBlock;
1705 #else /* LLVM 3.7+ */
1706  llvm::DIScope *parentScope;
1707  llvm::DILexicalBlock *lexicalBlock;
1708  //llvm::MDScope *parentScope;
1709  //llvm::MDLexicalBlock *lexicalBlock;
1710 #endif
1711  if (debugScopes.size() > 0)
1712  parentScope = debugScopes.back();
1713  else
1714  parentScope = diSubprogram;
1715 
1716  lexicalBlock =
1717  m->diBuilder->createLexicalBlock(parentScope, diFile,
1719 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_5
1720  // Revision 202736 in LLVM adds support of DWARF discriminator
1721  // to the last argument and revision 202737 in clang adds 0
1722  // for the last argument by default.
1724 #else
1725  // Revision 216239 in LLVM removes support of DWARF discriminator
1726  // as the last argument
1728 #endif // LLVM 3.2, 3.3, 3.4 and 3.6+
1729 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1730  AssertPos(currentPos, lexicalBlock.Verify());
1731  debugScopes.push_back(lexicalBlock);
1732 #else /* LLVM 3.7+ */
1733  debugScopes.push_back(llvm::cast<llvm::DILexicalBlockBase>(lexicalBlock));
1734  //debugScopes.push_back(llvm::cast<llvm::MDLexicalBlockBase>(lexicalBlock));
1735 #endif
1736  }
1737 }
1738 
1739 
1740 void
1742  if (m->diBuilder != NULL) {
1743  AssertPos(currentPos, debugScopes.size() > 0);
1744  debugScopes.pop_back();
1745  }
1746 }
1747 
1748 
1749 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1750 llvm::DIScope
1751 #else /* LLVM 3.7+ */
1752 llvm::DIScope*
1753 //llvm::MDScope*
1754 #endif
1756  AssertPos(currentPos, debugScopes.size() > 0);
1757  return debugScopes.back();
1758 }
1759 
1760 
1761 void
1763  if (m->diBuilder == NULL)
1764  return;
1765 
1766 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1767  llvm::DIScope scope = GetDIScope();
1768  llvm::DIType diType = sym->type->GetDIType(scope);
1769  AssertPos(currentPos, diType.Verify());
1770  llvm::DIVariable var =
1771 #else /* LLVM 3.7+ */
1772  llvm::DIScope *scope = GetDIScope();
1773  llvm::DIType *diType = sym->type->GetDIType(scope);
1774  llvm::DILocalVariable *var =
1775  //llvm::MDScope *scope = GetDIScope();
1776  //llvm::MDType *diType = sym->type->GetDIType(scope);
1777  //llvm::MDLocalVariable *var =
1778 #endif
1779 
1780 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7*/
1781  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
1782  scope,
1783  sym->name,
1784  sym->pos.GetDIFile(),
1785  sym->pos.first_line,
1786  diType,
1787  true /* preserve through opts */);
1788 #else /* LLVM 3.8+ */
1789  m->diBuilder->createAutoVariable(scope,
1790  sym->name,
1791  sym->pos.GetDIFile(),
1792  sym->pos.first_line,
1793  diType,
1794  true /* preserve through opts */);
1795 #endif
1796 
1797 
1798 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1799  AssertPos(currentPos, var.Verify());
1800  llvm::Instruction *declareInst =
1801  m->diBuilder->insertDeclare(sym->storagePtr, var,
1802  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1803  m->diBuilder->createExpression(),
1804  #endif
1805  bblock);
1806  AddDebugPos(declareInst, &sym->pos, &scope);
1807 #else /* LLVM 3.7+ */
1808  llvm::Instruction *declareInst =
1809  m->diBuilder->insertDeclare(sym->storagePtr, var,
1810  m->diBuilder->createExpression(),
1811  llvm::DebugLoc::get(sym->pos.first_line,
1812  sym->pos.first_column, scope),
1813  bblock);
1814  AddDebugPos(declareInst, &sym->pos, scope);
1815 #endif
1816 }
1817 
1818 
1819 void
1821  if (m->diBuilder == NULL)
1822  return;
1823 
1824  int flags = 0;
1825 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1826  llvm::DIScope scope = diSubprogram;
1827  llvm::DIType diType = sym->type->GetDIType(scope);
1828  AssertPos(currentPos, diType.Verify());
1829  llvm::DIVariable var =
1830 #else /* LLVM 3.7+ */
1831  llvm::DIScope *scope = diSubprogram;
1832  llvm::DIType *diType = sym->type->GetDIType(scope);
1833  llvm::DILocalVariable *var =
1834  //llvm::MDScope *scope = diSubprogram;
1835  //llvm::MDType *diType = sym->type->GetDIType(scope);
1836  //llvm::MDLocalVariable *var =
1837 #endif
1838 
1839 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
1840  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
1841  scope,
1842  sym->name,
1843  sym->pos.GetDIFile(),
1844  sym->pos.first_line,
1845  diType,
1846  true /* preserve through opts */,
1847  flags,
1848  argNum + 1);
1849 #else /* LLVM 3.8+ */
1850  m->diBuilder->createParameterVariable(scope,
1851  sym->name,
1852  argNum + 1,
1853  sym->pos.GetDIFile(),
1854  sym->pos.first_line,
1855  diType,
1856  true /* preserve through opts */,
1857  flags);
1858 #endif
1859 
1860 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1861  AssertPos(currentPos, var.Verify());
1862  llvm::Instruction *declareInst =
1863  m->diBuilder->insertDeclare(sym->storagePtr, var,
1864  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1865  m->diBuilder->createExpression(),
1866  #endif
1867  bblock);
1868  AddDebugPos(declareInst, &sym->pos, &scope);
1869 #else /* LLVM 3.7+ */
1870  llvm::Instruction *declareInst =
1871  m->diBuilder->insertDeclare(sym->storagePtr, var,
1872  m->diBuilder->createExpression(),
1873  llvm::DebugLoc::get(sym->pos.first_line,
1874  sym->pos.first_column, scope),
1875  bblock);
1876  AddDebugPos(declareInst, &sym->pos, scope);
1877 #endif
1878 }
1879 
1880 
1881 /** If the given type is an array of vector types, then it's the
1882  representation of an ispc VectorType with varying elements. If it is
1883  one of these, return the array size (i.e. the VectorType's size).
1884  Otherwise return zero.
1885  */
1886 static int
1887 lArrayVectorWidth(llvm::Type *t) {
1888  llvm::ArrayType *arrayType =
1889  llvm::dyn_cast<llvm::ArrayType>(t);
1890  if (arrayType == NULL)
1891  return 0;
1892 
1893  // We shouldn't be seeing arrays of anything but vectors being passed
1894  // to things like FunctionEmitContext::BinaryOperator() as operands.
1895  llvm::VectorType *vectorElementType =
1896  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1897  Assert((vectorElementType != NULL &&
1898  (int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
1899 
1900  return (int)arrayType->getNumElements();
1901 }
1902 
1903 
1904 llvm::Value *
1905 FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
1906  llvm::Value *v0, llvm::Value *v1,
1907  const char *name) {
1908  if (v0 == NULL || v1 == NULL) {
1910  return NULL;
1911  }
1912 
1913  AssertPos(currentPos, v0->getType() == v1->getType());
1914  llvm::Type *type = v0->getType();
1915  int arraySize = lArrayVectorWidth(type);
1916  if (arraySize == 0) {
1917  llvm::Instruction *bop =
1918  llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
1919  AddDebugPos(bop);
1920  return bop;
1921  }
1922  else {
1923  // If this is an ispc VectorType, apply the binary operator to each
1924  // of the elements of the array (which in turn should be either
1925  // scalar types or llvm::VectorTypes.)
1926  llvm::Value *ret = llvm::UndefValue::get(type);
1927  for (int i = 0; i < arraySize; ++i) {
1928  llvm::Value *a = ExtractInst(v0, i);
1929  llvm::Value *b = ExtractInst(v1, i);
1930  llvm::Value *op = BinaryOperator(inst, a, b);
1931  ret = InsertInst(ret, op, i);
1932  }
1933  return ret;
1934  }
1935 }
1936 
1937 
1938 llvm::Value *
1939 FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
1940  if (v == NULL) {
1942  return NULL;
1943  }
1944 
1945  // Similarly to BinaryOperator, do the operation on all the elements of
1946  // the array if we're given an array type; otherwise just do the
1947  // regular llvm operation.
1948  llvm::Type *type = v->getType();
1949  int arraySize = lArrayVectorWidth(type);
1950  if (arraySize == 0) {
1951  llvm::Instruction *binst =
1952  llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
1953  AddDebugPos(binst);
1954  return binst;
1955  }
1956  else {
1957  llvm::Value *ret = llvm::UndefValue::get(type);
1958  for (int i = 0; i < arraySize; ++i) {
1959  llvm::Value *a = ExtractInst(v, i);
1960  llvm::Value *op =
1961  llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
1962  AddDebugPos(op);
1963  ret = InsertInst(ret, op, i);
1964  }
1965  return ret;
1966  }
1967 }
1968 
1969 
1970 // Given the llvm Type that represents an ispc VectorType, return an
1971 // equally-shaped type with boolean elements. (This is the type that will
1972 // be returned from CmpInst with ispc VectorTypes).
1973 static llvm::Type *
1974 lGetMatchingBoolVectorType(llvm::Type *type) {
1975  llvm::ArrayType *arrayType =
1976  llvm::dyn_cast<llvm::ArrayType>(type);
1977  Assert(arrayType != NULL);
1978 
1979  llvm::VectorType *vectorElementType =
1980  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1981  Assert(vectorElementType != NULL);
1982  Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
1983 
1984  llvm::Type *base =
1985  llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
1986  return llvm::ArrayType::get(base, arrayType->getNumElements());
1987 }
1988 
1989 
1990 llvm::Value *
1991 FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
1992  llvm::CmpInst::Predicate pred,
1993  llvm::Value *v0, llvm::Value *v1,
1994  const char *name) {
1995  if (v0 == NULL || v1 == NULL) {
1997  return NULL;
1998  }
1999 
2000  AssertPos(currentPos, v0->getType() == v1->getType());
2001  llvm::Type *type = v0->getType();
2002  int arraySize = lArrayVectorWidth(type);
2003  if (arraySize == 0) {
2004  llvm::Instruction *ci =
2005  llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp",
2006  bblock);
2007  AddDebugPos(ci);
2008  return ci;
2009  }
2010  else {
2011  llvm::Type *boolType = lGetMatchingBoolVectorType(type);
2012  llvm::Value *ret = llvm::UndefValue::get(boolType);
2013  for (int i = 0; i < arraySize; ++i) {
2014  llvm::Value *a = ExtractInst(v0, i);
2015  llvm::Value *b = ExtractInst(v1, i);
2016  llvm::Value *op = CmpInst(inst, pred, a, b, name);
2017  ret = InsertInst(ret, op, i);
2018  }
2019  return ret;
2020  }
2021 }
2022 
2023 
2024 llvm::Value *
2025 FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
2026  if (value == NULL) {
2028  return NULL;
2029  }
2030 
2031  llvm::Value *ret = NULL;
2032  llvm::Type *eltType = value->getType();
2033  llvm::Type *vecType = NULL;
2034 
2035  llvm::PointerType *pt =
2036  llvm::dyn_cast<llvm::PointerType>(eltType);
2037  if (pt != NULL) {
2038  // Varying pointers are represented as vectors of i32/i64s
2040  value = PtrToIntInst(value);
2041  }
2042  else {
2043  // All other varying types are represented as vectors of the
2044  // underlying type.
2045  vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
2046  }
2047 
2048  // Check for a constant case.
2049  if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
2050  ret = llvm::ConstantVector::getSplat(
2051  g->target->getVectorWidth(),
2052  const_val);
2053  return ret;
2054  }
2055 
2056  ret = BroadcastValue(value, vecType, name);
2057 
2058  return ret;
2059 }
2060 
2061 
2062 llvm::Value *
2063 FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type,
2064  const char *name) {
2065  if (value == NULL) {
2067  return NULL;
2068  }
2069 
2070  if (name == NULL)
2071  name = LLVMGetName(value, "_bitcast");
2072 
2073  llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
2074  AddDebugPos(inst);
2075  return inst;
2076 }
2077 
2078 
2079 llvm::Value *
2080 FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
2081  if (value == NULL) {
2083  return NULL;
2084  }
2085 
2086  if (llvm::isa<llvm::VectorType>(value->getType()))
2087  // no-op for varying pointers; they're already vectors of ints
2088  return value;
2089 
2090  if (name == NULL)
2091  name = LLVMGetName(value, "_ptr2int");
2092  llvm::Type *type = LLVMTypes::PointerIntType;
2093  llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
2094  AddDebugPos(inst);
2095  return inst;
2096 }
2097 
2098 
2099 llvm::Value *
2100 FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
2101  const char *name) {
2102  if (value == NULL) {
2104  return NULL;
2105  }
2106 
2107  if (name == NULL)
2108  name = LLVMGetName(value, "_ptr2int");
2109 
2110  llvm::Type *fromType = value->getType();
2111  if (llvm::isa<llvm::VectorType>(fromType)) {
2112  // varying pointer
2113  if (fromType == toType)
2114  // already the right type--done
2115  return value;
2116  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2117  return TruncInst(value, toType, name);
2118  else {
2119  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2120  toType->getScalarSizeInBits());
2121  return ZExtInst(value, toType, name);
2122  }
2123  }
2124 
2125  llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
2126  AddDebugPos(inst);
2127  return inst;
2128 }
2129 
2130 
2131 llvm::Value *
2132 FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType,
2133  const char *name) {
2134  if (value == NULL) {
2136  return NULL;
2137  }
2138 
2139  if (name == NULL)
2140  name = LLVMGetName(value, "_int2ptr");
2141 
2142  llvm::Type *fromType = value->getType();
2143  if (llvm::isa<llvm::VectorType>(fromType)) {
2144  // varying pointer
2145  if (fromType == toType)
2146  // done
2147  return value;
2148  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2149  return TruncInst(value, toType, name);
2150  else {
2151  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2152  toType->getScalarSizeInBits());
2153  return ZExtInst(value, toType, name);
2154  }
2155  }
2156 
2157  llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name,
2158  bblock);
2159  AddDebugPos(inst);
2160  return inst;
2161 }
2162 
2163 
2164 llvm::Instruction *
2165 FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type,
2166  const char *name) {
2167  if (value == NULL) {
2169  return NULL;
2170  }
2171 
2172  if (name == NULL)
2173  name = LLVMGetName(value, "_trunc");
2174 
2175  // TODO: we should probably handle the array case as in
2176  // e.g. BitCastInst(), but we don't currently need that functionality
2177  llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
2178  AddDebugPos(inst);
2179  return inst;
2180 }
2181 
2182 
2183 llvm::Instruction *
2184 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
2185  llvm::Type *type, const char *name) {
2186  if (value == NULL) {
2188  return NULL;
2189  }
2190 
2191  if (name == NULL)
2192  name = LLVMGetName(value, "_cast");
2193 
2194  // TODO: we should probably handle the array case as in
2195  // e.g. BitCastInst(), but we don't currently need that functionality
2196  llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name,
2197  bblock);
2198  AddDebugPos(inst);
2199  return inst;
2200 }
2201 
2202 
2203 llvm::Instruction *
2204 FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type,
2205  const char *name) {
2206  if (value == NULL) {
2208  return NULL;
2209  }
2210 
2211  if (name == NULL)
2212  name = LLVMGetName(value, "_cast");
2213 
2214  // TODO: we should probably handle the array case as in
2215  // e.g. BitCastInst(), but we don't currently need that functionality
2216  llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
2217  AddDebugPos(inst);
2218  return inst;
2219 }
2220 
2221 
2222 llvm::Instruction *
2223 FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type,
2224  const char *name) {
2225  if (value == NULL) {
2227  return NULL;
2228  }
2229 
2230  if (name == NULL)
2231  name = LLVMGetName(value, "_sext");
2232 
2233  // TODO: we should probably handle the array case as in
2234  // e.g. BitCastInst(), but we don't currently need that functionality
2235  llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
2236  AddDebugPos(inst);
2237  return inst;
2238 }
2239 
2240 
2241 llvm::Instruction *
2242 FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type,
2243  const char *name) {
2244  if (value == NULL) {
2246  return NULL;
2247  }
2248 
2249  if (name == NULL)
2250  name = LLVMGetName(value, "_zext");
2251 
2252  // TODO: we should probably handle the array case as in
2253  // e.g. BitCastInst(), but we don't currently need that functionality
2254  llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
2255  AddDebugPos(inst);
2256  return inst;
2257 }
2258 
2259 
2260 /** Utility routine used by the GetElementPtrInst() methods; given a
2261  pointer to some type (either uniform or varying) and an index (also
2262  either uniform or varying), this returns the new pointer (varying if
2263  appropriate) given by offsetting the base pointer by the index times
2264  the size of the object that the pointer points to.
2265  */
2266 llvm::Value *
2267 FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
2268  const Type *ptrType) {
2269  // Find the scale factor for the index (i.e. the size of the object
2270  // that the pointer(s) point(s) to.
2271  const Type *scaleType = ptrType->GetBaseType();
2272  llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
2273 
2274  bool indexIsVarying =
2275  llvm::isa<llvm::VectorType>(index->getType());
2276  llvm::Value *offset = NULL;
2277  if (indexIsVarying == false) {
2278  // Truncate or sign extend the index as appropriate to a 32 or
2279  // 64-bit type.
2280  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2281  index->getType() == LLVMTypes::Int64Type)
2282  index = TruncInst(index, LLVMTypes::Int32Type);
2283  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2284  index->getType() == LLVMTypes::Int32Type)
2285  index = SExtInst(index, LLVMTypes::Int64Type);
2286 
2287  // do a scalar multiply to get the offset as index * scale and then
2288  // smear the result out to be a vector; this is more efficient than
2289  // first promoting both the scale and the index to vectors and then
2290  // multiplying.
2291  offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
2292  offset = SmearUniform(offset);
2293  }
2294  else {
2295  // Similarly, truncate or sign extend the index to be a 32 or 64
2296  // bit vector type
2297  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2298  index->getType() == LLVMTypes::Int64VectorType)
2299  index = TruncInst(index, LLVMTypes::Int32VectorType);
2300  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2301  index->getType() == LLVMTypes::Int32VectorType)
2302  index = SExtInst(index, LLVMTypes::Int64VectorType);
2303 
2304  scale = SmearUniform(scale);
2305 
2306  // offset = index * scale
2307  offset = BinaryOperator(llvm::Instruction::Mul, scale, index,
2308  LLVMGetName("mul", scale, index));
2309  }
2310 
2311  // For 64-bit targets, if we've been doing our offset calculations in
2312  // 32 bits, we still have to convert to a 64-bit value before we
2313  // actually add the offset to the pointer.
2314  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2315  offset = SExtInst(offset, LLVMTypes::Int64VectorType,
2316  LLVMGetName(offset, "_to_64"));
2317 
2318  // Smear out the pointer to be varying; either the base pointer or the
2319  // index must be varying for this method to be called.
2320  bool baseIsUniform =
2321  (llvm::isa<llvm::PointerType>(basePtr->getType()));
2322  AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
2323  llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
2324 
2325  // newPtr = ptr + offset
2326  return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset,
2327  LLVMGetName(basePtr, "_offset"));
2328 }
2329 
2330 
2331 void
2332 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
2333  llvm::Type *type0 = (*v0)->getType();
2334  llvm::Type *type1 = (*v1)->getType();
2335 
2336  // First, promote to a vector type if one of the two values is a vector
2337  // type
2338  if (llvm::isa<llvm::VectorType>(type0) &&
2339  !llvm::isa<llvm::VectorType>(type1)) {
2340  *v1 = SmearUniform(*v1, "smear_v1");
2341  type1 = (*v1)->getType();
2342  }
2343  if (!llvm::isa<llvm::VectorType>(type0) &&
2344  llvm::isa<llvm::VectorType>(type1)) {
2345  *v0 = SmearUniform(*v0, "smear_v0");
2346  type0 = (*v0)->getType();
2347  }
2348 
2349  // And then update to match bit widths
2350  if (type0 == LLVMTypes::Int32Type &&
2351  type1 == LLVMTypes::Int64Type)
2352  *v0 = SExtInst(*v0, LLVMTypes::Int64Type);
2353  else if (type1 == LLVMTypes::Int32Type &&
2354  type0 == LLVMTypes::Int64Type)
2355  *v1 = SExtInst(*v1, LLVMTypes::Int64Type);
2356  else if (type0 == LLVMTypes::Int32VectorType &&
2357  type1 == LLVMTypes::Int64VectorType)
2359  else if (type1 == LLVMTypes::Int32VectorType &&
2360  type0 == LLVMTypes::Int64VectorType)
2362 }
2363 
2364 
2365 /** Given an integer index in indexValue that's indexing into an array of
2366  soa<> structures with given soaWidth, compute the two sub-indices we
2367  need to do the actual indexing calculation:
2368 
2369  subIndices[0] = (indexValue >> log(soaWidth))
2370  subIndices[1] = (indexValue & (soaWidth-1))
2371  */
2372 static llvm::Value *
2374  llvm::Value *indexValue, llvm::Value *ptrSliceOffset,
2375  llvm::Value **newSliceOffset) {
2376  // Compute the log2 of the soaWidth.
2377  Assert(soaWidth > 0);
2378  int logWidth = 0, sw = soaWidth;
2379  while (sw > 1) {
2380  ++logWidth;
2381  sw >>= 1;
2382  }
2383  Assert((1 << logWidth) == soaWidth);
2384 
2385  ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
2386 
2387  llvm::Type *indexType = indexValue->getType();
2388  llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
2389  llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType);
2390 
2391  llvm::Value *indexSum =
2392  ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset,
2393  "index_sum");
2394 
2395  // minor index = (index & (soaWidth - 1))
2396  *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum,
2397  mask, "slice_index_minor");
2398  // slice offsets are always 32 bits...
2399  if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
2400  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
2401  else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
2402  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
2403 
2404  // major index = (index >> logWidth)
2405  return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum,
2406  shift, "slice_index_major");
2407 }
2408 
2409 
2410 llvm::Value *
2411 FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
2412  // Create a small struct where the first element is the type of the
2413  // given pointer and the second element is the type of the offset
2414  // value.
2415  std::vector<llvm::Type *> eltTypes;
2416  eltTypes.push_back(ptr->getType());
2417  eltTypes.push_back(offset->getType());
2418  llvm::StructType *st =
2419  llvm::StructType::get(*g->ctx, eltTypes);
2420 
2421  llvm::Value *ret = llvm::UndefValue::get(st);
2422  ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
2423  ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
2424  return ret;
2425 }
2426 
2427 
2428 llvm::Value *
2429 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
2430  const Type *ptrRefType, const char *name) {
2431  if (basePtr == NULL || index == NULL) {
2433  return NULL;
2434  }
2435 
2436  // Regularize to a standard pointer type for basePtr's type
2437  const PointerType *ptrType;
2438  if (CastType<ReferenceType>(ptrRefType) != NULL)
2439  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2440  else {
2441  ptrType = CastType<PointerType>(ptrRefType);
2442  AssertPos(currentPos, ptrType != NULL);
2443  }
2444 
2445  if (ptrType->IsSlice()) {
2446  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2447 
2448  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2449  if (ptrType->IsFrozenSlice() == false) {
2450  // For slice pointers that aren't frozen, we compute a new
2451  // index based on the given index plus the offset in the slice
2452  // pointer. This gives us an updated integer slice index for
2453  // the resulting slice pointer and then an index to index into
2454  // the soa<> structs with.
2455  llvm::Value *newSliceOffset;
2456  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2457  index = lComputeSliceIndex(this, soaWidth, index,
2458  ptrSliceOffset, &newSliceOffset);
2459  ptrSliceOffset = newSliceOffset;
2460  }
2461 
2462  // Handle the indexing into the soa<> structs with the major
2463  // component of the index through a recursive call
2464  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index,
2465  ptrType->GetAsNonSlice(), name);
2466 
2467  // And mash the results together for the return value
2468  return MakeSlicePointer(p, ptrSliceOffset);
2469  }
2470 
2471  // Double-check consistency between the given pointer type and its LLVM
2472  // type.
2473  if (ptrType->IsUniformType())
2474  AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
2475  else if (ptrType->IsVaryingType())
2476  AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
2477 
2478  bool indexIsVaryingType =
2479  llvm::isa<llvm::VectorType>(index->getType());
2480 
2481  if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
2482  // The easy case: both the base pointer and the indices are
2483  // uniform, so just emit the regular LLVM GEP instruction
2484  llvm::Value *ind[1] = { index };
2485  llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
2486 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2487  llvm::Instruction *inst =
2488  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2489  name ? name : "gep", bblock);
2490 #else /* LLVM 3.7+ */
2491  llvm::Instruction *inst =
2492  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2493  basePtr, arrayRef,
2494  name ? name : "gep", bblock);
2495 #endif
2496  AddDebugPos(inst);
2497  return inst;
2498  }
2499  else
2500  return applyVaryingGEP(basePtr, index, ptrType);
2501 }
2502 
2503 
2504 llvm::Value *
2505 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
2506  llvm::Value *index1, const Type *ptrRefType,
2507  const char *name) {
2508  if (basePtr == NULL || index0 == NULL || index1 == NULL) {
2510  return NULL;
2511  }
2512 
2513  // Regaularize the pointer type for basePtr
2514  const PointerType *ptrType = NULL;
2515  if (CastType<ReferenceType>(ptrRefType) != NULL)
2516  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2517  else {
2518  ptrType = CastType<PointerType>(ptrRefType);
2519  AssertPos(currentPos, ptrType != NULL);
2520  }
2521 
2522  if (ptrType->IsSlice()) {
2523  // Similar to the 1D GEP implementation above, for non-frozen slice
2524  // pointers we do the two-step indexing calculation and then pass
2525  // the new major index on to a recursive GEP call.
2526  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2527  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2528  if (ptrType->IsFrozenSlice() == false) {
2529  llvm::Value *newSliceOffset;
2530  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2531  index1 = lComputeSliceIndex(this, soaWidth, index1,
2532  ptrSliceOffset, &newSliceOffset);
2533  ptrSliceOffset = newSliceOffset;
2534  }
2535 
2536  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0,
2537  index1, ptrType->GetAsNonSlice(),
2538  name);
2539  return MakeSlicePointer(p, ptrSliceOffset);
2540  }
2541 
2542  bool index0IsVaryingType =
2543  llvm::isa<llvm::VectorType>(index0->getType());
2544  bool index1IsVaryingType =
2545  llvm::isa<llvm::VectorType>(index1->getType());
2546 
2547  if (index0IsVaryingType == false && index1IsVaryingType == false &&
2548  ptrType->IsUniformType() == true) {
2549  // The easy case: both the base pointer and the indices are
2550  // uniform, so just emit the regular LLVM GEP instruction
2551  llvm::Value *indices[2] = { index0, index1 };
2552  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
2553 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2554  llvm::Instruction *inst =
2555  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2556  name ? name : "gep", bblock);
2557 #else /* LLVM 3.7+ */
2558  llvm::Instruction *inst =
2559  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2560  basePtr, arrayRef,
2561  name ? name : "gep", bblock);
2562 #endif
2563  AddDebugPos(inst);
2564  return inst;
2565  }
2566  else {
2567  // Handle the first dimension with index0
2568  llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
2569 
2570  // Now index into the second dimension with index1. First figure
2571  // out the type of ptr0.
2572  const Type *baseType = ptrType->GetBaseType();
2573  const SequentialType *st = CastType<SequentialType>(baseType);
2574  AssertPos(currentPos, st != NULL);
2575 
2576  bool ptr0IsUniform =
2577  llvm::isa<llvm::PointerType>(ptr0->getType());
2578  const Type *ptr0BaseType = st->GetElementType();
2579  const Type *ptr0Type = ptr0IsUniform ?
2580  PointerType::GetUniform(ptr0BaseType) :
2581  PointerType::GetVarying(ptr0BaseType);
2582 
2583  return applyVaryingGEP(ptr0, index1, ptr0Type);
2584  }
2585 }
2586 
2587 
2588 llvm::Value *
2589 FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
2590  const Type *ptrRefType, const char *name,
2591  const PointerType **resultPtrType) {
2592  if (resultPtrType != NULL)
2593  AssertPos(currentPos, ptrRefType != NULL);
2594 
2595  llvm::PointerType *llvmPtrType =
2596  llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
2597  if (llvmPtrType != NULL) {
2598  llvm::StructType *llvmStructType =
2599  llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
2600  if (llvmStructType != NULL && llvmStructType->isSized() == false) {
2602  return NULL;
2603  }
2604  }
2605 
2606  // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
2607  // if we have one, regularize into a pointer type.
2608  const PointerType *ptrType = NULL;
2609  if (ptrRefType != NULL) {
2610  // Normalize references to uniform pointers
2611  if (CastType<ReferenceType>(ptrRefType) != NULL)
2612  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2613  else
2614  ptrType = CastType<PointerType>(ptrRefType);
2615  AssertPos(currentPos, ptrType != NULL);
2616  }
2617 
2618  // Similarly, we have to see if the pointer type is a struct to see if
2619  // we have a slice pointer instead of looking at ptrType; this is also
2620  // unfortunate...
2621  llvm::Value *basePtr = fullBasePtr;
2622  bool baseIsSlicePtr =
2623  llvm::isa<llvm::StructType>(fullBasePtr->getType());
2624  const PointerType *rpt;
2625  if (baseIsSlicePtr) {
2626  AssertPos(currentPos, ptrType != NULL);
2627  // Update basePtr to just be the part that actually points to the
2628  // start of an soa<> struct for now; the element offset computation
2629  // doesn't change the slice offset, so we'll incorporate that into
2630  // the final value right before this method returns.
2631  basePtr = ExtractInst(fullBasePtr, 0);
2632  if (resultPtrType == NULL)
2633  resultPtrType = &rpt;
2634  }
2635 
2636  // Return the pointer type of the result of this call, for callers that
2637  // want it.
2638  if (resultPtrType != NULL) {
2639  AssertPos(currentPos, ptrType != NULL);
2640  const CollectionType *ct =
2641  CastType<CollectionType>(ptrType->GetBaseType());
2642  AssertPos(currentPos, ct != NULL);
2643  *resultPtrType = new PointerType(ct->GetElementType(elementNum),
2644  ptrType->GetVariability(),
2645  ptrType->IsConstType(),
2646  ptrType->IsSlice());
2647  }
2648 
2649  llvm::Value *resultPtr = NULL;
2650  if (ptrType == NULL || ptrType->IsUniformType()) {
2651  // If the pointer is uniform, we can use the regular LLVM GEP.
2652  llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
2653  llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
2654 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2655  resultPtr =
2656  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2657  name ? name : "struct_offset", bblock);
2658 #else /* LLVM 3.7+ */
2659  resultPtr =
2660  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef,
2661  name ? name : "struct_offset", bblock);
2662 #endif
2663  }
2664  else {
2665  // Otherwise do the math to find the offset and add it to the given
2666  // varying pointers
2667  const StructType *st = CastType<StructType>(ptrType->GetBaseType());
2668  llvm::Value *offset = NULL;
2669  if (st != NULL)
2670  // If the pointer is to a structure, Target::StructOffset() gives
2671  // us the offset in bytes to the given element of the structure
2672  offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum,
2673  bblock);
2674  else {
2675  // Otherwise we should have a vector or array here and the offset
2676  // is given by the element number times the size of the element
2677  // type of the vector.
2678  const SequentialType *st =
2679  CastType<SequentialType>(ptrType->GetBaseType());
2680  AssertPos(currentPos, st != NULL);
2681  llvm::Value *size =
2683  llvm::Value *scale = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
2684  LLVMInt32(elementNum) : LLVMInt64(elementNum);
2685  offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
2686  }
2687 
2688  offset = SmearUniform(offset, "offset_smear");
2689 
2690  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2691  // If we're doing 32 bit addressing with a 64 bit target, although
2692  // we did the math above in 32 bit, we need to go to 64 bit before
2693  // we add the offset to the varying pointers.
2694  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
2695 
2696  resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset,
2697  "struct_ptr_offset");
2698  }
2699 
2700  // Finally, if had a slice pointer going in, mash back together with
2701  // the original (unchanged) slice offset.
2702  if (baseIsSlicePtr)
2703  return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
2704  else
2705  return resultPtr;
2706 }
2707 
2708 
2709 llvm::Value *
2710 FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
2711  if (ptr == NULL) {
2713  return NULL;
2714  }
2715 
2716  llvm::PointerType *pt =
2717  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2718  AssertPos(currentPos, pt != NULL);
2719 
2720  if (name == NULL)
2721  name = LLVMGetName(ptr, "_load");
2722 
2723  llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
2724 
2725  if (g->opt.forceAlignedMemory &&
2726  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2727  inst->setAlignment(g->target->getNativeVectorAlignment());
2728  }
2729 
2730  AddDebugPos(inst);
2731  return inst;
2732 }
2733 
2734 
2735 /** Given a slice pointer to soa'd data that is a basic type (atomic,
2736  pointer, or enum type), use the slice offset to compute pointer(s) to
2737  the appropriate individual data element(s).
2738  */
2739 static llvm::Value *
2741  const PointerType **ptrType) {
2742  Assert(CastType<PointerType>(*ptrType) != NULL);
2743 
2744  llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
2745  llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
2746 
2747  // slicePtr should be a pointer to an soa-width wide array of the
2748  // final atomic/enum/pointer type
2749  const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
2750  Assert(Type::IsBasicType(unifBaseType));
2751 
2752  // The final pointer type is a uniform or varying pointer to the
2753  // underlying uniform type, depending on whether the given pointer is
2754  // uniform or varying.
2755  *ptrType = (*ptrType)->IsUniformType() ?
2756  PointerType::GetUniform(unifBaseType) :
2757  PointerType::GetVarying(unifBaseType);
2758 
2759  // For uniform pointers, bitcast to a pointer to the uniform element
2760  // type, so that the GEP below does the desired indexing
2761  if ((*ptrType)->IsUniformType())
2762  slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
2763 
2764  // And finally index based on the slice offset
2765  return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType,
2766  LLVMGetName(slicePtr, "_final_gep"));
2767 }
2768 
2769 
2770 /** Utility routine that loads from a uniform pointer to soa<> data,
2771  returning a regular uniform (non-SOA result).
2772  */
2773 llvm::Value *
2774 FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
2775  const PointerType *ptrType,
2776  const char *name) {
2777  const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
2778 
2779  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2780  if (ct != NULL) {
2781  // If we have a struct/array, we need to decompose it into
2782  // individual element loads to fill in the result structure since
2783  // the SOA slice of values we need isn't contiguous in memory...
2784  llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
2785  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2786 
2787  for (int i = 0; i < ct->GetElementCount(); ++i) {
2788  const PointerType *eltPtrType;
2789  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType,
2790  "elt_offset", &eltPtrType);
2791  llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
2792  retValue = InsertInst(retValue, eltValue, i, "set_value");
2793  }
2794 
2795  return retValue;
2796  }
2797  else {
2798  // Otherwise we've made our way to a slice pointer to a basic type;
2799  // we need to apply the slice offset into this terminal SOA array
2800  // and then perform the final load
2801  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2802  return LoadInst(ptr, mask, ptrType, name);
2803  }
2804 }
2805 
2806 
2807 llvm::Value *
2808 FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
2809  const Type *ptrRefType, const char *name,
2810  bool one_elem) {
2811  if (ptr == NULL) {
2813  return NULL;
2814  }
2815 
2816  AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
2817 
2818  if (name == NULL)
2819  name = LLVMGetName(ptr, "_load");
2820 
2821  const PointerType *ptrType;
2822  if (CastType<ReferenceType>(ptrRefType) != NULL)
2823  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2824  else {
2825  ptrType = CastType<PointerType>(ptrRefType);
2826  AssertPos(currentPos, ptrType != NULL);
2827  }
2828 
2829  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2830  Error(currentPos, "Unable to load to undefined struct type \"%s\".",
2831  ptrType->GetBaseType()->GetString().c_str());
2832  return NULL;
2833  }
2834 
2835  if (ptrType->IsUniformType()) {
2836  if (ptrType->IsSlice()) {
2837  return loadUniformFromSOA(ptr, mask, ptrType, name);
2838  }
2839  else {
2840  // FIXME: same issue as above load inst regarding alignment...
2841  //
2842  // If the ptr is a straight up regular pointer, then just issue
2843  // a regular load. First figure out the alignment; in general we
2844  // can just assume the natural alignment (0 here), but for varying
2845  // atomic types, we need to make sure that the compiler emits
2846  // unaligned vector loads, so we specify a reduced alignment here.
2847  int align = 0;
2848  const AtomicType *atomicType =
2849  CastType<AtomicType>(ptrType->GetBaseType());
2850  if (atomicType != NULL && atomicType->IsVaryingType())
2851  // We actually just want to align to the vector element
2852  // alignment, but can't easily get that here, so just tell LLVM
2853  // it's totally unaligned. (This shouldn't make any difference
2854  // vs the proper alignment in practice.)
2855  align = 1;
2856  llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
2857  false /* not volatile */,
2858  align, bblock);
2859  AddDebugPos(inst);
2860  return inst;
2861  }
2862  }
2863  else {
2864  // Otherwise we should have a varying ptr and it's time for a
2865  // gather.
2866  llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name);
2867  if (!one_elem)
2868  return gather_result;
2869 
2870  // It is a kludge. When we dereference varying pointer to uniform struct
2871  // with "bound uniform" member, we should return first unmasked member.
2872  Warning(currentPos, "Dereferencing varying pointer to uniform struct with 'bound uniform' member,\n"
2873  " only one value will survive. Possible loss of data.");
2874  // Call the target-dependent movmsk function to turn the vector mask
2875  // into an i64 value
2876  std::vector<Symbol *> mm;
2877  m->symbolTable->LookupFunction("__movmsk", &mm);
2878  if (g->target->getMaskBitCount() == 1)
2879  AssertPos(currentPos, mm.size() == 1);
2880  else
2881  // There should be one with signed int signature, one unsigned int.
2882  AssertPos(currentPos, mm.size() == 2);
2883  // We can actually call either one, since both are i32s as far as
2884  // LLVM's type system is concerned...
2885  llvm::Function *fmm = mm[0]->function;
2886  llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk"));
2887  std::vector<Symbol *> lz;
2888  m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz);
2889  llvm::Function *flz = lz[0]->function;
2890  llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz"));
2891  llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx, LLVMGetName(gather_result, "_umasked_elem"), bblock);
2892  return elem;
2893  }
2894 }
2895 
2896 
2897 llvm::Value *
2898 FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
2899  llvm::Value *mask, const char *name) {
2900  // We should have a varying pointer if we get here...
2901  AssertPos(currentPos, ptrType->IsVaryingType());
2902 
2903  const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
2904  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
2905 
2906  const CollectionType *collectionType =
2907  CastType<CollectionType>(ptrType->GetBaseType());
2908  if (collectionType != NULL) {
2909  // For collections, recursively gather element wise to find the
2910  // result.
2911  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2912 
2913  const CollectionType *returnCollectionType =
2914  CastType<CollectionType>(returnType->GetBaseType());
2915 
2916  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2917  const PointerType *eltPtrType;
2918  llvm::Value *eltPtr =
2919  AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
2920 
2921  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
2922 
2923  // It is a kludge. When we dereference varying pointer to uniform struct
2924  // with "bound uniform" member, we should return first unmasked member.
2925  int need_one_elem = CastType<StructType>(ptrType->GetBaseType()) &&
2926  returnCollectionType->GetElementType(i)->IsUniformType();
2927  // This in turn will be another gather
2928  llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name, need_one_elem);
2929 
2930  retValue = InsertInst(retValue, eltValues, i, "set_value");
2931  }
2932  return retValue;
2933  }
2934  else if (ptrType->IsSlice()) {
2935  // If we have a slice pointer, we need to add the final slice
2936  // offset here right before issuing the actual gather
2937  //
2938  // FIXME: would it be better to do the corresponding same thing for
2939  // all of the varying offsets stuff here (and in scatter)?
2940  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2941  }
2942 
2943  // Otherwise we should just have a basic scalar or pointer type and we
2944  // can go and do the actual gather
2945  AddInstrumentationPoint("gather");
2946 
2947  // Figure out which gather function to call based on the size of
2948  // the elements.
2949  const PointerType *pt = CastType<PointerType>(returnType);
2950  const char *funcName = NULL;
2951  if (pt != NULL)
2952  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2953  "__pseudo_gather64_i64";
2954  else if (llvmReturnType == LLVMTypes::DoubleVectorType)
2955  funcName = g->target->is32Bit() ? "__pseudo_gather32_double" :
2956  "__pseudo_gather64_double";
2957  else if (llvmReturnType == LLVMTypes::Int64VectorType)
2958  funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" :
2959  "__pseudo_gather64_i64";
2960  else if (llvmReturnType == LLVMTypes::FloatVectorType)
2961  funcName = g->target->is32Bit() ? "__pseudo_gather32_float" :
2962  "__pseudo_gather64_float";
2963  else if (llvmReturnType == LLVMTypes::Int32VectorType)
2964  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2965  "__pseudo_gather64_i32";
2966  else if (llvmReturnType == LLVMTypes::Int16VectorType)
2967  funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" :
2968  "__pseudo_gather64_i16";
2969  else {
2970  AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
2971  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" :
2972  "__pseudo_gather64_i8";
2973  }
2974 
2975  llvm::Function *gatherFunc = m->module->getFunction(funcName);
2976  AssertPos(currentPos, gatherFunc != NULL);
2977 
2978  llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
2979 
2980  // Add metadata about the source file location so that the
2981  // optimization passes can print useful performance warnings if we
2982  // can't optimize out this gather
2983  if (disableGSWarningCount == 0)
2984  addGSMetadata(gatherCall, currentPos);
2985 
2986  return gatherCall;
2987 }
2988 
2989 
2990 /** Add metadata to the given instruction to encode the current source file
2991  position. This data is used in the lGetSourcePosFromMetadata()
2992  function in opt.cpp.
2993 */
2994 void
2996  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
2997  if (inst == NULL)
2998  return;
2999 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3000  llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
3001 #else /* LLVN 3.6+ */
3002  llvm::MDString *str = llvm::MDString::get(*g->ctx, pos.name);
3003 #endif
3004  llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
3005  inst->setMetadata("filename", md);
3006 
3007 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3008  llvm::Value *first_line = LLVMInt32(pos.first_line);
3009 #else /* LLVN 3.6+ */
3010  llvm::Metadata *first_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_line));
3011 #endif
3012  md = llvm::MDNode::get(*g->ctx, first_line);
3013  inst->setMetadata("first_line", md);
3014 
3015 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3016  llvm::Value *first_column = LLVMInt32(pos.first_column);
3017 #else /* LLVN 3.6+ */
3018  llvm::Metadata *first_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_column));
3019 #endif
3020  md = llvm::MDNode::get(*g->ctx, first_column);
3021  inst->setMetadata("first_column", md);
3022 
3023 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3024  llvm::Value *last_line = LLVMInt32(pos.last_line);
3025 #else /* LLVN 3.6+ */
3026  llvm::Metadata *last_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_line));
3027 #endif
3028  md = llvm::MDNode::get(*g->ctx, last_line);
3029  inst->setMetadata("last_line", md);
3030 
3031 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3032  llvm::Value *last_column = LLVMInt32(pos.last_column);
3033 #else /* LLVN 3.6+ */
3034  llvm::Metadata *last_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_column));
3035 #endif
3036  md = llvm::MDNode::get(*g->ctx, last_column);
3037  inst->setMetadata("last_column", md);
3038 }
3039 
3040 
3041 llvm::Value *
3042 FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
3043  const char *name, int align,
3044  bool atEntryBlock) {
3045  if (llvmType == NULL) {
3047  return NULL;
3048  }
3049 
3050  llvm::AllocaInst *inst = NULL;
3051  if (atEntryBlock) {
3052  // We usually insert it right before the jump instruction at the
3053  // end of allocaBlock
3054  llvm::Instruction *retInst = allocaBlock->getTerminator();
3055  AssertPos(currentPos, retInst);
3056  inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
3057  }
3058  else
3059  // Unless the caller overrode the default and wants it in the
3060  // current basic block
3061  inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
3062 
3063  // If no alignment was specified but we have an array of a uniform
3064  // type, then align it to the native vector alignment; it's not
3065  // unlikely that this array will be loaded into varying variables with
3066  // what will be aligned accesses if the uniform -> varying load is done
3067  // in regular chunks.
3068  llvm::ArrayType *arrayType =
3069  llvm::dyn_cast<llvm::ArrayType>(llvmType);
3070  if (align == 0 && arrayType != NULL &&
3071  !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
3072  align = g->target->getNativeVectorAlignment();
3073 
3074  if (align != 0)
3075  inst->setAlignment(align);
3076  // Don't add debugging info to alloca instructions
3077  return inst;
3078 }
3079 
3080 
3081 /** Code to store the given varying value to the given location, only
3082  storing the elements that correspond to active program instances as
3083  given by the provided storeMask value. Note that the lvalue is only a
3084  single pointer, not a varying lvalue of one pointer per program
3085  instance (that case is handled by scatters).
3086  */
3087 void
3088 FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
3089  const Type *ptrType, llvm::Value *mask) {
3090  if (value == NULL || ptr == NULL) {
3092  return;
3093  }
3094 
3095  AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
3096  AssertPos(currentPos, ptrType->IsUniformType());
3097 
3098  const Type *valueType = ptrType->GetBaseType();
3099  const CollectionType *collectionType = CastType<CollectionType>(valueType);
3100  if (collectionType != NULL) {
3101  // Assigning a structure / array / vector. Handle each element
3102  // individually with what turns into a recursive call to
3103  // makedStore()
3104  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
3105  const Type *eltType = collectionType->GetElementType(i);
3106  if (eltType == NULL) {
3107  Assert(m->errorCount > 0);
3108  continue;
3109  }
3110  llvm::Value *eltValue = ExtractInst(value, i, "value_member");
3111  llvm::Value *eltPtr =
3112  AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
3113  const Type *eltPtrType = PointerType::GetUniform(eltType);
3114  StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
3115  }
3116  return;
3117  }
3118 
3119  // We must have a regular atomic, enumerator, or pointer type at this
3120  // point.
3121  AssertPos(currentPos, Type::IsBasicType(valueType));
3122  valueType = valueType->GetAsNonConstType();
3123 
3124  // Figure out if we need a 8, 16, 32 or 64-bit masked store.
3125  llvm::Function *maskedStoreFunc = NULL;
3126  llvm::Type *llvmValueType = value->getType();
3127 
3128  const PointerType *pt = CastType<PointerType>(valueType);
3129  if (pt != NULL) {
3130  if (pt->IsSlice()) {
3131  // Masked store of (varying) slice pointer.
3133 
3134  // First, extract the pointer from the slice struct and masked
3135  // store that.
3136  llvm::Value *v0 = ExtractInst(value, 0);
3137  llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
3139  mask);
3140 
3141  // And then do same for the integer offset
3142  llvm::Value *v1 = ExtractInst(value, 1);
3143  llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
3144  const Type *offsetType = AtomicType::VaryingInt32;
3145  maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
3146 
3147  return;
3148  }
3149 
3150  if (g->target->is32Bit())
3151  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3152  else
3153  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3154  }
3155  else if (llvmValueType == LLVMTypes::Int1VectorType) {
3156  llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
3157  LLVMMaskAllOn, "~mask");
3158  llvm::Value *old = LoadInst(ptr);
3159  llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old,
3160  notMask, "old&~mask");
3161  llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value,
3162  mask, "new&mask");
3163  llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld,
3164  maskedNew, "old_new_result");
3165  StoreInst(final, ptr);
3166  return;
3167  }
3168  else if (llvmValueType == LLVMTypes::DoubleVectorType) {
3169  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
3170  }
3171  else if (llvmValueType == LLVMTypes::Int64VectorType) {
3172  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3173  }
3174  else if (llvmValueType == LLVMTypes::FloatVectorType) {
3175  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
3176  }
3177  else if (llvmValueType == LLVMTypes::Int32VectorType) {
3178  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3179  }
3180  else if (llvmValueType == LLVMTypes::Int16VectorType) {
3181  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
3182  }
3183  else if (llvmValueType == LLVMTypes::Int8VectorType) {
3184  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
3185  }
3186  AssertPos(currentPos, maskedStoreFunc != NULL);
3187 
3188  std::vector<llvm::Value *> args;
3189  args.push_back(ptr);
3190  args.push_back(value);
3191  args.push_back(mask);
3192  CallInst(maskedStoreFunc, NULL, args);
3193 }
3194 
3195 
3196 
3197 /** Scatter the given varying value to the locations given by the varying
3198  lvalue (which should be an array of pointers with size equal to the
3199  target's vector width. We want to store each rvalue element at the
3200  corresponding pointer's location, *if* the mask for the corresponding
3201  program instance are on. If they're off, don't do anything.
3202 */
3203 void
3204 FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
3205  const Type *valueType, const Type *origPt,
3206  llvm::Value *mask) {
3207  const PointerType *ptrType = CastType<PointerType>(origPt);
3208  AssertPos(currentPos, ptrType != NULL);
3209  AssertPos(currentPos, ptrType->IsVaryingType());
3210 
3211  const CollectionType *srcCollectionType =
3212  CastType<CollectionType>(valueType);
3213  if (srcCollectionType != NULL) {
3214  // We're scattering a collection type--we need to keep track of the
3215  // source type (the type of the data values to be stored) and the
3216  // destination type (the type of objects in memory that will be
3217  // stored into) separately. This is necessary so that we can get
3218  // all of the addressing calculations right if we're scattering
3219  // from a varying struct to an array of uniform instances of the
3220  // same struct type, versus scattering into an array of varying
3221  // instances of the struct type, etc.
3222  const CollectionType *dstCollectionType =
3223  CastType<CollectionType>(ptrType->GetBaseType());
3224  AssertPos(currentPos, dstCollectionType != NULL);
3225 
3226  // Scatter the collection elements individually
3227  for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
3228  // First, get the values for the current element out of the
3229  // source.
3230  llvm::Value *eltValue = ExtractInst(value, i);
3231  const Type *srcEltType = srcCollectionType->GetElementType(i);
3232 
3233  // We may be scattering a uniform atomic element; in this case
3234  // we'll smear it out to be varying before making the recursive
3235  // scatter() call below.
3236  if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
3237  eltValue = SmearUniform(eltValue, "to_varying");
3238  srcEltType = srcEltType->GetAsVaryingType();
3239  }
3240 
3241  // Get the (varying) pointer to the i'th element of the target
3242  // collection
3243  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
3244 
3245  // The destination element type may be uniform (e.g. if we're
3246  // scattering to an array of uniform structs). Thus, we need
3247  // to be careful about passing the correct type to
3248  // addVaryingOffsetsIfNeeded() here.
3249  const Type *dstEltType = dstCollectionType->GetElementType(i);
3250  const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
3251  if (ptrType->IsSlice())
3252  dstEltPtrType = dstEltPtrType->GetAsSlice();
3253 
3254  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
3255 
3256  // And recursively scatter() until we hit a basic type, at
3257  // which point the actual memory operations can be performed...
3258  scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
3259  }
3260  return;
3261  }
3262  else if (ptrType->IsSlice()) {
3263  // As with gather, we need to add the final slice offset finally
3264  // once we get to a terminal SOA array of basic types..
3265  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3266  }
3267 
3268  const PointerType *pt = CastType<PointerType>(valueType);
3269 
3270  // And everything should be a pointer or atomic (or enum) from here on out...
3272  pt != NULL
3273  || CastType<AtomicType>(valueType) != NULL
3274  || CastType<EnumType>(valueType) != NULL);
3275 
3276  llvm::Type *type = value->getType();
3277  const char *funcName = NULL;
3278  if (pt != NULL) {
3279  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3280  "__pseudo_scatter64_i64";
3281  }
3282  else if (type == LLVMTypes::DoubleVectorType) {
3283  funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" :
3284  "__pseudo_scatter64_double";
3285  }
3286  else if (type == LLVMTypes::Int64VectorType) {
3287  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" :
3288  "__pseudo_scatter64_i64";
3289  }
3290  else if (type == LLVMTypes::FloatVectorType) {
3291  funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" :
3292  "__pseudo_scatter64_float";
3293  }
3294  else if (type == LLVMTypes::Int32VectorType) {
3295  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3296  "__pseudo_scatter64_i32";
3297  }
3298  else if (type == LLVMTypes::Int16VectorType) {
3299  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" :
3300  "__pseudo_scatter64_i16";
3301  }
3302  else if (type == LLVMTypes::Int8VectorType) {
3303  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" :
3304  "__pseudo_scatter64_i8";
3305  }
3306 
3307  llvm::Function *scatterFunc = m->module->getFunction(funcName);
3308  AssertPos(currentPos, scatterFunc != NULL);
3309 
3310  AddInstrumentationPoint("scatter");
3311 
3312  std::vector<llvm::Value *> args;
3313  args.push_back(ptr);
3314  args.push_back(value);
3315  args.push_back(mask);
3316  llvm::Value *inst = CallInst(scatterFunc, NULL, args);
3317 
3318  if (disableGSWarningCount == 0)
3319  addGSMetadata(inst, currentPos);
3320 }
3321 
3322 
3323 void
3324 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
3325  if (value == NULL || ptr == NULL) {
3326  // may happen due to error elsewhere
3328  return;
3329  }
3330 
3331  llvm::PointerType *pt =
3332  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
3333  AssertPos(currentPos, pt != NULL);
3334 
3335  llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
3336 
3337  if (g->opt.forceAlignedMemory &&
3338  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
3339  inst->setAlignment(g->target->getNativeVectorAlignment());
3340  }
3341 
3342  AddDebugPos(inst);
3343 }
3344 
3345 
3346 void
3347 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
3348  llvm::Value *mask, const Type *valueType,
3349  const Type *ptrRefType) {
3350  if (value == NULL || ptr == NULL) {
3351  // may happen due to error elsewhere
3353  return;
3354  }
3355 
3356  const PointerType *ptrType;
3357  if (CastType<ReferenceType>(ptrRefType) != NULL)
3358  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
3359  else {
3360  ptrType = CastType<PointerType>(ptrRefType);
3361  AssertPos(currentPos, ptrType != NULL);
3362  }
3363 
3364  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
3365  Error(currentPos, "Unable to store to undefined struct type \"%s\".",
3366  ptrType->GetBaseType()->GetString().c_str());
3367  return;
3368  }
3369 
3370  // Figure out what kind of store we're doing here
3371  if (ptrType->IsUniformType()) {
3372  if (ptrType->IsSlice())
3373  // storing a uniform value to a single slice of a SOA type
3374  storeUniformToSOA(value, ptr, mask, valueType, ptrType);
3375  else if (ptrType->GetBaseType()->IsUniformType())
3376  // the easy case
3377  StoreInst(value, ptr);
3378  else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
3379  // Otherwise it is a masked store unless we can determine that the
3380  // mask is all on... (Unclear if this check is actually useful.)
3381  StoreInst(value, ptr);
3382  else
3383  maskedStore(value, ptr, ptrType, mask);
3384  }
3385  else {
3386  AssertPos(currentPos, ptrType->IsVaryingType());
3387  // We have a varying ptr (an array of pointers), so it's time to
3388  // scatter
3389  scatter(value, ptr, valueType, ptrType, GetFullMask());
3390  }
3391 }
3392 
3393 
3394 /** Store a uniform type to SOA-laid-out memory.
3395  */
3396 void
3397 FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr,
3398  llvm::Value *mask, const Type *valueType,
3399  const PointerType *ptrType) {
3401  valueType));
3402 
3403  const CollectionType *ct = CastType<CollectionType>(valueType);
3404  if (ct != NULL) {
3405  // Handle collections element wise...
3406  for (int i = 0; i < ct->GetElementCount(); ++i) {
3407  llvm::Value *eltValue = ExtractInst(value, i);
3408  const Type *eltType = ct->GetElementType(i);
3409  const PointerType *dstEltPtrType;
3410  llvm::Value *dstEltPtr =
3411  AddElementOffset(ptr, i, ptrType, "slice_offset",
3412  &dstEltPtrType);
3413  StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
3414  }
3415  }
3416  else {
3417  // We're finally at a leaf SOA array; apply the slice offset and
3418  // then we can do a final regular store
3419  AssertPos(currentPos, Type::IsBasicType(valueType));
3420  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3421  StoreInst(value, ptr);
3422  }
3423 }
3424 
3425 
3426 void
3427 FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src,
3428  llvm::Value *count, llvm::Value *align) {
3431  if (count->getType() != LLVMTypes::Int64Type) {
3432  AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
3433  count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
3434  }
3435  if (align == NULL)
3436  align = LLVMInt32(1);
3437 
3438  llvm::Constant *mcFunc =
3439  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3443  AssertPos(currentPos, mcFunc != NULL);
3444  AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
3445 
3446  std::vector<llvm::Value *> args;
3447  args.push_back(dest);
3448  args.push_back(src);
3449  args.push_back(count);
3450  args.push_back(align);
3451  args.push_back(LLVMFalse); /* not volatile */
3452  CallInst(mcFunc, NULL, args, "");
3453 }
3454 
3455 
3456 void
3457 FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
3458  llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
3459  AddDebugPos(b);
3460 }
3461 
3462 
3463 void
3464 FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
3465  llvm::BasicBlock *falseBlock,
3466  llvm::Value *test) {
3467  if (test == NULL) {
3469  return;
3470  }
3471 
3472  llvm::Instruction *b =
3473  llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
3474  AddDebugPos(b);
3475 }
3476 
3477 
3478 llvm::Value *
3479 FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
3480  if (v == NULL) {
3482  return NULL;
3483  }
3484 
3485  if (name == NULL) {
3486  char buf[32];
3487  sprintf(buf, "_extract_%d", elt);
3488  name = LLVMGetName(v, buf);
3489  }
3490 
3491  llvm::Instruction *ei = NULL;
3492  if (llvm::isa<llvm::VectorType>(v->getType()))
3493  ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
3494  else
3495  ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
3496  AddDebugPos(ei);
3497  return ei;
3498 }
3499 
3500 
3501 llvm::Value *
3502 FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
3503  const char *name) {
3504  if (v == NULL || eltVal == NULL) {
3506  return NULL;
3507  }
3508 
3509  if (name == NULL) {
3510  char buf[32];
3511  sprintf(buf, "_insert_%d", elt);
3512  name = LLVMGetName(v, buf);
3513  }
3514 
3515  llvm::Instruction *ii = NULL;
3516  if (llvm::isa<llvm::VectorType>(v->getType()))
3517  ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt),
3518  name, bblock);
3519  else
3520  ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
3521  AddDebugPos(ii);
3522  return ii;
3523 }
3524 
3525 
3526 llvm::Value *
3527 FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
3528  const char *name) {
3529  if (v1 == NULL || v2 == NULL || mask == NULL) {
3531  return NULL;
3532  }
3533 
3534  if (name == NULL) {
3535  char buf[32];
3536  sprintf(buf, "_shuffle");
3537  name = LLVMGetName(v1, buf);
3538  }
3539 
3540  llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
3541 
3542  AddDebugPos(ii);
3543  return ii;
3544 }
3545 
3546 
3547 llvm::Value *
3548 FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
3549  const char *name) {
3550  if (v == NULL || vecType == NULL) {
3552  return NULL;
3553  }
3554 
3555  llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
3556  Assert(ty && ty->getVectorElementType() == v->getType());
3557 
3558  if (name == NULL) {
3559  char buf[32];
3560  sprintf(buf, "_broadcast");
3561  name = LLVMGetName(v, buf);
3562  }
3563 
3564  // Generate the following sequence:
3565  // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
3566  // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
3567  // <4 x i32> zeroinitializer
3568 
3569  llvm::Value *undef1 = llvm::UndefValue::get(vecType);
3570  llvm::Value *undef2 = llvm::UndefValue::get(vecType);
3571 
3572  // InsertElement
3573  llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
3574  llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
3575 
3576  // ShuffleVector
3577  llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
3578  vecType->getVectorNumElements(),
3579  llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
3580  llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
3581 
3582  return ret;
3583 }
3584 
3585 
3586 llvm::PHINode *
3587 FunctionEmitContext::PhiNode(llvm::Type *type, int count,
3588  const char *name) {
3589  llvm::PHINode *pn = llvm::PHINode::Create(type, count,
3590  name ? name : "phi", bblock);
3591  AddDebugPos(pn);
3592  return pn;
3593 }
3594 
3595 
3596 llvm::Instruction *
3597 FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
3598  llvm::Value *val1, const char *name) {
3599  if (test == NULL || val0 == NULL || val1 == NULL) {
3601  return NULL;
3602  }
3603 
3604  if (name == NULL)
3605  name = LLVMGetName(test, "_select");
3606 
3607  llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name,
3608  bblock);
3609  AddDebugPos(inst);
3610  return inst;
3611 }
3612 
3613 
3614 /** Given a value representing a function to be called or possibly-varying
3615  pointer to a function to be called, figure out how many arguments the
3616  function has. */
3617 static unsigned int
3618 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
3619  llvm::FunctionType *ft =
3620  llvm::dyn_cast<llvm::FunctionType>(callee->getType());
3621 
3622  if (ft == NULL) {
3623  llvm::PointerType *pt =
3624  llvm::dyn_cast<llvm::PointerType>(callee->getType());
3625  if (pt == NULL) {
3626  // varying--in this case, it must be the version of the
3627  // function that takes a mask
3628  return funcType->GetNumParameters() + 1;
3629  }
3630  ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
3631  }
3632 
3633  Assert(ft != NULL);
3634  return ft->getNumParams();
3635 }
3636 
3637 
3638 llvm::Value *
3639 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3640  const std::vector<llvm::Value *> &args,
3641  const char *name) {
3642  if (func == NULL) {
3644  return NULL;
3645  }
3646 
3647  std::vector<llvm::Value *> argVals = args;
3648  // Most of the time, the mask is passed as the last argument. this
3649  // isn't the case for things like intrinsics, builtins, and extern "C"
3650  // functions from the application. Add the mask if it's needed.
3651  unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
3652  AssertPos(currentPos, argVals.size() + 1 == calleeArgCount ||
3653  argVals.size() == calleeArgCount);
3654  if (argVals.size() + 1 == calleeArgCount)
3655  argVals.push_back(GetFullMask());
3656 
3657  if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
3658  // Regular 'uniform' function call--just one function or function
3659  // pointer, so just emit the IR directly.
3660  llvm::Instruction *ci =
3661  llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
3662 
3663  // Copy noalias attribute to call instruction, to enable better
3664  // alias analysis.
3665  // TODO: what other attributes needs to be copied?
3666  // TODO: do the same for varing path.
3667 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 // LLVM 3.3+
3668  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3669  if (cc &&
3670  cc->getCalledFunction() &&
3671  cc->getCalledFunction()->doesNotAlias(0)) {
3672  cc->addAttribute(0, llvm::Attribute::NoAlias);
3673  }
3674 #endif
3675 
3676  AddDebugPos(ci);
3677  return ci;
3678  }
3679  else {
3680  // Emit the code for a varying function call, where we have an
3681  // vector of function pointers, one for each program instance. The
3682  // basic strategy is that we go through the function pointers, and
3683  // for the executing program instances, for each unique function
3684  // pointer that's in the vector, call that function with a mask
3685  // equal to the set of active program instances that also have that
3686  // function pointer. When all unique function pointers have been
3687  // called, we're done.
3688 
3689  llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
3690  llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
3691  llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
3692 
3693  // Get the current mask value so we can restore it later
3694  llvm::Value *origMask = GetInternalMask();
3695 
3696  // First allocate memory to accumulate the various program
3697  // instances' return values...
3698  const Type *returnType = funcType->GetReturnType();
3699  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
3700  llvm::Value *resultPtr = NULL;
3701  if (llvmReturnType->isVoidTy() == false)
3702  resultPtr = AllocaInst(llvmReturnType);
3703 
3704  // The memory pointed to by maskPointer tracks the set of program
3705  // instances for which we still need to call the function they are
3706  // pointing to. It starts out initialized with the mask of
3707  // currently running program instances.
3708  llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
3709  StoreInst(GetFullMask(), maskPtr);
3710 
3711  // And now we branch to the test to see if there's more work to be
3712  // done.
3713  BranchInst(bbTest);
3714 
3715  // bbTest: are any lanes of the mask still on? If so, jump to
3716  // bbCall
3717  SetCurrentBasicBlock(bbTest); {
3718  llvm::Value *maskLoad = LoadInst(maskPtr);
3719  llvm::Value *any = Any(maskLoad);
3720  BranchInst(bbCall, bbDone, any);
3721  }
3722 
3723  // bbCall: this is the body of the loop that calls out to one of
3724  // the active function pointer values.
3725  SetCurrentBasicBlock(bbCall); {
3726  // Figure out the first lane that still needs its function
3727  // pointer to be called.
3728  llvm::Value *currentMask = LoadInst(maskPtr);
3729  llvm::Function *cttz =
3730  m->module->getFunction("__count_trailing_zeros_i64");
3731  AssertPos(currentPos, cttz != NULL);
3732  llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask),
3733  "first_lane64");
3734  llvm::Value *firstLane =
3735  TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
3736 
3737  // Get the pointer to the function we're going to call this
3738  // time through: ftpr = func[firstLane]
3739  llvm::Value *fptr =
3740  llvm::ExtractElementInst::Create(func, firstLane,
3741  "extract_fptr", bblock);
3742 
3743  // Smear it out into an array of function pointers
3744  llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
3745 
3746  // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
3747  // mask for the set of program instances that have the same
3748  // value for their function pointer.
3749  llvm::Value *fpOverlap =
3750  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
3751  fptrSmear, func);
3752  fpOverlap = I1VecToBoolVec(fpOverlap);
3753 
3754  // Figure out the mask to use when calling the function
3755  // pointer: we need to AND the current execution mask to handle
3756  // the case of any non-running program instances that happen to
3757  // have this function pointer value.
3758  // callMask = (currentMask & fpOverlap)
3759  llvm::Value *callMask =
3760  BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap,
3761  "call_mask");
3762 
3763  // Set the mask
3764  SetInternalMask(callMask);
3765 
3766  // bitcast the i32/64 function pointer to the actual function
3767  // pointer type.
3768  llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
3769  llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
3770  llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
3771 
3772  // Call the function: callResult = call ftpr(args, args, call mask)
3773  llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
3774 
3775  // Now, do a masked store into the memory allocated to
3776  // accumulate the result using the call mask.
3777  if (callResult != NULL &&
3778  callResult->getType() != LLVMTypes::VoidType) {
3779  AssertPos(currentPos, resultPtr != NULL);
3780  StoreInst(callResult, resultPtr, callMask, returnType,
3781  PointerType::GetUniform(returnType));
3782  }
3783  else
3784  AssertPos(currentPos, resultPtr == NULL);
3785 
3786  // Update the mask to turn off the program instances for which
3787  // we just called the function.
3788  // currentMask = currentMask & ~callmask
3789  llvm::Value *notCallMask =
3790  BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn,
3791  "~callMask");
3792  currentMask = BinaryOperator(llvm::Instruction::And, currentMask,
3793  notCallMask, "currentMask&~callMask");
3794  StoreInst(currentMask, maskPtr);
3795 
3796  // And go back to the test to see if we need to do another
3797  // call.
3798  BranchInst(bbTest);
3799  }
3800 
3801  // bbDone: We're all done; clean up and return the result we've
3802  // accumulated in the result memory.
3803  SetCurrentBasicBlock(bbDone);
3804  SetInternalMask(origMask);
3805  return resultPtr ? LoadInst(resultPtr) : NULL;
3806  }
3807 }
3808 
3809 
3810 llvm::Value *
3811 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3812  llvm::Value *arg, const char *name) {
3813  std::vector<llvm::Value *> args;
3814  args.push_back(arg);
3815  return CallInst(func, funcType, args, name);
3816 }
3817 
3818 
3819 llvm::Value *
3820 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3821  llvm::Value *arg0, llvm::Value *arg1,
3822  const char *name) {
3823  std::vector<llvm::Value *> args;
3824  args.push_back(arg0);
3825  args.push_back(arg1);
3826  return CallInst(func, funcType, args, name);
3827 }
3828 
3829 
3830 llvm::Instruction *
3832  if (launchedTasks)
3833  // Add a sync call at the end of any function that launched tasks
3834  SyncInst();
3835 
3836  llvm::Instruction *rinst = NULL;
3837  if (returnValuePtr != NULL) {
3838  // We have value(s) to return; load them from their storage
3839  // location
3840  llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
3841  rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
3842  }
3843  else {
3845  rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
3846  }
3847 
3848  AddDebugPos(rinst);
3849  bblock = NULL;
3850  return rinst;
3851 }
3852 
3853 
3854 llvm::Value *
3856  std::vector<llvm::Value *> &argVals,
3857  llvm::Value *launchCount[3]){
3858 #ifdef ISPC_NVPTX_ENABLED
3859  if (g->target->getISA() == Target::NVPTX)
3860  {
3861  if (callee == NULL) {
3863  return NULL;
3864  }
3865  launchedTasks = true;
3866 
3867  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3868  std::vector<llvm::Type*> argTypes;
3869 
3870  llvm::Function *F = llvm::dyn_cast<llvm::Function>(callee);
3871  const unsigned int nArgs = F->arg_size();
3872  llvm::Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3873  for (; I != E; ++I)
3874  argTypes.push_back(I->getType());
3875  llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
3876  llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
3877  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3878  if (structSize->getType() != LLVMTypes::Int64Type)
3879  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
3880  "struct_size_to_64");
3881 
3882  const int align = 8;
3883  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3884  AssertPos(currentPos, falloc != NULL);
3885  std::vector<llvm::Value *> allocArgs;
3886  allocArgs.push_back(launchGroupHandlePtr);
3887  allocArgs.push_back(structSize);
3888  allocArgs.push_back(LLVMInt32(align));
3889  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3890  llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
3891  llvm::BasicBlock* if_true = CreateBasicBlock("if_true");
3892  llvm::BasicBlock* if_false = CreateBasicBlock("if_false");
3893 
3894  /* check if the pointer returned by ISPCAlloc is not NULL
3895  * --------------
3896  * this is a workaround for not checking the value of programIndex
3897  * because ISPCAlloc will return NULL pointer for all programIndex > 0
3898  * of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
3899  * will also be NULL
3900  * This check must be added, and also rewrite the code to make it less opaque
3901  */
3902  llvm::Value* cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
3903  BranchInst(if_true, if_false, cmp1);
3904 
3905  /**********************/
3906  bblock = if_true;
3907 
3908  // label_if_then block:
3909  llvm::Type *pt = llvm::PointerType::getUnqual(st);
3910  llvm::Value *argmem = BitCastInst(voidmem, pt);
3911  for (unsigned int i = 0; i < argVals.size(); ++i)
3912  {
3913  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3914  // don't need to do masked store here, I think
3915  StoreInst(argVals[i], ptr);
3916  }
3917  if (nArgs == argVals.size() + 1) {
3918  // copy in the mask
3919  llvm::Value *mask = GetFullMask();
3920  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
3921  "funarg_mask");
3922  StoreInst(mask, ptr);
3923  }
3924  BranchInst(if_false);
3925 
3926  /**********************/
3927  bblock = if_false;
3928 
3929  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3930  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3931  AssertPos(currentPos, flaunch != NULL);
3932  std::vector<llvm::Value *> args;
3933  args.push_back(launchGroupHandlePtr);
3934  args.push_back(fptr);
3935  args.push_back(voidmem);
3936  args.push_back(launchCount[0]);
3937  args.push_back(launchCount[1]);
3938  args.push_back(launchCount[2]);
3939  llvm::Value *ret = CallInst(flaunch, NULL, args, "");
3940  return ret;
3941  }
3942 #endif /* ISPC_NVPTX_ENABLED */
3943 
3944  if (callee == NULL) {
3946  return NULL;
3947  }
3948 
3949  launchedTasks = true;
3950 
3951  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3952  llvm::Type *argType =
3953  (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
3954  AssertPos(currentPos, llvm::PointerType::classof(argType));
3955  llvm::PointerType *pt =
3956  llvm::dyn_cast<llvm::PointerType>(argType);
3957  AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
3958  llvm::StructType *argStructType =
3959  static_cast<llvm::StructType *>(pt->getElementType());
3960 
3961  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3962  AssertPos(currentPos, falloc != NULL);
3963  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3964  if (structSize->getType() != LLVMTypes::Int64Type)
3965  // ISPCAlloc expects the size as an uint64_t, but on 32-bit
3966  // targets, SizeOf returns a 32-bit value
3967  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
3968  "struct_size_to_64");
3969  int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
3970 
3971  std::vector<llvm::Value *> allocArgs;
3972  allocArgs.push_back(launchGroupHandlePtr);
3973  allocArgs.push_back(structSize);
3974  allocArgs.push_back(LLVMInt32(align));
3975  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3976  llvm::Value *argmem = BitCastInst(voidmem, pt);
3977 
3978  // Copy the values of the parameters into the appropriate place in
3979  // the argument block
3980  for (unsigned int i = 0; i < argVals.size(); ++i) {
3981  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3982  // don't need to do masked store here, I think
3983  StoreInst(argVals[i], ptr);
3984  }
3985 
3986  if (argStructType->getNumElements() == argVals.size() + 1) {
3987  // copy in the mask
3988  llvm::Value *mask = GetFullMask();
3989  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
3990  "funarg_mask");
3991  StoreInst(mask, ptr);
3992  }
3993 
3994  // And emit the call to the user-supplied task launch function, passing
3995  // a pointer to the task function being called and a pointer to the
3996  // argument block we just filled in
3997  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3998  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3999  AssertPos(currentPos, flaunch != NULL);
4000  std::vector<llvm::Value *> args;
4001  args.push_back(launchGroupHandlePtr);
4002  args.push_back(fptr);
4003  args.push_back(voidmem);
4004  args.push_back(launchCount[0]);
4005  args.push_back(launchCount[1]);
4006  args.push_back(launchCount[2]);
4007  return CallInst(flaunch, NULL, args, "");
4008 }
4009 
4010 
4011 void
4013 #ifdef ISPC_NVPTX_ENABLED
4014  if (g->target->getISA() == Target::NVPTX)
4015  {
4016  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4017  llvm::Value *nullPtrValue =
4018  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4019  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4020  if (fsync == NULL)
4021  FATAL("Couldn't find ISPCSync declaration?!");
4022  CallInst(fsync, NULL, launchGroupHandle, "");
4023  StoreInst(nullPtrValue, launchGroupHandlePtr);
4024  return;
4025  }
4026 #endif /* ISPC_NVPTX_ENABLED */
4027 
4028  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4029  llvm::Value *nullPtrValue =
4030  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4031  llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
4032  llvm::CmpInst::ICMP_NE,
4033  launchGroupHandle, nullPtrValue);
4034  llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
4035  llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
4036  BranchInst(bSync, bPostSync, nonNull);
4037 
4038  SetCurrentBasicBlock(bSync);
4039  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4040  if (fsync == NULL)
4041  FATAL("Couldn't find ISPCSync declaration?!");
4042  CallInst(fsync, NULL, launchGroupHandle, "");
4043 
4044  // zero out the handle so that if ISPCLaunch is called again in this
4045  // function, it knows it's starting out from scratch
4046  StoreInst(nullPtrValue, launchGroupHandlePtr);
4047 
4048  BranchInst(bPostSync);
4049 
4050  SetCurrentBasicBlock(bPostSync);
4051 }
4052 
4053 
4054 /** When we gathering from or scattering to a varying atomic type, we need
4055  to add an appropriate offset to the final address for each lane right
4056  before we use it. Given a varying pointer we're about to use and its
4057  type, this function determines whether these offsets are needed and
4058  returns an updated pointer that incorporates these offsets if needed.
4059  */
4060 llvm::Value *
4062  const Type *ptrType) {
4063  // This should only be called for varying pointers
4064  const PointerType *pt = CastType<PointerType>(ptrType);
4065  AssertPos(currentPos, pt && pt->IsVaryingType());
4066 
4067  const Type *baseType = ptrType->GetBaseType();
4068  if (Type::IsBasicType(baseType) == false)
4069  return ptr;
4070 
4071  if (baseType->IsVaryingType() == false)
4072  return ptr;
4073 
4074  // Find the size of a uniform element of the varying type
4075  llvm::Type *llvmBaseUniformType =
4076  baseType->GetAsUniformType()->LLVMType(g->ctx);
4077  llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
4078  unifSize = SmearUniform(unifSize);
4079 
4080  // Compute offset = <0, 1, .. > * unifSize
4081  bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
4082  llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
4083 
4084  llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
4085  varyingOffsets);
4086 
4087  if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
4088  // On 64-bit targets where we're doing 32-bit addressing
4089  // calculations, we need to convert to an i64 vector before adding
4090  // to the pointer
4091  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
4092 
4093  return BinaryOperator(llvm::Instruction::Add, ptr, offset);
4094 }
4095 
4096 
4097 CFInfo *
4099  AssertPos(currentPos, controlFlowInfo.size() > 0);
4100  CFInfo *ci = controlFlowInfo.back();
4101  controlFlowInfo.pop_back();
4102 
4103  if (ci->IsSwitch()) {
4104  breakTarget = ci->savedBreakTarget;
4105  continueTarget = ci->savedContinueTarget;
4106  breakLanesPtr = ci->savedBreakLanesPtr;
4107  continueLanesPtr = ci->savedContinueLanesPtr;
4108  blockEntryMask = ci->savedBlockEntryMask;
4109  switchExpr = ci->savedSwitchExpr;
4110  defaultBlock = ci->savedDefaultBlock;
4111  caseBlocks = ci->savedCaseBlocks;
4112  nextBlocks = ci->savedNextBlocks;
4113  switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
4114  }
4115  else if (ci->IsLoop() || ci->IsForeach()) {
4116  breakTarget = ci->savedBreakTarget;
4117  continueTarget = ci->savedContinueTarget;
4118  breakLanesPtr = ci->savedBreakLanesPtr;
4119  continueLanesPtr = ci->savedContinueLanesPtr;
4120  blockEntryMask = ci->savedBlockEntryMask;
4121  }
4122  else {
4123  AssertPos(currentPos, ci->IsIf());
4124  // nothing to do
4125  }
4126 
4127  return ci;
4128 }
bool IsVoidType() const
Definition: type.cpp:246
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1381
bool IsVaryingType() const
Definition: type.h:150
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
llvm::Value * savedBreakLanesPtr
Definition: ctx.cpp:114
void InitializeLabelMap(Stmt *code)
Definition: ctx.cpp:1276
llvm::Value * PtrToIntInst(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2080
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target)
Definition: ctx.cpp:904
Definition: func.h:44
std::vector< CFInfo * > controlFlowInfo
Definition: ctx.h:686
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2589
CFInfo * popCFState()
Definition: ctx.cpp:4098
Opt opt
Definition: ispc.h:541
void StartUniformIf()
Definition: ctx.cpp:560
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock * > > &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > &nextBlocks)
Definition: ctx.cpp:1172
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:549
int last_column
Definition: ispc.h:142
SourcePos currentPos
Definition: ctx.h:607
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm, llvm::Value *sse=NULL, llvm::BasicBlock *bbd=NULL, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbc=NULL, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbn=NULL, bool scu=false)
Definition: ctx.cpp:135
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:987
llvm::Value * ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name=NULL)
Definition: ctx.cpp:3527
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1542
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:501
llvm::Constant * LLVMInt64Vector(int64_t i)
Definition: llvmutil.cpp:455
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:635
llvm::Instruction * FPCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2204
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1762
static llvm::Type * lGetMatchingBoolVectorType(llvm::Type *type)
Definition: ctx.cpp:1974
void StartScope()
Definition: ctx.cpp:1700
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm)
Definition: ctx.cpp:156
llvm::BasicBlock * savedBreakTarget
Definition: ctx.cpp:113
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:509
CFInfo(CFType t, bool uniformIf, llvm::Value *sm)
Definition: ctx.cpp:123
llvm::Value * functionMaskValue
Definition: ctx.h:602
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:3457
const std::vector< std::pair< int, llvm::BasicBlock * > > * savedCaseBlocks
Definition: ctx.cpp:118
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3088
bool IsFrozenSlice() const
Definition: type.h:468
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2242
int getVectorWidth() const
Definition: ispc.h:283
Module * m
Definition: ispc.cpp:89
llvm::DIType GetDIType(llvm::DIDescriptor scope) const
Definition: type.cpp:3125
Interface class for statements in the ispc language.
Definition: stmt.h:49
FunctionEmitContext(Function *function, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos)
Definition: ctx.cpp:241
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1939
int first_line
Definition: ispc.h:139
Target * target
Definition: ispc.h:543
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1399
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2808
static llvm::VectorType * VoidPointerVectorType
Definition: llvmutil.h:104
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:88
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:538
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1291
virtual const Type * GetElementType(int index) const =0
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2165
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:3042
llvm::BasicBlock * allocaBlock
Definition: ctx.h:586
Abstract base class for types that represent sequences.
Definition: type.h:531
bool IsVarying()
Definition: ctx.cpp:106
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1991
void EndSwitch()
Definition: ctx.cpp:1013
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:566
const PointerType * GetAsNonSlice() const
Definition: type.cpp:1072
static llvm::Type * BoolType
Definition: llvmutil.h:70
llvm::Value * launchGroupHandlePtr
Definition: ctx.h:720
#define Assert(expr)
Definition: ispc.h:170
void addSwitchMaskCheck(llvm::Value *mask)
Definition: ctx.cpp:1026
void StartForeach(ForeachType ft)
Definition: ctx.cpp:679
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
llvm::FunctionType * LLVMFunctionType(llvm::LLVMContext *ctx, bool disableMask=false) const
Definition: type.cpp:3203
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:74
llvm::Value * breakLanesPtr
Definition: ctx.h:621
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:92
bool IsForeach()
Definition: ctx.cpp:102
bool forceAlignedMemory
Definition: ispc.h:465
static PointerType * GetVarying(const Type *t)
Definition: type.cpp:969
llvm::Value * switchExpr
Definition: ctx.h:648
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:836
llvm::Value * GetFullMask()
Definition: ctx.cpp:474
const char * GetISAString() const
Definition: ispc.cpp:1307
bool isUniform
Definition: ctx.cpp:112
virtual const Type * GetAsUniformType() const =0
CFType
Definition: ctx.cpp:109
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1640
llvm::Value * MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset)
Definition: ctx.cpp:2411
std::string name
Definition: sym.h:71
llvm::Value * gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name)
Definition: ctx.cpp:2898
bool launchedTasks
Definition: ctx.h:715
void restoreMaskGivenReturns(llvm::Value *oldMask)
Definition: ctx.cpp:720
virtual const Type * GetElementType() const =0
Type implementation for pointers to other types.
Definition: type.h:446
int getNativeVectorAlignment() const
Definition: ispc.h:279
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:527
void RestoreContinuedLanes()
Definition: ctx.cpp:959
llvm::Value * returnValuePtr
Definition: ctx.h:680
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
llvm::Value * loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name)
Definition: ctx.cpp:2774
virtual std::string GetString() const =0
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:450
int GetSOAWidth() const
Definition: type.h:160
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:963
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:756
static llvm::Value * lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue, llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset)
Definition: ctx.cpp:2373
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:89
SourcePos funcStartPos
Definition: ctx.h:611
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1582
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * nextBlocks
Definition: ctx.h:661
llvm::Value * savedSwitchExpr
Definition: ctx.cpp:116
static CFInfo * GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:183
bool IsIf()
Definition: ctx.cpp:100
header file with declarations for symbol and symbol table classes.
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3548
static void addGSMetadata(llvm::Value *inst, SourcePos pos)
Definition: ctx.cpp:2995
llvm::Value * internalMaskPointer
Definition: ctx.h:599
static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType)
Definition: ctx.cpp:2740
bool disableMaskAllOnOptimizations
Definition: ispc.h:470
int level
Definition: ispc.h:431
static llvm::Type * VoidType
Definition: llvmutil.h:67
Function * function
Definition: ctx.h:579
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:3324
llvm::Module * module
Definition: module.h:158
File with declarations for classes related to statements in the language.
llvm::BasicBlock * defaultBlock
Definition: ctx.h:656
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:1120
Globals * g
Definition: ispc.cpp:88
llvm::BasicBlock * savedContinueTarget
Definition: ctx.cpp:113
bool IsUniformType() const
Definition: type.h:145
void EndLoop()
Definition: ctx.cpp:664
bool switchConditionWasUniform
Definition: ctx.h:668
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:462
int getMaskBitCount() const
Definition: ispc.h:289
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:90
static CFInfo * GetIf(bool isUniform, llvm::Value *savedMask)
Definition: ctx.cpp:177
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos=NULL, llvm::DIScope *scope=NULL)
Definition: ctx.cpp:1673
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2429
bool LookupFunction(const char *name, std::vector< Symbol * > *matches=NULL)
Definition: sym.cpp:162
CFType type
Definition: ctx.cpp:111
static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType)
Definition: ctx.cpp:3618
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1305
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
bool inSwitchStatement() const
Definition: ctx.cpp:742
llvm::BasicBlock * continueTarget
Definition: ctx.h:636
llvm::DIScope GetDIScope() const
Definition: ctx.cpp:1755
bool IsUniform()
Definition: ctx.cpp:107
llvm::BasicBlock * savedDefaultBlock
Definition: ctx.cpp:117
SourcePos GetDebugPos() const
Definition: ctx.cpp:1667
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:94
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1434
bool IsLoop()
Definition: ctx.cpp:101
bool IsSlice() const
Definition: type.h:467
static CFInfo * GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr, llvm::BasicBlock *bbDefault, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbCases, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbNext, bool scUniform)
Definition: ctx.cpp:224
static llvm::Type * Int64Type
Definition: llvmutil.h:75
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align=NULL)
Definition: ctx.cpp:3427
llvm::PHINode * PhiNode(llvm::Type *type, int count, const char *name=NULL)
Definition: ctx.cpp:3587
Representation of a structure holding a number of members.
Definition: type.h:690
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
llvm::Value * addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType)
Definition: ctx.cpp:4061
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:93
Header file with declarations for various LLVM utility stuff.
llvm::DISubprogram diSubprogram
Definition: ctx.h:695
#define AssertPos(pos, expr)
Definition: ispc.h:173
llvm::Value * getMaskAtSwitchEntry()
Definition: ctx.cpp:1046
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1)
Definition: ctx.cpp:2332
ISA getISA() const
Definition: ispc.h:267
bool emitInstrumentation
Definition: ispc.h:603
static bool IsBasicType(const Type *type)
Definition: type.cpp:3509
SourcePos pos
Definition: sym.h:70
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value * > &args, const char *name=NULL)
Definition: ctx.cpp:3639
uint32_t RoundUpPow2(uint32_t v)
Definition: util.h:51
bool ifsInCFAllUniform(int cfType) const
Definition: ctx.cpp:886
AtomicType represents basic types like floats, ints, etc.
Definition: type.h:292
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:495
std::vector< llvm::DILexicalBlock > debugScopes
Definition: ctx.h:699
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
StorageClass storageClass
Definition: sym.h:96
Representation of a range of positions in a source file.
Definition: ispc.h:134
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1434
static bool lEnclosingLoopIsForeachActive(const std::vector< CFInfo * > &controlFlowInfo)
Definition: ctx.cpp:826
bool InForeachLoop() const
Definition: ctx.cpp:1235
int VaryingCFDepth() const
Definition: ctx.cpp:1225
void ClearBreakLanes()
Definition: ctx.cpp:977
llvm::Value * continueLanesPtr
Definition: ctx.h:626
Abstract base class for types that represent collections of other types.
Definition: type.h:510
const char * LLVMGetName(llvm::Value *v, const char *)
Definition: llvmutil.cpp:1712
llvm::Value * None(llvm::Value *mask)
Definition: ctx.cpp:1416
bool force32BitAddressing
Definition: ispc.h:451
const char * name
Definition: ispc.h:138
llvm::Instruction * SExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2223
SourcePos pos
Definition: ast.h:77
int disableGSWarningCount
Definition: ctx.h:724
static llvm::Type * PointerIntType
Definition: llvmutil.h:69
const PointerType * GetAsSlice() const
Definition: type.cpp:1064
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:68
const Type * GetBaseType() const
Definition: type.cpp:1018
const Type * GetReturnType() const
Definition: func.cpp:180
int getNativeVectorWidth() const
Definition: ispc.h:277
const Function * GetFunction() const
Definition: ctx.cpp:444
const Type * GetReturnType() const
Definition: type.h:924
llvm::Function * llvmFunction
Definition: ctx.h:582
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1570
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType)
Definition: ctx.cpp:3397
#define FATAL(message)
Definition: util.h:113
bool savedSwitchConditionWasUniform
Definition: ctx.cpp:120
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
llvm::Value * InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name=NULL)
Definition: ctx.cpp:3502
llvm::Value * fullMaskPointer
Definition: ctx.h:594
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1244
llvm::Value * savedMask
Definition: ctx.cpp:115
static llvm::Type * Int32Type
Definition: llvmutil.h:74
int last_line
Definition: ispc.h:141
llvm::Value * returnedLanesPtr
Definition: ctx.h:674
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1661
#define PTYPE(p)
Definition: llvmutil.h:54
Representation of a function in a source file.
Variability GetVariability() const
Definition: type.cpp:982
int first_column
Definition: ispc.h:140
Definition: ctx.cpp:69
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1573
virtual const Type * GetAsVaryingType() const =0
virtual const Type * GetType() const =0
llvm::Value * GetFullMaskPointer()
Definition: ctx.cpp:481
llvm::Value * GetInternalMask()
Definition: ctx.cpp:468
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2063
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:517
llvm::Value * LaunchInst(llvm::Value *callee, std::vector< llvm::Value * > &argVals, llvm::Value *launchCount[3])
Definition: ctx.cpp:3855
Type representing a function (return type + argument types)
Definition: type.h:883
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:3479
bool IsSwitch()
Definition: ctx.cpp:105
void EndForeach()
Definition: ctx.cpp:713
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1250
Interface class that defines the type abstraction.
Definition: type.h:101
static bool initLabelBBlocks(ASTNode *node, void *data)
Definition: ctx.cpp:1257
const std::vector< std::pair< int, llvm::BasicBlock * > > * caseBlocks
Definition: ctx.h:652
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:456
static llvm::VectorType * MaskType
Definition: llvmutil.h:86
virtual const Type * GetBaseType() const =0
llvm::Value * savedContinueLanesPtr
Definition: ctx.cpp:114
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:1057
llvm::Instruction * SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name=NULL)
Definition: ctx.cpp:3597
static int lArrayVectorWidth(llvm::Type *t)
Definition: ctx.cpp:1887
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:595
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum)
Definition: ctx.cpp:1820
llvm::Value * blockEntryMask
Definition: ctx.h:615
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2132
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:95
llvm::Value * MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2)
Definition: ctx.cpp:1514
llvm::Type * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:2255
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1398
std::string name
Definition: stmt.h:494
llvm::Constant * LLVMIntAsType(int64_t, llvm::Type *t)
Definition: llvmutil.cpp:548
virtual int GetElementCount() const =0
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2025
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:91
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * savedNextBlocks
Definition: ctx.cpp:119
llvm::DIFile diFile
Definition: ctx.h:691
llvm::Value * savedBlockEntryMask
Definition: ctx.cpp:115
bool IsConstType() const
Definition: type.cpp:1012
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:632
const Type * type
Definition: sym.h:84
llvm::DIBuilder * diBuilder
Definition: module.h:161
llvm::BasicBlock * breakTarget
Definition: ctx.h:632
static CFInfo * GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:195
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3625
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3204
virtual const Type * GetReferenceTarget() const
Definition: type.cpp:3301
llvm::Instruction * CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2184
bool is32Bit() const
Definition: ispc.h:273
llvm::Instruction * ReturnInst()
Definition: ctx.cpp:3831
llvm::Value * applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType)
Definition: ctx.cpp:2267
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1905
int GetNumParameters() const
Definition: type.h:935
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1283
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:487
std::map< std::string, llvm::BasicBlock * > labelMap
Definition: ctx.h:726
llvm::BasicBlock * bblock
Definition: ctx.h:590
SymbolTable * symbolTable
Definition: module.h:155
File with declarations for classes related to type representation.
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1588
static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s)
Definition: ctx.cpp:1620