Intel SPMD Program Compiler  1.10.0
ctx.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2015, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ctx.cpp
35  @brief Implementation of the FunctionEmitContext class
36 */
37 
38 #include "ctx.h"
39 #include "util.h"
40 #include "func.h"
41 #include "llvmutil.h"
42 #include "type.h"
43 #include "stmt.h"
44 #include "expr.h"
45 #include "module.h"
46 #include "sym.h"
47 #include <map>
48 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
49  #include <llvm/BinaryFormat/Dwarf.h>
50 #else // LLVM up to 4.x
51  #include <llvm/Support/Dwarf.h>
52 #endif
53 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
54  #include <llvm/Metadata.h>
55  #include <llvm/Module.h>
56  #include <llvm/Instructions.h>
57  #include <llvm/DerivedTypes.h>
58 #else
59  #include <llvm/IR/Metadata.h>
60  #include <llvm/IR/Module.h>
61  #include <llvm/IR/Instructions.h>
62  #include <llvm/IR/DerivedTypes.h>
63 #endif
64 #ifdef ISPC_NVPTX_ENABLED
65 #include <llvm/Support/raw_ostream.h>
66 #include <llvm/Support/FormattedStream.h>
67 #endif /* ISPC_NVPTX_ENABLED */
68 
69 /** This is a small utility structure that records information related to one
70  level of nested control flow. It's mostly used in correctly restoring
71  the mask and other state as we exit control flow nesting levels.
72 */
73 struct CFInfo {
74  /** Returns a new instance of the structure that represents entering an
75  'if' statement */
76  static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
77 
78  /** Returns a new instance of the structure that represents entering a
79  loop. */
80  static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
81  llvm::BasicBlock *continueTarget,
82  llvm::Value *savedBreakLanesPtr,
83  llvm::Value *savedContinueLanesPtr,
84  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
85 
87  llvm::BasicBlock *breakTarget,
88  llvm::BasicBlock *continueTarget,
89  llvm::Value *savedBreakLanesPtr,
90  llvm::Value *savedContinueLanesPtr,
91  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask);
92 
93  static CFInfo *GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
94  llvm::BasicBlock *continueTarget,
95  llvm::Value *savedBreakLanesPtr,
96  llvm::Value *savedContinueLanesPtr,
97  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask,
98  llvm::Value *switchExpr,
99  llvm::BasicBlock *bbDefault,
100  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbCases,
101  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbNext,
102  bool scUniform);
103 
104  bool IsIf() { return type == If; }
105  bool IsLoop() { return type == Loop; }
106  bool IsForeach() { return (type == ForeachRegular ||
107  type == ForeachActive ||
108  type == ForeachUnique); }
109  bool IsSwitch() { return type == Switch; }
110  bool IsVarying() { return !isUniform; }
111  bool IsUniform() { return isUniform; }
112 
116  bool isUniform;
120  llvm::Value *savedSwitchExpr;
121  llvm::BasicBlock *savedDefaultBlock;
122  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCaseBlocks;
123  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNextBlocks;
125 
126 private:
127  CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
128  Assert(t == If);
129  type = t;
130  isUniform = uniformIf;
131  savedBreakTarget = savedContinueTarget = NULL;
132  savedBreakLanesPtr = savedContinueLanesPtr = NULL;
133  savedMask = savedBlockEntryMask = sm;
134  savedSwitchExpr = NULL;
135  savedDefaultBlock = NULL;
136  savedCaseBlocks = NULL;
137  savedNextBlocks = NULL;
138  }
139  CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
140  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
141  llvm::Value *lm, llvm::Value *sse = NULL, llvm::BasicBlock *bbd = NULL,
142  const std::vector<std::pair<int, llvm::BasicBlock *> > *bbc = NULL,
143  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *bbn = NULL,
144  bool scu = false) {
145  Assert(t == Loop || t == Switch);
146  type = t;
147  isUniform = iu;
148  savedBreakTarget = bt;
149  savedContinueTarget = ct;
150  savedBreakLanesPtr = sb;
151  savedContinueLanesPtr = sc;
152  savedMask = sm;
153  savedBlockEntryMask = lm;
154  savedSwitchExpr = sse;
155  savedDefaultBlock = bbd;
156  savedCaseBlocks = bbc;
157  savedNextBlocks = bbn;
158  savedSwitchConditionWasUniform = scu;
159  }
160  CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
161  llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
162  llvm::Value *lm) {
163  Assert(t == ForeachRegular || t == ForeachActive || t == ForeachUnique);
164  type = t;
165  isUniform = false;
166  savedBreakTarget = bt;
167  savedContinueTarget = ct;
168  savedBreakLanesPtr = sb;
169  savedContinueLanesPtr = sc;
170  savedMask = sm;
171  savedBlockEntryMask = lm;
172  savedSwitchExpr = NULL;
173  savedDefaultBlock = NULL;
174  savedCaseBlocks = NULL;
175  savedNextBlocks = NULL;
176  }
177 };
178 
179 
180 CFInfo *
181 CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) {
182  return new CFInfo(If, isUniform, savedMask);
183 }
184 
185 
186 CFInfo *
187 CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
188  llvm::BasicBlock *continueTarget,
189  llvm::Value *savedBreakLanesPtr,
190  llvm::Value *savedContinueLanesPtr,
191  llvm::Value *savedMask, llvm::Value *savedBlockEntryMask) {
192  return new CFInfo(Loop, isUniform, breakTarget, continueTarget,
193  savedBreakLanesPtr, savedContinueLanesPtr,
194  savedMask, savedBlockEntryMask);
195 }
196 
197 
198 CFInfo *
200  llvm::BasicBlock *breakTarget,
201  llvm::BasicBlock *continueTarget,
202  llvm::Value *savedBreakLanesPtr,
203  llvm::Value *savedContinueLanesPtr,
204  llvm::Value *savedMask, llvm::Value *savedForeachMask) {
205  CFType cfType;
206  switch (ft) {
208  cfType = ForeachRegular;
209  break;
211  cfType = ForeachActive;
212  break;
214  cfType = ForeachUnique;
215  break;
216  default:
217  FATAL("Unhandled foreach type");
218  return NULL;
219  }
220 
221  return new CFInfo(cfType, breakTarget, continueTarget,
222  savedBreakLanesPtr, savedContinueLanesPtr,
223  savedMask, savedForeachMask);
224 }
225 
226 
227 CFInfo *
228 CFInfo::GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget,
229  llvm::BasicBlock *continueTarget,
230  llvm::Value *savedBreakLanesPtr,
231  llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask,
232  llvm::Value *savedBlockEntryMask, llvm::Value *savedSwitchExpr,
233  llvm::BasicBlock *savedDefaultBlock,
234  const std::vector<std::pair<int, llvm::BasicBlock *> > *savedCases,
235  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *savedNext,
236  bool savedSwitchConditionUniform) {
237  return new CFInfo(Switch, isUniform, breakTarget, continueTarget,
238  savedBreakLanesPtr, savedContinueLanesPtr,
239  savedMask, savedBlockEntryMask, savedSwitchExpr, savedDefaultBlock,
240  savedCases, savedNext, savedSwitchConditionUniform);
241 }
242 
243 ///////////////////////////////////////////////////////////////////////////
244 
246  llvm::Function *lf,
247  SourcePos firstStmtPos) {
248  function = func;
249  llvmFunction = lf;
250 
251  /* Create a new basic block to store all of the allocas */
252  allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
253  bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
254  /* But jump from it immediately into the real entry block */
255  llvm::BranchInst::Create(bblock, allocaBlock);
256 
257  funcStartPos = funSym->pos;
258 
259  internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
260  StoreInst(LLVMMaskAllOn, internalMaskPointer);
261 
262  functionMaskValue = LLVMMaskAllOn;
263 
264  fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
265  StoreInst(LLVMMaskAllOn, fullMaskPointer);
266 
267  blockEntryMask = NULL;
268  breakLanesPtr = continueLanesPtr = NULL;
269  breakTarget = continueTarget = NULL;
270 
271  switchExpr = NULL;
272  caseBlocks = NULL;
273  defaultBlock = NULL;
274  nextBlocks = NULL;
275 
276  returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
277  StoreInst(LLVMMaskAllOff, returnedLanesPtr);
278 
279  launchedTasks = false;
280  launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle");
281  StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType),
282  launchGroupHandlePtr);
283 
284  disableGSWarningCount = 0;
285 
286  const Type *returnType = function->GetReturnType();
287  if (!returnType || returnType->IsVoidType())
288  returnValuePtr = NULL;
289  else {
290  llvm::Type *ftype = returnType->LLVMType(g->ctx);
291  returnValuePtr = AllocaInst(ftype, "return_value_memory");
292  }
293 
295  // This is really disgusting. We want to be able to fool the
296  // compiler to not be able to reason that the mask is all on, but
297  // we don't want to pay too much of a price at the start of each
298  // function to do so.
299  //
300  // Therefore: first, we declare a module-static __all_on_mask
301  // variable that will hold an "all on" mask value. At the start of
302  // each function, we'll load its value and call SetInternalMaskAnd
303  // with the result to set the current internal execution mask.
304  // (This is a no-op at runtime.)
305  //
306  // Then, to fool the optimizer that maybe the value of
307  // __all_on_mask can't be guaranteed to be "all on", we emit a
308  // dummy function that sets __all_on_mask be "all off". (That
309  // function is never actually called.)
310  llvm::Value *globalAllOnMaskPtr =
311  m->module->getNamedGlobal("__all_on_mask");
312  if (globalAllOnMaskPtr == NULL) {
313  globalAllOnMaskPtr =
314  new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
315  llvm::GlobalValue::InternalLinkage,
316  LLVMMaskAllOn, "__all_on_mask");
317 
318  char buf[256];
319  sprintf(buf, "__off_all_on_mask_%s", g->target->GetISAString());
320  llvm::Constant *offFunc =
321 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
322  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
323  NULL);
324 #else // LLVM 5.0+
325  m->module->getOrInsertFunction(buf, LLVMTypes::VoidType);
326 #endif
327 
328  AssertPos(currentPos, llvm::isa<llvm::Function>(offFunc));
329  llvm::BasicBlock *offBB =
330  llvm::BasicBlock::Create(*g->ctx, "entry",
331  (llvm::Function *)offFunc, 0);
332  llvm::StoreInst *inst =
333  new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
334  if (g->opt.forceAlignedMemory) {
335  inst->setAlignment(g->target->getNativeVectorAlignment());
336  }
337  llvm::ReturnInst::Create(*g->ctx, offBB);
338  }
339 
340  llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
341  SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
342  }
343 
344  if (m->diBuilder) {
345  currentPos = funSym->pos;
346 
347  /* If debugging is enabled, tell the debug information emission
348  code about this new function */
349 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
350  diFile = funcStartPos.GetDIFile();
351  AssertPos(currentPos, diFile.Verify());
352 #else /* LLVM 3.7+ */
353  diFile = funcStartPos.GetDIFile();
354 #endif
355 
356 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
357  llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU());
358 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
359  llvm::DIScope scope = llvm::DIScope(m->diCompileUnit);
360 #else /* LLVM 3.7+ */
361  llvm::DIScope *scope = m->diCompileUnit;
362 #endif
363 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
364  llvm::DIType diSubprogramType;
365  AssertPos(currentPos, scope.Verify());
366 #else /* LLVM 3.7+ */
367  llvm::DIType *diSubprogramType = NULL;
368 #endif
369 
370  const FunctionType *functionType = function->GetType();
371  if (functionType == NULL)
372  AssertPos(currentPos, m->errorCount > 0);
373  else {
374  diSubprogramType = functionType->GetDIType(scope);
375 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
376  AssertPos(currentPos, diSubprogramType.Verify());
377 #else /* LLVM 3.7+ */
378  //comming soon
379 #endif
380  }
381 
382 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 /* 3.2, 3.3 */
383  llvm::DIType diSubprogramType_n = diSubprogramType;
384  int flags = llvm::DIDescriptor::FlagPrototyped;
385 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.4, 3.5, 3.6 */
386  Assert(diSubprogramType.isCompositeType());
387  llvm::DICompositeType diSubprogramType_n =
388  static_cast<llvm::DICompositeType>(diSubprogramType);
389  int flags = llvm::DIDescriptor::FlagPrototyped;
390 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
391  Assert(llvm::isa<llvm::DICompositeTypeBase>(diSubprogramType));
392  llvm::DISubroutineType *diSubprogramType_n =
393  llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
394  int flags = llvm::DINode::FlagPrototyped;
395 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
396  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
397  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
398  int flags = llvm::DINode::FlagPrototyped;
399 #else /* LLVM 4.0+ */
400  Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
401  llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
402  llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
403 
404 #endif
405 
406  std::string mangledName = llvmFunction->getName();
407  if (mangledName == funSym->name)
408  mangledName = "";
409 
410  bool isStatic = (funSym->storageClass == SC_STATIC);
411  bool isOptimized = (g->opt.level > 0);
412  int firstLine = funcStartPos.first_line;
413 
414 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
415  diSubprogram =
416  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
417  mangledName, diFile,
418  firstLine, diSubprogramType_n,
419  isStatic, true, /* is defn */
420  firstLine, flags,
421  isOptimized, llvmFunction);
422  AssertPos(currentPos, diSubprogram.Verify());
423 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_7 /* LLVM 3.7 */
424  diSubprogram =
425  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
426  mangledName, diFile,
427  firstLine, diSubprogramType_n,
428  isStatic, true, /* is defn */
429  firstLine, flags,
430  isOptimized, llvmFunction);
431 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
432  diSubprogram =
433  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
434  mangledName, diFile,
435  firstLine, diSubprogramType_n,
436  isStatic, true, /* is defn */
437  firstLine, flags,
438  isOptimized);
439  llvmFunction->setSubprogram(diSubprogram);
440 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 && ISPC_LLVM_VERSION <= ISPC_LLVM_7_0 /* LLVM 4.0 to 7.0 */
441  diSubprogram =
442  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
443  mangledName, diFile,
444  firstLine, diSubprogramType_n,
445  isStatic, true, /* is defn */
446  firstLine, flags,
447  isOptimized);
448  llvmFunction->setSubprogram(diSubprogram);
449 #else /* LLVM 8.0+ */
450  /* isDefinition is always set to 'true' */
451  llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagDefinition;
452  if (isOptimized)
453  SPFlags |= llvm::DISubprogram::SPFlagOptimized;
454  if (isStatic)
455  SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
456 
457  diSubprogram =
458  m->diBuilder->createFunction(diFile /* scope */, funSym->name,
459  mangledName, diFile,
460  firstLine, diSubprogramType_n,
461  firstLine, flags,
462  SPFlags);
463  llvmFunction->setSubprogram(diSubprogram);
464 #endif
465 
466  /* And start a scope representing the initial function scope */
467  StartScope();
468  }
469 }
470 
471 
473  AssertPos(currentPos, controlFlowInfo.size() == 0);
474  AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0));
475 }
476 
477 
478 const Function *
480  return function;
481 }
482 
483 
484 llvm::BasicBlock *
486  return bblock;
487 }
488 
489 
490 void
492  bblock = bb;
493 }
494 
495 
496 llvm::Value *
498  return functionMaskValue;
499 }
500 
501 
502 llvm::Value *
504  return LoadInst(internalMaskPointer, "load_mask");
505 }
506 
507 
508 llvm::Value *
510  return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
511  functionMaskValue, "internal_mask&function_mask");
512 }
513 
514 
515 llvm::Value *
517  return fullMaskPointer;
518 }
519 
520 
521 void
523  functionMaskValue = value;
524  if (bblock != NULL)
525  StoreInst(GetFullMask(), fullMaskPointer);
526 }
527 
528 
529 void
531  blockEntryMask = value;
532 }
533 
534 
535 void
537  StoreInst(value, internalMaskPointer);
538  // kludge so that __mask returns the right value in ispc code.
539  StoreInst(GetFullMask(), fullMaskPointer);
540 }
541 
542 
543 void
544 FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
545  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask,
546  test, "oldMask&test");
547  SetInternalMask(mask);
548 }
549 
550 
551 void
552 FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
553  llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
554  "~test");
555  llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
556  "oldMask&~test");
557  SetInternalMask(mask);
558 }
559 
560 
561 void
562 FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
563  AssertPos(currentPos, bblock != NULL);
564  llvm::Value *any = Any(GetFullMask());
565  BranchInst(btrue, bfalse, any);
566  // It's illegal to add any additional instructions to the basic block
567  // now that it's terminated, so set bblock to NULL to be safe
568  bblock = NULL;
569 }
570 
571 
572 void
573 FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
574  AssertPos(currentPos, bblock != NULL);
575  llvm::Value *all = All(GetFullMask());
576  BranchInst(btrue, bfalse, all);
577  // It's illegal to add any additional instructions to the basic block
578  // now that it's terminated, so set bblock to NULL to be safe
579  bblock = NULL;
580 }
581 
582 
583 void
584 FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
585  AssertPos(currentPos, bblock != NULL);
586  // switch sense of true/false bblocks
587  BranchIfMaskAny(bfalse, btrue);
588  // It's illegal to add any additional instructions to the basic block
589  // now that it's terminated, so set bblock to NULL to be safe
590  bblock = NULL;
591 }
592 
593 
594 void
596  controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
597 }
598 
599 
600 void
601 FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
602  controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
603 }
604 
605 
606 void
608  CFInfo *ci = popCFState();
609  // Make sure we match up with a Start{Uniform,Varying}If().
610  AssertPos(currentPos, ci->IsIf());
611 
612  // 'uniform' ifs don't change the mask so we only need to restore the
613  // mask going into the if for 'varying' if statements
614  if (ci->IsUniform() || bblock == NULL)
615  return;
616 
617  // We can't just restore the mask as it was going into the 'if'
618  // statement. First we have to take into account any program
619  // instances that have executed 'return' statements; the restored
620  // mask must be off for those lanes.
621  restoreMaskGivenReturns(ci->savedMask);
622 
623  // If the 'if' statement is inside a loop with a 'varying'
624  // condition, we also need to account for any break or continue
625  // statements that executed inside the 'if' statmeent; we also must
626  // leave the lane masks for the program instances that ran those
627  // off after we restore the mask after the 'if'. The code below
628  // ends up being optimized out in the case that there were no break
629  // or continue statements (and breakLanesPtr and continueLanesPtr
630  // have their initial 'all off' values), so we don't need to check
631  // for that here.
632  //
633  // There are three general cases to deal with here:
634  // - Loops: both break and continue are allowed, and thus the corresponding
635  // lane mask pointers are non-NULL
636  // - Foreach: only continueLanesPtr may be non-NULL
637  // - Switch: only breakLanesPtr may be non-NULL
638  if (continueLanesPtr != NULL || breakLanesPtr != NULL) {
639  // We want to compute:
640  // newMask = (oldMask & ~(breakLanes | continueLanes)),
641  // treading breakLanes or continueLanes as "all off" if the
642  // corresponding pointer is NULL.
643  llvm::Value *bcLanes = NULL;
644 
645  if (continueLanesPtr != NULL)
646  bcLanes = LoadInst(continueLanesPtr, "continue_lanes");
647  else
648  bcLanes = LLVMMaskAllOff;
649 
650  if (breakLanesPtr != NULL) {
651  llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
652  bcLanes = BinaryOperator(llvm::Instruction::Or, bcLanes,
653  breakLanes, "|break_lanes");
654  }
655 
656  llvm::Value *notBreakOrContinue =
657  BinaryOperator(llvm::Instruction::Xor,
658  bcLanes, LLVMMaskAllOn,
659  "!(break|continue)_lanes");
660  llvm::Value *oldMask = GetInternalMask();
661  llvm::Value *newMask =
662  BinaryOperator(llvm::Instruction::And, oldMask,
663  notBreakOrContinue, "new_mask");
664  SetInternalMask(newMask);
665  }
666 }
667 
668 
669 void
670 FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
671  bool uniformCF) {
672  // Store the current values of various loop-related state so that we
673  // can restore it when we exit this loop.
674  llvm::Value *oldMask = GetInternalMask();
675  controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget,
676  continueTarget, breakLanesPtr,
677  continueLanesPtr, oldMask, blockEntryMask));
678  if (uniformCF)
679  // If the loop has a uniform condition, we don't need to track
680  // which lanes 'break' or 'continue'; all of the running ones go
681  // together, so we just jump
682  breakLanesPtr = continueLanesPtr = NULL;
683  else {
684  // For loops with varying conditions, allocate space to store masks
685  // that record which lanes have done these
686  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
687  StoreInst(LLVMMaskAllOff, continueLanesPtr);
688  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
689  StoreInst(LLVMMaskAllOff, breakLanesPtr);
690  }
691 
692  breakTarget = bt;
693  continueTarget = ct;
694  blockEntryMask = NULL; // this better be set by the loop!
695 }
696 
697 
698 void
700  CFInfo *ci = popCFState();
701  AssertPos(currentPos, ci->IsLoop());
702 
703  if (!ci->IsUniform())
704  // If the loop had a 'uniform' test, then it didn't make any
705  // changes to the mask so there's nothing to restore. If it had a
706  // varying test, we need to restore the mask to what it was going
707  // into the loop, but still leaving off any lanes that executed a
708  // 'return' statement.
709  restoreMaskGivenReturns(ci->savedMask);
710 }
711 
712 
713 void
715  // Issue an error if we're in a nested foreach...
716  if (ft == FOREACH_REGULAR) {
717  for (int i = 0; i < (int)controlFlowInfo.size(); ++i) {
718  if (controlFlowInfo[i]->type == CFInfo::ForeachRegular) {
719  Error(currentPos, "Nested \"foreach\" statements are currently "
720  "illegal.");
721  break;
722  // Don't return here, however, and in turn allow the caller to
723  // do the rest of its codegen and then call EndForeach()
724  // normally--the idea being that this gives a chance to find
725  // any other errors inside the body of the foreach loop...
726  }
727  }
728  }
729 
730  // Store the current values of various loop-related state so that we
731  // can restore it when we exit this loop.
732  llvm::Value *oldMask = GetInternalMask();
733  controlFlowInfo.push_back(CFInfo::GetForeach(ft, breakTarget, continueTarget,
734  breakLanesPtr, continueLanesPtr,
735  oldMask, blockEntryMask));
736  breakLanesPtr = NULL;
737  breakTarget = NULL;
738 
739  continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
740  StoreInst(LLVMMaskAllOff, continueLanesPtr);
741  continueTarget = NULL; // should be set by SetContinueTarget()
742 
743  blockEntryMask = NULL;
744 }
745 
746 
747 void
749  CFInfo *ci = popCFState();
750  AssertPos(currentPos, ci->IsForeach());
751 }
752 
753 
754 void
756  if (!bblock)
757  return;
758 
759  // Restore the mask to the given old mask, but leave off any lanes that
760  // executed a return statement.
761  // newMask = (oldMask & ~returnedLanes)
762  llvm::Value *returnedLanes = LoadInst(returnedLanesPtr,
763  "returned_lanes");
764  llvm::Value *notReturned = BinaryOperator(llvm::Instruction::Xor,
765  returnedLanes, LLVMMaskAllOn,
766  "~returned_lanes");
767  llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
768  oldMask, notReturned, "new_mask");
769  SetInternalMask(newMask);
770 }
771 
772 
773 /** Returns "true" if the first enclosing non-if control flow expression is
774  a "switch" statement.
775 */
776 bool
778  // Go backwards through controlFlowInfo, since we add new nested scopes
779  // to the back.
780  int i = controlFlowInfo.size() - 1;
781  while (i >= 0 && controlFlowInfo[i]->IsIf())
782  --i;
783  // Got to the first non-if (or end of CF info)
784  if (i == -1)
785  return false;
786  return controlFlowInfo[i]->IsSwitch();
787 }
788 
789 
790 void
791 FunctionEmitContext::Break(bool doCoherenceCheck) {
792  if (breakTarget == NULL) {
793  Error(currentPos, "\"break\" statement is illegal outside of "
794  "for/while/do loops and \"switch\" statements.");
795  return;
796  }
797  AssertPos(currentPos, controlFlowInfo.size() > 0);
798 
799  if (bblock == NULL)
800  return;
801 
802  if (inSwitchStatement() == true &&
803  switchConditionWasUniform == true &&
804  ifsInCFAllUniform(CFInfo::Switch)) {
805  // We know that all program instances are executing the break, so
806  // just jump to the block immediately after the switch.
807  AssertPos(currentPos, breakTarget != NULL);
808  BranchInst(breakTarget);
809  bblock = NULL;
810  return;
811  }
812 
813  // If all of the enclosing 'if' tests in the loop have uniform control
814  // flow or if we can tell that the mask is all on, then we can just
815  // jump to the break location.
816  if (inSwitchStatement() == false && ifsInCFAllUniform(CFInfo::Loop)) {
817  BranchInst(breakTarget);
818  // Set bblock to NULL since the jump has terminated the basic block
819  bblock = NULL;
820  }
821  else {
822  // Varying switch, uniform switch where the 'break' is under
823  // varying control flow, or a loop with varying 'if's above the
824  // break. In these cases, we need to update the mask of the lanes
825  // that have executed a 'break' statement:
826  // breakLanes = breakLanes | mask
827  AssertPos(currentPos, breakLanesPtr != NULL);
828 
829  llvm::Value *mask = GetInternalMask();
830  llvm::Value *breakMask = LoadInst(breakLanesPtr,
831  "break_mask");
832  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
833  mask, breakMask, "mask|break_mask");
834  StoreInst(newMask, breakLanesPtr);
835 
836  // Set the current mask to be all off, just in case there are any
837  // statements in the same scope after the 'break'. Most of time
838  // this will be optimized away since we'll likely end the scope of
839  // an 'if' statement and restore the mask then.
840  SetInternalMask(LLVMMaskAllOff);
841 
842  if (doCoherenceCheck) {
843  if (continueTarget != NULL)
844  // If the user has indicated that this is a 'coherent'
845  // break statement, then check to see if the mask is all
846  // off. If so, we have to conservatively jump to the
847  // continueTarget, not the breakTarget, since part of the
848  // reason the mask is all off may be due to 'continue'
849  // statements that executed in the current loop iteration.
850  jumpIfAllLoopLanesAreDone(continueTarget);
851  else if (breakTarget != NULL)
852  // Similarly handle these for switch statements, where we
853  // only have a break target.
854  jumpIfAllLoopLanesAreDone(breakTarget);
855  }
856  }
857 }
858 
859 
860 static bool
861 lEnclosingLoopIsForeachActive(const std::vector<CFInfo *> &controlFlowInfo) {
862  for (int i = (int)controlFlowInfo.size() - 1; i >= 0; --i) {
863  if (controlFlowInfo[i]->type == CFInfo::ForeachActive)
864  return true;
865  }
866  return false;
867 }
868 
869 
870 void
871 FunctionEmitContext::Continue(bool doCoherenceCheck) {
872  if (!continueTarget) {
873  Error(currentPos, "\"continue\" statement illegal outside of "
874  "for/while/do/foreach loops.");
875  return;
876  }
877  AssertPos(currentPos, controlFlowInfo.size() > 0);
878 
879  if (ifsInCFAllUniform(CFInfo::Loop) ||
880  lEnclosingLoopIsForeachActive(controlFlowInfo)) {
881  // Similarly to 'break' statements, we can immediately jump to the
882  // continue target if we're only in 'uniform' control flow within
883  // loop or if we can tell that the mask is all on. Here, we can
884  // also jump if the enclosing loop is a 'foreach_active' loop, in
885  // which case we know that only a single program instance is
886  // executing.
887  AddInstrumentationPoint("continue: uniform CF, jumped");
888  BranchInst(continueTarget);
889  bblock = NULL;
890  }
891  else {
892  // Otherwise update the stored value of which lanes have 'continue'd.
893  // continueLanes = continueLanes | mask
894  AssertPos(currentPos, continueLanesPtr);
895  llvm::Value *mask = GetInternalMask();
896  llvm::Value *continueMask =
897  LoadInst(continueLanesPtr, "continue_mask");
898  llvm::Value *newMask =
899  BinaryOperator(llvm::Instruction::Or, mask, continueMask,
900  "mask|continueMask");
901  StoreInst(newMask, continueLanesPtr);
902 
903  // And set the current mask to be all off in case there are any
904  // statements in the same scope after the 'continue'
905  SetInternalMask(LLVMMaskAllOff);
906 
907  if (doCoherenceCheck)
908  // If this is a 'coherent continue' statement, then emit the
909  // code to see if all of the lanes are now off due to
910  // breaks/continues and jump to the continue target if so.
911  jumpIfAllLoopLanesAreDone(continueTarget);
912  }
913 }
914 
915 
916 /** This function checks to see if all of the 'if' statements (if any)
917  between the current scope and the first enclosing loop/switch of given
918  control flow type have 'uniform' tests.
919  */
920 bool
922  AssertPos(currentPos, controlFlowInfo.size() > 0);
923  // Go backwards through controlFlowInfo, since we add new nested scopes
924  // to the back. Stop once we come to the first enclosing control flow
925  // structure of the desired type.
926  int i = controlFlowInfo.size() - 1;
927  while (i >= 0 && controlFlowInfo[i]->type != type) {
928  if (controlFlowInfo[i]->isUniform == false)
929  // Found a scope due to an 'if' statement with a varying test
930  return false;
931  --i;
932  }
933  AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type!
934  return true;
935 }
936 
937 
938 void
940  llvm::Value *allDone = NULL;
941 
942  if (breakLanesPtr == NULL) {
943  llvm::Value *continued = LoadInst(continueLanesPtr,
944  "continue_lanes");
945  continued = BinaryOperator(llvm::Instruction::And,
946  continued, GetFunctionMask(),
947  "continued&func");
948  allDone = MasksAllEqual(continued, blockEntryMask);
949  }
950  else {
951  // Check to see if (returned lanes | continued lanes | break lanes) is
952  // equal to the value of mask at the start of the loop iteration. If
953  // so, everyone is done and we can jump to the given target
954  llvm::Value *returned = LoadInst(returnedLanesPtr,
955  "returned_lanes");
956  llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
957  llvm::Value *finishedLanes = BinaryOperator(llvm::Instruction::Or,
958  returned, breaked,
959  "returned|breaked");
960  if (continueLanesPtr != NULL) {
961  // It's NULL for "switch" statements...
962  llvm::Value *continued = LoadInst(continueLanesPtr,
963  "continue_lanes");
964  finishedLanes = BinaryOperator(llvm::Instruction::Or, finishedLanes,
965  continued, "returned|breaked|continued");
966  }
967 
968  finishedLanes = BinaryOperator(llvm::Instruction::And,
969  finishedLanes, GetFunctionMask(),
970  "finished&func");
971 
972  // Do we match the mask at loop or switch statement entry?
973  allDone = MasksAllEqual(finishedLanes, blockEntryMask);
974  }
975 
976  llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
977  llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
978  BranchInst(bAll, bNotAll, allDone);
979 
980  // If so, have an extra basic block along the way to add
981  // instrumentation, if the user asked for it.
982  bblock = bAll;
983  AddInstrumentationPoint("break/continue: all dynamically went");
984  BranchInst(target);
985 
986  // And set the current basic block to a new one for future instructions
987  // for the path where we weren't able to jump
988  bblock = bNotAll;
989  AddInstrumentationPoint("break/continue: not all went");
990 }
991 
992 
993 void
995  if (continueLanesPtr == NULL)
996  return;
997 
998  // mask = mask & continueFlags
999  llvm::Value *mask = GetInternalMask();
1000  llvm::Value *continueMask = LoadInst(continueLanesPtr,
1001  "continue_mask");
1002  llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
1003  mask, continueMask, "mask|continue_mask");
1004  SetInternalMask(orMask);
1005 
1006  // continueLanes = 0
1007  StoreInst(LLVMMaskAllOff, continueLanesPtr);
1008 }
1009 
1010 
1011 void
1013  if (breakLanesPtr == NULL)
1014  return;
1015 
1016  // breakLanes = 0
1017  StoreInst(LLVMMaskAllOff, breakLanesPtr);
1018 }
1019 
1020 
1021 void
1022 FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) {
1023  llvm::Value *oldMask = GetInternalMask();
1024  controlFlowInfo.push_back(CFInfo::GetSwitch(cfIsUniform, breakTarget,
1025  continueTarget, breakLanesPtr,
1026  continueLanesPtr, oldMask,
1027  blockEntryMask, switchExpr, defaultBlock,
1028  caseBlocks, nextBlocks,
1029  switchConditionWasUniform));
1030 
1031  breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
1032  StoreInst(LLVMMaskAllOff, breakLanesPtr);
1033  breakTarget = bbBreak;
1034 
1035  continueLanesPtr = NULL;
1036  continueTarget = NULL;
1037  blockEntryMask = NULL;
1038 
1039  // These will be set by the SwitchInst() method
1040  switchExpr = NULL;
1041  defaultBlock = NULL;
1042  caseBlocks = NULL;
1043  nextBlocks = NULL;
1044 }
1045 
1046 
1047 void
1049  AssertPos(currentPos, bblock != NULL);
1050 
1051  CFInfo *ci = popCFState();
1052  if (ci->IsVarying() && bblock != NULL)
1053  restoreMaskGivenReturns(ci->savedMask);
1054 }
1055 
1056 
1057 /** Emit code to check for an "all off" mask before the code for a
1058  case or default label in a "switch" statement.
1059  */
1060 void
1062  llvm::Value *allOff = None(mask);
1063  llvm::BasicBlock *bbSome = CreateBasicBlock("case_default_on");
1064 
1065  // Find the basic block for the case or default label immediately after
1066  // the current one in the switch statement--that's where we want to
1067  // jump if the mask is all off at this label.
1068  AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end());
1069  llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second;
1070 
1071  // Jump to the next one of the mask is all off; otherwise jump to the
1072  // newly created block that will hold the actual code for this label.
1073  BranchInst(bbNext, bbSome, allOff);
1074  SetCurrentBasicBlock(bbSome);
1075 }
1076 
1077 
1078 /** Returns the execution mask at entry to the first enclosing "switch"
1079  statement. */
1080 llvm::Value *
1082  AssertPos(currentPos, controlFlowInfo.size() > 0);
1083  int i = controlFlowInfo.size() - 1;
1084  while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch)
1085  --i;
1086  AssertPos(currentPos, i != -1);
1087  return controlFlowInfo[i]->savedMask;
1088 }
1089 
1090 
1091 void
1093  if (inSwitchStatement() == false) {
1094  Error(pos, "\"default\" label illegal outside of \"switch\" "
1095  "statement.");
1096  return;
1097  }
1098 
1099  // If there's a default label in the switch, a basic block for it
1100  // should have been provided in the previous call to SwitchInst().
1101  AssertPos(currentPos, defaultBlock != NULL);
1102 
1103  if (bblock != NULL)
1104  // The previous case in the switch fell through, or we're in a
1105  // varying switch; terminate the current block with a jump to the
1106  // block for the code for the default label.
1107  BranchInst(defaultBlock);
1108  SetCurrentBasicBlock(defaultBlock);
1109 
1110  if (switchConditionWasUniform)
1111  // Nothing more to do for this case; return back to the caller,
1112  // which will then emit the code for the default case.
1113  return;
1114 
1115  // For a varying switch, we need to update the execution mask.
1116  //
1117  // First, compute the mask that corresponds to which program instances
1118  // should execute the "default" code; this corresponds to the set of
1119  // program instances that don't match any of the case statements.
1120  // Therefore, we generate code that compares the value of the switch
1121  // expression to the value associated with each of the "case"
1122  // statements such that the surviving lanes didn't match any of them.
1123  llvm::Value *matchesDefault = getMaskAtSwitchEntry();
1124  for (int i = 0; i < (int)caseBlocks->size(); ++i) {
1125  int value = (*caseBlocks)[i].first;
1126  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1127  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1128  // TODO: for AVX2 at least, the following generates better code
1129  // than doing ICMP_NE and skipping the NotOperator() below; file a
1130  // LLVM bug?
1131  llvm::Value *matchesCaseValue =
1132  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1133  valueVec, "cmp_case_value");
1134  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1135 
1136  llvm::Value *notMatchesCaseValue = NotOperator(matchesCaseValue);
1137  matchesDefault = BinaryOperator(llvm::Instruction::And, matchesDefault,
1138  notMatchesCaseValue, "default&~case_match");
1139  }
1140 
1141  // The mask may have some lanes on, which corresponds to the previous
1142  // label falling through; compute the updated mask by ANDing with the
1143  // current mask.
1144  llvm::Value *oldMask = GetInternalMask();
1145  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1146  matchesDefault, "old_mask|matches_default");
1147  SetInternalMask(newMask);
1148 
1149  if (checkMask)
1150  addSwitchMaskCheck(newMask);
1151 }
1152 
1153 
1154 void
1155 FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) {
1156  if (inSwitchStatement() == false) {
1157  Error(pos, "\"case\" label illegal outside of \"switch\" statement.");
1158  return;
1159  }
1160 
1161  // Find the basic block for this case statement.
1162  llvm::BasicBlock *bbCase = NULL;
1163  AssertPos(currentPos, caseBlocks != NULL);
1164  for (int i = 0; i < (int)caseBlocks->size(); ++i)
1165  if ((*caseBlocks)[i].first == value) {
1166  bbCase = (*caseBlocks)[i].second;
1167  break;
1168  }
1169  AssertPos(currentPos, bbCase != NULL);
1170 
1171  if (bblock != NULL)
1172  // fall through from the previous case
1173  BranchInst(bbCase);
1174  SetCurrentBasicBlock(bbCase);
1175 
1176  if (switchConditionWasUniform)
1177  return;
1178 
1179  // update the mask: first, get a mask that indicates which program
1180  // instances have a value for the switch expression that matches this
1181  // case statement.
1182  llvm::Value *valueVec = (switchExpr->getType() == LLVMTypes::Int32VectorType) ?
1183  LLVMInt32Vector(value) : LLVMInt64Vector(value);
1184  llvm::Value *matchesCaseValue =
1185  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, switchExpr,
1186  valueVec, "cmp_case_value");
1187  matchesCaseValue = I1VecToBoolVec(matchesCaseValue);
1188 
1189  // If a lane was off going into the switch, we don't care if has a
1190  // value in the switch expression that happens to match this case.
1191  llvm::Value *entryMask = getMaskAtSwitchEntry();
1192  matchesCaseValue = BinaryOperator(llvm::Instruction::And, entryMask,
1193  matchesCaseValue, "entry_mask&case_match");
1194 
1195  // Take the surviving lanes and turn on the mask for them.
1196  llvm::Value *oldMask = GetInternalMask();
1197  llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, oldMask,
1198  matchesCaseValue, "mask|case_match");
1199  SetInternalMask(newMask);
1200 
1201  if (checkMask)
1202  addSwitchMaskCheck(newMask);
1203 }
1204 
1205 
1206 void
1207 FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
1208  const std::vector<std::pair<int, llvm::BasicBlock *> > &bbCases,
1209  const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &bbNext) {
1210  // The calling code should have called StartSwitch() before calling
1211  // SwitchInst().
1212  AssertPos(currentPos, controlFlowInfo.size() &&
1213  controlFlowInfo.back()->IsSwitch());
1214 
1215  switchExpr = expr;
1216  defaultBlock = bbDefault;
1217  caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases);
1218  nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
1219  switchConditionWasUniform =
1220  (llvm::isa<llvm::VectorType>(expr->getType()) == false);
1221 
1222  if (switchConditionWasUniform == true) {
1223  // For a uniform switch condition, just wire things up to the LLVM
1224  // switch instruction.
1225  llvm::SwitchInst *s = llvm::SwitchInst::Create(expr, bbDefault,
1226  bbCases.size(), bblock);
1227  for (int i = 0; i < (int)bbCases.size(); ++i) {
1228  if (expr->getType() == LLVMTypes::Int32Type)
1229  s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second);
1230  else {
1231  AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type);
1232  s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second);
1233  }
1234  }
1235 
1236  AddDebugPos(s);
1237  // switch is a terminator
1238  bblock = NULL;
1239  }
1240  else {
1241  // For a varying switch, we first turn off all lanes of the mask
1242  SetInternalMask(LLVMMaskAllOff);
1243 
1244  if (nextBlocks->size() > 0) {
1245  // If there are any labels inside the switch, jump to the first
1246  // one; any code before the first label won't be executed by
1247  // anyone.
1248  std::map<llvm::BasicBlock *, llvm::BasicBlock *>::const_iterator iter;
1249  iter = nextBlocks->find(NULL);
1250  AssertPos(currentPos, iter != nextBlocks->end());
1251  llvm::BasicBlock *bbFirst = iter->second;
1252  BranchInst(bbFirst);
1253  bblock = NULL;
1254  }
1255  }
1256 }
1257 
1258 
1259 int
1261  int sum = 0;
1262  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1263  if (controlFlowInfo[i]->IsVarying())
1264  ++sum;
1265  return sum;
1266 }
1267 
1268 
1269 bool
1271  for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
1272  if (controlFlowInfo[i]->IsForeach())
1273  return true;
1274  return false;
1275 }
1276 
1277 
1278 void
1280  ++disableGSWarningCount;
1281 }
1282 
1283 
1284 void
1286  --disableGSWarningCount;
1287 }
1288 
1289 
1290 
1291 bool
1293  LabeledStmt *ls = llvm::dyn_cast<LabeledStmt>(node);
1294  if (ls == NULL)
1295  return true;
1296 
1298 
1299  if (ctx->labelMap.find(ls->name) != ctx->labelMap.end())
1300  Error(ls->pos, "Multiple labels named \"%s\" in function.",
1301  ls->name.c_str());
1302  else {
1303  llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str());
1304  ctx->labelMap[ls->name] = bb;
1305  }
1306  return true;
1307 }
1308 
1309 
1310 void
1312  labelMap.erase(labelMap.begin(), labelMap.end());
1313  WalkAST(code, initLabelBBlocks, NULL, this);
1314 }
1315 
1316 
1317 llvm::BasicBlock *
1319  if (labelMap.find(label) != labelMap.end())
1320  return labelMap[label];
1321  else
1322  return NULL;
1323 }
1324 
1325 std::vector<std::string>
1327  // Initialize vector to the right size
1328  std::vector<std::string> labels(labelMap.size());
1329 
1330  // Iterate through labelMap and grab only the keys
1331  std::map<std::string, llvm::BasicBlock*>::iterator iter;
1332  for (iter=labelMap.begin(); iter != labelMap.end(); iter++)
1333  labels.push_back(iter->first);
1334 
1335  return labels;
1336 }
1337 
1338 
1339 void
1340 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
1341  const Type *returnType = function->GetReturnType();
1342  if (returnType->IsVoidType()) {
1343  if (expr != NULL)
1344  Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
1345  expr->GetType()->GetString().c_str());
1346  }
1347  else {
1348  if (expr == NULL) {
1349  Error(funcStartPos, "Must provide return value for return "
1350  "statement for non-void function.");
1351  return;
1352  }
1353 
1354  expr = TypeConvertExpr(expr, returnType, "return statement");
1355  if (expr != NULL) {
1356  llvm::Value *retVal = expr->GetValue(this);
1357  if (retVal != NULL) {
1358  if (returnType->IsUniformType() ||
1359  CastType<ReferenceType>(returnType) != NULL)
1360  StoreInst(retVal, returnValuePtr);
1361  else {
1362  // Use a masked store to store the value of the expression
1363  // in the return value memory; this preserves the return
1364  // values from other lanes that may have executed return
1365  // statements previously.
1366  StoreInst(retVal, returnValuePtr, GetInternalMask(),
1367  returnType, PointerType::GetUniform(returnType));
1368  }
1369  }
1370  }
1371  }
1372 
1373  if (VaryingCFDepth() == 0) {
1374  // If there is only uniform control flow between us and the
1375  // function entry, then it's guaranteed that all lanes are running,
1376  // so we can just emit a true return instruction
1377  AddInstrumentationPoint("return: uniform control flow");
1378  ReturnInst();
1379  }
1380  else {
1381  // Otherwise we update the returnedLanes value by ANDing it with
1382  // the current lane mask.
1383  llvm::Value *oldReturnedLanes =
1384  LoadInst(returnedLanesPtr, "old_returned_lanes");
1385  llvm::Value *newReturnedLanes =
1386  BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
1387  GetFullMask(), "old_mask|returned_lanes");
1388 
1389  // For 'coherent' return statements, emit code to check if all
1390  // lanes have returned
1391  if (doCoherenceCheck) {
1392  // if newReturnedLanes == functionMaskValue, get out of here!
1393  llvm::Value *cmp = MasksAllEqual(functionMaskValue,
1394  newReturnedLanes);
1395  llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
1396  llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
1397  BranchInst(bDoReturn, bNoReturn, cmp);
1398 
1399  bblock = bDoReturn;
1400  AddInstrumentationPoint("return: all lanes have returned");
1401  ReturnInst();
1402 
1403  bblock = bNoReturn;
1404  }
1405  // Otherwise update returnedLanesPtr and turn off all of the lanes
1406  // in the current mask so that any subsequent statements in the
1407  // same scope after the return have no effect
1408  StoreInst(newReturnedLanes, returnedLanesPtr);
1409  AddInstrumentationPoint("return: some but not all lanes have returned");
1410  SetInternalMask(LLVMMaskAllOff);
1411  }
1412 }
1413 
1414 
1415 llvm::Value *
1416 FunctionEmitContext::Any(llvm::Value *mask) {
1417  // Call the target-dependent any function to test that the mask is non-zero
1418  std::vector<Symbol *> mm;
1419  m->symbolTable->LookupFunction("__any", &mm);
1420  if (g->target->getMaskBitCount() == 1)
1421  AssertPos(currentPos, mm.size() == 1);
1422  else
1423  // There should be one with signed int signature, one unsigned int.
1424  AssertPos(currentPos, mm.size() == 2);
1425  // We can actually call either one, since both are i32s as far as
1426  // LLVM's type system is concerned...
1427  llvm::Function *fmm = mm[0]->function;
1428  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any"));
1429 }
1430 
1431 
1432 llvm::Value *
1433 FunctionEmitContext::All(llvm::Value *mask) {
1434  // Call the target-dependent movmsk function to turn the vector mask
1435  // into an i64 value
1436  std::vector<Symbol *> mm;
1437  m->symbolTable->LookupFunction("__all", &mm);
1438  if (g->target->getMaskBitCount() == 1)
1439  AssertPos(currentPos, mm.size() == 1);
1440  else
1441  // There should be one with signed int signature, one unsigned int.
1442  AssertPos(currentPos, mm.size() == 2);
1443  // We can actually call either one, since both are i32s as far as
1444  // LLVM's type system is concerned...
1445  llvm::Function *fmm = mm[0]->function;
1446  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all"));
1447 }
1448 
1449 
1450 llvm::Value *
1451 FunctionEmitContext::None(llvm::Value *mask) {
1452  // Call the target-dependent movmsk function to turn the vector mask
1453  // into an i64 value
1454  std::vector<Symbol *> mm;
1455  m->symbolTable->LookupFunction("__none", &mm);
1456  if (g->target->getMaskBitCount() == 1)
1457  AssertPos(currentPos, mm.size() == 1);
1458  else
1459  // There should be one with signed int signature, one unsigned int.
1460  AssertPos(currentPos, mm.size() == 2);
1461  // We can actually call either one, since both are i32s as far as
1462  // LLVM's type system is concerned...
1463  llvm::Function *fmm = mm[0]->function;
1464  return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none"));
1465 }
1466 
1467 
1468 llvm::Value *
1470 #ifdef ISPC_NVPTX_ENABLED
1471  /* this makes mandelbrot example slower with "nvptx" target.
1472  * Needs further investigation. */
1473  const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
1474 #else
1475  const char *__movmsk = "__movmsk";
1476 #endif
1477  // Call the target-dependent movmsk function to turn the vector mask
1478  // into an i64 value
1479  std::vector<Symbol *> mm;
1480  m->symbolTable->LookupFunction(__movmsk, &mm);
1481  if (g->target->getMaskBitCount() == 1)
1482  AssertPos(currentPos, mm.size() == 1);
1483  else
1484  // There should be one with signed int signature, one unsigned int.
1485  AssertPos(currentPos, mm.size() == 2);
1486  // We can actually call either one, since both are i32s as far as
1487  // LLVM's type system is concerned...
1488  llvm::Function *fmm = mm[0]->function;
1489  return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
1490 }
1491 
1492 #ifdef ISPC_NVPTX_ENABLED
1493 bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
1494 {
1495  llvm::Type *type = vector->getType();
1496  if (type == LLVMTypes::Int8VectorType)
1497  funcName += "_int8";
1498  else if (type == LLVMTypes::Int16VectorType)
1499  funcName += "_int16";
1500  else if (type == LLVMTypes::Int32VectorType)
1501  funcName += "_int32";
1502  else if (type == LLVMTypes::Int64VectorType)
1503  funcName += "_int64";
1504  else if (type == LLVMTypes::FloatVectorType)
1505  funcName += "_float";
1506  else if (type == LLVMTypes::DoubleVectorType)
1507  funcName += "_double";
1508  else
1509  return false;
1510  return true;
1511 }
1512 
1513 llvm::Value*
1514 FunctionEmitContext::Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar)
1515 {
1516  std::string funcName = "__insert";
1517  assert(lAppendInsertExtractName(vector, funcName));
1518  assert(lane->getType() == LLVMTypes::Int32Type);
1519 
1520  llvm::Function *func = m->module->getFunction(funcName.c_str());
1521  assert(func != NULL);
1522  std::vector<llvm::Value *> args;
1523  args.push_back(vector);
1524  args.push_back(lane);
1525  args.push_back(scalar);
1526  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1527  return ret;
1528 }
1529 
1530 llvm::Value*
1531 FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
1532 {
1533  std::string funcName = "__extract";
1534  assert(lAppendInsertExtractName(vector, funcName));
1535  assert(lane->getType() == LLVMTypes::Int32Type);
1536 
1537  llvm::Function *func = m->module->getFunction(funcName.c_str());
1538  assert(func != NULL);
1539  std::vector<llvm::Value *> args;
1540  args.push_back(vector);
1541  args.push_back(lane);
1542  llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
1543  return ret;
1544 }
1545 #endif /* ISPC_NVPTX_ENABLED */
1546 
1547 
1548 llvm::Value *
1549 FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
1550 #ifdef ISPC_NVPTX_ENABLED
1551  if (g->target->getISA() == Target::NVPTX)
1552  {
1553  // Compare the two masks to get a vector of i1s
1554  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1555  v1, v2, "v1==v2");
1556  return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
1557  }
1558 #endif /* ISPC_NVPTX_ENABLED */
1559 
1560 #if 0
1561  // Compare the two masks to get a vector of i1s
1562  llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
1563  v1, v2, "v1==v2");
1564  // Turn that into a bool vector type (often i32s)
1565  cmp = I1VecToBoolVec(cmp);
1566  // And see if it's all on
1567  return All(cmp);
1568 #else
1569  llvm::Value *mm1 = LaneMask(v1);
1570  llvm::Value *mm2 = LaneMask(v2);
1571  return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
1572  LLVMGetName("equal", v1, v2));
1573 #endif
1574 }
1575 
1576 llvm::Value *
1578  llvm::SmallVector<llvm::Constant*, 16> array;
1579  for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
1580  llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
1581  array.push_back(C);
1582  }
1583 
1584  llvm::Constant* index = llvm::ConstantVector::get(array);
1585 
1586  return index;
1587 }
1588 
1589 #ifdef ISPC_NVPTX_ENABLED
1590 llvm::Value *
1591 FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
1592  llvm::Function *func_program_index = m->module->getFunction("__program_index");
1593  llvm::Value *__program_index = CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__program_indexS");
1594  llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), __program_index, 0, "foreach__program_indexV");
1595 #if 0
1596  if (!is32bits)
1597  index = ZExtInst(index, LLVMTypes::Int64VectandType);
1598 #endif
1599  return index;
1600 }
1601 #endif /* ISPC_NVPTX_ENABLED */
1602 
1603 
1604 llvm::Value *
1605 FunctionEmitContext::GetStringPtr(const std::string &str) {
1606  llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
1607  llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
1608  llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
1609  true /*isConst*/,
1610  linkage, lstr, "__str");
1611  return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType,
1612  "str_void_ptr", bblock);
1613 }
1614 
1615 
1616 llvm::BasicBlock *
1618  return llvm::BasicBlock::Create(*g->ctx, name, llvmFunction);
1619 }
1620 
1621 
1622 llvm::Value *
1624  if (b == NULL) {
1625  AssertPos(currentPos, m->errorCount > 0);
1626  return NULL;
1627  }
1628 
1629  if (g->target->getMaskBitCount() == 1)
1630  return b;
1631 
1632  llvm::ArrayType *at =
1633  llvm::dyn_cast<llvm::ArrayType>(b->getType());
1634  if (at) {
1635  // If we're given an array of vectors of i1s, then do the
1636  // conversion for each of the elements
1637  llvm::Type *boolArrayType =
1638  llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
1639  llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
1640 
1641  for (unsigned int i = 0; i < at->getNumElements(); ++i) {
1642  llvm::Value *elt = ExtractInst(b, i);
1643  llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
1644  LLVMGetName(elt, "_to_boolvec"));
1645  ret = InsertInst(ret, sext, i);
1646  }
1647  return ret;
1648  }
1649  else
1650  return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
1651 }
1652 
1653 
1654 static llvm::Value *
1655 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
1656  llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, true);
1657  std::string var_name = "_";
1658  var_name = var_name + s;
1659  llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
1660  true /* const */,
1661  llvm::GlobalValue::InternalLinkage,
1662  sConstant, var_name.c_str());
1663  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
1664  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1665 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1666  return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
1667 #else /* LLVM 3.7+ */
1668  return llvm::GetElementPtrInst::Create(PTYPE(sPtr),
1669  sPtr, arrayRef, "sptr", bblock);
1670 #endif
1671 }
1672 
1673 
1674 void
1676  AssertPos(currentPos, note != NULL);
1677  if (!g->emitInstrumentation)
1678  return;
1679 
1680  std::vector<llvm::Value *> args;
1681  // arg 1: filename as string
1682  args.push_back(lGetStringAsValue(bblock, currentPos.name));
1683  // arg 2: provided note
1684  args.push_back(lGetStringAsValue(bblock, note));
1685  // arg 3: line number
1686  args.push_back(LLVMInt32(currentPos.first_line));
1687  // arg 4: current mask, movmsk'ed down to an int64
1688  args.push_back(LaneMask(GetFullMask()));
1689 
1690  llvm::Function *finst = m->module->getFunction("ISPCInstrument");
1691  CallInst(finst, NULL, args, "");
1692 }
1693 
1694 
1695 void
1697  currentPos = pos;
1698 }
1699 
1700 
1701 SourcePos
1703  return currentPos;
1704 }
1705 
1706 
1707 void
1708 FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
1709  llvm::DIScope *scope) {
1710  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
1711  if (inst != NULL && m->diBuilder) {
1712  SourcePos p = pos ? *pos : currentPos;
1713  if (p.first_line != 0)
1714  // If first_line == 0, then we're in the middle of setting up
1715  // the standard library or the like; don't add debug positions
1716  // for those functions
1717  inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
1718  scope ?
1719 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1720  *scope
1721 #else /* LLVM 3.7+ */
1722  scope
1723 #endif
1724  : GetDIScope()));
1725  }
1726 }
1727 
1728 
1729 void
1731  if (m->diBuilder != NULL) {
1732 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1733  llvm::DIScope parentScope;
1734  llvm::DILexicalBlock lexicalBlock;
1735 #else /* LLVM 3.7+ */
1736  llvm::DIScope *parentScope;
1737  llvm::DILexicalBlock *lexicalBlock;
1738 #endif
1739  if (debugScopes.size() > 0)
1740  parentScope = debugScopes.back();
1741  else
1742  parentScope = diSubprogram;
1743 
1744  lexicalBlock =
1745  m->diBuilder->createLexicalBlock(parentScope, diFile,
1746  currentPos.first_line,
1747 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_5
1748  // Revision 202736 in LLVM adds support of DWARF discriminator
1749  // to the last argument and revision 202737 in clang adds 0
1750  // for the last argument by default.
1751  currentPos.first_column, 0);
1752 #else
1753  // Revision 216239 in LLVM removes support of DWARF discriminator
1754  // as the last argument
1755  currentPos.first_column);
1756 #endif // LLVM 3.2, 3.3, 3.4 and 3.6+
1757 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1758  AssertPos(currentPos, lexicalBlock.Verify());
1759  debugScopes.push_back(lexicalBlock);
1760 #else /* LLVM 3.7+ */
1761  debugScopes.push_back(llvm::cast<llvm::DILexicalBlockBase>(lexicalBlock));
1762 #endif
1763  }
1764 }
1765 
1766 
1767 void
1769  if (m->diBuilder != NULL) {
1770  AssertPos(currentPos, debugScopes.size() > 0);
1771  debugScopes.pop_back();
1772  }
1773 }
1774 
1775 
1776 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1777 llvm::DIScope
1778 #else /* LLVM 3.7+ */
1779 llvm::DIScope*
1780 #endif
1782  AssertPos(currentPos, debugScopes.size() > 0);
1783  return debugScopes.back();
1784 }
1785 
1786 
1787 void
1789  if (m->diBuilder == NULL)
1790  return;
1791 
1792 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1793  llvm::DIScope scope = GetDIScope();
1794  llvm::DIType diType = sym->type->GetDIType(scope);
1795  AssertPos(currentPos, diType.Verify());
1796  llvm::DIVariable var =
1797 #else /* LLVM 3.7+ */
1798  llvm::DIScope *scope = GetDIScope();
1799  llvm::DIType *diType = sym->type->GetDIType(scope);
1800  llvm::DILocalVariable *var =
1801 #endif
1802 
1803 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7*/
1804  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
1805  scope,
1806  sym->name,
1807  sym->pos.GetDIFile(),
1808  sym->pos.first_line,
1809  diType,
1810  true /* preserve through opts */);
1811 #else /* LLVM 3.8+ */
1812  m->diBuilder->createAutoVariable(scope,
1813  sym->name,
1814  sym->pos.GetDIFile(),
1815  sym->pos.first_line,
1816  diType,
1817  true /* preserve through opts */);
1818 #endif
1819 
1820 
1821 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1822  AssertPos(currentPos, var.Verify());
1823  llvm::Instruction *declareInst =
1824  m->diBuilder->insertDeclare(sym->storagePtr, var,
1825  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1826  m->diBuilder->createExpression(),
1827  #endif
1828  bblock);
1829  AddDebugPos(declareInst, &sym->pos, &scope);
1830 #else /* LLVM 3.7+ */
1831  llvm::Instruction *declareInst =
1832  m->diBuilder->insertDeclare(sym->storagePtr, var,
1833  m->diBuilder->createExpression(),
1834  llvm::DebugLoc::get(sym->pos.first_line,
1835  sym->pos.first_column, scope),
1836  bblock);
1837  AddDebugPos(declareInst, &sym->pos, scope);
1838 #endif
1839 }
1840 
1841 
1842 void
1844  if (m->diBuilder == NULL)
1845  return;
1846 
1847 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
1848  int flags = 0;
1849 #else // LLVM 4.0+
1850  llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
1851 #endif
1852 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1853  llvm::DIScope scope = diSubprogram;
1854  llvm::DIType diType = sym->type->GetDIType(scope);
1855  AssertPos(currentPos, diType.Verify());
1856  llvm::DIVariable var =
1857 #else /* LLVM 3.7+ */
1858  llvm::DIScope *scope = diSubprogram;
1859  llvm::DIType *diType = sym->type->GetDIType(scope);
1860  llvm::DILocalVariable *var =
1861 #endif
1862 
1863 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
1864  m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
1865  scope,
1866  sym->name,
1867  sym->pos.GetDIFile(),
1868  sym->pos.first_line,
1869  diType,
1870  true /* preserve through opts */,
1871  flags,
1872  argNum + 1);
1873 #else /* LLVM 3.8+ */
1874  m->diBuilder->createParameterVariable(scope,
1875  sym->name,
1876  argNum + 1,
1877  sym->pos.GetDIFile(),
1878  sym->pos.first_line,
1879  diType,
1880  true /* preserve through opts */,
1881  flags);
1882 #endif
1883 
1884 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
1885  AssertPos(currentPos, var.Verify());
1886  llvm::Instruction *declareInst =
1887  m->diBuilder->insertDeclare(sym->storagePtr, var,
1888  #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1889  m->diBuilder->createExpression(),
1890  #endif
1891  bblock);
1892  AddDebugPos(declareInst, &sym->pos, &scope);
1893 #else /* LLVM 3.7+ */
1894  llvm::Instruction *declareInst =
1895  m->diBuilder->insertDeclare(sym->storagePtr, var,
1896  m->diBuilder->createExpression(),
1897  llvm::DebugLoc::get(sym->pos.first_line,
1898  sym->pos.first_column, scope),
1899  bblock);
1900  AddDebugPos(declareInst, &sym->pos, scope);
1901 #endif
1902 }
1903 
1904 
1905 /** If the given type is an array of vector types, then it's the
1906  representation of an ispc VectorType with varying elements. If it is
1907  one of these, return the array size (i.e. the VectorType's size).
1908  Otherwise return zero.
1909  */
1910 static int
1911 lArrayVectorWidth(llvm::Type *t) {
1912  llvm::ArrayType *arrayType =
1913  llvm::dyn_cast<llvm::ArrayType>(t);
1914  if (arrayType == NULL)
1915  return 0;
1916 
1917  // We shouldn't be seeing arrays of anything but vectors being passed
1918  // to things like FunctionEmitContext::BinaryOperator() as operands.
1919  llvm::VectorType *vectorElementType =
1920  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
1921  Assert((vectorElementType != NULL &&
1922  (int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
1923 
1924  return (int)arrayType->getNumElements();
1925 }
1926 
1927 
1928 llvm::Value *
1929 FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
1930  llvm::Value *v0, llvm::Value *v1,
1931  const char *name) {
1932  if (v0 == NULL || v1 == NULL) {
1933  AssertPos(currentPos, m->errorCount > 0);
1934  return NULL;
1935  }
1936 
1937  AssertPos(currentPos, v0->getType() == v1->getType());
1938  llvm::Type *type = v0->getType();
1939  int arraySize = lArrayVectorWidth(type);
1940  if (arraySize == 0) {
1941  llvm::Instruction *bop =
1942  llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
1943  AddDebugPos(bop);
1944  return bop;
1945  }
1946  else {
1947  // If this is an ispc VectorType, apply the binary operator to each
1948  // of the elements of the array (which in turn should be either
1949  // scalar types or llvm::VectorTypes.)
1950  llvm::Value *ret = llvm::UndefValue::get(type);
1951  for (int i = 0; i < arraySize; ++i) {
1952  llvm::Value *a = ExtractInst(v0, i);
1953  llvm::Value *b = ExtractInst(v1, i);
1954  llvm::Value *op = BinaryOperator(inst, a, b);
1955  ret = InsertInst(ret, op, i);
1956  }
1957  return ret;
1958  }
1959 }
1960 
1961 
1962 llvm::Value *
1963 FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
1964  if (v == NULL) {
1965  AssertPos(currentPos, m->errorCount > 0);
1966  return NULL;
1967  }
1968 
1969  // Similarly to BinaryOperator, do the operation on all the elements of
1970  // the array if we're given an array type; otherwise just do the
1971  // regular llvm operation.
1972  llvm::Type *type = v->getType();
1973  int arraySize = lArrayVectorWidth(type);
1974  if (arraySize == 0) {
1975  llvm::Instruction *binst =
1976  llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
1977  AddDebugPos(binst);
1978  return binst;
1979  }
1980  else {
1981  llvm::Value *ret = llvm::UndefValue::get(type);
1982  for (int i = 0; i < arraySize; ++i) {
1983  llvm::Value *a = ExtractInst(v, i);
1984  llvm::Value *op =
1985  llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
1986  AddDebugPos(op);
1987  ret = InsertInst(ret, op, i);
1988  }
1989  return ret;
1990  }
1991 }
1992 
1993 
1994 // Given the llvm Type that represents an ispc VectorType, return an
1995 // equally-shaped type with boolean elements. (This is the type that will
1996 // be returned from CmpInst with ispc VectorTypes).
1997 static llvm::Type *
1999  llvm::ArrayType *arrayType =
2000  llvm::dyn_cast<llvm::ArrayType>(type);
2001  Assert(arrayType != NULL);
2002 
2003  llvm::VectorType *vectorElementType =
2004  llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
2005  Assert(vectorElementType != NULL);
2006  Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
2007 
2008  llvm::Type *base =
2009  llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
2010  return llvm::ArrayType::get(base, arrayType->getNumElements());
2011 }
2012 
2013 
2014 llvm::Value *
2015 FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
2016  llvm::CmpInst::Predicate pred,
2017  llvm::Value *v0, llvm::Value *v1,
2018  const char *name) {
2019  if (v0 == NULL || v1 == NULL) {
2020  AssertPos(currentPos, m->errorCount > 0);
2021  return NULL;
2022  }
2023 
2024  AssertPos(currentPos, v0->getType() == v1->getType());
2025  llvm::Type *type = v0->getType();
2026  int arraySize = lArrayVectorWidth(type);
2027  if (arraySize == 0) {
2028  llvm::Instruction *ci =
2029  llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp",
2030  bblock);
2031  AddDebugPos(ci);
2032  return ci;
2033  }
2034  else {
2035  llvm::Type *boolType = lGetMatchingBoolVectorType(type);
2036  llvm::Value *ret = llvm::UndefValue::get(boolType);
2037  for (int i = 0; i < arraySize; ++i) {
2038  llvm::Value *a = ExtractInst(v0, i);
2039  llvm::Value *b = ExtractInst(v1, i);
2040  llvm::Value *op = CmpInst(inst, pred, a, b, name);
2041  ret = InsertInst(ret, op, i);
2042  }
2043  return ret;
2044  }
2045 }
2046 
2047 
2048 llvm::Value *
2049 FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
2050  if (value == NULL) {
2051  AssertPos(currentPos, m->errorCount > 0);
2052  return NULL;
2053  }
2054 
2055  llvm::Value *ret = NULL;
2056  llvm::Type *eltType = value->getType();
2057  llvm::Type *vecType = NULL;
2058 
2059  llvm::PointerType *pt =
2060  llvm::dyn_cast<llvm::PointerType>(eltType);
2061  if (pt != NULL) {
2062  // Varying pointers are represented as vectors of i32/i64s
2064  value = PtrToIntInst(value);
2065  }
2066  else {
2067  // All other varying types are represented as vectors of the
2068  // underlying type.
2069  vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
2070  }
2071 
2072  // Check for a constant case.
2073  if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
2074  ret = llvm::ConstantVector::getSplat(
2075  g->target->getVectorWidth(),
2076  const_val);
2077  return ret;
2078  }
2079 
2080  ret = BroadcastValue(value, vecType, name);
2081 
2082  return ret;
2083 }
2084 
2085 
2086 llvm::Value *
2087 FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type,
2088  const char *name) {
2089  if (value == NULL) {
2090  AssertPos(currentPos, m->errorCount > 0);
2091  return NULL;
2092  }
2093 
2094  if (name == NULL)
2095  name = LLVMGetName(value, "_bitcast");
2096 
2097  llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock);
2098  AddDebugPos(inst);
2099  return inst;
2100 }
2101 
2102 
2103 llvm::Value *
2104 FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
2105  if (value == NULL) {
2106  AssertPos(currentPos, m->errorCount > 0);
2107  return NULL;
2108  }
2109 
2110  if (llvm::isa<llvm::VectorType>(value->getType()))
2111  // no-op for varying pointers; they're already vectors of ints
2112  return value;
2113 
2114  if (name == NULL)
2115  name = LLVMGetName(value, "_ptr2int");
2116  llvm::Type *type = LLVMTypes::PointerIntType;
2117  llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
2118  AddDebugPos(inst);
2119  return inst;
2120 }
2121 
2122 
2123 llvm::Value *
2124 FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
2125  const char *name) {
2126  if (value == NULL) {
2127  AssertPos(currentPos, m->errorCount > 0);
2128  return NULL;
2129  }
2130 
2131  if (name == NULL)
2132  name = LLVMGetName(value, "_ptr2int");
2133 
2134  llvm::Type *fromType = value->getType();
2135  if (llvm::isa<llvm::VectorType>(fromType)) {
2136  // varying pointer
2137  if (fromType == toType)
2138  // already the right type--done
2139  return value;
2140  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2141  return TruncInst(value, toType, name);
2142  else {
2143  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2144  toType->getScalarSizeInBits());
2145  return ZExtInst(value, toType, name);
2146  }
2147  }
2148 
2149  llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
2150  AddDebugPos(inst);
2151  return inst;
2152 }
2153 
2154 
2155 llvm::Value *
2156 FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType,
2157  const char *name) {
2158  if (value == NULL) {
2159  AssertPos(currentPos, m->errorCount > 0);
2160  return NULL;
2161  }
2162 
2163  if (name == NULL)
2164  name = LLVMGetName(value, "_int2ptr");
2165 
2166  llvm::Type *fromType = value->getType();
2167  if (llvm::isa<llvm::VectorType>(fromType)) {
2168  // varying pointer
2169  if (fromType == toType)
2170  // done
2171  return value;
2172  else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
2173  return TruncInst(value, toType, name);
2174  else {
2175  AssertPos(currentPos, fromType->getScalarSizeInBits() <
2176  toType->getScalarSizeInBits());
2177  return ZExtInst(value, toType, name);
2178  }
2179  }
2180 
2181  llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name,
2182  bblock);
2183  AddDebugPos(inst);
2184  return inst;
2185 }
2186 
2187 
2188 llvm::Instruction *
2189 FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type,
2190  const char *name) {
2191  if (value == NULL) {
2192  AssertPos(currentPos, m->errorCount > 0);
2193  return NULL;
2194  }
2195 
2196  if (name == NULL)
2197  name = LLVMGetName(value, "_trunc");
2198 
2199  // TODO: we should probably handle the array case as in
2200  // e.g. BitCastInst(), but we don't currently need that functionality
2201  llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock);
2202  AddDebugPos(inst);
2203  return inst;
2204 }
2205 
2206 
2207 llvm::Instruction *
2208 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
2209  llvm::Type *type, const char *name) {
2210  if (value == NULL) {
2211  AssertPos(currentPos, m->errorCount > 0);
2212  return NULL;
2213  }
2214 
2215  if (name == NULL)
2216  name = LLVMGetName(value, "_cast");
2217 
2218  // TODO: we should probably handle the array case as in
2219  // e.g. BitCastInst(), but we don't currently need that functionality
2220  llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name,
2221  bblock);
2222  AddDebugPos(inst);
2223  return inst;
2224 }
2225 
2226 
2227 llvm::Instruction *
2228 FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type,
2229  const char *name) {
2230  if (value == NULL) {
2231  AssertPos(currentPos, m->errorCount > 0);
2232  return NULL;
2233  }
2234 
2235  if (name == NULL)
2236  name = LLVMGetName(value, "_cast");
2237 
2238  // TODO: we should probably handle the array case as in
2239  // e.g. BitCastInst(), but we don't currently need that functionality
2240  llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock);
2241  AddDebugPos(inst);
2242  return inst;
2243 }
2244 
2245 
2246 llvm::Instruction *
2247 FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type,
2248  const char *name) {
2249  if (value == NULL) {
2250  AssertPos(currentPos, m->errorCount > 0);
2251  return NULL;
2252  }
2253 
2254  if (name == NULL)
2255  name = LLVMGetName(value, "_sext");
2256 
2257  // TODO: we should probably handle the array case as in
2258  // e.g. BitCastInst(), but we don't currently need that functionality
2259  llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock);
2260  AddDebugPos(inst);
2261  return inst;
2262 }
2263 
2264 
2265 llvm::Instruction *
2266 FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type,
2267  const char *name) {
2268  if (value == NULL) {
2269  AssertPos(currentPos, m->errorCount > 0);
2270  return NULL;
2271  }
2272 
2273  if (name == NULL)
2274  name = LLVMGetName(value, "_zext");
2275 
2276  // TODO: we should probably handle the array case as in
2277  // e.g. BitCastInst(), but we don't currently need that functionality
2278  llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock);
2279  AddDebugPos(inst);
2280  return inst;
2281 }
2282 
2283 
2284 /** Utility routine used by the GetElementPtrInst() methods; given a
2285  pointer to some type (either uniform or varying) and an index (also
2286  either uniform or varying), this returns the new pointer (varying if
2287  appropriate) given by offsetting the base pointer by the index times
2288  the size of the object that the pointer points to.
2289  */
2290 llvm::Value *
2291 FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
2292  const Type *ptrType) {
2293  // Find the scale factor for the index (i.e. the size of the object
2294  // that the pointer(s) point(s) to.
2295  const Type *scaleType = ptrType->GetBaseType();
2296  llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
2297 
2298  bool indexIsVarying =
2299  llvm::isa<llvm::VectorType>(index->getType());
2300  llvm::Value *offset = NULL;
2301  if (indexIsVarying == false) {
2302  // Truncate or sign extend the index as appropriate to a 32 or
2303  // 64-bit type.
2304  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2305  index->getType() == LLVMTypes::Int64Type)
2306  index = TruncInst(index, LLVMTypes::Int32Type);
2307  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2308  index->getType() == LLVMTypes::Int32Type)
2309  index = SExtInst(index, LLVMTypes::Int64Type);
2310 
2311  // do a scalar multiply to get the offset as index * scale and then
2312  // smear the result out to be a vector; this is more efficient than
2313  // first promoting both the scale and the index to vectors and then
2314  // multiplying.
2315  offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
2316  offset = SmearUniform(offset);
2317  }
2318  else {
2319  // Similarly, truncate or sign extend the index to be a 32 or 64
2320  // bit vector type
2321  if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
2322  index->getType() == LLVMTypes::Int64VectorType)
2323  index = TruncInst(index, LLVMTypes::Int32VectorType);
2324  else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
2325  index->getType() == LLVMTypes::Int32VectorType)
2326  index = SExtInst(index, LLVMTypes::Int64VectorType);
2327 
2328  scale = SmearUniform(scale);
2329 
2330  // offset = index * scale
2331  offset = BinaryOperator(llvm::Instruction::Mul, scale, index,
2332  LLVMGetName("mul", scale, index));
2333  }
2334 
2335  // For 64-bit targets, if we've been doing our offset calculations in
2336  // 32 bits, we still have to convert to a 64-bit value before we
2337  // actually add the offset to the pointer.
2338  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2339  offset = SExtInst(offset, LLVMTypes::Int64VectorType,
2340  LLVMGetName(offset, "_to_64"));
2341 
2342  // Smear out the pointer to be varying; either the base pointer or the
2343  // index must be varying for this method to be called.
2344  bool baseIsUniform =
2345  (llvm::isa<llvm::PointerType>(basePtr->getType()));
2346  AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true);
2347  llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr;
2348 
2349  // newPtr = ptr + offset
2350  return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset,
2351  LLVMGetName(basePtr, "_offset"));
2352 }
2353 
2354 
2355 void
2356 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
2357  llvm::Type *type0 = (*v0)->getType();
2358  llvm::Type *type1 = (*v1)->getType();
2359 
2360  // First, promote to a vector type if one of the two values is a vector
2361  // type
2362  if (llvm::isa<llvm::VectorType>(type0) &&
2363  !llvm::isa<llvm::VectorType>(type1)) {
2364  *v1 = SmearUniform(*v1, "smear_v1");
2365  type1 = (*v1)->getType();
2366  }
2367  if (!llvm::isa<llvm::VectorType>(type0) &&
2368  llvm::isa<llvm::VectorType>(type1)) {
2369  *v0 = SmearUniform(*v0, "smear_v0");
2370  type0 = (*v0)->getType();
2371  }
2372 
2373  // And then update to match bit widths
2374  if (type0 == LLVMTypes::Int32Type &&
2375  type1 == LLVMTypes::Int64Type)
2376  *v0 = SExtInst(*v0, LLVMTypes::Int64Type);
2377  else if (type1 == LLVMTypes::Int32Type &&
2378  type0 == LLVMTypes::Int64Type)
2379  *v1 = SExtInst(*v1, LLVMTypes::Int64Type);
2380  else if (type0 == LLVMTypes::Int32VectorType &&
2381  type1 == LLVMTypes::Int64VectorType)
2382  *v0 = SExtInst(*v0, LLVMTypes::Int64VectorType);
2383  else if (type1 == LLVMTypes::Int32VectorType &&
2384  type0 == LLVMTypes::Int64VectorType)
2385  *v1 = SExtInst(*v1, LLVMTypes::Int64VectorType);
2386 }
2387 
2388 
2389 /** Given an integer index in indexValue that's indexing into an array of
2390  soa<> structures with given soaWidth, compute the two sub-indices we
2391  need to do the actual indexing calculation:
2392 
2393  subIndices[0] = (indexValue >> log(soaWidth))
2394  subIndices[1] = (indexValue & (soaWidth-1))
2395  */
2396 static llvm::Value *
2398  llvm::Value *indexValue, llvm::Value *ptrSliceOffset,
2399  llvm::Value **newSliceOffset) {
2400  // Compute the log2 of the soaWidth.
2401  Assert(soaWidth > 0);
2402  int logWidth = 0, sw = soaWidth;
2403  while (sw > 1) {
2404  ++logWidth;
2405  sw >>= 1;
2406  }
2407  Assert((1 << logWidth) == soaWidth);
2408 
2409  ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
2410 
2411  llvm::Type *indexType = indexValue->getType();
2412  llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
2413  llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType);
2414 
2415  llvm::Value *indexSum =
2416  ctx->BinaryOperator(llvm::Instruction::Add, indexValue, ptrSliceOffset,
2417  "index_sum");
2418 
2419  // minor index = (index & (soaWidth - 1))
2420  *newSliceOffset = ctx->BinaryOperator(llvm::Instruction::And, indexSum,
2421  mask, "slice_index_minor");
2422  // slice offsets are always 32 bits...
2423  if ((*newSliceOffset)->getType() == LLVMTypes::Int64Type)
2424  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32Type);
2425  else if ((*newSliceOffset)->getType() == LLVMTypes::Int64VectorType)
2426  *newSliceOffset = ctx->TruncInst(*newSliceOffset, LLVMTypes::Int32VectorType);
2427 
2428  // major index = (index >> logWidth)
2429  return ctx->BinaryOperator(llvm::Instruction::AShr, indexSum,
2430  shift, "slice_index_major");
2431 }
2432 
2433 
2434 llvm::Value *
2435 FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
2436  // Create a small struct where the first element is the type of the
2437  // given pointer and the second element is the type of the offset
2438  // value.
2439  std::vector<llvm::Type *> eltTypes;
2440  eltTypes.push_back(ptr->getType());
2441  eltTypes.push_back(offset->getType());
2442  llvm::StructType *st =
2443  llvm::StructType::get(*g->ctx, eltTypes);
2444 
2445  llvm::Value *ret = llvm::UndefValue::get(st);
2446  ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr"));
2447  ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset"));
2448  return ret;
2449 }
2450 
2451 
2452 llvm::Value *
2453 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
2454  const Type *ptrRefType, const char *name) {
2455  if (basePtr == NULL || index == NULL) {
2456  AssertPos(currentPos, m->errorCount > 0);
2457  return NULL;
2458  }
2459 
2460  // Regularize to a standard pointer type for basePtr's type
2461  const PointerType *ptrType;
2462  if (CastType<ReferenceType>(ptrRefType) != NULL)
2463  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2464  else {
2465  ptrType = CastType<PointerType>(ptrRefType);
2466  AssertPos(currentPos, ptrType != NULL);
2467  }
2468 
2469  if (ptrType->IsSlice()) {
2470  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2471 
2472  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2473  if (ptrType->IsFrozenSlice() == false) {
2474  // For slice pointers that aren't frozen, we compute a new
2475  // index based on the given index plus the offset in the slice
2476  // pointer. This gives us an updated integer slice index for
2477  // the resulting slice pointer and then an index to index into
2478  // the soa<> structs with.
2479  llvm::Value *newSliceOffset;
2480  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2481  index = lComputeSliceIndex(this, soaWidth, index,
2482  ptrSliceOffset, &newSliceOffset);
2483  ptrSliceOffset = newSliceOffset;
2484  }
2485 
2486  // Handle the indexing into the soa<> structs with the major
2487  // component of the index through a recursive call
2488  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index,
2489  ptrType->GetAsNonSlice(), name);
2490 
2491  // And mash the results together for the return value
2492  return MakeSlicePointer(p, ptrSliceOffset);
2493  }
2494 
2495  // Double-check consistency between the given pointer type and its LLVM
2496  // type.
2497  if (ptrType->IsUniformType())
2498  AssertPos(currentPos, llvm::isa<llvm::PointerType>(basePtr->getType()));
2499  else if (ptrType->IsVaryingType())
2500  AssertPos(currentPos, llvm::isa<llvm::VectorType>(basePtr->getType()));
2501 
2502  bool indexIsVaryingType =
2503  llvm::isa<llvm::VectorType>(index->getType());
2504 
2505  if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
2506  // The easy case: both the base pointer and the indices are
2507  // uniform, so just emit the regular LLVM GEP instruction
2508  llvm::Value *ind[1] = { index };
2509  llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
2510 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2511  llvm::Instruction *inst =
2512  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2513  name ? name : "gep", bblock);
2514 #else /* LLVM 3.7+ */
2515  llvm::Instruction *inst =
2516  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2517  basePtr, arrayRef,
2518  name ? name : "gep", bblock);
2519 #endif
2520  AddDebugPos(inst);
2521  return inst;
2522  }
2523  else
2524  return applyVaryingGEP(basePtr, index, ptrType);
2525 }
2526 
2527 
2528 llvm::Value *
2529 FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
2530  llvm::Value *index1, const Type *ptrRefType,
2531  const char *name) {
2532  if (basePtr == NULL || index0 == NULL || index1 == NULL) {
2533  AssertPos(currentPos, m->errorCount > 0);
2534  return NULL;
2535  }
2536 
2537  // Regaularize the pointer type for basePtr
2538  const PointerType *ptrType = NULL;
2539  if (CastType<ReferenceType>(ptrRefType) != NULL)
2540  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2541  else {
2542  ptrType = CastType<PointerType>(ptrRefType);
2543  AssertPos(currentPos, ptrType != NULL);
2544  }
2545 
2546  if (ptrType->IsSlice()) {
2547  // Similar to the 1D GEP implementation above, for non-frozen slice
2548  // pointers we do the two-step indexing calculation and then pass
2549  // the new major index on to a recursive GEP call.
2550  AssertPos(currentPos, llvm::isa<llvm::StructType>(basePtr->getType()));
2551  llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
2552  if (ptrType->IsFrozenSlice() == false) {
2553  llvm::Value *newSliceOffset;
2554  int soaWidth = ptrType->GetBaseType()->GetSOAWidth();
2555  index1 = lComputeSliceIndex(this, soaWidth, index1,
2556  ptrSliceOffset, &newSliceOffset);
2557  ptrSliceOffset = newSliceOffset;
2558  }
2559 
2560  llvm::Value *p = GetElementPtrInst(ExtractInst(basePtr, 0), index0,
2561  index1, ptrType->GetAsNonSlice(),
2562  name);
2563  return MakeSlicePointer(p, ptrSliceOffset);
2564  }
2565 
2566  bool index0IsVaryingType =
2567  llvm::isa<llvm::VectorType>(index0->getType());
2568  bool index1IsVaryingType =
2569  llvm::isa<llvm::VectorType>(index1->getType());
2570 
2571  if (index0IsVaryingType == false && index1IsVaryingType == false &&
2572  ptrType->IsUniformType() == true) {
2573  // The easy case: both the base pointer and the indices are
2574  // uniform, so just emit the regular LLVM GEP instruction
2575  llvm::Value *indices[2] = { index0, index1 };
2576  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
2577 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2578  llvm::Instruction *inst =
2579  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2580  name ? name : "gep", bblock);
2581 #else /* LLVM 3.7+ */
2582  llvm::Instruction *inst =
2583  llvm::GetElementPtrInst::Create(PTYPE(basePtr),
2584  basePtr, arrayRef,
2585  name ? name : "gep", bblock);
2586 #endif
2587  AddDebugPos(inst);
2588  return inst;
2589  }
2590  else {
2591  // Handle the first dimension with index0
2592  llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
2593 
2594  // Now index into the second dimension with index1. First figure
2595  // out the type of ptr0.
2596  const Type *baseType = ptrType->GetBaseType();
2597  const SequentialType *st = CastType<SequentialType>(baseType);
2598  AssertPos(currentPos, st != NULL);
2599 
2600  bool ptr0IsUniform =
2601  llvm::isa<llvm::PointerType>(ptr0->getType());
2602  const Type *ptr0BaseType = st->GetElementType();
2603  const Type *ptr0Type = ptr0IsUniform ?
2604  PointerType::GetUniform(ptr0BaseType) :
2605  PointerType::GetVarying(ptr0BaseType);
2606 
2607  return applyVaryingGEP(ptr0, index1, ptr0Type);
2608  }
2609 }
2610 
2611 
2612 llvm::Value *
2613 FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
2614  const Type *ptrRefType, const char *name,
2615  const PointerType **resultPtrType) {
2616  if (resultPtrType != NULL)
2617  AssertPos(currentPos, ptrRefType != NULL);
2618 
2619  llvm::PointerType *llvmPtrType =
2620  llvm::dyn_cast<llvm::PointerType>(fullBasePtr->getType());
2621  if (llvmPtrType != NULL) {
2622  llvm::StructType *llvmStructType =
2623  llvm::dyn_cast<llvm::StructType>(llvmPtrType->getElementType());
2624  if (llvmStructType != NULL && llvmStructType->isSized() == false) {
2625  AssertPos(currentPos, m->errorCount > 0);
2626  return NULL;
2627  }
2628  }
2629 
2630  // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but
2631  // if we have one, regularize into a pointer type.
2632  const PointerType *ptrType = NULL;
2633  if (ptrRefType != NULL) {
2634  // Normalize references to uniform pointers
2635  if (CastType<ReferenceType>(ptrRefType) != NULL)
2636  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2637  else
2638  ptrType = CastType<PointerType>(ptrRefType);
2639  AssertPos(currentPos, ptrType != NULL);
2640  }
2641 
2642  // Similarly, we have to see if the pointer type is a struct to see if
2643  // we have a slice pointer instead of looking at ptrType; this is also
2644  // unfortunate...
2645  llvm::Value *basePtr = fullBasePtr;
2646  bool baseIsSlicePtr =
2647  llvm::isa<llvm::StructType>(fullBasePtr->getType());
2648  const PointerType *rpt;
2649  if (baseIsSlicePtr) {
2650  AssertPos(currentPos, ptrType != NULL);
2651  // Update basePtr to just be the part that actually points to the
2652  // start of an soa<> struct for now; the element offset computation
2653  // doesn't change the slice offset, so we'll incorporate that into
2654  // the final value right before this method returns.
2655  basePtr = ExtractInst(fullBasePtr, 0);
2656  if (resultPtrType == NULL)
2657  resultPtrType = &rpt;
2658  }
2659 
2660  // Return the pointer type of the result of this call, for callers that
2661  // want it.
2662  if (resultPtrType != NULL) {
2663  AssertPos(currentPos, ptrType != NULL);
2664  const CollectionType *ct =
2665  CastType<CollectionType>(ptrType->GetBaseType());
2666  AssertPos(currentPos, ct != NULL);
2667  *resultPtrType = new PointerType(ct->GetElementType(elementNum),
2668  ptrType->GetVariability(),
2669  ptrType->IsConstType(),
2670  ptrType->IsSlice());
2671  }
2672 
2673  llvm::Value *resultPtr = NULL;
2674  if (ptrType == NULL || ptrType->IsUniformType()) {
2675  // If the pointer is uniform, we can use the regular LLVM GEP.
2676  llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
2677  llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
2678 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
2679  resultPtr =
2680  llvm::GetElementPtrInst::Create(basePtr, arrayRef,
2681  name ? name : "struct_offset", bblock);
2682 #else /* LLVM 3.7+ */
2683  resultPtr =
2684  llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef,
2685  name ? name : "struct_offset", bblock);
2686 #endif
2687  }
2688  else {
2689  // Otherwise do the math to find the offset and add it to the given
2690  // varying pointers
2691  const StructType *st = CastType<StructType>(ptrType->GetBaseType());
2692  llvm::Value *offset = NULL;
2693  if (st != NULL)
2694  // If the pointer is to a structure, Target::StructOffset() gives
2695  // us the offset in bytes to the given element of the structure
2696  offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum,
2697  bblock);
2698  else {
2699  // Otherwise we should have a vector or array here and the offset
2700  // is given by the element number times the size of the element
2701  // type of the vector.
2702  const SequentialType *st =
2703  CastType<SequentialType>(ptrType->GetBaseType());
2704  AssertPos(currentPos, st != NULL);
2705  llvm::Value *size =
2706  g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
2707  llvm::Value *scale = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
2708  LLVMInt32(elementNum) : LLVMInt64(elementNum);
2709  offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
2710  }
2711 
2712  offset = SmearUniform(offset, "offset_smear");
2713 
2714  if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
2715  // If we're doing 32 bit addressing with a 64 bit target, although
2716  // we did the math above in 32 bit, we need to go to 64 bit before
2717  // we add the offset to the varying pointers.
2718  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
2719 
2720  resultPtr = BinaryOperator(llvm::Instruction::Add, basePtr, offset,
2721  "struct_ptr_offset");
2722  }
2723 
2724  // Finally, if had a slice pointer going in, mash back together with
2725  // the original (unchanged) slice offset.
2726  if (baseIsSlicePtr)
2727  return MakeSlicePointer(resultPtr, ExtractInst(fullBasePtr, 1));
2728  else
2729  return resultPtr;
2730 }
2731 
2732 
2733 llvm::Value *
2734 FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
2735  if (ptr == NULL) {
2736  AssertPos(currentPos, m->errorCount > 0);
2737  return NULL;
2738  }
2739 
2740  llvm::PointerType *pt =
2741  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
2742  AssertPos(currentPos, pt != NULL);
2743 
2744  if (name == NULL)
2745  name = LLVMGetName(ptr, "_load");
2746 
2747  llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
2748 
2749  if (g->opt.forceAlignedMemory &&
2750  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
2751  inst->setAlignment(g->target->getNativeVectorAlignment());
2752  }
2753 
2754  AddDebugPos(inst);
2755  return inst;
2756 }
2757 
2758 
2759 /** Given a slice pointer to soa'd data that is a basic type (atomic,
2760  pointer, or enum type), use the slice offset to compute pointer(s) to
2761  the appropriate individual data element(s).
2762  */
2763 static llvm::Value *
2765  const PointerType **ptrType) {
2766  Assert(CastType<PointerType>(*ptrType) != NULL);
2767 
2768  llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr"));
2769  llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset"));
2770 
2771  // slicePtr should be a pointer to an soa-width wide array of the
2772  // final atomic/enum/pointer type
2773  const Type *unifBaseType = (*ptrType)->GetBaseType()->GetAsUniformType();
2774  Assert(Type::IsBasicType(unifBaseType));
2775 
2776  // The final pointer type is a uniform or varying pointer to the
2777  // underlying uniform type, depending on whether the given pointer is
2778  // uniform or varying.
2779  *ptrType = (*ptrType)->IsUniformType() ?
2780  PointerType::GetUniform(unifBaseType) :
2781  PointerType::GetVarying(unifBaseType);
2782 
2783  // For uniform pointers, bitcast to a pointer to the uniform element
2784  // type, so that the GEP below does the desired indexing
2785  if ((*ptrType)->IsUniformType())
2786  slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx));
2787 
2788  // And finally index based on the slice offset
2789  return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType,
2790  LLVMGetName(slicePtr, "_final_gep"));
2791 }
2792 
2793 
2794 /** Utility routine that loads from a uniform pointer to soa<> data,
2795  returning a regular uniform (non-SOA result).
2796  */
2797 llvm::Value *
2798 FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
2799  const PointerType *ptrType,
2800  const char *name) {
2801  const Type *unifType = ptrType->GetBaseType()->GetAsUniformType();
2802 
2803  const CollectionType *ct = CastType<CollectionType>(ptrType->GetBaseType());
2804  if (ct != NULL) {
2805  // If we have a struct/array, we need to decompose it into
2806  // individual element loads to fill in the result structure since
2807  // the SOA slice of values we need isn't contiguous in memory...
2808  llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
2809  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2810 
2811  for (int i = 0; i < ct->GetElementCount(); ++i) {
2812  const PointerType *eltPtrType;
2813  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType,
2814  "elt_offset", &eltPtrType);
2815  llvm::Value *eltValue = LoadInst(eltPtr, mask, eltPtrType, name);
2816  retValue = InsertInst(retValue, eltValue, i, "set_value");
2817  }
2818 
2819  return retValue;
2820  }
2821  else {
2822  // Otherwise we've made our way to a slice pointer to a basic type;
2823  // we need to apply the slice offset into this terminal SOA array
2824  // and then perform the final load
2825  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2826  return LoadInst(ptr, mask, ptrType, name);
2827  }
2828 }
2829 
2830 
2831 llvm::Value *
2832 FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
2833  const Type *ptrRefType, const char *name,
2834  bool one_elem) {
2835  if (ptr == NULL) {
2836  AssertPos(currentPos, m->errorCount > 0);
2837  return NULL;
2838  }
2839 
2840  AssertPos(currentPos, ptrRefType != NULL && mask != NULL);
2841 
2842  if (name == NULL)
2843  name = LLVMGetName(ptr, "_load");
2844 
2845  const PointerType *ptrType;
2846  if (CastType<ReferenceType>(ptrRefType) != NULL)
2847  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
2848  else {
2849  ptrType = CastType<PointerType>(ptrRefType);
2850  AssertPos(currentPos, ptrType != NULL);
2851  }
2852 
2853  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
2854  Error(currentPos, "Unable to load to undefined struct type \"%s\".",
2855  ptrType->GetBaseType()->GetString().c_str());
2856  return NULL;
2857  }
2858 
2859  if (ptrType->IsUniformType()) {
2860  if (ptrType->IsSlice()) {
2861  return loadUniformFromSOA(ptr, mask, ptrType, name);
2862  }
2863  else {
2864  // FIXME: same issue as above load inst regarding alignment...
2865  //
2866  // If the ptr is a straight up regular pointer, then just issue
2867  // a regular load. First figure out the alignment; in general we
2868  // can just assume the natural alignment (0 here), but for varying
2869  // atomic types, we need to make sure that the compiler emits
2870  // unaligned vector loads, so we specify a reduced alignment here.
2871  int align = 0;
2872  const AtomicType *atomicType =
2873  CastType<AtomicType>(ptrType->GetBaseType());
2874  if (atomicType != NULL && atomicType->IsVaryingType())
2875  // We actually just want to align to the vector element
2876  // alignment, but can't easily get that here, so just tell LLVM
2877  // it's totally unaligned. (This shouldn't make any difference
2878  // vs the proper alignment in practice.)
2879  align = 1;
2880  llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
2881  false /* not volatile */,
2882  align, bblock);
2883  AddDebugPos(inst);
2884  return inst;
2885  }
2886  }
2887  else {
2888  // Otherwise we should have a varying ptr and it's time for a
2889  // gather.
2890  llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name);
2891  if (!one_elem)
2892  return gather_result;
2893 
2894  // It is a kludge. When we dereference varying pointer to uniform struct
2895  // with "bound uniform" member, we should return first unmasked member.
2896  Warning(currentPos, "Dereferencing varying pointer to uniform struct with 'bound uniform' member,\n"
2897  " only one value will survive. Possible loss of data.");
2898  // Call the target-dependent movmsk function to turn the vector mask
2899  // into an i64 value
2900  std::vector<Symbol *> mm;
2901  m->symbolTable->LookupFunction("__movmsk", &mm);
2902  if (g->target->getMaskBitCount() == 1)
2903  AssertPos(currentPos, mm.size() == 1);
2904  else
2905  // There should be one with signed int signature, one unsigned int.
2906  AssertPos(currentPos, mm.size() == 2);
2907  // We can actually call either one, since both are i32s as far as
2908  // LLVM's type system is concerned...
2909  llvm::Function *fmm = mm[0]->function;
2910  llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk"));
2911  std::vector<Symbol *> lz;
2912  m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz);
2913  llvm::Function *flz = lz[0]->function;
2914  llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz"));
2915  llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx, LLVMGetName(gather_result, "_umasked_elem"), bblock);
2916  return elem;
2917  }
2918 }
2919 
2920 
2921 llvm::Value *
2922 FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
2923  llvm::Value *mask, const char *name) {
2924  // We should have a varying pointer if we get here...
2925  AssertPos(currentPos, ptrType->IsVaryingType());
2926 
2927  const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
2928  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
2929 
2930  const CollectionType *collectionType =
2931  CastType<CollectionType>(ptrType->GetBaseType());
2932  if (collectionType != NULL) {
2933  // For collections, recursively gather element wise to find the
2934  // result.
2935  llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
2936 
2937  const CollectionType *returnCollectionType =
2938  CastType<CollectionType>(returnType->GetBaseType());
2939 
2940  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
2941  const PointerType *eltPtrType;
2942  llvm::Value *eltPtr =
2943  AddElementOffset(ptr, i, ptrType, "gather_elt_ptr", &eltPtrType);
2944 
2945  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
2946 
2947  // It is a kludge. When we dereference varying pointer to uniform struct
2948  // with "bound uniform" member, we should return first unmasked member.
2949  int need_one_elem = CastType<StructType>(ptrType->GetBaseType()) &&
2950  returnCollectionType->GetElementType(i)->IsUniformType();
2951  // This in turn will be another gather
2952  llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name, need_one_elem);
2953 
2954  retValue = InsertInst(retValue, eltValues, i, "set_value");
2955  }
2956  return retValue;
2957  }
2958  else if (ptrType->IsSlice()) {
2959  // If we have a slice pointer, we need to add the final slice
2960  // offset here right before issuing the actual gather
2961  //
2962  // FIXME: would it be better to do the corresponding same thing for
2963  // all of the varying offsets stuff here (and in scatter)?
2964  ptr = lFinalSliceOffset(this, ptr, &ptrType);
2965  }
2966 
2967  // Otherwise we should just have a basic scalar or pointer type and we
2968  // can go and do the actual gather
2969  AddInstrumentationPoint("gather");
2970 
2971  // Figure out which gather function to call based on the size of
2972  // the elements.
2973  const PointerType *pt = CastType<PointerType>(returnType);
2974  const char *funcName = NULL;
2975  if (pt != NULL)
2976  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2977  "__pseudo_gather64_i64";
2978  else if (llvmReturnType == LLVMTypes::DoubleVectorType)
2979  funcName = g->target->is32Bit() ? "__pseudo_gather32_double" :
2980  "__pseudo_gather64_double";
2981  else if (llvmReturnType == LLVMTypes::Int64VectorType)
2982  funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" :
2983  "__pseudo_gather64_i64";
2984  else if (llvmReturnType == LLVMTypes::FloatVectorType)
2985  funcName = g->target->is32Bit() ? "__pseudo_gather32_float" :
2986  "__pseudo_gather64_float";
2987  else if (llvmReturnType == LLVMTypes::Int32VectorType)
2988  funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
2989  "__pseudo_gather64_i32";
2990  else if (llvmReturnType == LLVMTypes::Int16VectorType)
2991  funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" :
2992  "__pseudo_gather64_i16";
2993  else {
2994  AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
2995  funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" :
2996  "__pseudo_gather64_i8";
2997  }
2998 
2999  llvm::Function *gatherFunc = m->module->getFunction(funcName);
3000  AssertPos(currentPos, gatherFunc != NULL);
3001 
3002  llvm::Value *gatherCall = CallInst(gatherFunc, NULL, ptr, mask, name);
3003 
3004  // Add metadata about the source file location so that the
3005  // optimization passes can print useful performance warnings if we
3006  // can't optimize out this gather
3007  if (disableGSWarningCount == 0)
3008  addGSMetadata(gatherCall, currentPos);
3009 
3010  return gatherCall;
3011 }
3012 
3013 
3014 /** Add metadata to the given instruction to encode the current source file
3015  position. This data is used in the lGetSourcePosFromMetadata()
3016  function in opt.cpp.
3017 */
3018 void
3020  llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
3021  if (inst == NULL)
3022  return;
3023 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3024  llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
3025 #else /* LLVN 3.6+ */
3026  llvm::MDString *str = llvm::MDString::get(*g->ctx, pos.name);
3027 #endif
3028  llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
3029  inst->setMetadata("filename", md);
3030 
3031 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3032  llvm::Value *first_line = LLVMInt32(pos.first_line);
3033 #else /* LLVN 3.6+ */
3034  llvm::Metadata *first_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_line));
3035 #endif
3036  md = llvm::MDNode::get(*g->ctx, first_line);
3037  inst->setMetadata("first_line", md);
3038 
3039 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3040  llvm::Value *first_column = LLVMInt32(pos.first_column);
3041 #else /* LLVN 3.6+ */
3042  llvm::Metadata *first_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.first_column));
3043 #endif
3044  md = llvm::MDNode::get(*g->ctx, first_column);
3045  inst->setMetadata("first_column", md);
3046 
3047 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3048  llvm::Value *last_line = LLVMInt32(pos.last_line);
3049 #else /* LLVN 3.6+ */
3050  llvm::Metadata *last_line = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_line));
3051 #endif
3052  md = llvm::MDNode::get(*g->ctx, last_line);
3053  inst->setMetadata("last_line", md);
3054 
3055 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 /* 3.2, 3.3, 3.4, 3.5 */
3056  llvm::Value *last_column = LLVMInt32(pos.last_column);
3057 #else /* LLVN 3.6+ */
3058  llvm::Metadata *last_column = llvm::ConstantAsMetadata::get(LLVMInt32(pos.last_column));
3059 #endif
3060  md = llvm::MDNode::get(*g->ctx, last_column);
3061  inst->setMetadata("last_column", md);
3062 }
3063 
3064 
3065 llvm::Value *
3066 FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
3067  const char *name, int align,
3068  bool atEntryBlock) {
3069  if (llvmType == NULL) {
3070  AssertPos(currentPos, m->errorCount > 0);
3071  return NULL;
3072  }
3073 
3074  llvm::AllocaInst *inst = NULL;
3075  if (atEntryBlock) {
3076  // We usually insert it right before the jump instruction at the
3077  // end of allocaBlock
3078  llvm::Instruction *retInst = allocaBlock->getTerminator();
3079  AssertPos(currentPos, retInst);
3080 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3081  inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
3082 #else // LLVM 5.0+
3083  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
3084  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", retInst);
3085 #endif
3086  }
3087  else {
3088  // Unless the caller overrode the default and wants it in the
3089  // current basic block
3090 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3091  inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
3092 #else // LLVM 5.0+
3093  unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace();
3094  inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", bblock);
3095 #endif
3096  }
3097 
3098  // If no alignment was specified but we have an array of a uniform
3099  // type, then align it to the native vector alignment; it's not
3100  // unlikely that this array will be loaded into varying variables with
3101  // what will be aligned accesses if the uniform -> varying load is done
3102  // in regular chunks.
3103  llvm::ArrayType *arrayType =
3104  llvm::dyn_cast<llvm::ArrayType>(llvmType);
3105  if (align == 0 && arrayType != NULL &&
3106  !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
3107  align = g->target->getNativeVectorAlignment();
3108 
3109  if (align != 0)
3110  inst->setAlignment(align);
3111  // Don't add debugging info to alloca instructions
3112  return inst;
3113 }
3114 
3115 
3116 /** Code to store the given varying value to the given location, only
3117  storing the elements that correspond to active program instances as
3118  given by the provided storeMask value. Note that the lvalue is only a
3119  single pointer, not a varying lvalue of one pointer per program
3120  instance (that case is handled by scatters).
3121  */
3122 void
3123 FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
3124  const Type *ptrType, llvm::Value *mask) {
3125  if (value == NULL || ptr == NULL) {
3126  AssertPos(currentPos, m->errorCount > 0);
3127  return;
3128  }
3129 
3130  AssertPos(currentPos, CastType<PointerType>(ptrType) != NULL);
3131  AssertPos(currentPos, ptrType->IsUniformType());
3132 
3133  const Type *valueType = ptrType->GetBaseType();
3134  const CollectionType *collectionType = CastType<CollectionType>(valueType);
3135  if (collectionType != NULL) {
3136  // Assigning a structure / array / vector. Handle each element
3137  // individually with what turns into a recursive call to
3138  // makedStore()
3139  for (int i = 0; i < collectionType->GetElementCount(); ++i) {
3140  const Type *eltType = collectionType->GetElementType(i);
3141  if (eltType == NULL) {
3142  Assert(m->errorCount > 0);
3143  continue;
3144  }
3145  llvm::Value *eltValue = ExtractInst(value, i, "value_member");
3146  llvm::Value *eltPtr =
3147  AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
3148  const Type *eltPtrType = PointerType::GetUniform(eltType);
3149  StoreInst(eltValue, eltPtr, mask, eltType, eltPtrType);
3150  }
3151  return;
3152  }
3153 
3154  // We must have a regular atomic, enumerator, or pointer type at this
3155  // point.
3156  AssertPos(currentPos, Type::IsBasicType(valueType));
3157  valueType = valueType->GetAsNonConstType();
3158 
3159  // Figure out if we need a 8, 16, 32 or 64-bit masked store.
3160  llvm::Function *maskedStoreFunc = NULL;
3161  llvm::Type *llvmValueType = value->getType();
3162 
3163  const PointerType *pt = CastType<PointerType>(valueType);
3164  if (pt != NULL) {
3165  if (pt->IsSlice()) {
3166  // Masked store of (varying) slice pointer.
3167  AssertPos(currentPos, pt->IsVaryingType());
3168 
3169  // First, extract the pointer from the slice struct and masked
3170  // store that.
3171  llvm::Value *v0 = ExtractInst(value, 0);
3172  llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
3173  maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()),
3174  mask);
3175 
3176  // And then do same for the integer offset
3177  llvm::Value *v1 = ExtractInst(value, 1);
3178  llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
3179  const Type *offsetType = AtomicType::VaryingInt32;
3180  maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
3181 
3182  return;
3183  }
3184 
3185  if (g->target->is32Bit())
3186  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3187  else
3188  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3189  }
3190  else if (llvmValueType == LLVMTypes::Int1VectorType) {
3191  llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
3192  LLVMMaskAllOn, "~mask");
3193  llvm::Value *old = LoadInst(ptr);
3194  llvm::Value *maskedOld = BinaryOperator(llvm::Instruction::And, old,
3195  notMask, "old&~mask");
3196  llvm::Value *maskedNew = BinaryOperator(llvm::Instruction::And, value,
3197  mask, "new&mask");
3198  llvm::Value *final = BinaryOperator(llvm::Instruction::Or, maskedOld,
3199  maskedNew, "old_new_result");
3200  StoreInst(final, ptr);
3201  return;
3202  }
3203  else if (llvmValueType == LLVMTypes::DoubleVectorType) {
3204  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
3205  }
3206  else if (llvmValueType == LLVMTypes::Int64VectorType) {
3207  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
3208  }
3209  else if (llvmValueType == LLVMTypes::FloatVectorType) {
3210  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
3211  }
3212  else if (llvmValueType == LLVMTypes::Int32VectorType) {
3213  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
3214  }
3215  else if (llvmValueType == LLVMTypes::Int16VectorType) {
3216  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
3217  }
3218  else if (llvmValueType == LLVMTypes::Int8VectorType) {
3219  maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
3220  }
3221  AssertPos(currentPos, maskedStoreFunc != NULL);
3222 
3223  std::vector<llvm::Value *> args;
3224  args.push_back(ptr);
3225  args.push_back(value);
3226  args.push_back(mask);
3227  CallInst(maskedStoreFunc, NULL, args);
3228 }
3229 
3230 
3231 
3232 /** Scatter the given varying value to the locations given by the varying
3233  lvalue (which should be an array of pointers with size equal to the
3234  target's vector width. We want to store each rvalue element at the
3235  corresponding pointer's location, *if* the mask for the corresponding
3236  program instance are on. If they're off, don't do anything.
3237 */
3238 void
3239 FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
3240  const Type *valueType, const Type *origPt,
3241  llvm::Value *mask) {
3242  const PointerType *ptrType = CastType<PointerType>(origPt);
3243  AssertPos(currentPos, ptrType != NULL);
3244  AssertPos(currentPos, ptrType->IsVaryingType());
3245 
3246  const CollectionType *srcCollectionType =
3247  CastType<CollectionType>(valueType);
3248  if (srcCollectionType != NULL) {
3249  // We're scattering a collection type--we need to keep track of the
3250  // source type (the type of the data values to be stored) and the
3251  // destination type (the type of objects in memory that will be
3252  // stored into) separately. This is necessary so that we can get
3253  // all of the addressing calculations right if we're scattering
3254  // from a varying struct to an array of uniform instances of the
3255  // same struct type, versus scattering into an array of varying
3256  // instances of the struct type, etc.
3257  const CollectionType *dstCollectionType =
3258  CastType<CollectionType>(ptrType->GetBaseType());
3259  AssertPos(currentPos, dstCollectionType != NULL);
3260 
3261  // Scatter the collection elements individually
3262  for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) {
3263  // First, get the values for the current element out of the
3264  // source.
3265  llvm::Value *eltValue = ExtractInst(value, i);
3266  const Type *srcEltType = srcCollectionType->GetElementType(i);
3267 
3268  // We may be scattering a uniform atomic element; in this case
3269  // we'll smear it out to be varying before making the recursive
3270  // scatter() call below.
3271  if (srcEltType->IsUniformType() && Type::IsBasicType(srcEltType)) {
3272  eltValue = SmearUniform(eltValue, "to_varying");
3273  srcEltType = srcEltType->GetAsVaryingType();
3274  }
3275 
3276  // Get the (varying) pointer to the i'th element of the target
3277  // collection
3278  llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
3279 
3280  // The destination element type may be uniform (e.g. if we're
3281  // scattering to an array of uniform structs). Thus, we need
3282  // to be careful about passing the correct type to
3283  // addVaryingOffsetsIfNeeded() here.
3284  const Type *dstEltType = dstCollectionType->GetElementType(i);
3285  const PointerType *dstEltPtrType = PointerType::GetVarying(dstEltType);
3286  if (ptrType->IsSlice())
3287  dstEltPtrType = dstEltPtrType->GetAsSlice();
3288 
3289  eltPtr = addVaryingOffsetsIfNeeded(eltPtr, dstEltPtrType);
3290 
3291  // And recursively scatter() until we hit a basic type, at
3292  // which point the actual memory operations can be performed...
3293  scatter(eltValue, eltPtr, srcEltType, dstEltPtrType, mask);
3294  }
3295  return;
3296  }
3297  else if (ptrType->IsSlice()) {
3298  // As with gather, we need to add the final slice offset finally
3299  // once we get to a terminal SOA array of basic types..
3300  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3301  }
3302 
3303  const PointerType *pt = CastType<PointerType>(valueType);
3304 
3305  // And everything should be a pointer or atomic (or enum) from here on out...
3306  AssertPos(currentPos,
3307  pt != NULL
3308  || CastType<AtomicType>(valueType) != NULL
3309  || CastType<EnumType>(valueType) != NULL);
3310 
3311  llvm::Type *type = value->getType();
3312  const char *funcName = NULL;
3313  if (pt != NULL) {
3314  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3315  "__pseudo_scatter64_i64";
3316  }
3317  else if (type == LLVMTypes::DoubleVectorType) {
3318  funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" :
3319  "__pseudo_scatter64_double";
3320  }
3321  else if (type == LLVMTypes::Int64VectorType) {
3322  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" :
3323  "__pseudo_scatter64_i64";
3324  }
3325  else if (type == LLVMTypes::FloatVectorType) {
3326  funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" :
3327  "__pseudo_scatter64_float";
3328  }
3329  else if (type == LLVMTypes::Int32VectorType) {
3330  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
3331  "__pseudo_scatter64_i32";
3332  }
3333  else if (type == LLVMTypes::Int16VectorType) {
3334  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" :
3335  "__pseudo_scatter64_i16";
3336  }
3337  else if (type == LLVMTypes::Int8VectorType) {
3338  funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" :
3339  "__pseudo_scatter64_i8";
3340  }
3341 
3342  llvm::Function *scatterFunc = m->module->getFunction(funcName);
3343  AssertPos(currentPos, scatterFunc != NULL);
3344 
3345  AddInstrumentationPoint("scatter");
3346 
3347  std::vector<llvm::Value *> args;
3348  args.push_back(ptr);
3349  args.push_back(value);
3350  args.push_back(mask);
3351  llvm::Value *inst = CallInst(scatterFunc, NULL, args);
3352 
3353  if (disableGSWarningCount == 0)
3354  addGSMetadata(inst, currentPos);
3355 }
3356 
3357 
3358 void
3359 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
3360  if (value == NULL || ptr == NULL) {
3361  // may happen due to error elsewhere
3362  AssertPos(currentPos, m->errorCount > 0);
3363  return;
3364  }
3365 
3366  llvm::PointerType *pt =
3367  llvm::dyn_cast<llvm::PointerType>(ptr->getType());
3368  AssertPos(currentPos, pt != NULL);
3369 
3370  llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
3371 
3372  if (g->opt.forceAlignedMemory &&
3373  llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
3374  inst->setAlignment(g->target->getNativeVectorAlignment());
3375  }
3376 
3377  AddDebugPos(inst);
3378 }
3379 
3380 
3381 void
3382 FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
3383  llvm::Value *mask, const Type *valueType,
3384  const Type *ptrRefType) {
3385  if (value == NULL || ptr == NULL) {
3386  // may happen due to error elsewhere
3387  AssertPos(currentPos, m->errorCount > 0);
3388  return;
3389  }
3390 
3391  const PointerType *ptrType;
3392  if (CastType<ReferenceType>(ptrRefType) != NULL)
3393  ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget());
3394  else {
3395  ptrType = CastType<PointerType>(ptrRefType);
3396  AssertPos(currentPos, ptrType != NULL);
3397  }
3398 
3399  if (CastType<UndefinedStructType>(ptrType->GetBaseType())) {
3400  Error(currentPos, "Unable to store to undefined struct type \"%s\".",
3401  ptrType->GetBaseType()->GetString().c_str());
3402  return;
3403  }
3404 
3405  // Figure out what kind of store we're doing here
3406  if (ptrType->IsUniformType()) {
3407  if (ptrType->IsSlice())
3408  // storing a uniform value to a single slice of a SOA type
3409  storeUniformToSOA(value, ptr, mask, valueType, ptrType);
3410  else if (ptrType->GetBaseType()->IsUniformType())
3411  // the easy case
3412  StoreInst(value, ptr);
3413  else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
3414  // Otherwise it is a masked store unless we can determine that the
3415  // mask is all on... (Unclear if this check is actually useful.)
3416  StoreInst(value, ptr);
3417  else
3418  maskedStore(value, ptr, ptrType, mask);
3419  }
3420  else {
3421  AssertPos(currentPos, ptrType->IsVaryingType());
3422  // We have a varying ptr (an array of pointers), so it's time to
3423  // scatter
3424  scatter(value, ptr, valueType, ptrType, GetFullMask());
3425  }
3426 }
3427 
3428 
3429 /** Store a uniform type to SOA-laid-out memory.
3430  */
3431 void
3432 FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr,
3433  llvm::Value *mask, const Type *valueType,
3434  const PointerType *ptrType) {
3436  valueType));
3437 
3438  const CollectionType *ct = CastType<CollectionType>(valueType);
3439  if (ct != NULL) {
3440  // Handle collections element wise...
3441  for (int i = 0; i < ct->GetElementCount(); ++i) {
3442  llvm::Value *eltValue = ExtractInst(value, i);
3443  const Type *eltType = ct->GetElementType(i);
3444  const PointerType *dstEltPtrType;
3445  llvm::Value *dstEltPtr =
3446  AddElementOffset(ptr, i, ptrType, "slice_offset",
3447  &dstEltPtrType);
3448  StoreInst(eltValue, dstEltPtr, mask, eltType, dstEltPtrType);
3449  }
3450  }
3451  else {
3452  // We're finally at a leaf SOA array; apply the slice offset and
3453  // then we can do a final regular store
3454  AssertPos(currentPos, Type::IsBasicType(valueType));
3455  ptr = lFinalSliceOffset(this, ptr, &ptrType);
3456  StoreInst(value, ptr);
3457  }
3458 }
3459 
3460 
3461 void
3462 FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src,
3463  llvm::Value *count, llvm::Value *align) {
3464  dest = BitCastInst(dest, LLVMTypes::VoidPointerType);
3465  src = BitCastInst(src, LLVMTypes::VoidPointerType);
3466  if (count->getType() != LLVMTypes::Int64Type) {
3467  AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type);
3468  count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64");
3469  }
3470  if (align == NULL)
3471  align = LLVMInt32(1);
3472 
3473  llvm::Constant *mcFunc =
3474 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3475  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3479 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_6_0 // LLVM 5.0-6.0
3480  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3484 #else // LLVM 7.0+
3485  // Now alignment goes as an attribute, not as a parameter.
3486  // See LLVM r322965/r323597 for more details.
3487  m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64",
3491 #endif
3492 
3493  AssertPos(currentPos, mcFunc != NULL);
3494  AssertPos(currentPos, llvm::isa<llvm::Function>(mcFunc));
3495 
3496  std::vector<llvm::Value *> args;
3497  args.push_back(dest);
3498  args.push_back(src);
3499  args.push_back(count);
3500 #if ISPC_LLVM_VERSION < ISPC_LLVM_7_0
3501  // Don't bother about setting alignment for 7.0+, as this parameter is never really used by ISPC.
3502  args.push_back(align);
3503 #endif
3504  args.push_back(LLVMFalse); /* not volatile */
3505  CallInst(mcFunc, NULL, args, "");
3506 }
3507 
3508 
3509 void
3510 FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
3511  llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
3512  AddDebugPos(b);
3513 }
3514 
3515 
3516 void
3517 FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
3518  llvm::BasicBlock *falseBlock,
3519  llvm::Value *test) {
3520  if (test == NULL) {
3521  AssertPos(currentPos, m->errorCount > 0);
3522  return;
3523  }
3524 
3525  llvm::Instruction *b =
3526  llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
3527  AddDebugPos(b);
3528 }
3529 
3530 
3531 llvm::Value *
3532 FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
3533  if (v == NULL) {
3534  AssertPos(currentPos, m->errorCount > 0);
3535  return NULL;
3536  }
3537 
3538  if (name == NULL) {
3539  char buf[32];
3540  sprintf(buf, "_extract_%d", elt);
3541  name = LLVMGetName(v, buf);
3542  }
3543 
3544  llvm::Instruction *ei = NULL;
3545  if (llvm::isa<llvm::VectorType>(v->getType()))
3546  ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock);
3547  else
3548  ei = llvm::ExtractValueInst::Create(v, elt, name, bblock);
3549  AddDebugPos(ei);
3550  return ei;
3551 }
3552 
3553 
3554 llvm::Value *
3555 FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
3556  const char *name) {
3557  if (v == NULL || eltVal == NULL) {
3558  AssertPos(currentPos, m->errorCount > 0);
3559  return NULL;
3560  }
3561 
3562  if (name == NULL) {
3563  char buf[32];
3564  sprintf(buf, "_insert_%d", elt);
3565  name = LLVMGetName(v, buf);
3566  }
3567 
3568  llvm::Instruction *ii = NULL;
3569  if (llvm::isa<llvm::VectorType>(v->getType()))
3570  ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt),
3571  name, bblock);
3572  else
3573  ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock);
3574  AddDebugPos(ii);
3575  return ii;
3576 }
3577 
3578 
3579 llvm::Value *
3580 FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
3581  const char *name) {
3582  if (v1 == NULL || v2 == NULL || mask == NULL) {
3583  AssertPos(currentPos, m->errorCount > 0);
3584  return NULL;
3585  }
3586 
3587  if (name == NULL) {
3588  char buf[32];
3589  sprintf(buf, "_shuffle");
3590  name = LLVMGetName(v1, buf);
3591  }
3592 
3593  llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
3594 
3595  AddDebugPos(ii);
3596  return ii;
3597 }
3598 
3599 
3600 llvm::Value *
3601 FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
3602  const char *name) {
3603  if (v == NULL || vecType == NULL) {
3604  AssertPos(currentPos, m->errorCount > 0);
3605  return NULL;
3606  }
3607 
3608  llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
3609  Assert(ty && ty->getVectorElementType() == v->getType());
3610 
3611  if (name == NULL) {
3612  char buf[32];
3613  sprintf(buf, "_broadcast");
3614  name = LLVMGetName(v, buf);
3615  }
3616 
3617  // Generate the following sequence:
3618  // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
3619  // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
3620  // <4 x i32> zeroinitializer
3621 
3622  llvm::Value *undef1 = llvm::UndefValue::get(vecType);
3623  llvm::Value *undef2 = llvm::UndefValue::get(vecType);
3624 
3625  // InsertElement
3626  llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
3627  llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
3628 
3629  // ShuffleVector
3630  llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
3631  vecType->getVectorNumElements(),
3632  llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
3633  llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
3634 
3635  return ret;
3636 }
3637 
3638 
3639 llvm::PHINode *
3640 FunctionEmitContext::PhiNode(llvm::Type *type, int count,
3641  const char *name) {
3642  llvm::PHINode *pn = llvm::PHINode::Create(type, count,
3643  name ? name : "phi", bblock);
3644  AddDebugPos(pn);
3645  return pn;
3646 }
3647 
3648 
3649 llvm::Instruction *
3650 FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
3651  llvm::Value *val1, const char *name) {
3652  if (test == NULL || val0 == NULL || val1 == NULL) {
3653  AssertPos(currentPos, m->errorCount > 0);
3654  return NULL;
3655  }
3656 
3657  if (name == NULL)
3658  name = LLVMGetName(test, "_select");
3659 
3660  llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name,
3661  bblock);
3662  AddDebugPos(inst);
3663  return inst;
3664 }
3665 
3666 
3667 /** Given a value representing a function to be called or possibly-varying
3668  pointer to a function to be called, figure out how many arguments the
3669  function has. */
3670 static unsigned int
3671 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
3672  llvm::FunctionType *ft =
3673  llvm::dyn_cast<llvm::FunctionType>(callee->getType());
3674 
3675  if (ft == NULL) {
3676  llvm::PointerType *pt =
3677  llvm::dyn_cast<llvm::PointerType>(callee->getType());
3678  if (pt == NULL) {
3679  // varying--in this case, it must be the version of the
3680  // function that takes a mask
3681  return funcType->GetNumParameters() + 1;
3682  }
3683  ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
3684  }
3685 
3686  Assert(ft != NULL);
3687  return ft->getNumParams();
3688 }
3689 
3690 
3691 llvm::Value *
3692 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3693  const std::vector<llvm::Value *> &args,
3694  const char *name) {
3695  if (func == NULL) {
3696  AssertPos(currentPos, m->errorCount > 0);
3697  return NULL;
3698  }
3699 
3700  std::vector<llvm::Value *> argVals = args;
3701  // Most of the time, the mask is passed as the last argument. this
3702  // isn't the case for things like intrinsics, builtins, and extern "C"
3703  // functions from the application. Add the mask if it's needed.
3704  unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
3705  AssertPos(currentPos, argVals.size() + 1 == calleeArgCount ||
3706  argVals.size() == calleeArgCount);
3707  if (argVals.size() + 1 == calleeArgCount)
3708  argVals.push_back(GetFullMask());
3709 
3710  if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
3711  // Regular 'uniform' function call--just one function or function
3712  // pointer, so just emit the IR directly.
3713  llvm::Instruction *ci =
3714  llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
3715 
3716  // Copy noalias attribute to call instruction, to enable better
3717  // alias analysis.
3718  // TODO: what other attributes needs to be copied?
3719  // TODO: do the same for varing path.
3720 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION < ISPC_LLVM_5_0 // LLVM 3.3-4.0
3721  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3722  if (cc &&
3723  cc->getCalledFunction() &&
3724  cc->getCalledFunction()->doesNotAlias(0)) {
3725  cc->addAttribute(0, llvm::Attribute::NoAlias);
3726  }
3727 #else // LLVM 5.0+
3728  llvm::CallInst *cc = llvm::dyn_cast<llvm::CallInst>(ci);
3729  if (cc &&
3730  cc->getCalledFunction() &&
3731  cc->getCalledFunction()->returnDoesNotAlias()) {
3732  cc->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias);
3733  }
3734 #endif
3735 
3736  AddDebugPos(ci);
3737  return ci;
3738  }
3739  else {
3740  // Emit the code for a varying function call, where we have an
3741  // vector of function pointers, one for each program instance. The
3742  // basic strategy is that we go through the function pointers, and
3743  // for the executing program instances, for each unique function
3744  // pointer that's in the vector, call that function with a mask
3745  // equal to the set of active program instances that also have that
3746  // function pointer. When all unique function pointers have been
3747  // called, we're done.
3748 
3749  llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
3750  llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
3751  llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
3752 
3753  // Get the current mask value so we can restore it later
3754  llvm::Value *origMask = GetInternalMask();
3755 
3756  // First allocate memory to accumulate the various program
3757  // instances' return values...
3758  const Type *returnType = funcType->GetReturnType();
3759  llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
3760  llvm::Value *resultPtr = NULL;
3761  if (llvmReturnType->isVoidTy() == false)
3762  resultPtr = AllocaInst(llvmReturnType);
3763 
3764  // The memory pointed to by maskPointer tracks the set of program
3765  // instances for which we still need to call the function they are
3766  // pointing to. It starts out initialized with the mask of
3767  // currently running program instances.
3768  llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
3769  StoreInst(GetFullMask(), maskPtr);
3770 
3771  // And now we branch to the test to see if there's more work to be
3772  // done.
3773  BranchInst(bbTest);
3774 
3775  // bbTest: are any lanes of the mask still on? If so, jump to
3776  // bbCall
3777  SetCurrentBasicBlock(bbTest); {
3778  llvm::Value *maskLoad = LoadInst(maskPtr);
3779  llvm::Value *any = Any(maskLoad);
3780  BranchInst(bbCall, bbDone, any);
3781  }
3782 
3783  // bbCall: this is the body of the loop that calls out to one of
3784  // the active function pointer values.
3785  SetCurrentBasicBlock(bbCall); {
3786  // Figure out the first lane that still needs its function
3787  // pointer to be called.
3788  llvm::Value *currentMask = LoadInst(maskPtr);
3789  llvm::Function *cttz =
3790  m->module->getFunction("__count_trailing_zeros_i64");
3791  AssertPos(currentPos, cttz != NULL);
3792  llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask),
3793  "first_lane64");
3794  llvm::Value *firstLane =
3795  TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32");
3796 
3797  // Get the pointer to the function we're going to call this
3798  // time through: ftpr = func[firstLane]
3799  llvm::Value *fptr =
3800  llvm::ExtractElementInst::Create(func, firstLane,
3801  "extract_fptr", bblock);
3802 
3803  // Smear it out into an array of function pointers
3804  llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
3805 
3806  // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
3807  // mask for the set of program instances that have the same
3808  // value for their function pointer.
3809  llvm::Value *fpOverlap =
3810  CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
3811  fptrSmear, func);
3812  fpOverlap = I1VecToBoolVec(fpOverlap);
3813 
3814  // Figure out the mask to use when calling the function
3815  // pointer: we need to AND the current execution mask to handle
3816  // the case of any non-running program instances that happen to
3817  // have this function pointer value.
3818  // callMask = (currentMask & fpOverlap)
3819  llvm::Value *callMask =
3820  BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap,
3821  "call_mask");
3822 
3823  // Set the mask
3824  SetInternalMask(callMask);
3825 
3826  // bitcast the i32/64 function pointer to the actual function
3827  // pointer type.
3828  llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx);
3829  llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0);
3830  llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
3831 
3832  // Call the function: callResult = call ftpr(args, args, call mask)
3833  llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
3834 
3835  // Now, do a masked store into the memory allocated to
3836  // accumulate the result using the call mask.
3837  if (callResult != NULL &&
3838  callResult->getType() != LLVMTypes::VoidType) {
3839  AssertPos(currentPos, resultPtr != NULL);
3840  StoreInst(callResult, resultPtr, callMask, returnType,
3841  PointerType::GetUniform(returnType));
3842  }
3843  else
3844  AssertPos(currentPos, resultPtr == NULL);
3845 
3846  // Update the mask to turn off the program instances for which
3847  // we just called the function.
3848  // currentMask = currentMask & ~callmask
3849  llvm::Value *notCallMask =
3850  BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn,
3851  "~callMask");
3852  currentMask = BinaryOperator(llvm::Instruction::And, currentMask,
3853  notCallMask, "currentMask&~callMask");
3854  StoreInst(currentMask, maskPtr);
3855 
3856  // And go back to the test to see if we need to do another
3857  // call.
3858  BranchInst(bbTest);
3859  }
3860 
3861  // bbDone: We're all done; clean up and return the result we've
3862  // accumulated in the result memory.
3863  SetCurrentBasicBlock(bbDone);
3864  SetInternalMask(origMask);
3865  return resultPtr ? LoadInst(resultPtr) : NULL;
3866  }
3867 }
3868 
3869 
3870 llvm::Value *
3871 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3872  llvm::Value *arg, const char *name) {
3873  std::vector<llvm::Value *> args;
3874  args.push_back(arg);
3875  return CallInst(func, funcType, args, name);
3876 }
3877 
3878 
3879 llvm::Value *
3880 FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
3881  llvm::Value *arg0, llvm::Value *arg1,
3882  const char *name) {
3883  std::vector<llvm::Value *> args;
3884  args.push_back(arg0);
3885  args.push_back(arg1);
3886  return CallInst(func, funcType, args, name);
3887 }
3888 
3889 
3890 llvm::Instruction *
3892  if (launchedTasks)
3893  // Add a sync call at the end of any function that launched tasks
3894  SyncInst();
3895 
3896  llvm::Instruction *rinst = NULL;
3897  if (returnValuePtr != NULL) {
3898  // We have value(s) to return; load them from their storage
3899  // location
3900  llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
3901  rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
3902  }
3903  else {
3904  AssertPos(currentPos, function->GetReturnType()->IsVoidType());
3905  rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
3906  }
3907 
3908  AddDebugPos(rinst);
3909  bblock = NULL;
3910  return rinst;
3911 }
3912 
3913 
3914 llvm::Value *
3916  std::vector<llvm::Value *> &argVals,
3917  llvm::Value *launchCount[3]){
3918 #ifdef ISPC_NVPTX_ENABLED
3919  if (g->target->getISA() == Target::NVPTX)
3920  {
3921  if (callee == NULL) {
3922  AssertPos(currentPos, m->errorCount > 0);
3923  return NULL;
3924  }
3925  launchedTasks = true;
3926 
3927  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
3928  std::vector<llvm::Type*> argTypes;
3929 
3930  llvm::Function *F = llvm::dyn_cast<llvm::Function>(callee);
3931  const unsigned int nArgs = F->arg_size();
3932  llvm::Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3933  for (; I != E; ++I)
3934  argTypes.push_back(I->getType());
3935  llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
3936  llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
3937  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
3938  if (structSize->getType() != LLVMTypes::Int64Type)
3939  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
3940  "struct_size_to_64");
3941 
3942  const int align = 8;
3943  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
3944  AssertPos(currentPos, falloc != NULL);
3945  std::vector<llvm::Value *> allocArgs;
3946  allocArgs.push_back(launchGroupHandlePtr);
3947  allocArgs.push_back(structSize);
3948  allocArgs.push_back(LLVMInt32(align));
3949  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
3950  llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
3951  llvm::BasicBlock* if_true = CreateBasicBlock("if_true");
3952  llvm::BasicBlock* if_false = CreateBasicBlock("if_false");
3953 
3954  /* check if the pointer returned by ISPCAlloc is not NULL
3955  * --------------
3956  * this is a workaround for not checking the value of programIndex
3957  * because ISPCAlloc will return NULL pointer for all programIndex > 0
3958  * of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
3959  * will also be NULL
3960  * This check must be added, and also rewrite the code to make it less opaque
3961  */
3962  llvm::Value* cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
3963  BranchInst(if_true, if_false, cmp1);
3964 
3965  /**********************/
3966  bblock = if_true;
3967 
3968  // label_if_then block:
3969  llvm::Type *pt = llvm::PointerType::getUnqual(st);
3970  llvm::Value *argmem = BitCastInst(voidmem, pt);
3971  for (unsigned int i = 0; i < argVals.size(); ++i)
3972  {
3973  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
3974  // don't need to do masked store here, I think
3975  StoreInst(argVals[i], ptr);
3976  }
3977  if (nArgs == argVals.size() + 1) {
3978  // copy in the mask
3979  llvm::Value *mask = GetFullMask();
3980  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
3981  "funarg_mask");
3982  StoreInst(mask, ptr);
3983  }
3984  BranchInst(if_false);
3985 
3986  /**********************/
3987  bblock = if_false;
3988 
3989  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
3990  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
3991  AssertPos(currentPos, flaunch != NULL);
3992  std::vector<llvm::Value *> args;
3993  args.push_back(launchGroupHandlePtr);
3994  args.push_back(fptr);
3995  args.push_back(voidmem);
3996  args.push_back(launchCount[0]);
3997  args.push_back(launchCount[1]);
3998  args.push_back(launchCount[2]);
3999  llvm::Value *ret = CallInst(flaunch, NULL, args, "");
4000  return ret;
4001  }
4002 #endif /* ISPC_NVPTX_ENABLED */
4003 
4004  if (callee == NULL) {
4005  AssertPos(currentPos, m->errorCount > 0);
4006  return NULL;
4007  }
4008 
4009  launchedTasks = true;
4010 
4011  AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
4012  llvm::Type *argType =
4013  (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
4014  AssertPos(currentPos, llvm::PointerType::classof(argType));
4015  llvm::PointerType *pt =
4016  llvm::dyn_cast<llvm::PointerType>(argType);
4017  AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
4018  llvm::StructType *argStructType =
4019  static_cast<llvm::StructType *>(pt->getElementType());
4020 
4021  llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
4022  AssertPos(currentPos, falloc != NULL);
4023  llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
4024  if (structSize->getType() != LLVMTypes::Int64Type)
4025  // ISPCAlloc expects the size as an uint64_t, but on 32-bit
4026  // targets, SizeOf returns a 32-bit value
4027  structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
4028  "struct_size_to_64");
4029  int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
4030 
4031  std::vector<llvm::Value *> allocArgs;
4032  allocArgs.push_back(launchGroupHandlePtr);
4033  allocArgs.push_back(structSize);
4034  allocArgs.push_back(LLVMInt32(align));
4035  llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
4036  llvm::Value *argmem = BitCastInst(voidmem, pt);
4037 
4038  // Copy the values of the parameters into the appropriate place in
4039  // the argument block
4040  for (unsigned int i = 0; i < argVals.size(); ++i) {
4041  llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
4042  // don't need to do masked store here, I think
4043  StoreInst(argVals[i], ptr);
4044  }
4045 
4046  if (argStructType->getNumElements() == argVals.size() + 1) {
4047  // copy in the mask
4048  llvm::Value *mask = GetFullMask();
4049  llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
4050  "funarg_mask");
4051  StoreInst(mask, ptr);
4052  }
4053 
4054  // And emit the call to the user-supplied task launch function, passing
4055  // a pointer to the task function being called and a pointer to the
4056  // argument block we just filled in
4057  llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
4058  llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
4059  AssertPos(currentPos, flaunch != NULL);
4060  std::vector<llvm::Value *> args;
4061  args.push_back(launchGroupHandlePtr);
4062  args.push_back(fptr);
4063  args.push_back(voidmem);
4064  args.push_back(launchCount[0]);
4065  args.push_back(launchCount[1]);
4066  args.push_back(launchCount[2]);
4067  return CallInst(flaunch, NULL, args, "");
4068 }
4069 
4070 
4071 void
4073 #ifdef ISPC_NVPTX_ENABLED
4074  if (g->target->getISA() == Target::NVPTX)
4075  {
4076  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4077  llvm::Value *nullPtrValue =
4078  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4079  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4080  if (fsync == NULL)
4081  FATAL("Couldn't find ISPCSync declaration?!");
4082  CallInst(fsync, NULL, launchGroupHandle, "");
4083  StoreInst(nullPtrValue, launchGroupHandlePtr);
4084  return;
4085  }
4086 #endif /* ISPC_NVPTX_ENABLED */
4087 
4088  llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
4089  llvm::Value *nullPtrValue =
4090  llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
4091  llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
4092  llvm::CmpInst::ICMP_NE,
4093  launchGroupHandle, nullPtrValue);
4094  llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
4095  llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
4096  BranchInst(bSync, bPostSync, nonNull);
4097 
4098  SetCurrentBasicBlock(bSync);
4099  llvm::Function *fsync = m->module->getFunction("ISPCSync");
4100  if (fsync == NULL)
4101  FATAL("Couldn't find ISPCSync declaration?!");
4102  CallInst(fsync, NULL, launchGroupHandle, "");
4103 
4104  // zero out the handle so that if ISPCLaunch is called again in this
4105  // function, it knows it's starting out from scratch
4106  StoreInst(nullPtrValue, launchGroupHandlePtr);
4107 
4108  BranchInst(bPostSync);
4109 
4110  SetCurrentBasicBlock(bPostSync);
4111 }
4112 
4113 
4114 /** When we gathering from or scattering to a varying atomic type, we need
4115  to add an appropriate offset to the final address for each lane right
4116  before we use it. Given a varying pointer we're about to use and its
4117  type, this function determines whether these offsets are needed and
4118  returns an updated pointer that incorporates these offsets if needed.
4119  */
4120 llvm::Value *
4122  const Type *ptrType) {
4123  // This should only be called for varying pointers
4124  const PointerType *pt = CastType<PointerType>(ptrType);
4125  AssertPos(currentPos, pt && pt->IsVaryingType());
4126 
4127  const Type *baseType = ptrType->GetBaseType();
4128  if (Type::IsBasicType(baseType) == false)
4129  return ptr;
4130 
4131  if (baseType->IsVaryingType() == false)
4132  return ptr;
4133 
4134  // Find the size of a uniform element of the varying type
4135  llvm::Type *llvmBaseUniformType =
4136  baseType->GetAsUniformType()->LLVMType(g->ctx);
4137  llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
4138  unifSize = SmearUniform(unifSize);
4139 
4140  // Compute offset = <0, 1, .. > * unifSize
4141  bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
4142  llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
4143 
4144  llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
4145  varyingOffsets);
4146 
4147  if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
4148  // On 64-bit targets where we're doing 32-bit addressing
4149  // calculations, we need to convert to an i64 vector before adding
4150  // to the pointer
4151  offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
4152 
4153  return BinaryOperator(llvm::Instruction::Add, ptr, offset);
4154 }
4155 
4156 
4157 CFInfo *
4159  AssertPos(currentPos, controlFlowInfo.size() > 0);
4160  CFInfo *ci = controlFlowInfo.back();
4161  controlFlowInfo.pop_back();
4162 
4163  if (ci->IsSwitch()) {
4164  breakTarget = ci->savedBreakTarget;
4165  continueTarget = ci->savedContinueTarget;
4166  breakLanesPtr = ci->savedBreakLanesPtr;
4167  continueLanesPtr = ci->savedContinueLanesPtr;
4168  blockEntryMask = ci->savedBlockEntryMask;
4169  switchExpr = ci->savedSwitchExpr;
4170  defaultBlock = ci->savedDefaultBlock;
4171  caseBlocks = ci->savedCaseBlocks;
4172  nextBlocks = ci->savedNextBlocks;
4173  switchConditionWasUniform = ci->savedSwitchConditionWasUniform;
4174  }
4175  else if (ci->IsLoop() || ci->IsForeach()) {
4176  breakTarget = ci->savedBreakTarget;
4177  continueTarget = ci->savedContinueTarget;
4178  breakLanesPtr = ci->savedBreakLanesPtr;
4179  continueLanesPtr = ci->savedContinueLanesPtr;
4180  blockEntryMask = ci->savedBlockEntryMask;
4181  }
4182  else {
4183  AssertPos(currentPos, ci->IsIf());
4184  // nothing to do
4185  }
4186 
4187  return ci;
4188 }
bool IsVoidType() const
Definition: type.cpp:251
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
llvm::Value * Any(llvm::Value *mask)
Definition: ctx.cpp:1416
bool IsVaryingType() const
Definition: type.h:150
llvm::Constant * LLVMMaskAllOn
Definition: llvmutil.cpp:92
llvm::Value * savedBreakLanesPtr
Definition: ctx.cpp:118
void InitializeLabelMap(Stmt *code)
Definition: ctx.cpp:1311
llvm::Value * PtrToIntInst(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2104
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target)
Definition: ctx.cpp:939
Definition: func.h:44
llvm::Value * AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name=NULL, const PointerType **resultPtrType=NULL)
Definition: ctx.cpp:2613
CFInfo * popCFState()
Definition: ctx.cpp:4158
Opt opt
Definition: ispc.h:548
void StartUniformIf()
Definition: ctx.cpp:595
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock, const std::vector< std::pair< int, llvm::BasicBlock * > > &caseBlocks, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > &nextBlocks)
Definition: ctx.cpp:1207
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:584
int last_column
Definition: ispc.h:145
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm, llvm::Value *sse=NULL, llvm::BasicBlock *bbd=NULL, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbc=NULL, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbn=NULL, bool scu=false)
Definition: ctx.cpp:139
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch)
Definition: ctx.cpp:1022
llvm::Value * ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name=NULL)
Definition: ctx.cpp:3580
llvm::Value * ProgramIndexVector(bool is32bits=true)
Definition: ctx.cpp:1577
void SetInternalMask(llvm::Value *val)
Definition: ctx.cpp:536
llvm::Constant * LLVMInt64Vector(int64_t i)
Definition: llvmutil.cpp:455
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, bool uniformControlFlow)
Definition: ctx.cpp:670
llvm::Instruction * FPCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2228
Declaration of the FunctionEmitContext class
void EmitVariableDebugInfo(Symbol *sym)
Definition: ctx.cpp:1788
static llvm::Type * lGetMatchingBoolVectorType(llvm::Type *type)
Definition: ctx.cpp:1998
void StartScope()
Definition: ctx.cpp:1730
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, llvm::Value *lm)
Definition: ctx.cpp:160
llvm::BasicBlock * savedBreakTarget
Definition: ctx.cpp:117
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val)
Definition: ctx.cpp:544
CFInfo(CFType t, bool uniformIf, llvm::Value *sm)
Definition: ctx.cpp:127
void BranchInst(llvm::BasicBlock *block)
Definition: ctx.cpp:3510
const std::vector< std::pair< int, llvm::BasicBlock * > > * savedCaseBlocks
Definition: ctx.cpp:122
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3123
bool IsFrozenSlice() const
Definition: type.h:468
llvm::Instruction * ZExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2266
int getVectorWidth() const
Definition: ispc.h:286
Module * m
Definition: ispc.cpp:101
llvm::DIType GetDIType(llvm::DIDescriptor scope) const
Definition: type.cpp:3258
Interface class for statements in the ispc language.
Definition: stmt.h:49
FunctionEmitContext(Function *function, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos)
Definition: ctx.cpp:245
llvm::Value * NotOperator(llvm::Value *v, const char *name=NULL)
Definition: ctx.cpp:1963
int first_line
Definition: ispc.h:142
Target * target
Definition: ispc.h:550
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1424
llvm::Value * LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name=NULL, bool one_elem=false)
Definition: ctx.cpp:2832
static llvm::VectorType * VoidPointerVectorType
Definition: llvmutil.h:108
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:92
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:573
std::vector< std::string > GetLabels()
Definition: ctx.cpp:1326
virtual const Type * GetElementType(int index) const =0
llvm::Instruction * TruncInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2189
llvm::Value * AllocaInst(llvm::Type *llvmType, const char *name=NULL, int align=0, bool atEntryBlock=true)
Definition: ctx.cpp:3066
Abstract base class for types that represent sequences.
Definition: type.h:531
bool IsVarying()
Definition: ctx.cpp:110
llvm::Value * CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:2015
void EndSwitch()
Definition: ctx.cpp:1048
void StartVaryingIf(llvm::Value *oldMask)
Definition: ctx.cpp:601
const PointerType * GetAsNonSlice() const
Definition: type.cpp:1125
static llvm::Type * BoolType
Definition: llvmutil.h:74
#define Assert(expr)
Definition: ispc.h:173
void addSwitchMaskCheck(llvm::Value *mask)
Definition: ctx.cpp:1061
void StartForeach(ForeachType ft)
Definition: ctx.cpp:714
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
llvm::FunctionType * LLVMFunctionType(llvm::LLVMContext *ctx, bool disableMask=false) const
Definition: type.cpp:3336
ASTNode * WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, void *data)
Definition: ast.cpp:74
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:96
bool IsForeach()
Definition: ctx.cpp:106
bool forceAlignedMemory
Definition: ispc.h:472
static PointerType * GetVarying(const Type *t)
Definition: type.cpp:1022
void Continue(bool doCoherenceCheck)
Definition: ctx.cpp:871
llvm::Value * GetFullMask()
Definition: ctx.cpp:509
const char * GetISAString() const
Definition: ispc.cpp:1332
bool isUniform
Definition: ctx.cpp:116
virtual const Type * GetAsUniformType() const =0
CFType
Definition: ctx.cpp:113
void AddInstrumentationPoint(const char *note)
Definition: ctx.cpp:1675
llvm::Value * MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset)
Definition: ctx.cpp:2435
std::string name
Definition: sym.h:71
llvm::Value * gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name)
Definition: ctx.cpp:2922
void restoreMaskGivenReturns(llvm::Value *oldMask)
Definition: ctx.cpp:755
virtual const Type * GetElementType() const =0
Type implementation for pointers to other types.
Definition: type.h:446
int getNativeVectorAlignment() const
Definition: ispc.h:282
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse)
Definition: ctx.cpp:562
void RestoreContinuedLanes()
Definition: ctx.cpp:994
llvm::Constant * LLVMFalse
Definition: llvmutil.cpp:91
llvm::Constant * LLVMMaskAllOff
Definition: llvmutil.cpp:93
llvm::Value * loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name)
Definition: ctx.cpp:2798
virtual std::string GetString() const =0
llvm::BasicBlock * GetCurrentBasicBlock()
Definition: ctx.cpp:485
int GetSOAWidth() const
Definition: type.h:160
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:1016
void Break(bool doCoherenceCheck)
Definition: ctx.cpp:791
static llvm::Value * lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, llvm::Value *indexValue, llvm::Value *ptrSliceOffset, llvm::Value **newSliceOffset)
Definition: ctx.cpp:2397
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:93
llvm::BasicBlock * CreateBasicBlock(const char *name)
Definition: ctx.cpp:1617
llvm::Value * savedSwitchExpr
Definition: ctx.cpp:120
static CFInfo * GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:187
bool IsIf()
Definition: ctx.cpp:104
header file with declarations for symbol and symbol table classes.
llvm::Value * BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name=NULL)
Definition: ctx.cpp:3601
static void addGSMetadata(llvm::Value *inst, SourcePos pos)
Definition: ctx.cpp:3019
static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType)
Definition: ctx.cpp:2764
bool disableMaskAllOnOptimizations
Definition: ispc.h:477
int level
Definition: ispc.h:438
static llvm::Type * VoidType
Definition: llvmutil.h:71
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
void StoreInst(llvm::Value *value, llvm::Value *ptr)
Definition: ctx.cpp:3359
llvm::Module * module
Definition: module.h:166
File with declarations for classes related to statements in the language.
void EmitCaseLabel(int value, bool checkMask, SourcePos pos)
Definition: ctx.cpp:1155
Globals * g
Definition: ispc.cpp:100
llvm::BasicBlock * savedContinueTarget
Definition: ctx.cpp:117
bool IsUniformType() const
Definition: type.h:145
void EndLoop()
Definition: ctx.cpp:699
llvm::Value * GetFunctionMask()
Definition: ctx.cpp:497
int getMaskBitCount() const
Definition: ispc.h:292
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:94
static CFInfo * GetIf(bool isUniform, llvm::Value *savedMask)
Definition: ctx.cpp:181
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos=NULL, llvm::DIScope *scope=NULL)
Definition: ctx.cpp:1708
Abstract base class for nodes in the abstract syntax tree (AST).
Definition: ast.h:50
llvm::Value * GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, const char *name=NULL)
Definition: ctx.cpp:2453
bool LookupFunction(const char *name, std::vector< Symbol * > *matches=NULL)
Definition: sym.cpp:162
CFType type
Definition: ctx.cpp:115
static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType)
Definition: ctx.cpp:3671
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck)
Definition: ctx.cpp:1340
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
bool inSwitchStatement() const
Definition: ctx.cpp:777
llvm::DIScope GetDIScope() const
Definition: ctx.cpp:1781
bool IsUniform()
Definition: ctx.cpp:111
llvm::BasicBlock * savedDefaultBlock
Definition: ctx.cpp:121
SourcePos GetDebugPos() const
Definition: ctx.cpp:1702
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:98
llvm::Value * LaneMask(llvm::Value *mask)
Definition: ctx.cpp:1469
bool IsLoop()
Definition: ctx.cpp:105
bool IsSlice() const
Definition: type.h:467
static CFInfo * GetSwitch(bool isUniform, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask, llvm::Value *switchExpr, llvm::BasicBlock *bbDefault, const std::vector< std::pair< int, llvm::BasicBlock * > > *bbCases, const std::map< llvm::BasicBlock *, llvm::BasicBlock * > *bbNext, bool scUniform)
Definition: ctx.cpp:228
static llvm::Type * Int64Type
Definition: llvmutil.h:79
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count, llvm::Value *align=NULL)
Definition: ctx.cpp:3462
llvm::PHINode * PhiNode(llvm::Type *type, int count, const char *name=NULL)
Definition: ctx.cpp:3640
Representation of a structure holding a number of members.
Definition: type.h:692
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
llvm::Value * addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType)
Definition: ctx.cpp:4121
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:97
Header file with declarations for various LLVM utility stuff.
#define AssertPos(pos, expr)
Definition: ispc.h:176
llvm::Value * getMaskAtSwitchEntry()
Definition: ctx.cpp:1081
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1)
Definition: ctx.cpp:2356
ISA getISA() const
Definition: ispc.h:270
bool emitInstrumentation
Definition: ispc.h:610
static bool IsBasicType(const Type *type)
Definition: type.cpp:3642
SourcePos pos
Definition: sym.h:70
llvm::Value * CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector< llvm::Value * > &args, const char *name=NULL)
Definition: ctx.cpp:3692
uint32_t RoundUpPow2(uint32_t v)
Definition: util.h:51
bool ifsInCFAllUniform(int cfType) const
Definition: ctx.cpp:921
AtomicType represents basic types like floats, ints, etc.
Definition: type.h:292
void SetBlockEntryMask(llvm::Value *mask)
Definition: ctx.cpp:530
virtual llvm::Value * GetValue(FunctionEmitContext *ctx) const =0
StorageClass storageClass
Definition: sym.h:96
Representation of a range of positions in a source file.
Definition: ispc.h:137
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1459
static bool lEnclosingLoopIsForeachActive(const std::vector< CFInfo * > &controlFlowInfo)
Definition: ctx.cpp:861
bool InForeachLoop() const
Definition: ctx.cpp:1270
int VaryingCFDepth() const
Definition: ctx.cpp:1260
void ClearBreakLanes()
Definition: ctx.cpp:1012
Abstract base class for types that represent collections of other types.
Definition: type.h:510
const char * LLVMGetName(llvm::Value *v, const char *)
Definition: llvmutil.cpp:1746
llvm::Value * None(llvm::Value *mask)
Definition: ctx.cpp:1451
bool force32BitAddressing
Definition: ispc.h:458
const char * name
Definition: ispc.h:141
llvm::Instruction * SExtInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2247
SourcePos pos
Definition: ast.h:77
static llvm::Type * PointerIntType
Definition: llvmutil.h:73
const PointerType * GetAsSlice() const
Definition: type.cpp:1117
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
static llvm::PointerType * VoidPointerType
Definition: llvmutil.h:72
const Type * GetBaseType() const
Definition: type.cpp:1071
int getNativeVectorWidth() const
Definition: ispc.h:280
const Function * GetFunction() const
Definition: ctx.cpp:479
const Type * GetReturnType() const
Definition: type.h:926
llvm::Value * GetStringPtr(const std::string &str)
Definition: ctx.cpp:1605
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType)
Definition: ctx.cpp:3432
#define FATAL(message)
Definition: util.h:113
bool savedSwitchConditionWasUniform
Definition: ctx.cpp:124
virtual llvm::Type * LLVMType(llvm::LLVMContext *ctx) const =0
llvm::Value * InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name=NULL)
Definition: ctx.cpp:3555
void DisableGatherScatterWarnings()
Definition: ctx.cpp:1279
llvm::Value * savedMask
Definition: ctx.cpp:119
static llvm::Type * Int32Type
Definition: llvmutil.h:78
int last_line
Definition: ispc.h:144
void SetDebugPos(SourcePos pos)
Definition: ctx.cpp:1696
#define PTYPE(p)
Definition: llvmutil.h:55
Representation of a function in a source file.
Variability GetVariability() const
Definition: type.cpp:1035
int first_column
Definition: ispc.h:143
Definition: ctx.cpp:73
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1602
virtual const Type * GetAsVaryingType() const =0
virtual const Type * GetType() const =0
llvm::Value * GetFullMaskPointer()
Definition: ctx.cpp:516
llvm::Value * GetInternalMask()
Definition: ctx.cpp:503
llvm::Value * BitCastInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2087
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test)
Definition: ctx.cpp:552
llvm::Value * LaunchInst(llvm::Value *callee, std::vector< llvm::Value * > &argVals, llvm::Value *launchCount[3])
Definition: ctx.cpp:3915
Type representing a function (return type + argument types)
Definition: type.h:885
Representation of a program symbol.
Definition: sym.h:63
llvm::Value * ExtractInst(llvm::Value *v, int elt, const char *name=NULL)
Definition: ctx.cpp:3532
bool IsSwitch()
Definition: ctx.cpp:109
void EndForeach()
Definition: ctx.cpp:748
void EnableGatherScatterWarnings()
Definition: ctx.cpp:1285
Interface class that defines the type abstraction.
Definition: type.h:101
static bool initLabelBBlocks(ASTNode *node, void *data)
Definition: ctx.cpp:1292
Expr abstract base class and expression implementations.
void SetCurrentBasicBlock(llvm::BasicBlock *bblock)
Definition: ctx.cpp:491
static llvm::VectorType * MaskType
Definition: llvmutil.h:90
virtual const Type * GetBaseType() const =0
llvm::Value * savedContinueLanesPtr
Definition: ctx.cpp:118
void EmitDefaultLabel(bool checkMask, SourcePos pos)
Definition: ctx.cpp:1092
llvm::Instruction * SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name=NULL)
Definition: ctx.cpp:3650
static int lArrayVectorWidth(llvm::Type *t)
Definition: ctx.cpp:1911
Expr * TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase)
Definition: expr.cpp:602
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum)
Definition: ctx.cpp:1843
Expr is the abstract base class that defines the interface that all expression types must implement...
Definition: expr.h:48
llvm::Value * IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2156
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:99
llvm::Value * MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2)
Definition: ctx.cpp:1549
llvm::Type * LLVMType(llvm::LLVMContext *ctx) const
Definition: type.cpp:2334
llvm::Value * All(llvm::Value *mask)
Definition: ctx.cpp:1433
std::string name
Definition: stmt.h:494
llvm::Constant * LLVMIntAsType(int64_t, llvm::Type *t)
Definition: llvmutil.cpp:548
virtual int GetElementCount() const =0
llvm::Value * SmearUniform(llvm::Value *value, const char *name=NULL)
Definition: ctx.cpp:2049
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:95
const std::map< llvm::BasicBlock *, llvm::BasicBlock * > * savedNextBlocks
Definition: ctx.cpp:123
llvm::Value * savedBlockEntryMask
Definition: ctx.cpp:119
bool IsConstType() const
Definition: type.cpp:1065
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:159
llvm::LLVMContext * ctx
Definition: ispc.h:639
const Type * type
Definition: sym.h:84
llvm::DIBuilder * diBuilder
Definition: module.h:169
static CFInfo * GetForeach(FunctionEmitContext::ForeachType ft, llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, llvm::Value *savedBreakLanesPtr, llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedBlockEntryMask)
Definition: ctx.cpp:199
static bool EqualIgnoringConst(const Type *a, const Type *b)
Definition: type.cpp:3758
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *ptrType, llvm::Value *mask)
Definition: ctx.cpp:3239
virtual const Type * GetReferenceTarget() const
Definition: type.cpp:3434
llvm::Instruction * CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name=NULL)
Definition: ctx.cpp:2208
bool is32Bit() const
Definition: ispc.h:276
llvm::Instruction * ReturnInst()
Definition: ctx.cpp:3891
llvm::Value * applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType)
Definition: ctx.cpp:2291
llvm::Value * BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name=NULL)
Definition: ctx.cpp:1929
int GetNumParameters() const
Definition: type.h:937
llvm::BasicBlock * GetLabeledBasicBlock(const std::string &label)
Definition: ctx.cpp:1318
void SetFunctionMask(llvm::Value *val)
Definition: ctx.cpp:522
std::map< std::string, llvm::BasicBlock * > labelMap
Definition: ctx.h:726
SymbolTable * symbolTable
Definition: module.h:163
File with declarations for classes related to type representation.
llvm::Value * I1VecToBoolVec(llvm::Value *b)
Definition: ctx.cpp:1623
static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s)
Definition: ctx.cpp:1655