Intel SPMD Program Compiler  1.12.0
cbackend.cpp
Go to the documentation of this file.
1 //===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This library converts LLVM code to C code, compilable by GCC and other C
11 // compilers.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ispc.h"
16 #include "module.h"
17 
18 #include <math.h>
19 #include <sstream>
20 #include <stdio.h>
21 #include <string.h>
22 
23 #ifndef _MSC_VER
24 #include <inttypes.h>
25 #define HAVE_PRINTF_A 1
26 #define ENABLE_CBE_PRINTF_A 1
27 #endif
28 
29 #ifndef PRIx64
30 #define PRIx64 "llx"
31 #endif
32 
33 #include "llvmutil.h"
34 
35 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
36 #include "llvm/CallingConv.h"
37 #include "llvm/Constants.h"
38 #include "llvm/DerivedTypes.h"
39 #include "llvm/InlineAsm.h"
40 #include "llvm/Instructions.h"
41 #include "llvm/IntrinsicInst.h"
42 #include "llvm/Intrinsics.h"
43 #include "llvm/Module.h"
44 #else // LLVM 3.3+
45 #include "llvm/IR/CallingConv.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/InlineAsm.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Intrinsics.h"
52 #include "llvm/IR/Module.h"
53 #endif
54 #include "llvm/Pass.h"
55 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
56 #include "llvm/PassManager.h"
57 #else // LLVM 3.7+
58 #include "llvm/IR/LegacyPassManager.h"
59 #endif
60 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
61 #include "llvm/TypeFinder.h"
62 #else // LLVM_3_3+
63 #include "llvm/IR/TypeFinder.h"
64 #endif
65 #include "llvm/ADT/STLExtras.h"
66 #include "llvm/ADT/SmallString.h"
67 #include "llvm/ADT/StringExtras.h"
68 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_4 // 3.2, 3.3, 3.4
69 #include "llvm/Support/InstIterator.h"
70 #else // 3.5+
71 #include "llvm/IR/InstIterator.h"
72 #endif
73 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
74 #include "llvm/Analysis/FindUsedTypes.h"
75 #endif
76 #include "llvm/Analysis/LoopInfo.h"
77 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
78 #include "llvm/IR/CFG.h"
79 #include "llvm/IR/CallSite.h"
80 #include "llvm/IR/GetElementPtrTypeIterator.h"
81 #include "llvm/IR/Verifier.h"
82 #include "llvm/Support/FileSystem.h"
83 #include <llvm/IR/IRPrintingPasses.h>
84 #else
85 #include "llvm/Analysis/Verifier.h"
86 #include "llvm/Support/CFG.h"
87 #include "llvm/Support/CallSite.h"
88 #include "llvm/Support/GetElementPtrTypeIterator.h"
89 #include <llvm/Assembly/PrintModulePass.h>
90 #endif
91 #include "llvm/Analysis/ValueTracking.h"
92 #include "llvm/CodeGen/IntrinsicLowering.h"
93 #include "llvm/CodeGen/Passes.h"
94 //#include "llvm/Target/Mangler.h"
95 #include "llvm/Transforms/Scalar.h"
96 #if ISPC_LLVM_VERSION >= ISPC_LLVM_7_0
97 #include "llvm/Transforms/Utils.h"
98 #endif
99 #include "llvm/MC/MCAsmInfo.h"
100 #include "llvm/MC/MCContext.h"
101 #include "llvm/MC/MCInstrInfo.h"
102 #include "llvm/MC/MCObjectFileInfo.h"
103 #include "llvm/MC/MCRegisterInfo.h"
104 #include "llvm/MC/MCSubtargetInfo.h"
105 #include "llvm/MC/MCSymbol.h"
106 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2 // 3.2
107 #include "llvm/DataLayout.h"
108 #else // LLVM 3.3+
109 #include "llvm/IR/DataLayout.h"
110 #endif
111 #include "llvm/Support/ErrorHandling.h"
112 #include "llvm/Support/FormattedStream.h"
113 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2 // 3.2
114 #include "llvm/Support/InstVisitor.h"
115 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_4 // 3.3, 3.4
116 #include "llvm/InstVisitor.h"
117 #else // LLVM 3.5+
118 #include "llvm/IR/InstVisitor.h"
119 #endif
120 #include "llvm/Support/Host.h"
121 #include "llvm/Support/MathExtras.h"
122 #include "llvm/Support/TargetRegistry.h"
123 #include "llvm/Target/TargetMachine.h"
124 
125 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_4 // 3.2, 3.3, 3.4
126 #include "llvm/Config/config.h"
127 #endif
128 
129 #include <llvm/Support/ToolOutputFile.h>
130 #include <llvm/Transforms/IPO.h>
131 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
132 #if ISPC_LLVM_VERSION > ISPC_LLVM_7_0
133 #include "llvm/IR/PatternMatch.h"
134 #endif
135 #include <algorithm>
136 // Some ms header decided to define setjmp as _setjmp, undo this for this file.
137 #ifdef _MSC_VER
138 #undef setjmp
139 #define snprintf _snprintf
140 #endif
141 ///////////////////////////////////////////////////////////////////////////////
142 // This part of code was in LLVM's ConstantsScanner.h,
143 // but it was removed in revision #232397
144 namespace constant_scanner {
145 class constant_iterator : public std::iterator<std::forward_iterator_tag, const llvm::Constant, ptrdiff_t> {
146  llvm::const_inst_iterator InstI; // Method instruction iterator
147  unsigned OpIdx; // Operand index
148 
149  bool isAtConstant() const {
150  assert(!InstI.atEnd() && OpIdx < InstI->getNumOperands() && "isAtConstant called with invalid arguments!");
151  return llvm::isa<llvm::Constant>(InstI->getOperand(OpIdx));
152  }
153 
154  public:
155  constant_iterator(const llvm::Function *F) : InstI(llvm::inst_begin(F)), OpIdx(0) {
156  // Advance to first constant... if we are not already at constant or end
157  if (InstI != llvm::inst_end(F) && // InstI is valid?
158  (InstI->getNumOperands() == 0 || !isAtConstant())) // Not at constant?
159  operator++();
160  }
161 
162  constant_iterator(const llvm::Function *F, bool) // end ctor
163  : InstI(llvm::inst_end(F)), OpIdx(0) {}
164 
165  bool operator==(const constant_iterator &x) const { return OpIdx == x.OpIdx && InstI == x.InstI; }
166  bool operator!=(const constant_iterator &x) const { return !(*this == x); }
167 
168  pointer operator*() const {
169  assert(isAtConstant() && "Dereferenced an iterator at the end!");
170  return llvm::cast<llvm::Constant>(InstI->getOperand(OpIdx));
171  }
172 
173  constant_iterator &operator++() { // Preincrement implementation
174  ++OpIdx;
175  do {
176  unsigned NumOperands = InstI->getNumOperands();
177  while (OpIdx < NumOperands && !isAtConstant()) {
178  ++OpIdx;
179  }
180 
181  if (OpIdx < NumOperands)
182  return *this; // Found a constant!
183  ++InstI;
184  OpIdx = 0;
185  } while (!InstI.atEnd());
186 
187  return *this; // At the end of the method
188  }
189 };
190 
191 inline constant_iterator constant_begin(const llvm::Function *F) { return constant_iterator(F); }
192 
193 inline constant_iterator constant_end(const llvm::Function *F) { return constant_iterator(F, true); }
194 
195 } // namespace constant_scanner
196 
197 ///////////////////////////////////////////////////////////////////////////////
198 // FIXME:
199 namespace {
200 /// TypeFinder - Walk over a module, identifying all of the types that are
201 /// used by the module.
202 class TypeFinder {
203  // To avoid walking constant expressions multiple times and other IR
204  // objects, we keep several helper maps.
205  llvm::DenseSet<const llvm::Value *> VisitedConstants;
206 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
207  llvm::DenseSet<const llvm::Metadata *> VisitedMDNodes;
208 #endif
209  llvm::DenseSet<llvm::Type *> VisitedTypes;
210  std::vector<llvm::ArrayType *> &ArrayTypes;
211  std::vector<llvm::IntegerType *> &IntegerTypes;
212  std::vector<bool> &IsVolatile;
213  std::vector<int> &Alignment;
214 
215  public:
216  TypeFinder(std::vector<llvm::ArrayType *> &t, std::vector<llvm::IntegerType *> &i, std::vector<bool> &v,
217  std::vector<int> &a)
218  : ArrayTypes(t), IntegerTypes(i), IsVolatile(v), Alignment(a) {}
219 
220  void run(const llvm::Module &M) {
221  // Get types from global variables.
222  for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) {
223  incorporateType(I->getType());
224  if (I->hasInitializer())
225  incorporateValue(I->getInitializer());
226  }
227 
228  // Get types from aliases.
229  for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) {
230  incorporateType(I->getType());
231  if (const llvm::Value *Aliasee = I->getAliasee())
232  incorporateValue(Aliasee);
233  }
234 
235  llvm::SmallVector<std::pair<unsigned, llvm::MDNode *>, 4> MDForInst;
236 
237  // Get types from functions.
238  for (llvm::Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
239  incorporateType(FI->getType());
240 
241  for (llvm::Function::const_iterator BB = FI->begin(), E = FI->end(); BB != E; ++BB)
242  for (llvm::BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
243  const llvm::Instruction &I = *II;
244 
245  // Operands of SwitchInsts changed format after 3.1
246  // Seems like there ought to be better way to do what we
247  // want here. For now, punt on SwitchInsts.
248  if (llvm::isa<llvm::SwitchInst>(&I))
249  continue;
250 
251  // Incorporate the type of the instruction and all its operands.
252  incorporateType(I.getType());
253  if (llvm::isa<llvm::StoreInst>(&I))
254  if (llvm::IntegerType *ITy = llvm::dyn_cast<llvm::IntegerType>(I.getType())) {
255  IntegerTypes.push_back(ITy);
256  const llvm::StoreInst *St = llvm::dyn_cast<llvm::StoreInst>(&I);
257  IsVolatile.push_back(St->isVolatile());
258  Alignment.push_back(St->getAlignment());
259  }
260 
261  if (llvm::isa<llvm::LoadInst>(&I))
262  if (llvm::IntegerType *ITy = llvm::dyn_cast<llvm::IntegerType>(I.getType())) {
263  IntegerTypes.push_back(ITy);
264  const llvm::LoadInst *St = llvm::dyn_cast<llvm::LoadInst>(&I);
265  IsVolatile.push_back(St->isVolatile());
266  Alignment.push_back(St->getAlignment());
267  }
268 
269  for (llvm::User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
270  incorporateValue(*OI);
271 
272  // Incorporate types hiding in metadata.
273  I.getAllMetadataOtherThanDebugLoc(MDForInst);
274  for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
275  incorporateMDNode(MDForInst[i].second);
276 
277  MDForInst.clear();
278  }
279  }
280 
281  for (llvm::Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end();
282  I != E; ++I) {
283 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
284  const llvm::NamedMDNode *NMD = I;
285 #else /* LLVM 3.8+ */
286  const llvm::NamedMDNode *NMD = &*I;
287 #endif
288  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
289  incorporateMDNode(NMD->getOperand(i));
290  }
291  }
292 
293  private:
294  void incorporateType(llvm::Type *Ty) {
295  // Check to see if we're already visited this type.
296  if (!VisitedTypes.insert(Ty).second)
297  return;
298 
299  if (llvm::ArrayType *ATy = llvm::dyn_cast<llvm::ArrayType>(Ty))
300  ArrayTypes.push_back(ATy);
301 
302  // Recursively walk all contained types.
303  for (llvm::Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I)
304  incorporateType(*I);
305  }
306 
307  /// incorporateValue - This method is used to walk operand lists finding
308  /// types hiding in constant expressions and other operands that won't be
309  /// walked in other ways. GlobalValues, basic blocks, instructions, and
310  /// inst operands are all explicitly enumerated.
311  void incorporateValue(const llvm::Value *V) {
312 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2, 3.3, 3.4, 3.5
313  if (const llvm::MDNode *M = llvm::dyn_cast<llvm::MDNode>(V)) {
314  incorporateMDNode(M);
315  return;
316  }
317 #else /* LLVN 3.6+ */
318  if (const llvm::MetadataAsValue *MV = llvm::dyn_cast<llvm::MetadataAsValue>(V)) {
319  incorporateMDNode(MV->getMetadata());
320  return;
321  }
322 #endif
323  if (!llvm::isa<llvm::Constant>(V) || llvm::isa<llvm::GlobalValue>(V))
324  return;
325 
326  // Already visited?
327  if (!VisitedConstants.insert(V).second)
328  return;
329 
330  // Check this type.
331  incorporateType(V->getType());
332 
333  // Look in operands for types.
334  const llvm::User *U = llvm::cast<llvm::User>(V);
335  for (llvm::Constant::const_op_iterator I = U->op_begin(), E = U->op_end(); I != E; ++I)
336  incorporateValue(*I);
337  }
338 
339 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2, 3.3, 3.4, 3.5
340  void incorporateMDNode(const llvm::MDNode *V) {
341 
342  // Already visited?
343  if (!VisitedConstants.insert(V).second)
344  return;
345 
346  // Look in operands for types.
347  for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
348  if (llvm::Value *Op = V->getOperand(i))
349  incorporateValue(Op);
350  }
351 #else // LLVM 3.6+
352  void incorporateMDNode(const llvm::Metadata *M) {
353 
354  // Already visited?
355  if (!VisitedMDNodes.insert(M).second)
356  return;
357 
358  if (const llvm::MDNode *N = llvm::dyn_cast<llvm::MDNode>(M)) {
359  // Look in operands for types.
360  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
361  if (const llvm::Metadata *O = N->getOperand(i))
362  incorporateMDNode(O);
363  } else if (llvm::isa<llvm::MDString>(M)) {
364  // Nothing to do with MDString.
365  } else if (const llvm::ValueAsMetadata *V = llvm::dyn_cast<llvm::ValueAsMetadata>(M)) {
366  incorporateValue(V->getValue());
367  } else {
368  // Some unknown Metadata subclass - has LLVM introduced something new?
369  llvm_unreachable("Unknown Metadata subclass");
370  }
371  }
372 #endif
373 };
374 } // end anonymous namespace
375 
376 static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm::ArrayType *> &t,
377  std::vector<llvm::IntegerType *> &i, std::vector<bool> &IsVolatile,
378  std::vector<int> &Alignment) {
379  TypeFinder(t, i, IsVolatile, Alignment).run(*m);
380 }
381 
382 static bool is_vec16_i64_ty(llvm::Type *Ty) {
383  llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(Ty);
384  if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
385  VTy->getElementType()->getPrimitiveSizeInBits() == 64)
386  return true;
387  return false;
388 }
389 
390 namespace {
391 class CBEMCAsmInfo : public llvm::MCAsmInfo {
392  public:
393  CBEMCAsmInfo() {
394 
395 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_4 // 3.2, 3.3, 3.4
396  GlobalPrefix = "";
397 #endif
398  PrivateGlobalPrefix = "";
399  }
400 };
401 
402 /// CWriter - This class is the main chunk of code that converts an LLVM
403 /// module to a C translation unit.
404 class CWriter : public llvm::FunctionPass, public llvm::InstVisitor<CWriter> {
405  llvm::formatted_raw_ostream &Out;
406  llvm::IntrinsicLowering *IL;
407  // llvm::Mangler *Mang;
408  llvm::LoopInfo *LI;
409  const llvm::Module *TheModule;
410  const llvm::MCAsmInfo *TAsm;
411  const llvm::MCRegisterInfo *MRI;
412  const llvm::MCObjectFileInfo *MOFI;
413  llvm::MCContext *TCtx;
414 
415  // FIXME: it's ugly to have the name be "TD" here, but it saves us
416  // lots of ifdefs in the below since the new DataLayout and the old
417  // TargetData have generally similar interfaces...
418  const llvm::DataLayout *TD;
419 
420  std::map<const llvm::ConstantFP *, unsigned> FPConstantMap;
421  std::map<const llvm::ConstantDataVector *, unsigned> VectorConstantMap;
422  unsigned VectorConstantIndex;
423  std::set<llvm::Function *> intrinsicPrototypesAlreadyGenerated;
424  std::set<const llvm::Argument *> ByValParams;
425  unsigned FPCounter;
426  unsigned OpaqueCounter;
427  llvm::DenseMap<const llvm::Value *, unsigned> AnonValueNumbers;
428  unsigned NextAnonValueNumber;
429 
430  std::string includeName;
431  int vectorWidth;
432 
433  /// UnnamedStructIDs - This contains a unique ID for each struct that is
434  /// either anonymous or has no name.
435  llvm::DenseMap<llvm::StructType *, unsigned> UnnamedStructIDs;
436  llvm::DenseMap<llvm::ArrayType *, unsigned> ArrayIDs;
437 
438  public:
439  static char ID;
440  explicit CWriter(llvm::formatted_raw_ostream &o, const char *incname, int vecwidth)
441  : FunctionPass(ID), Out(o), IL(0), /* Mang(0), */ LI(0), TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
442  OpaqueCounter(0), NextAnonValueNumber(0), includeName(incname ? incname : "generic_defs.h"),
443  vectorWidth(vecwidth) {
444 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
445  initializeLoopInfoPass(*llvm::PassRegistry::getPassRegistry());
446 #else // LLVM 3.7+
447  initializeLoopInfoWrapperPassPass(*llvm::PassRegistry::getPassRegistry());
448 #endif
449  FPCounter = 0;
450  VectorConstantIndex = 0;
451  }
452 
453 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
454  virtual const char *getPassName() const { return "C backend"; }
455 #else // LLVM 4.0+
456  virtual llvm::StringRef getPassName() const { return "C backend"; }
457 #endif
458 
459  void getAnalysisUsage(llvm::AnalysisUsage &AU) const {
460 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
461  AU.addRequired<llvm::LoopInfo>();
462 #else // LLVM 3.7+
463  AU.addRequired<llvm::LoopInfoWrapperPass>();
464 #endif
465  AU.setPreservesAll();
466  }
467 
468  virtual bool doInitialization(llvm::Module &M);
469 
470  bool runOnFunction(llvm::Function &F) {
471  // Do not codegen any 'available_externally' functions at all, they have
472  // definitions outside the translation unit.
473  if (F.hasAvailableExternallyLinkage())
474  return false;
475 
476 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
477  LI = &getAnalysis<llvm::LoopInfo>();
478 #else // LLVM 3.7+
479  LI = &getAnalysis<llvm::LoopInfoWrapperPass>().getLoopInfo();
480 #endif
481 
482  // Get rid of intrinsics we can't handle.
483  lowerIntrinsics(F);
484 
485  // Output all floating point constants that cannot be printed accurately.
486  printFloatingPointConstants(F);
487 
488  // Output all vector constants so they can be accessed with single
489  // vector loads
490  printVectorConstants(F);
491 
492  printFunction(F);
493  return false;
494  }
495 
496  virtual bool doFinalization(llvm::Module &M) {
497  // Free memory...
498  delete IL;
499  delete TD;
500  // delete Mang;
501  delete TCtx;
502  delete TAsm;
503  delete MRI;
504  delete MOFI;
505  FPConstantMap.clear();
506  VectorConstantMap.clear();
507  ByValParams.clear();
508  intrinsicPrototypesAlreadyGenerated.clear();
509  UnnamedStructIDs.clear();
510  ArrayIDs.clear();
511  return false;
512  }
513 
514  llvm::raw_ostream &printType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned = false,
515  const std::string &VariableName = "", bool IgnoreName = false,
516 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
517  const llvm::AttrListPtr &PAL = llvm::AttrListPtr()
518 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
519  const llvm::AttributeSet &PAL = llvm::AttributeSet()
520 #else // LLVM 5.0+
521  const llvm::AttributeList &PAL = llvm::AttributeList()
522 #endif
523  );
524  llvm::raw_ostream &printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned,
525  const std::string &NameSoFar = "");
526 
527  void printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
528 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
529  const llvm::AttrListPtr &PAL,
530 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
531  const llvm::AttributeSet &PAL,
532 #else // LLVM 5.0+
533  const llvm::AttributeList &PAL,
534 #endif
535  llvm::PointerType *Ty);
536 
537  std::string getStructName(llvm::StructType *ST);
538  std::string getArrayName(llvm::ArrayType *AT);
539 
540  /// writeOperandDeref - Print the result of dereferencing the specified
541  /// operand with '*'. This is equivalent to printing '*' then using
542  /// writeOperand, but avoids excess syntax in some cases.
543  void writeOperandDeref(llvm::Value *Operand) {
544  if (isAddressExposed(Operand)) {
545  // Already something with an address exposed.
546  writeOperandInternal(Operand);
547  } else {
548  Out << "*(";
549  writeOperand(Operand);
550  Out << ")";
551  }
552  }
553 
554  void writeOperand(llvm::Value *Operand, bool Static = false);
555  void writeInstComputationInline(llvm::Instruction &I);
556  void writeOperandInternal(llvm::Value *Operand, bool Static = false);
557  void writeOperandWithCast(llvm::Value *Operand, unsigned Opcode);
558  void writeOperandWithCast(llvm::Value *Operand, const llvm::ICmpInst &I);
559  bool writeInstructionCast(const llvm::Instruction &I);
560 
561  void writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType, bool IsVolatile, unsigned Alignment);
562 
563  private:
564  void lowerIntrinsics(llvm::Function &F);
565  /// Prints the definition of the intrinsic function F. Supports the
566  /// intrinsics which need to be explicitly defined in the CBackend.
567  void printIntrinsicDefinition(const llvm::Function &F, llvm::raw_ostream &Out);
568 
569  void printModuleTypes();
570  void printContainedStructs(llvm::Type *Ty, llvm::SmallPtrSet<llvm::Type *, 16> &);
571  void printContainedArrays(llvm::ArrayType *ATy, llvm::SmallPtrSet<llvm::Type *, 16> &);
572  void printFloatingPointConstants(llvm::Function &F);
573  void printFloatingPointConstants(const llvm::Constant *C);
574  void printVectorConstants(llvm::Function &F);
575  void printFunctionSignature(const llvm::Function *F, bool Prototype);
576 
577  void printFunction(llvm::Function &);
578  void printBasicBlock(llvm::BasicBlock *BB);
579  void printLoop(llvm::Loop *L);
580 
581  bool printCast(unsigned opcode, llvm::Type *SrcTy, llvm::Type *DstTy);
582  void printConstant(llvm::Constant *CPV, bool Static);
583  void printConstantWithCast(llvm::Constant *CPV, unsigned Opcode);
584  bool printConstExprCast(const llvm::ConstantExpr *CE, bool Static);
585  void printConstantArray(llvm::ConstantArray *CPA, bool Static);
586  void printConstantVector(llvm::ConstantVector *CV, bool Static);
587  void printConstantDataSequential(llvm::ConstantDataSequential *CDS, bool Static);
588 
589  /// isAddressExposed - Return true if the specified value's name needs to
590  /// have its address taken in order to get a C value of the correct type.
591  /// This happens for global variables, byval parameters, and direct allocas.
592  bool isAddressExposed(const llvm::Value *V) const {
593  if (const llvm::Argument *A = llvm::dyn_cast<llvm::Argument>(V))
594  return ByValParams.count(A);
595  return llvm::isa<llvm::GlobalVariable>(V) || isDirectAlloca(V);
596  }
597 
598  // isInlinableInst - Attempt to inline instructions into their uses to build
599  // trees as much as possible. To do this, we have to consistently decide
600  // what is acceptable to inline, so that variable declarations don't get
601  // printed and an extra copy of the expr is not emitted.
602  //
603  static bool isInlinableInst(const llvm::Instruction &I) {
604  // Always inline cmp instructions, even if they are shared by multiple
605  // expressions. GCC generates horrible code if we don't.
606  if (llvm::isa<llvm::CmpInst>(I) && llvm::isa<llvm::VectorType>(I.getType()) == false)
607  return true;
608 
609 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // 3.5+
610  // This instruction returns a struct on LLVM older than 3.4, and can not be inlined
611  if (llvm::isa<llvm::AtomicCmpXchgInst>(I))
612  return false;
613 #endif
614 
615  // Must be an expression, must be used exactly once. If it is dead, we
616  // emit it inline where it would go.
617  if (I.getType() == llvm::Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
618 #if ISPC_LLVM_VERSION > ISPC_LLVM_7_0 // 8.0+
619  I.isTerminator()
620 #else
621  llvm::isa<llvm::TerminatorInst>(I)
622 #endif
623  || llvm::isa<llvm::CallInst>(I) || llvm::isa<llvm::PHINode>(I) || llvm::isa<llvm::LoadInst>(I) ||
624  llvm::isa<llvm::VAArgInst>(I) || llvm::isa<llvm::InsertElementInst>(I) ||
625  llvm::isa<llvm::InsertValueInst>(I) || llvm::isa<llvm::ExtractValueInst>(I) ||
626  llvm::isa<llvm::SelectInst>(I))
627  // Don't inline a load across a store or other bad things!
628  return false;
629 
630  // Must not be used in inline asm, extractelement, or shufflevector.
631  if (I.hasOneUse()) {
632 
633 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // 3.5+
634  const llvm::Instruction &User = llvm::cast<llvm::Instruction>(*I.user_back());
635 #else
636  const llvm::Instruction &User = llvm::cast<llvm::Instruction>(*I.use_back());
637 #endif
638  if (isInlineAsm(User) || llvm::isa<llvm::ExtractElementInst>(User) ||
639  llvm::isa<llvm::ShuffleVectorInst>(User) || llvm::isa<llvm::AtomicRMWInst>(User) ||
640  llvm::isa<llvm::AtomicCmpXchgInst>(User))
641  return false;
642  }
643 
644  // Only inline instruction it if it's use is in the same BB as the inst.
645 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // 3.5+
646  return I.getParent() == llvm::cast<llvm::Instruction>(I.user_back())->getParent();
647 #else
648  return I.getParent() == llvm::cast<llvm::Instruction>(I.use_back())->getParent();
649 #endif
650  }
651 
652  // isDirectAlloca - Define fixed sized allocas in the entry block as direct
653  // variables which are accessed with the & operator. This causes GCC to
654  // generate significantly better code than to emit alloca calls directly.
655  //
656  static const llvm::AllocaInst *isDirectAlloca(const llvm::Value *V) {
657  const llvm::AllocaInst *AI = llvm::dyn_cast<llvm::AllocaInst>(V);
658  if (!AI)
659  return 0;
660  if (AI->isArrayAllocation())
661  return 0; // FIXME: we can also inline fixed size array allocas!
662  if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
663  return 0;
664  return AI;
665  }
666 
667  // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
668  static bool isInlineAsm(const llvm::Instruction &I) {
669  if (const llvm::CallInst *CI = llvm::dyn_cast<llvm::CallInst>(&I))
670  return llvm::isa<llvm::InlineAsm>(CI->getCalledValue());
671  return false;
672  }
673 
674  // Instruction visitation functions
675  friend class llvm::InstVisitor<CWriter>;
676 
677  void visitReturnInst(llvm::ReturnInst &I);
678  void visitBranchInst(llvm::BranchInst &I);
679  void visitSwitchInst(llvm::SwitchInst &I);
680  void visitIndirectBrInst(llvm::IndirectBrInst &I);
681  void visitInvokeInst(llvm::InvokeInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); }
682  void visitResumeInst(llvm::ResumeInst &I) { llvm_unreachable("DwarfEHPrepare pass didn't work!"); }
683  void visitUnreachableInst(llvm::UnreachableInst &I);
684 
685  void visitPHINode(llvm::PHINode &I);
686  void visitBinaryOperator(llvm::Instruction &I);
687  void visitICmpInst(llvm::ICmpInst &I);
688  void visitFCmpInst(llvm::FCmpInst &I);
689 
690  void visitCastInst(llvm::CastInst &I);
691  void visitSelectInst(llvm::SelectInst &I);
692  void visitCallInst(llvm::CallInst &I);
693  void visitInlineAsm(llvm::CallInst &I);
694  bool visitBuiltinCall(llvm::CallInst &I, llvm::Intrinsic::ID ID, bool &WroteCallee);
695 
696  void visitAllocaInst(llvm::AllocaInst &I);
697  void visitLoadInst(llvm::LoadInst &I);
698  void visitStoreInst(llvm::StoreInst &I);
699  void visitGetElementPtrInst(llvm::GetElementPtrInst &I);
700  void visitVAArgInst(llvm::VAArgInst &I);
701 
702  void visitInsertElementInst(llvm::InsertElementInst &I);
703  void visitExtractElementInst(llvm::ExtractElementInst &I);
704  void visitShuffleVectorInst(llvm::ShuffleVectorInst &SVI);
705 
706  void visitInsertValueInst(llvm::InsertValueInst &I);
707  void visitExtractValueInst(llvm::ExtractValueInst &I);
708 
709  void visitAtomicRMWInst(llvm::AtomicRMWInst &I);
710  void visitAtomicCmpXchgInst(llvm::AtomicCmpXchgInst &I);
711 
712  void visitInstruction(llvm::Instruction &I) {
713 #ifndef NDEBUG
714  llvm::errs() << "C Writer does not know about " << I;
715 #endif
716  llvm_unreachable(0);
717  }
718 
719  void outputLValue(llvm::Instruction *I) { Out << " " << GetValueName(I) << " = "; }
720 
721  bool isGotoCodeNecessary(llvm::BasicBlock *From, llvm::BasicBlock *To);
722  void printPHICopiesForSuccessor(llvm::BasicBlock *CurBlock, llvm::BasicBlock *Successor, unsigned Indent);
723  void printBranchToBlock(llvm::BasicBlock *CurBlock, llvm::BasicBlock *SuccBlock, unsigned Indent);
724  void printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I, llvm::gep_type_iterator E, bool Static);
725 
726  std::string GetValueName(const llvm::Value *Operand);
727 };
728 } // namespace
729 
730 char CWriter::ID = 0;
731 
732 static std::string CBEMangle(const std::string &S) {
733  std::string Result;
734 
735  for (unsigned i = 0, e = S.size(); i != e; ++i) {
736  if (i + 1 != e && ((S[i] == '>' && S[i + 1] == '>') || (S[i] == '<' && S[i + 1] == '<'))) {
737  Result += '_';
738  Result += 'A' + (S[i] & 15);
739  Result += 'A' + ((S[i] >> 4) & 15);
740  Result += '_';
741  i++;
742  } else if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') {
743  Result += S[i];
744  } else {
745  Result += '_';
746  Result += 'A' + (S[i] & 15);
747  Result += 'A' + ((S[i] >> 4) & 15);
748  Result += '_';
749  }
750  }
751  return Result;
752 }
753 
754 std::string CWriter::getStructName(llvm::StructType *ST) {
755  if (!ST->isLiteral() && !ST->getName().empty())
756  return CBEMangle("l_" + ST->getName().str());
757 
758  return "l_unnamed_" + llvm::utostr(UnnamedStructIDs[ST]);
759 }
760 
761 std::string CWriter::getArrayName(llvm::ArrayType *AT) { return "l_array_" + llvm::utostr(ArrayIDs[AT]); }
762 
763 /// printStructReturnPointerFunctionType - This is like printType for a struct
764 /// return type, except, instead of printing the type as void (*)(Struct*, ...)
765 /// print it as "Struct (*)(...)", for struct return functions.
766 void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
767 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
768  const llvm::AttrListPtr &PAL,
769 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
770  const llvm::AttributeSet &PAL,
771 #else // LLVM 5.0+
772  const llvm::AttributeList &PAL,
773 #endif
774  llvm::PointerType *TheTy) {
775  llvm::FunctionType *FTy = llvm::cast<llvm::FunctionType>(TheTy->getElementType());
776  std::string tstr;
777  llvm::raw_string_ostream FunctionInnards(tstr);
778  FunctionInnards << " (*) (";
779  bool PrintedType = false;
780 
781  llvm::FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
782  llvm::Type *RetTy = llvm::cast<llvm::PointerType>(*I)->getElementType();
783  unsigned Idx = 1;
784  for (++I, ++Idx; I != E; ++I, ++Idx) {
785  if (PrintedType)
786  FunctionInnards << ", ";
787  llvm::Type *ArgTy = *I;
788 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
789  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
790 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
791  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
792 #else // LLVM 5.0+
793  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::ByVal)) {
794 #endif
795  assert(ArgTy->isPointerTy());
796  ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
797  }
798  printType(FunctionInnards, ArgTy,
799 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
800  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
801 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
802  PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
803 #else // LLVM 5.0+
804  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::SExt),
805 #endif
806  "");
807  PrintedType = true;
808  }
809  if (FTy->isVarArg()) {
810  if (!PrintedType)
811  FunctionInnards << " int"; // dummy argument for empty vararg functs
812  FunctionInnards << ", ...";
813  } else if (!PrintedType) {
814  FunctionInnards << "void";
815  }
816  FunctionInnards << ')';
817  printType(Out, RetTy,
818 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
819  PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
820 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
821  PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
822 #else // LLVM 5.0+
823  PAL.getParamAttributes(0).hasAttribute(llvm::Attribute::SExt),
824 #endif
825  FunctionInnards.str());
826 }
827 
828 llvm::raw_ostream &CWriter::printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned,
829  const std::string &NameSoFar) {
830  assert((Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || Ty->isVectorTy() || Ty->isVoidTy()) &&
831  "Invalid type for printSimpleType");
832  switch (Ty->getTypeID()) {
833  case llvm::Type::VoidTyID:
834  return Out << "void " << NameSoFar;
835  case llvm::Type::IntegerTyID: {
836  unsigned NumBits = llvm::cast<llvm::IntegerType>(Ty)->getBitWidth();
837  if (NumBits == 1)
838  return Out << "bool " << NameSoFar;
839  else if (NumBits <= 8)
840  return Out << (isSigned ? "" : "u") << "int8_t " << NameSoFar;
841  else if (NumBits <= 16)
842  return Out << (isSigned ? "" : "u") << "int16_t " << NameSoFar;
843  else if (NumBits <= 32)
844  return Out << (isSigned ? "" : "u") << "int32_t " << NameSoFar;
845  else if (NumBits <= 64)
846  return Out << (isSigned ? "" : "u") << "int64_t " << NameSoFar;
847  else
848  return Out << "iN<" << NumBits << "> " << NameSoFar;
849  }
850  case llvm::Type::FloatTyID:
851  return Out << "float " << NameSoFar;
852  case llvm::Type::DoubleTyID:
853  return Out << "double " << NameSoFar;
854  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
855  // present matches host 'long double'.
856  case llvm::Type::X86_FP80TyID:
857  case llvm::Type::PPC_FP128TyID:
858  case llvm::Type::FP128TyID:
859  return Out << "long double " << NameSoFar;
860 
861  case llvm::Type::X86_MMXTyID:
862  return printSimpleType(Out, llvm::Type::getInt32Ty(Ty->getContext()), isSigned,
863  " __attribute__((vector_size(64))) " + NameSoFar);
864 
865  case llvm::Type::VectorTyID: {
866  llvm::VectorType *VTy = llvm::cast<llvm::VectorType>(Ty);
867 #if 1
868  const char *suffix = NULL;
869  const llvm::Type *eltTy = VTy->getElementType();
870  if (eltTy->isFloatTy())
871  suffix = "f";
872  else if (eltTy->isDoubleTy())
873  suffix = "d";
874  else {
875  assert(eltTy->isIntegerTy());
876  switch (eltTy->getPrimitiveSizeInBits()) {
877  case 1:
878  suffix = "i1";
879  break;
880  case 8:
881  suffix = "i8";
882  break;
883  case 16:
884  suffix = "i16";
885  break;
886  case 32:
887  suffix = "i32";
888  break;
889  case 64:
890  suffix = "i64";
891  break;
892  default:
893  suffix = "iN";
894  break;
895  }
896  }
897 
898  return Out << "__vec" << VTy->getNumElements() << "_" << suffix << " " << NameSoFar;
899 #else
900  return printSimpleType(Out, VTy->getElementType(), isSigned,
901  " __attribute__((vector_size(" + utostr(TD->getTypeAllocSize(VTy)) + " ))) " +
902  NameSoFar);
903 #endif
904  }
905 
906  default:
907 #ifndef NDEBUG
908  llvm::errs() << "Unknown primitive type: " << *Ty << "\n";
909 #endif
910  llvm_unreachable(0);
911  }
912  return Out << "";
913 }
914 
915 // Pass the Type* and the variable name and this prints out the variable
916 // declaration.
917 //
918 llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned,
919  const std::string &NameSoFar, bool IgnoreName,
920 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
921  const llvm::AttrListPtr &PAL
922 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
923  const llvm::AttributeSet &PAL
924 #else // LLVM 5.0+
925  const llvm::AttributeList &PAL
926 #endif
927 ) {
928 
929  if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || Ty->isVectorTy() || Ty->isVoidTy()) {
930  printSimpleType(Out, Ty, isSigned, NameSoFar);
931  return Out;
932  }
933 
934  switch (Ty->getTypeID()) {
935  case llvm::Type::FunctionTyID: {
936  llvm::FunctionType *FTy = llvm::cast<llvm::FunctionType>(Ty);
937  std::string tstr;
938  llvm::raw_string_ostream FunctionInnards(tstr);
939  FunctionInnards << " (" << NameSoFar << ") (";
940  unsigned Idx = 1;
941  for (llvm::FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) {
942  llvm::Type *ArgTy = *I;
943 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
944  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
945 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
946  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
947 #else // LLVM 5.0+
948  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::ByVal)) {
949 #endif
950  assert(ArgTy->isPointerTy());
951  ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
952  }
953  if (I != FTy->param_begin())
954  FunctionInnards << ", ";
955  printType(FunctionInnards, ArgTy,
956 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
957  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
958 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
959  PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex,
960  llvm::Attribute::SExt),
961 #else // LLVM 5.0+
962  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::SExt),
963 #endif
964  "");
965  ++Idx;
966  }
967  if (FTy->isVarArg()) {
968  if (!FTy->getNumParams())
969  FunctionInnards << " int"; // dummy argument for empty vaarg functs
970  FunctionInnards << ", ...";
971  } else if (!FTy->getNumParams()) {
972  FunctionInnards << "void";
973  }
974  FunctionInnards << ')';
975  printType(Out, FTy->getReturnType(),
976 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
977  PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
978 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
979  PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
980 #else // LLVM 5.0+
981  PAL.getParamAttributes(0).hasAttribute(llvm::Attribute::SExt),
982 #endif
983  FunctionInnards.str());
984  return Out;
985  }
986  case llvm::Type::StructTyID: {
987  llvm::StructType *STy = llvm::cast<llvm::StructType>(Ty);
988 
989  // Check to see if the type is named.
990  if (!IgnoreName)
991  return Out << getStructName(STy) << ' ' << NameSoFar;
992 
993  Out << "struct " << NameSoFar << " {\n";
994 
995  // print initialization func
996  if (STy->getNumElements() > 0) {
997  Out << " static " << NameSoFar << " init(";
998  unsigned Idx = 0;
999  for (llvm::StructType::element_iterator I = STy->element_begin(), E = STy->element_end(); I != E;
1000  ++I, ++Idx) {
1001  char buf[64];
1002  snprintf(buf, sizeof(buf), "v%d", Idx);
1003  printType(Out, *I, false, buf);
1004  if (Idx + 1 < STy->getNumElements())
1005  Out << ", ";
1006  }
1007  Out << ") {\n";
1008  Out << " " << NameSoFar << " ret;\n";
1009  for (Idx = 0; Idx < STy->getNumElements(); ++Idx)
1010  Out << " ret.field" << Idx << " = v" << Idx << ";\n";
1011  Out << " return ret;\n";
1012  Out << " }\n";
1013  }
1014 
1015  unsigned Idx = 0;
1016  for (llvm::StructType::element_iterator I = STy->element_begin(), E = STy->element_end(); I != E; ++I) {
1017  Out << " ";
1018  printType(Out, *I, false, "field" + llvm::utostr(Idx++));
1019  Out << ";\n";
1020  }
1021  Out << '}';
1022  if (STy->isPacked())
1023  Out << " __attribute__ ((packed))";
1024  return Out;
1025  }
1026 
1027  case llvm::Type::PointerTyID: {
1028  llvm::PointerType *PTy = llvm::cast<llvm::PointerType>(Ty);
1029  std::string ptrName = "*" + NameSoFar;
1030 
1031  if (PTy->getElementType()->isArrayTy() || PTy->getElementType()->isVectorTy())
1032  ptrName = "(" + ptrName + ")";
1033 
1034  if (!PAL.isEmpty())
1035  // Must be a function ptr cast!
1036  return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
1037  return printType(Out, PTy->getElementType(), false, ptrName);
1038  }
1039 
1040  case llvm::Type::ArrayTyID: {
1041  llvm::ArrayType *ATy = llvm::cast<llvm::ArrayType>(Ty);
1042 
1043  // Check to see if the type is named.
1044  if (!IgnoreName)
1045  return Out << getArrayName(ATy) << ' ' << NameSoFar;
1046 
1047  unsigned NumElements = (unsigned)ATy->getNumElements();
1048  if (NumElements == 0)
1049  NumElements = 1;
1050  // Arrays are wrapped in structs to allow them to have normal
1051  // value semantics (avoiding the array "decay").
1052  Out << "struct " << NameSoFar << " {\n";
1053  // init func
1054  Out << " static " << NameSoFar << " init(";
1055  for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1056  char buf[64];
1057  snprintf(buf, sizeof(buf), "v%d", Idx);
1058  printType(Out, ATy->getElementType(), false, buf);
1059  if (Idx + 1 < NumElements)
1060  Out << ", ";
1061  }
1062  Out << ") {\n";
1063  Out << " " << NameSoFar << " ret;\n";
1064  for (unsigned Idx = 0; Idx < NumElements; ++Idx)
1065  Out << " ret.array[" << Idx << "] = v" << Idx << ";\n";
1066  Out << " return ret;\n";
1067  Out << " }\n ";
1068 
1069  // if it's an array of i8s, also provide a version that takes a const
1070  // char *
1071  if (ATy->getElementType() == LLVMTypes::Int8Type) {
1072  Out << " static " << NameSoFar << " init(const char *p) {\n";
1073  Out << " " << NameSoFar << " ret;\n";
1074  Out << " memcpy((uint8_t *)ret.array, (uint8_t *)p, " << NumElements << ");\n";
1075  Out << " return ret;\n";
1076  Out << " }\n";
1077  }
1078 
1079  printType(Out, ATy->getElementType(), false, "array[" + llvm::utostr(NumElements) + "]");
1080  return Out << ";\n} ";
1081  }
1082 
1083  default:
1084  llvm_unreachable("Unhandled case in getTypeProps!");
1085  }
1086  return Out << "";
1087 }
1088 
1089 void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
1090  // vec16_i64 should be handled separately
1091 
1092  if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) {
1093  Out << "/* vec16_i64 should be loaded carefully on knc */";
1094  Out << "\n#if defined(KNC)\n";
1095  Out << "hilo2zmm";
1096  Out << "\n#endif\n";
1097  }
1098  Out << "(";
1099  printConstant(llvm::cast<llvm::Constant>(CPA->getOperand(0)), Static);
1100  Out << ")";
1101 
1102  for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
1103  Out << ", ";
1104 
1105  if (is_vec16_i64_ty(CPA->getOperand(i)->getType())) {
1106  Out << "/* vec16_i64 should be loaded carefully on knc */";
1107  Out << "\n#if defined(KNC) \n";
1108  Out << "hilo2zmm";
1109  Out << "\n#endif \n";
1110  }
1111  Out << "(";
1112  printConstant(llvm::cast<llvm::Constant>(CPA->getOperand(i)), Static);
1113  Out << ")";
1114  }
1115 }
1116 
1117 void CWriter::printConstantVector(llvm::ConstantVector *CP, bool Static) {
1118  printConstant(llvm::cast<llvm::Constant>(CP->getOperand(0)), Static);
1119  for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
1120  Out << ", ";
1121  printConstant(llvm::cast<llvm::Constant>(CP->getOperand(i)), Static);
1122  }
1123 }
1124 
1125 void CWriter::printConstantDataSequential(llvm::ConstantDataSequential *CDS, bool Static) {
1126  // As a special case, print the array as a string if it is an array of
1127  // ubytes or an array of sbytes with positive values.
1128  //
1129  if (CDS->isCString()) {
1130  Out << '\"';
1131  // Keep track of whether the last number was a hexadecimal escape.
1132  bool LastWasHex = false;
1133 
1134  llvm::StringRef Bytes = CDS->getAsCString();
1135 
1136  // Do not include the last character, which we know is null
1137  for (unsigned i = 0, e = Bytes.size(); i != e; ++i) {
1138  unsigned char C = Bytes[i];
1139 
1140  // Print it out literally if it is a printable character. The only thing
1141  // to be careful about is when the last letter output was a hex escape
1142  // code, in which case we have to be careful not to print out hex digits
1143  // explicitly (the C compiler thinks it is a continuation of the previous
1144  // character, sheesh...)
1145  //
1146  if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
1147  LastWasHex = false;
1148  if (C == '"' || C == '\\')
1149  Out << "\\" << (char)C;
1150  else
1151  Out << (char)C;
1152  } else {
1153  LastWasHex = false;
1154  switch (C) {
1155  case '\n':
1156  Out << "\\n";
1157  break;
1158  case '\t':
1159  Out << "\\t";
1160  break;
1161  case '\r':
1162  Out << "\\r";
1163  break;
1164  case '\v':
1165  Out << "\\v";
1166  break;
1167  case '\a':
1168  Out << "\\a";
1169  break;
1170  case '\"':
1171  Out << "\\\"";
1172  break;
1173  case '\'':
1174  Out << "\\\'";
1175  break;
1176  default:
1177  Out << "\\x";
1178  Out << (char)((C / 16 < 10) ? (C / 16 + '0') : (C / 16 - 10 + 'A'));
1179  Out << (char)(((C & 15) < 10) ? ((C & 15) + '0') : ((C & 15) - 10 + 'A'));
1180  LastWasHex = true;
1181  break;
1182  }
1183  }
1184  }
1185  Out << '\"';
1186  } else {
1187  printConstant(CDS->getElementAsConstant(0), Static);
1188  for (unsigned i = 1, e = CDS->getNumElements(); i != e; ++i) {
1189  Out << ", ";
1190  printConstant(CDS->getElementAsConstant(i), Static);
1191  }
1192  }
1193 }
1194 
1195 static inline std::string ftostr(const llvm::APFloat &V) {
1196  std::string Buf;
1197 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
1198  if (&V.getSemantics() == &llvm::APFloat::IEEEdouble) {
1199  llvm::raw_string_ostream(Buf) << V.convertToDouble();
1200  return Buf;
1201  } else if (&V.getSemantics() == &llvm::APFloat::IEEEsingle) {
1202  llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
1203  return Buf;
1204  }
1205 #else // LLVM 4.0+
1206  if (&V.getSemantics() == &llvm::APFloat::IEEEdouble()) {
1207  llvm::raw_string_ostream(Buf) << V.convertToDouble();
1208  return Buf;
1209  } else if (&V.getSemantics() == &llvm::APFloat::IEEEsingle()) {
1210  llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
1211  return Buf;
1212  }
1213 #endif
1214  return "<unknown format in ftostr>"; // error
1215 }
1216 
1217 // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
1218 // textually as a double (rather than as a reference to a stack-allocated
1219 // variable). We decide this by converting CFP to a string and back into a
1220 // double, and then checking whether the conversion results in a bit-equal
1221 // double to the original value of CFP. This depends on us and the target C
1222 // compiler agreeing on the conversion process (which is pretty likely since we
1223 // only deal in IEEE FP).
1224 //
1225 static bool isFPCSafeToPrint(const llvm::ConstantFP *CFP) {
1226  bool ignored;
1227  // Do long doubles in hex for now.
1228  if (CFP->getType() != llvm::Type::getFloatTy(CFP->getContext()) &&
1229  CFP->getType() != llvm::Type::getDoubleTy(CFP->getContext()))
1230  return false;
1231  llvm::APFloat APF = llvm::APFloat(CFP->getValueAPF()); // copy
1232  if (CFP->getType() == llvm::Type::getFloatTy(CFP->getContext()))
1233 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
1234  APF.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmNearestTiesToEven, &ignored);
1235 #else // LLVM 4.0+
1236  APF.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmNearestTiesToEven, &ignored);
1237 #endif
1238 #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
1239  char Buffer[100];
1240  snprintf(Buffer, sizeof(Buffer), "%a", APF.convertToDouble());
1241  if (!strncmp(Buffer, "0x", 2) || !strncmp(Buffer, "-0x", 3) || !strncmp(Buffer, "+0x", 3))
1242  return APF.bitwiseIsEqual(llvm::APFloat(atof(Buffer)));
1243  return false;
1244 #else
1245  std::string StrVal = ftostr(APF);
1246 
1247  while (StrVal[0] == ' ')
1248  StrVal.erase(StrVal.begin());
1249 
1250  // Check to make sure that the stringized number is not some string like "Inf"
1251  // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
1252  if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
1253  ((StrVal[0] == '-' || StrVal[0] == '+') && (StrVal[1] >= '0' && StrVal[1] <= '9')))
1254  // Reparse stringized version!
1255  return APF.bitwiseIsEqual(llvm::APFloat(atof(StrVal.c_str())));
1256  return false;
1257 #endif
1258 }
1259 
1260 /// Print out the casting for a cast operation. This does the double casting
1261 /// necessary for conversion to the destination type, if necessary.
1262 /// Return value indicates whether a closing paren is needed.
1263 /// @brief Print a cast
1264 bool CWriter::printCast(unsigned opc, llvm::Type *SrcTy, llvm::Type *DstTy) {
1265  if (llvm::isa<const llvm::VectorType>(DstTy)) {
1266  assert(llvm::isa<const llvm::VectorType>(SrcTy));
1267  switch (opc) {
1268  case llvm::Instruction::UIToFP:
1269  Out << "__cast_uitofp(";
1270  break;
1271  case llvm::Instruction::SIToFP:
1272  Out << "__cast_sitofp(";
1273  break;
1274  case llvm::Instruction::IntToPtr:
1275  llvm_unreachable("Invalid vector cast");
1276  case llvm::Instruction::Trunc:
1277  Out << "__cast_trunc(";
1278  break;
1279  case llvm::Instruction::BitCast:
1280  Out << "__cast_bits(";
1281  break;
1282  case llvm::Instruction::FPExt:
1283  Out << "__cast_fpext(";
1284  break;
1285  case llvm::Instruction::FPTrunc:
1286  Out << "__cast_fptrunc(";
1287  break;
1288  case llvm::Instruction::ZExt:
1289  Out << "__cast_zext(";
1290  break;
1291  case llvm::Instruction::PtrToInt:
1292  llvm_unreachable("Invalid vector cast");
1293  case llvm::Instruction::FPToUI:
1294  Out << "__cast_fptoui(";
1295  break;
1296  case llvm::Instruction::SExt:
1297  Out << "__cast_sext(";
1298  break;
1299  case llvm::Instruction::FPToSI:
1300  Out << "__cast_fptosi(";
1301  break;
1302  default:
1303  llvm_unreachable("Invalid cast opcode");
1304  }
1305 
1306  // print a call to the constructor for the destination type for the
1307  // first arg; this bogus first parameter is only used to convey the
1308  // desired return type to the callee.
1309  printType(Out, DstTy);
1310  Out << "(), ";
1311 
1312  return true;
1313  }
1314 
1315  // Print the destination type cast
1316  switch (opc) {
1317  case llvm::Instruction::BitCast: {
1318  if (DstTy->isPointerTy()) {
1319  Out << '(';
1320  printType(Out, DstTy);
1321  Out << ')';
1322  break;
1323  } else {
1324  Out << "__cast_bits((";
1325  printType(Out, DstTy);
1326  Out << ")0, ";
1327  return true;
1328  }
1329  }
1330  case llvm::Instruction::UIToFP:
1331  case llvm::Instruction::SIToFP:
1332  case llvm::Instruction::IntToPtr:
1333  case llvm::Instruction::Trunc:
1334  case llvm::Instruction::FPExt:
1335  case llvm::Instruction::FPTrunc: // For these the DstTy sign doesn't matter
1336  Out << '(';
1337  printType(Out, DstTy);
1338  Out << ')';
1339  break;
1340  case llvm::Instruction::ZExt:
1341  case llvm::Instruction::PtrToInt:
1342  case llvm::Instruction::FPToUI: // For these, make sure we get an unsigned dest
1343  Out << '(';
1344  printSimpleType(Out, DstTy, false);
1345  Out << ')';
1346  break;
1347  case llvm::Instruction::SExt:
1348  case llvm::Instruction::FPToSI: // For these, make sure we get a signed dest
1349  Out << '(';
1350  printSimpleType(Out, DstTy, true);
1351  Out << ')';
1352  break;
1353  default:
1354  llvm_unreachable("Invalid cast opcode");
1355  }
1356 
1357  // Print the source type cast
1358  switch (opc) {
1359  case llvm::Instruction::UIToFP:
1360  case llvm::Instruction::ZExt:
1361  Out << '(';
1362  printSimpleType(Out, SrcTy, false);
1363  Out << ')';
1364  break;
1365  case llvm::Instruction::SIToFP:
1366  case llvm::Instruction::SExt:
1367  Out << '(';
1368  printSimpleType(Out, SrcTy, true);
1369  Out << ')';
1370  break;
1371  case llvm::Instruction::IntToPtr:
1372  case llvm::Instruction::PtrToInt:
1373  // Avoid "cast to pointer from integer of different size" warnings
1374  Out << "(unsigned long)";
1375  break;
1376  case llvm::Instruction::Trunc:
1377  case llvm::Instruction::BitCast:
1378  case llvm::Instruction::FPExt:
1379  case llvm::Instruction::FPTrunc:
1380  case llvm::Instruction::FPToSI:
1381  case llvm::Instruction::FPToUI:
1382  break; // These don't need a source cast.
1383  default:
1384  llvm_unreachable("Invalid cast opcode");
1385  break;
1386  }
1387  return false;
1388 }
1389 
1390 /** Construct the name of a function with the given base and returning a
1391  vector of a given type, of the specified idth. For example, if base
1392  is "foo" and matchType is i32 and width is 16, this will return the
1393  string "__foo_i32<__vec16_i32>".
1394  */
1395 static const char *lGetTypedFunc(const char *base, llvm::Type *matchType, int width) {
1396  static const char *ty_desc_str[] = {"f", "d", "i1", "i8", "i16", "i32", "i64"};
1397  static const char *fn_desc_str[] = {"float", "double", "i1", "i8", "i16", "i32", "i64"};
1398  enum { DESC_FLOAT, DESC_DOUBLE, DESC_I1, DESC_I8, DESC_I16, DESC_I32, DESC_I64 } desc;
1399 
1400  switch (matchType->getTypeID()) {
1401  case llvm::Type::FloatTyID:
1402  desc = DESC_FLOAT;
1403  break;
1404  case llvm::Type::DoubleTyID:
1405  desc = DESC_DOUBLE;
1406  break;
1407  case llvm::Type::IntegerTyID: {
1408  switch (llvm::cast<llvm::IntegerType>(matchType)->getBitWidth()) {
1409  case 1:
1410  desc = DESC_I1;
1411  break;
1412  case 8:
1413  desc = DESC_I8;
1414  break;
1415  case 16:
1416  desc = DESC_I16;
1417  break;
1418  case 32:
1419  desc = DESC_I32;
1420  break;
1421  case 64:
1422  desc = DESC_I64;
1423  break;
1424  default:
1425  return NULL;
1426  }
1427  break;
1428  }
1429  default:
1430  return NULL;
1431  }
1432 
1433  char buf[64];
1434  snprintf(buf, 64, "__%s_%s<__vec%d_%s>", base, fn_desc_str[desc], width, ty_desc_str[desc]);
1435  return strdup(buf);
1436 }
1437 
1438 // printConstant - The LLVM Constant to C Constant converter.
1439 void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
1440  if (const llvm::ConstantExpr *CE = llvm::dyn_cast<llvm::ConstantExpr>(CPV)) {
1441  if (llvm::isa<llvm::VectorType>(CPV->getType())) {
1442  assert(CE->getOpcode() == llvm::Instruction::BitCast);
1443  llvm::ConstantExpr *Op = llvm::dyn_cast<llvm::ConstantExpr>(CE->getOperand(0));
1444  assert(Op && Op->getOpcode() == llvm::Instruction::BitCast);
1445  assert(llvm::isa<llvm::VectorType>(Op->getOperand(0)->getType()));
1446 
1447  Out << "(__cast_bits(";
1448  printType(Out, CE->getType());
1449  Out << "(), ";
1450  printConstant(Op->getOperand(0), Static);
1451  Out << "))";
1452  return;
1453  }
1454  switch (CE->getOpcode()) {
1455  case llvm::Instruction::Trunc:
1456  case llvm::Instruction::ZExt:
1457  case llvm::Instruction::SExt:
1458  case llvm::Instruction::FPTrunc:
1459  case llvm::Instruction::FPExt:
1460  case llvm::Instruction::UIToFP:
1461  case llvm::Instruction::SIToFP:
1462  case llvm::Instruction::FPToUI:
1463  case llvm::Instruction::FPToSI:
1464  case llvm::Instruction::PtrToInt:
1465  case llvm::Instruction::IntToPtr:
1466  case llvm::Instruction::BitCast: {
1467  if (CE->getOpcode() == llvm::Instruction::BitCast && CE->getType()->isPointerTy() == false) {
1468  Out << "__cast_bits((";
1469  printType(Out, CE->getType());
1470  Out << ")0, ";
1471  printConstant(CE->getOperand(0), Static);
1472  Out << ")";
1473  return;
1474  }
1475 
1476  Out << "(";
1477  bool closeParen = printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
1478  if (CE->getOpcode() == llvm::Instruction::SExt &&
1479  CE->getOperand(0)->getType() == llvm::Type::getInt1Ty(CPV->getContext())) {
1480  // Make sure we really sext from bool here by subtracting from 0
1481  Out << "0-";
1482  }
1483  printConstant(CE->getOperand(0), Static);
1484  if (CE->getType() == llvm::Type::getInt1Ty(CPV->getContext()) &&
1485  (CE->getOpcode() == llvm::Instruction::Trunc || CE->getOpcode() == llvm::Instruction::FPToUI ||
1486  CE->getOpcode() == llvm::Instruction::FPToSI || CE->getOpcode() == llvm::Instruction::PtrToInt)) {
1487  // Make sure we really truncate to bool here by anding with 1
1488  Out << "&1u";
1489  }
1490  Out << ')';
1491  if (closeParen)
1492  Out << ')';
1493  return;
1494  }
1495  case llvm::Instruction::GetElementPtr:
1496  assert(!llvm::isa<llvm::VectorType>(CPV->getType()));
1497  Out << "(";
1498  printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), gep_type_end(CPV), Static);
1499  Out << ")";
1500  return;
1501  case llvm::Instruction::Select:
1502  assert(!llvm::isa<llvm::VectorType>(CPV->getType()));
1503  Out << '(';
1504  printConstant(CE->getOperand(0), Static);
1505  Out << '?';
1506  printConstant(CE->getOperand(1), Static);
1507  Out << ':';
1508  printConstant(CE->getOperand(2), Static);
1509  Out << ')';
1510  return;
1511  case llvm::Instruction::Add:
1512  case llvm::Instruction::FAdd:
1513  case llvm::Instruction::Sub:
1514  case llvm::Instruction::FSub:
1515  case llvm::Instruction::Mul:
1516  case llvm::Instruction::FMul:
1517  case llvm::Instruction::SDiv:
1518  case llvm::Instruction::UDiv:
1519  case llvm::Instruction::FDiv:
1520  case llvm::Instruction::URem:
1521  case llvm::Instruction::SRem:
1522  case llvm::Instruction::FRem:
1523  case llvm::Instruction::And:
1524  case llvm::Instruction::Or:
1525  case llvm::Instruction::Xor:
1526  case llvm::Instruction::ICmp:
1527  case llvm::Instruction::Shl:
1528  case llvm::Instruction::LShr:
1529  case llvm::Instruction::AShr: {
1530  assert(!llvm::isa<llvm::VectorType>(CPV->getType()));
1531  Out << '(';
1532  bool NeedsClosingParens = printConstExprCast(CE, Static);
1533  printConstantWithCast(CE->getOperand(0), CE->getOpcode());
1534  switch (CE->getOpcode()) {
1535  case llvm::Instruction::Add:
1536  case llvm::Instruction::FAdd:
1537  Out << " + ";
1538  break;
1539  case llvm::Instruction::Sub:
1540  case llvm::Instruction::FSub:
1541  Out << " - ";
1542  break;
1543  case llvm::Instruction::Mul:
1544  case llvm::Instruction::FMul:
1545  Out << " * ";
1546  break;
1547  case llvm::Instruction::URem:
1548  case llvm::Instruction::SRem:
1549  case llvm::Instruction::FRem:
1550  Out << " % ";
1551  break;
1552  case llvm::Instruction::UDiv:
1553  case llvm::Instruction::SDiv:
1554  case llvm::Instruction::FDiv:
1555  Out << " / ";
1556  break;
1557  case llvm::Instruction::And:
1558  Out << " & ";
1559  break;
1560  case llvm::Instruction::Or:
1561  Out << " | ";
1562  break;
1563  case llvm::Instruction::Xor:
1564  Out << " ^ ";
1565  break;
1566  case llvm::Instruction::Shl:
1567  Out << " << ";
1568  break;
1569  case llvm::Instruction::LShr:
1570  case llvm::Instruction::AShr:
1571  Out << " >> ";
1572  break;
1573  case llvm::Instruction::ICmp:
1574  switch (CE->getPredicate()) {
1575  case llvm::ICmpInst::ICMP_EQ:
1576  Out << " == ";
1577  break;
1578  case llvm::ICmpInst::ICMP_NE:
1579  Out << " != ";
1580  break;
1581  case llvm::ICmpInst::ICMP_SLT:
1582  case llvm::ICmpInst::ICMP_ULT:
1583  Out << " < ";
1584  break;
1585  case llvm::ICmpInst::ICMP_SLE:
1586  case llvm::ICmpInst::ICMP_ULE:
1587  Out << " <= ";
1588  break;
1589  case llvm::ICmpInst::ICMP_SGT:
1590  case llvm::ICmpInst::ICMP_UGT:
1591  Out << " > ";
1592  break;
1593  case llvm::ICmpInst::ICMP_SGE:
1594  case llvm::ICmpInst::ICMP_UGE:
1595  Out << " >= ";
1596  break;
1597  default:
1598  llvm_unreachable("Illegal ICmp predicate");
1599  }
1600  break;
1601  default:
1602  llvm_unreachable("Illegal opcode here!");
1603  }
1604  printConstantWithCast(CE->getOperand(1), CE->getOpcode());
1605  if (NeedsClosingParens)
1606  Out << "))";
1607  Out << ')';
1608  return;
1609  }
1610  case llvm::Instruction::FCmp: {
1611  assert(!llvm::isa<llvm::VectorType>(CPV->getType()));
1612  Out << '(';
1613  bool NeedsClosingParens = printConstExprCast(CE, Static);
1614  if (CE->getPredicate() == llvm::FCmpInst::FCMP_FALSE)
1615  Out << "0";
1616  else if (CE->getPredicate() == llvm::FCmpInst::FCMP_TRUE)
1617  Out << "1";
1618  else {
1619  const char *op = 0;
1620  switch (CE->getPredicate()) {
1621  default:
1622  llvm_unreachable("Illegal FCmp predicate");
1623  case llvm::FCmpInst::FCMP_ORD:
1624  op = "ord";
1625  break;
1626  case llvm::FCmpInst::FCMP_UNO:
1627  op = "uno";
1628  break;
1629  case llvm::FCmpInst::FCMP_UEQ:
1630  op = "ueq";
1631  break;
1632  case llvm::FCmpInst::FCMP_UNE:
1633  op = "une";
1634  break;
1635  case llvm::FCmpInst::FCMP_ULT:
1636  op = "ult";
1637  break;
1638  case llvm::FCmpInst::FCMP_ULE:
1639  op = "ule";
1640  break;
1641  case llvm::FCmpInst::FCMP_UGT:
1642  op = "ugt";
1643  break;
1644  case llvm::FCmpInst::FCMP_UGE:
1645  op = "uge";
1646  break;
1647  case llvm::FCmpInst::FCMP_OEQ:
1648  op = "oeq";
1649  break;
1650  case llvm::FCmpInst::FCMP_ONE:
1651  op = "one";
1652  break;
1653  case llvm::FCmpInst::FCMP_OLT:
1654  op = "olt";
1655  break;
1656  case llvm::FCmpInst::FCMP_OLE:
1657  op = "ole";
1658  break;
1659  case llvm::FCmpInst::FCMP_OGT:
1660  op = "ogt";
1661  break;
1662  case llvm::FCmpInst::FCMP_OGE:
1663  op = "oge";
1664  break;
1665  }
1666  Out << "llvm_fcmp_" << op << "(";
1667  printConstantWithCast(CE->getOperand(0), CE->getOpcode());
1668  Out << ", ";
1669  printConstantWithCast(CE->getOperand(1), CE->getOpcode());
1670  Out << ")";
1671  }
1672  if (NeedsClosingParens)
1673  Out << "))";
1674  Out << ')';
1675  return;
1676  }
1677  default:
1678 #ifndef NDEBUG
1679  llvm::errs() << "CWriter Error: Unhandled constant expression: " << *CE << "\n";
1680 #endif
1681  llvm_unreachable(0);
1682  }
1683  } else if (llvm::isa<llvm::UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
1684  if (CPV->getType()->isVectorTy()) {
1685  printType(Out, CPV->getType());
1686  Out << "( /* UNDEF */)";
1687  return;
1688  }
1689 
1690  Out << "((";
1691  printType(Out, CPV->getType()); // sign doesn't matter
1692  Out << ")/*UNDEF*/";
1693  Out << "0)";
1694  return;
1695  }
1696 
1697  if (llvm::ConstantInt *CI = llvm::dyn_cast<llvm::ConstantInt>(CPV)) {
1698  llvm::Type *Ty = CI->getType();
1699  if (Ty == llvm::Type::getInt1Ty(CPV->getContext()))
1700  Out << (CI->getZExtValue() ? '1' : '0');
1701  else if (Ty == llvm::Type::getInt32Ty(CPV->getContext()))
1702  Out << CI->getZExtValue() << 'u';
1703  else if (Ty == llvm::Type::getInt64Ty(CPV->getContext()))
1704  Out << CI->getZExtValue() << "ull";
1705  else if (Ty->getPrimitiveSizeInBits() > 64) {
1706  Out << "\"";
1707  // const uint64_t *Ptr64 = CPV->getUniqueInteger().getRawData();
1708  const uint64_t *Ptr64 = CI->getValue().getRawData();
1709  for (unsigned i = 0; i < Ty->getPrimitiveSizeInBits(); i++) {
1710  Out << ((Ptr64[i / (sizeof(uint64_t) * 8)] >> (i % (sizeof(uint64_t) * 8))) & 1);
1711  }
1712  Out << "\"";
1713  } else {
1714  Out << "((";
1715  printSimpleType(Out, Ty, false) << ')';
1716  if (CI->isMinValue(true))
1717  Out << CI->getZExtValue() << 'u';
1718  else
1719  Out << CI->getSExtValue();
1720  Out << ')';
1721  }
1722  return;
1723  }
1724 
1725  switch (CPV->getType()->getTypeID()) {
1726  case llvm::Type::FloatTyID:
1727  case llvm::Type::DoubleTyID:
1728  case llvm::Type::X86_FP80TyID:
1729  case llvm::Type::PPC_FP128TyID:
1730  case llvm::Type::FP128TyID: {
1731  llvm::ConstantFP *FPC = llvm::cast<llvm::ConstantFP>(CPV);
1732  std::map<const llvm::ConstantFP *, unsigned>::iterator I = FPConstantMap.find(FPC);
1733  if (I != FPConstantMap.end()) {
1734  // Because of FP precision problems we must load from a stack allocated
1735  // value that holds the value in hex.
1736  Out << "(*("
1737  << (FPC->getType() == llvm::Type::getFloatTy(CPV->getContext())
1738  ? "float"
1739  : FPC->getType() == llvm::Type::getDoubleTy(CPV->getContext()) ? "double" : "long double")
1740  << "*)&FPConstant" << I->second << ')';
1741  } else {
1742  double V;
1743  if (FPC->getType() == llvm::Type::getFloatTy(CPV->getContext()))
1744  V = FPC->getValueAPF().convertToFloat();
1745  else if (FPC->getType() == llvm::Type::getDoubleTy(CPV->getContext()))
1746  V = FPC->getValueAPF().convertToDouble();
1747  else {
1748  // Long double. Convert the number to double, discarding precision.
1749  // This is not awesome, but it at least makes the CBE output somewhat
1750  // useful.
1751  llvm::APFloat Tmp = FPC->getValueAPF();
1752  bool LosesInfo;
1753 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
1754  Tmp.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmTowardZero, &LosesInfo);
1755 #else // LLVM 4.0+
1756  Tmp.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmTowardZero, &LosesInfo);
1757 #endif
1758  V = Tmp.convertToDouble();
1759  }
1760 
1761  if (std::isnan(V)) {
1762  // The value is NaN
1763 
1764  // FIXME the actual NaN bits should be emitted.
1765  // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
1766  // it's 0x7ff4.
1767  const unsigned long QuietNaN = 0x7ff8UL;
1768  // const unsigned long SignalNaN = 0x7ff4UL;
1769 
1770  // We need to grab the first part of the FP #
1771  char Buffer[100];
1772 
1773  uint64_t ll = llvm::DoubleToBits(V);
1774  snprintf(Buffer, sizeof(Buffer), "0x%" PRIx64, ll);
1775 
1776  std::string Num(&Buffer[0], &Buffer[6]);
1777  unsigned long Val = strtoul(Num.c_str(), 0, 16);
1778 
1779  if (FPC->getType() == llvm::Type::getFloatTy(FPC->getContext()))
1780  Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" << Buffer << "\") /*nan*/ ";
1781  else
1782  Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" << Buffer << "\") /*nan*/ ";
1783  } else if (std::isinf(V)) {
1784  // The value is Inf
1785  if (V < 0)
1786  Out << '-';
1787  Out << "LLVM_INF" << (FPC->getType() == llvm::Type::getFloatTy(FPC->getContext()) ? "F" : "")
1788  << " /*inf*/ ";
1789  } else {
1790  std::string Num;
1791 #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
1792  // Print out the constant as a floating point number.
1793  char Buffer[100];
1794  snprintf(Buffer, sizeof(Buffer), "%a", V);
1795  Num = Buffer;
1796 #else
1797  Num = ftostr(FPC->getValueAPF());
1798 #endif
1799  Out << Num;
1800  }
1801  }
1802  break;
1803  }
1804 
1805  case llvm::Type::ArrayTyID: {
1806  llvm::ArrayType *AT = llvm::cast<llvm::ArrayType>(CPV->getType());
1807  if (Static)
1808  // arrays are wrapped in structs...
1809  Out << "{ ";
1810  else {
1811  // call init func of the struct it's wrapped in...
1812  printType(Out, CPV->getType());
1813  Out << "::init (";
1814  }
1815  if (llvm::ConstantArray *CA = llvm::dyn_cast<llvm::ConstantArray>(CPV)) {
1816  printConstantArray(CA, Static);
1817  } else if (llvm::ConstantDataSequential *CDS = llvm::dyn_cast<llvm::ConstantDataSequential>(CPV)) {
1818  printConstantDataSequential(CDS, Static);
1819  } else {
1820  assert(llvm::isa<llvm::ConstantAggregateZero>(CPV) || llvm::isa<llvm::UndefValue>(CPV));
1821  if (AT->getNumElements()) {
1822  Out << ' ';
1823  llvm::Constant *CZ = llvm::Constant::getNullValue(AT->getElementType());
1824  printConstant(CZ, Static);
1825  for (unsigned i = 1, e = (unsigned)AT->getNumElements(); i != e; ++i) {
1826  Out << ", ";
1827  printConstant(CZ, Static);
1828  }
1829  }
1830  }
1831  if (Static)
1832  Out << " }";
1833  else
1834  Out << ")";
1835  break;
1836  }
1837  case llvm::Type::VectorTyID: {
1838  llvm::VectorType *VT = llvm::dyn_cast<llvm::VectorType>(CPV->getType());
1839 
1840  if (llvm::isa<llvm::ConstantAggregateZero>(CPV)) {
1841  // All zeros; call the __setzero_* function.
1842  const char *setZeroFunc = lGetTypedFunc("setzero", VT->getElementType(), vectorWidth);
1843  assert(setZeroFunc != NULL);
1844  Out << setZeroFunc << "()";
1845  } else if (llvm::isa<llvm::UndefValue>(CPV)) {
1846  // Undefined value; call __undef_* so that we can potentially pass
1847  // this information along..
1848  const char *undefFunc = lGetTypedFunc("undef", VT->getElementType(), vectorWidth);
1849  assert(undefFunc != NULL);
1850  Out << undefFunc << "()";
1851  } else {
1852  const char *smearFunc = lGetTypedFunc("smear", VT->getElementType(), vectorWidth);
1853 
1854  if (llvm::ConstantVector *CV = llvm::dyn_cast<llvm::ConstantVector>(CPV)) {
1855  llvm::Constant *splatValue = CV->getSplatValue();
1856  if (splatValue != NULL && smearFunc != NULL) {
1857  // If it's a basic type and has a __smear_* function, then
1858  // call that.
1859  Out << smearFunc << "(";
1860  printConstant(splatValue, Static);
1861  Out << ")";
1862  } else {
1863  // Otherwise call the constructor for the type
1864  printType(Out, CPV->getType());
1865  Out << "(";
1866  printConstantVector(CV, Static);
1867  Out << ")";
1868  }
1869  } else if (llvm::ConstantDataVector *CDV = llvm::dyn_cast<llvm::ConstantDataVector>(CPV)) {
1870  llvm::Constant *splatValue = CDV->getSplatValue();
1871  if (splatValue != NULL && smearFunc != NULL) {
1872  Out << smearFunc << "(";
1873  printConstant(splatValue, Static);
1874  Out << ")";
1875  } else if (VectorConstantMap.find(CDV) != VectorConstantMap.end()) {
1876  // If we have emitted an static const array with the
1877  // vector's values, just load from it.
1878  unsigned index = VectorConstantMap[CDV];
1879  int alignment = 4 * std::min(vectorWidth, 16);
1880 
1881  Out << "__load<" << alignment << ">(";
1882 
1883  // Cast the pointer to the array of element values to a
1884  // pointer to the vector type.
1885  Out << "(const ";
1886  printSimpleType(Out, CDV->getType(), true, "");
1887  Out << " *)";
1888 
1889  Out << "(VectorConstant" << index << "))";
1890  } else {
1891  printType(Out, CPV->getType());
1892  Out << "(";
1893  printConstantDataSequential(CDV, Static);
1894  Out << ")";
1895  }
1896  } else {
1897  llvm::report_fatal_error("Unexpected vector type");
1898  }
1899  }
1900 
1901  break;
1902  }
1903  case llvm::Type::StructTyID:
1904  if (!Static) {
1905  // call init func...
1906  printType(Out, CPV->getType());
1907  Out << "::init";
1908  }
1909  if (llvm::isa<llvm::ConstantAggregateZero>(CPV) || llvm::isa<llvm::UndefValue>(CPV)) {
1910  llvm::StructType *ST = llvm::cast<llvm::StructType>(CPV->getType());
1911  Out << '(';
1912  if (ST->getNumElements()) {
1913  Out << ' ';
1914  printConstant(llvm::Constant::getNullValue(ST->getElementType(0)), Static);
1915  for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
1916  Out << ", ";
1917  printConstant(llvm::Constant::getNullValue(ST->getElementType(i)), Static);
1918  }
1919  }
1920  Out << ')';
1921  } else {
1922  Out << '(';
1923  if (CPV->getNumOperands()) {
1924  // It is a kludge. It is needed because we cannot support short vectors
1925  // when generating code for knl-generic in multitarget mode.
1926  // Short vectors are mapped to "native" vectors and cause AVX-512 code
1927  // generation in static block initialization (__vec16_* in ::init function).
1928  Out << ' ';
1929  printConstant(llvm::cast<llvm::Constant>(CPV->getOperand(0)), Static);
1930  for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
1931  Out << ", ";
1932  printConstant(llvm::cast<llvm::Constant>(CPV->getOperand(i)), Static);
1933  }
1934  }
1935  Out << ')';
1936  }
1937  break;
1938 
1939  case llvm::Type::PointerTyID:
1940  if (llvm::isa<llvm::ConstantPointerNull>(CPV)) {
1941  Out << "((";
1942  printType(Out, CPV->getType()); // sign doesn't matter
1943  Out << ")/*NULL*/0)";
1944  break;
1945  } else if (llvm::GlobalValue *GV = llvm::dyn_cast<llvm::GlobalValue>(CPV)) {
1946  writeOperand(GV, Static);
1947  break;
1948  }
1949  // FALL THROUGH
1950  default:
1951 #ifndef NDEBUG
1952  llvm::errs() << "Unknown constant type: " << *CPV << "\n";
1953 #endif
1954  llvm_unreachable(0);
1955  }
1956 }
1957 
1958 // Some constant expressions need to be casted back to the original types
1959 // because their operands were casted to the expected type. This function takes
1960 // care of detecting that case and printing the cast for the ConstantExpr.
1961 bool CWriter::printConstExprCast(const llvm::ConstantExpr *CE, bool Static) {
1962  bool NeedsExplicitCast = false;
1963  llvm::Type *Ty = CE->getOperand(0)->getType();
1964  bool TypeIsSigned = false;
1965  switch (CE->getOpcode()) {
1966  case llvm::Instruction::Add:
1967  case llvm::Instruction::Sub:
1968  case llvm::Instruction::Mul:
1969  // We need to cast integer arithmetic so that it is always performed
1970  // as unsigned, to avoid undefined behavior on overflow.
1971  case llvm::Instruction::LShr:
1972  case llvm::Instruction::URem:
1973  case llvm::Instruction::UDiv:
1974  NeedsExplicitCast = true;
1975  break;
1976  case llvm::Instruction::AShr:
1977  case llvm::Instruction::SRem:
1978  case llvm::Instruction::SDiv:
1979  NeedsExplicitCast = true;
1980  TypeIsSigned = true;
1981  break;
1982  case llvm::Instruction::SExt:
1983  Ty = CE->getType();
1984  NeedsExplicitCast = true;
1985  TypeIsSigned = true;
1986  break;
1987  case llvm::Instruction::ZExt:
1988  case llvm::Instruction::Trunc:
1989  case llvm::Instruction::FPTrunc:
1990  case llvm::Instruction::FPExt:
1991  case llvm::Instruction::UIToFP:
1992  case llvm::Instruction::SIToFP:
1993  case llvm::Instruction::FPToUI:
1994  case llvm::Instruction::FPToSI:
1995  case llvm::Instruction::PtrToInt:
1996  case llvm::Instruction::IntToPtr:
1997  case llvm::Instruction::BitCast:
1998  Ty = CE->getType();
1999  NeedsExplicitCast = true;
2000  break;
2001  default:
2002  break;
2003  }
2004  if (NeedsExplicitCast) {
2005  Out << "((";
2006  if (Ty->isIntegerTy() && Ty != llvm::Type::getInt1Ty(Ty->getContext()))
2007  printSimpleType(Out, Ty, TypeIsSigned);
2008  else
2009  printType(Out, Ty); // not integer, sign doesn't matter
2010  Out << ")(";
2011  }
2012  return NeedsExplicitCast;
2013 }
2014 
2015 // Print a constant assuming that it is the operand for a given Opcode. The
2016 // opcodes that care about sign need to cast their operands to the expected
2017 // type before the operation proceeds. This function does the casting.
2018 void CWriter::printConstantWithCast(llvm::Constant *CPV, unsigned Opcode) {
2019 
2020  // Extract the operand's type, we'll need it.
2021  llvm::Type *OpTy = CPV->getType();
2022 
2023  // Indicate whether to do the cast or not.
2024  bool shouldCast = false;
2025  bool typeIsSigned = false;
2026 
2027  // Based on the Opcode for which this Constant is being written, determine
2028  // the new type to which the operand should be casted by setting the value
2029  // of OpTy. If we change OpTy, also set shouldCast to true so it gets
2030  // casted below.
2031  switch (Opcode) {
2032  default:
2033  // for most instructions, it doesn't matter
2034  break;
2035  case llvm::Instruction::Add:
2036  case llvm::Instruction::Sub:
2037  case llvm::Instruction::Mul:
2038  // We need to cast integer arithmetic so that it is always performed
2039  // as unsigned, to avoid undefined behavior on overflow.
2040  case llvm::Instruction::LShr:
2041  case llvm::Instruction::UDiv:
2042  case llvm::Instruction::URem:
2043  shouldCast = true;
2044  break;
2045  case llvm::Instruction::AShr:
2046  case llvm::Instruction::SDiv:
2047  case llvm::Instruction::SRem:
2048  shouldCast = true;
2049  typeIsSigned = true;
2050  break;
2051  }
2052 
2053  // Write out the casted constant if we should, otherwise just write the
2054  // operand.
2055  if (shouldCast) {
2056  Out << "((";
2057  printSimpleType(Out, OpTy, typeIsSigned);
2058  Out << ")";
2059  printConstant(CPV, false);
2060  Out << ")";
2061  } else
2062  printConstant(CPV, false);
2063 }
2064 
2065 std::string CWriter::GetValueName(const llvm::Value *Operand) {
2066 
2067  // Resolve potential alias.
2068  if (const llvm::GlobalAlias *GA = llvm::dyn_cast<llvm::GlobalAlias>(Operand)) {
2069 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 /* LLVM 3.5+ */
2070  if (const llvm::Value *V = GA->getAliasee())
2071 #else /* <= LLVM 3.4 */
2072  if (const llvm::Value *V = GA->resolveAliasedGlobal(false))
2073 #endif
2074  Operand = V;
2075  }
2076 
2077  // Mangle globals with the standard mangler interface for LLC compatibility.
2078  if (const llvm::GlobalValue *GV = llvm::dyn_cast<llvm::GlobalValue>(Operand)) {
2079  (void)GV;
2080  // llvm::SmallString<128> Str;
2081  // Mang->getNameWithPrefix(Str, GV, false);
2082  // return CBEMangle(Str.str().str());
2083  return CBEMangle(Operand->getName().str().c_str());
2084  }
2085 
2086  std::string Name = Operand->getName();
2087 
2088  if (Name.empty()) { // Assign unique names to local temporaries.
2089  unsigned &No = AnonValueNumbers[Operand];
2090  if (No == 0)
2091  No = ++NextAnonValueNumber;
2092  Name = "tmp__" + llvm::utostr(No);
2093  }
2094 
2095  std::string VarName;
2096  VarName.reserve(Name.capacity());
2097 
2098  for (std::string::iterator I = Name.begin(), E = Name.end(); I != E; ++I) {
2099  char ch = *I;
2100 
2101  if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_')) {
2102  char buffer[5];
2103  snprintf(buffer, sizeof(buffer), "_%x_", ch);
2104  VarName += buffer;
2105  } else
2106  VarName += ch;
2107  }
2108 
2109  if (llvm::isa<llvm::BasicBlock>(Operand))
2110  VarName += "_label";
2111  else
2112  VarName += "_";
2113 
2114  return VarName;
2115 }
2116 
2117 /// writeInstComputationInline - Emit the computation for the specified
2118 /// instruction inline, with no destination provided.
2119 void CWriter::writeInstComputationInline(llvm::Instruction &I) {
2120  // If this is a non-trivial bool computation, make sure to truncate down to
2121  // a 1 bit value. This is important because we want "add i1 x, y" to return
2122  // "0" when x and y are true, not "2" for example.
2123  bool NeedBoolTrunc = false;
2124  if (I.getType() == llvm::Type::getInt1Ty(I.getContext()) && !llvm::isa<llvm::ICmpInst>(I) &&
2125  !llvm::isa<llvm::FCmpInst>(I))
2126  NeedBoolTrunc = true;
2127 
2128  if (NeedBoolTrunc)
2129  Out << "((";
2130 
2131  visit(I);
2132 
2133  if (NeedBoolTrunc)
2134  Out << ")&1)";
2135 }
2136 
2137 void CWriter::writeOperandInternal(llvm::Value *Operand, bool Static) {
2138  if (llvm::Instruction *I = llvm::dyn_cast<llvm::Instruction>(Operand))
2139  // Should we inline this instruction to build a tree?
2140  if (isInlinableInst(*I) && !isDirectAlloca(I)) {
2141  Out << '(';
2142  writeInstComputationInline(*I);
2143  Out << ')';
2144  return;
2145  }
2146 
2147  llvm::Constant *CPV = llvm::dyn_cast<llvm::Constant>(Operand);
2148 
2149  if (CPV && !llvm::isa<llvm::GlobalValue>(CPV))
2150  printConstant(CPV, Static);
2151  else
2152  Out << GetValueName(Operand);
2153 }
2154 
2155 void CWriter::writeOperand(llvm::Value *Operand, bool Static) {
2156  bool isAddressImplicit = isAddressExposed(Operand);
2157  if (isAddressImplicit)
2158  Out << "(&"; // Global variables are referenced as their addresses by llvm
2159 
2160  writeOperandInternal(Operand, Static);
2161 
2162  if (isAddressImplicit)
2163  Out << ')';
2164 }
2165 
2166 // Some instructions need to have their result value casted back to the
2167 // original types because their operands were casted to the expected type.
2168 // This function takes care of detecting that case and printing the cast
2169 // for the Instruction.
2170 bool CWriter::writeInstructionCast(const llvm::Instruction &I) {
2171  llvm::Type *Ty = I.getOperand(0)->getType();
2172  switch (I.getOpcode()) {
2173  case llvm::Instruction::Add:
2174  case llvm::Instruction::Sub:
2175  case llvm::Instruction::Mul:
2176  // We need to cast integer arithmetic so that it is always performed
2177  // as unsigned, to avoid undefined behavior on overflow.
2178  case llvm::Instruction::LShr:
2179  case llvm::Instruction::URem:
2180  case llvm::Instruction::UDiv:
2181  Out << "((";
2182  printSimpleType(Out, Ty, false);
2183  Out << ")(";
2184  return true;
2185  case llvm::Instruction::AShr:
2186  case llvm::Instruction::SRem:
2187  case llvm::Instruction::SDiv:
2188  Out << "((";
2189  printSimpleType(Out, Ty, true);
2190  Out << ")(";
2191  return true;
2192  default:
2193  break;
2194  }
2195  return false;
2196 }
2197 
2198 // Write the operand with a cast to another type based on the Opcode being used.
2199 // This will be used in cases where an instruction has specific type
2200 // requirements (usually signedness) for its operands.
2201 void CWriter::writeOperandWithCast(llvm::Value *Operand, unsigned Opcode) {
2202 
2203  // Extract the operand's type, we'll need it.
2204  llvm::Type *OpTy = Operand->getType();
2205 
2206  // Indicate whether to do the cast or not.
2207  bool shouldCast = false;
2208 
2209  // Indicate whether the cast should be to a signed type or not.
2210  bool castIsSigned = false;
2211 
2212  // Based on the Opcode for which this Operand is being written, determine
2213  // the new type to which the operand should be casted by setting the value
2214  // of OpTy. If we change OpTy, also set shouldCast to true.
2215  switch (Opcode) {
2216  default:
2217  // for most instructions, it doesn't matter
2218  break;
2219  case llvm::Instruction::Add:
2220  case llvm::Instruction::Sub:
2221  case llvm::Instruction::Mul:
2222  // We need to cast integer arithmetic so that it is always performed
2223  // as unsigned, to avoid undefined behavior on overflow.
2224  case llvm::Instruction::LShr:
2225  case llvm::Instruction::UDiv:
2226  case llvm::Instruction::URem: // Cast to unsigned first
2227  shouldCast = true;
2228  castIsSigned = false;
2229  break;
2230  case llvm::Instruction::GetElementPtr:
2231  case llvm::Instruction::AShr:
2232  case llvm::Instruction::SDiv:
2233  case llvm::Instruction::SRem: // Cast to signed first
2234  shouldCast = true;
2235  castIsSigned = true;
2236  break;
2237  }
2238 
2239  // Write out the casted operand if we should, otherwise just write the
2240  // operand.
2241  if (shouldCast) {
2242  Out << "((";
2243  printSimpleType(Out, OpTy, castIsSigned);
2244  Out << ")";
2245  writeOperand(Operand);
2246  Out << ")";
2247  } else
2248  writeOperand(Operand);
2249 }
2250 
2251 // Write the operand with a cast to another type based on the icmp predicate
2252 // being used.
2253 void CWriter::writeOperandWithCast(llvm::Value *Operand, const llvm::ICmpInst &Cmp) {
2254  // This has to do a cast to ensure the operand has the right signedness.
2255  // Also, if the operand is a pointer, we make sure to cast to an integer when
2256  // doing the comparison both for signedness and so that the C compiler doesn't
2257  // optimize things like "p < NULL" to false (p may contain an integer value
2258  // f.e.).
2259  bool shouldCast = Cmp.isRelational();
2260 
2261  // Write out the casted operand if we should, otherwise just write the
2262  // operand.
2263  if (!shouldCast) {
2264  writeOperand(Operand);
2265  return;
2266  }
2267 
2268  // Should this be a signed comparison? If so, convert to signed.
2269  bool castIsSigned = Cmp.isSigned();
2270 
2271  // If the operand was a pointer, convert to a large integer type.
2272  llvm::Type *OpTy = Operand->getType();
2273  if (OpTy->isPointerTy())
2274  OpTy = TD->getIntPtrType(Operand->getContext());
2275 
2276  Out << "((";
2277  printSimpleType(Out, OpTy, castIsSigned);
2278  Out << ")";
2279  writeOperand(Operand);
2280  Out << ")";
2281 }
2282 
2283 // generateCompilerSpecificCode - This is where we add conditional compilation
2284 // directives to cater to specific compilers as need be.
2285 //
2286 static void generateCompilerSpecificCode(llvm::formatted_raw_ostream &Out, const llvm::DataLayout *TD) {
2287  // We output GCC specific attributes to preserve 'linkonce'ness on globals.
2288  // If we aren't being compiled with GCC, just drop these attributes.
2289  Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
2290  << "#define __attribute__(X)\n"
2291  << "#endif\n\n";
2292 
2293  // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
2294  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
2295  << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
2296  << "#elif defined(__GNUC__)\n"
2297  << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
2298  << "#else\n"
2299  << "#define __EXTERNAL_WEAK__\n"
2300  << "#endif\n\n";
2301 
2302  // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
2303  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
2304  << "#define __ATTRIBUTE_WEAK__\n"
2305  << "#elif defined(__GNUC__)\n"
2306  << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
2307  << "#else\n"
2308  << "#define __ATTRIBUTE_WEAK__\n"
2309  << "#endif\n\n";
2310 
2311  // Add hidden visibility support. FIXME: APPLE_CC?
2312  Out << "#if defined(__GNUC__)\n"
2313  << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
2314  << "#endif\n\n";
2315 
2316  // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
2317  // From the GCC documentation:
2318  //
2319  // double __builtin_nan (const char *str)
2320  //
2321  // This is an implementation of the ISO C99 function nan.
2322  //
2323  // Since ISO C99 defines this function in terms of strtod, which we do
2324  // not implement, a description of the parsing is in order. The string is
2325  // parsed as by strtol; that is, the base is recognized by leading 0 or
2326  // 0x prefixes. The number parsed is placed in the significand such that
2327  // the least significant bit of the number is at the least significant
2328  // bit of the significand. The number is truncated to fit the significand
2329  // field provided. The significand is forced to be a quiet NaN.
2330  //
2331  // This function, if given a string literal, is evaluated early enough
2332  // that it is considered a compile-time constant.
2333  //
2334  // float __builtin_nanf (const char *str)
2335  //
2336  // Similar to __builtin_nan, except the return type is float.
2337  //
2338  // double __builtin_inf (void)
2339  //
2340  // Similar to __builtin_huge_val, except a warning is generated if the
2341  // target floating-point format does not support infinities. This
2342  // function is suitable for implementing the ISO C99 macro INFINITY.
2343  //
2344  // float __builtin_inff (void)
2345  //
2346  // Similar to __builtin_inf, except the return type is float.
2347  Out << "#if (defined(__GNUC__) || defined(__clang__)) && !defined(__INTEL_COMPILER)\n"
2348  << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
2349  << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
2350  << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
2351  << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
2352  << "#define LLVM_INF __builtin_inf() /* Double */\n"
2353  << "#define LLVM_INFF __builtin_inff() /* Float */\n"
2354  << "//#define LLVM_PREFETCH(addr,rw,locality) "
2355  "__builtin_prefetch(addr,rw,locality)\n"
2356  << "//#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
2357  << "//#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
2358  << "#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)\n"
2359  << "#include <limits>\n"
2360  << "#define LLVM_NAN(NanStr) std::numeric_limits<double>::quiet_NaN()\n"
2361  << "#define LLVM_NANF(NanStr) std::numeric_limits<float>::quiet_NaN()\n"
2362  << "#define LLVM_NANS(NanStr) std::numeric_limits<double>::signaling_NaN()\n"
2363  << "#define LLVM_NANSF(NanStr) std::numeric_limits<float>::signaling_NaN()\n"
2364  << "#define LLVM_INF std::numeric_limits<double>::infinity()\n"
2365  << "#define LLVM_INFF std::numeric_limits<float>::infinity()\n"
2366  << "//#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
2367  << "//#define __ATTRIBUTE_CTOR__\n"
2368  << "//#define __ATTRIBUTE_DTOR__\n"
2369  << "#else\n"
2370  << "#error \"Not MSVC, clang, or g++?\"\n"
2371  << "#endif\n\n";
2372 
2373  // LLVM_ASM() is used to define mapping of the symbol to a different name,
2374  // this is expected to be MacOS-only feature. So defining it only for
2375  // gcc and clang (Intel Compiler on Linux/MacOS is also ok).
2376  // For example, this feature is required to translate symbols described in
2377  // "Symbol Variants Release Notes" document (on Apple website).
2378  Out << "#if (defined(__GNUC__) || defined(__clang__))\n"
2379  << "#define LLVM_ASM(X) __asm(X)\n"
2380  << "#endif\n\n";
2381 
2382  Out << "#if defined(__clang__) || defined(__INTEL_COMPILER) || "
2383  "(__GNUC__ < 4) /* Old GCCs, or compilers not GCC */ \n"
2384  << "#define __builtin_stack_save() 0 /* not implemented */\n"
2385  << "#define __builtin_stack_restore(X) /* noop */\n"
2386  << "#endif\n\n";
2387 
2388 #if 0
2389  // Output typedefs for 128-bit integers. If these are needed with a
2390  // 32-bit target or with a C compiler that doesn't support mode(TI),
2391  // more drastic measures will be needed.
2392  Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
2393  << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
2394  << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
2395  << "#endif\n\n";
2396 #endif
2397 
2398  // Output target-specific code that should be inserted into main.
2399  Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
2400 }
2401 
2402 /// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
2403 /// the StaticTors set.
2404 static void FindStaticTors(llvm::GlobalVariable *GV, std::set<llvm::Function *> &StaticTors) {
2405  llvm::ConstantArray *InitList = llvm::dyn_cast<llvm::ConstantArray>(GV->getInitializer());
2406  if (!InitList)
2407  return;
2408 
2409  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
2410  if (llvm::ConstantStruct *CS = llvm::dyn_cast<llvm::ConstantStruct>(InitList->getOperand(i))) {
2411  if (CS->getNumOperands() != 2)
2412  return; // Not array of 2-element structs.
2413 
2414  if (CS->getOperand(1)->isNullValue())
2415  return; // Found a null terminator, exit printing.
2416  llvm::Constant *FP = CS->getOperand(1);
2417  if (llvm::ConstantExpr *CE = llvm::dyn_cast<llvm::ConstantExpr>(FP))
2418  if (CE->isCast())
2419  FP = CE->getOperand(0);
2420  if (llvm::Function *F = llvm::dyn_cast<llvm::Function>(FP))
2421  StaticTors.insert(F);
2422  }
2423 }
2424 
2426 
2427 /// getGlobalVariableClass - If this is a global that is specially recognized
2428 /// by LLVM, return a code that indicates how we should handle it.
2429 static SpecialGlobalClass getGlobalVariableClass(const llvm::GlobalVariable *GV) {
2430  // If this is a global ctors/dtors list, handle it now.
2431  if (GV->hasAppendingLinkage() && GV->use_empty()) {
2432  if (GV->getName() == "llvm.global_ctors")
2433  return GlobalCtors;
2434  else if (GV->getName() == "llvm.global_dtors")
2435  return GlobalDtors;
2436  }
2437 
2438  // Otherwise, if it is other metadata, don't print it. This catches things
2439  // like debug information.
2440 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_8 /* LLVM 3.5-3.8 */
2441  // Here we compare char *
2442  if (!strcmp(GV->getSection(), "llvm.metadata"))
2443 #else
2444  // Here we compare strings
2445  if (GV->getSection() == "llvm.metadata")
2446 #endif
2447  return NotPrinted;
2448 
2449  return NotSpecial;
2450 }
2451 
2452 // PrintEscapedString - Print each character of the specified string, escaping
2453 // it if it is not printable or if it is an escape char.
2454 static void PrintEscapedString(const char *Str, unsigned Length, llvm::raw_ostream &Out) {
2455  for (unsigned i = 0; i != Length; ++i) {
2456  unsigned char C = Str[i];
2457  if (isprint(C) && C != '\\' && C != '"')
2458  Out << C;
2459  else if (C == '\\')
2460  Out << "\\\\";
2461  else if (C == '\"')
2462  Out << "\\\"";
2463  else if (C == '\t')
2464  Out << "\\t";
2465  else
2466  Out << "\\x" << llvm::hexdigit(C >> 4) << llvm::hexdigit(C & 0x0F);
2467  }
2468 }
2469 
2470 // PrintEscapedString - Print each character of the specified string, escaping
2471 // it if it is not printable or if it is an escape char.
2472 static void PrintEscapedString(const std::string &Str, llvm::raw_ostream &Out) {
2473  PrintEscapedString(Str.c_str(), Str.size(), Out);
2474 }
2475 
2476 bool CWriter::doInitialization(llvm::Module &M) {
2477  llvm::FunctionPass::doInitialization(M);
2478 
2479  // Initialize
2480  TheModule = &M;
2481 
2482  TD = new llvm::DataLayout(&M);
2483  IL = new llvm::IntrinsicLowering(*TD);
2484  // AddPrototypes was removed from LLVM 9.0.
2485  // It looks like that usage of this method does not affect ISPC functionality
2486  // so it is safe to just remove it for LLVM 9.0+ versions.
2487 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
2488  IL->AddPrototypes(M);
2489 #endif
2490 
2491 #if 0
2492  std::string Triple = TheModule->getTargetTriple();
2493  if (Triple.empty())
2494  Triple = llvm::sys::getDefaultTargetTriple();
2495 
2496  std::string E;
2497  if (const llvm::Target *Match = llvm::TargetRegistry::lookupTarget(Triple, E))
2498  TAsm = Match->createMCAsmInfo(Triple);
2499 #endif
2500  TAsm = new CBEMCAsmInfo();
2501  MRI = new llvm::MCRegisterInfo();
2502 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
2503  TCtx = new llvm::MCContext(TAsm, MRI, NULL);
2504 #else
2505  TCtx = new llvm::MCContext(*TAsm, *MRI, NULL);
2506 #endif
2507  // Mang = new llvm::Mangler(*TCtx, *TD);
2508 
2509  // Keep track of which functions are static ctors/dtors so they can have
2510  // an attribute added to their prototypes.
2511  std::set<llvm::Function *> StaticCtors, StaticDtors;
2512  for (llvm::Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) {
2513 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2514  switch (getGlobalVariableClass(I)) {
2515 #else /* LLVM 3.8+ */
2516  switch (getGlobalVariableClass(&*I)) {
2517 #endif
2518  default:
2519  break;
2520  case GlobalCtors:
2521  FindStaticTors(&*I, StaticCtors);
2522  break;
2523  case GlobalDtors:
2524  FindStaticTors(&*I, StaticDtors);
2525  break;
2526  }
2527  }
2528 
2529  Out << "/*******************************************************************\n";
2530  Out << " This file has been automatically generated by ispc\n";
2531  Out << " DO NOT EDIT THIS FILE DIRECTLY\n";
2532  Out << " *******************************************************************/\n\n";
2533 
2534  Out << "/* Provide Declarations */\n";
2535  Out << "#include <stdarg.h>\n"; // Varargs support
2536  Out << "#include <setjmp.h>\n"; // Unwind support
2537  Out << "#include <limits.h>\n"; // With overflow intrinsics support.
2538  Out << "#include <stdlib.h>\n";
2539  Out << "#ifdef _MSC_VER\n";
2540  Out << " #define NOMINMAX\n";
2541  Out << " #include <windows.h>\n";
2542  Out << "#endif // _MSC_VER\n";
2543  Out << "#include <stdlib.h>\n";
2544  Out << "#include <stdint.h>\n";
2545  Out << "/* get a declaration for alloca */\n";
2546  Out << "#ifdef _MSC_VER\n";
2547  Out << " #include <malloc.h>\n";
2548  Out << " #define alloca _alloca\n";
2549  Out << "#else\n";
2550  Out << " #include <alloca.h>\n";
2551  Out << "#endif\n\n";
2552 
2553  if (g->opt.fastMath) {
2554  Out << "#define ISPC_FAST_MATH 1\n";
2555  } else {
2556  Out << "#undef ISPC_FAST_MATH\n";
2557  }
2558 
2559  if (g->opt.forceAlignedMemory) {
2560  Out << "#define ISPC_FORCE_ALIGNED_MEMORY\n";
2561  }
2562 
2563  Out << "#include \"" << includeName << "\"\n";
2564 
2565  Out << "\n/* Basic Library Function Declarations */\n";
2566  Out << "extern \"C\" {\n";
2567  Out << "int puts(unsigned char *);\n";
2568  Out << "unsigned int putchar(unsigned int);\n";
2569  Out << "int fflush(void *);\n";
2570  Out << "int printf(const unsigned char *, ...);\n";
2571  Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n";
2572  Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n";
2573  Out << "void memset_pattern16(void *, const void *, uint64_t );\n";
2574  Out << "}\n\n";
2575 
2577 
2578  // Provide a definition for `bool' if not compiling with a C++ compiler.
2579  Out << "\n"
2580  << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
2581 
2582  << "\n\n/* Support for floating point constants */\n"
2583  << "typedef uint64_t ConstantDoubleTy;\n"
2584  << "typedef uint32_t ConstantFloatTy;\n"
2585  << "typedef struct { unsigned long long f1; unsigned short f2; "
2586  "unsigned short pad[3]; } ConstantFP80Ty;\n"
2587  // This is used for both kinds of 128-bit long double; meaning differs.
2588  << "typedef struct { uint64_t f1, f2; } ConstantFP128Ty;\n"
2589  << "\n\n/* Global Declarations */\n\n";
2590 
2591  // First output all the declarations for the program, because C requires
2592  // Functions & globals to be declared before they are used.
2593  //
2594  if (!M.getModuleInlineAsm().empty()) {
2595  Out << "/* Module asm statements */\n"
2596  << "asm(";
2597 
2598  // Split the string into lines, to make it easier to read the .ll file.
2599  std::string Asm = M.getModuleInlineAsm();
2600  size_t CurPos = 0;
2601  size_t NewLine = Asm.find_first_of('\n', CurPos);
2602  while (NewLine != std::string::npos) {
2603  // We found a newline, print the portion of the asm string from the
2604  // last newline up to this newline.
2605  Out << "\"";
2606  PrintEscapedString(std::string(Asm.begin() + CurPos, Asm.begin() + NewLine), Out);
2607  Out << "\\n\"\n";
2608  CurPos = NewLine + 1;
2609  NewLine = Asm.find_first_of('\n', CurPos);
2610  }
2611  Out << "\"";
2612  PrintEscapedString(std::string(Asm.begin() + CurPos, Asm.end()), Out);
2613  Out << "\");\n"
2614  << "/* End Module asm statements */\n";
2615  }
2616 
2617  // Loop over the symbol table, emitting all named constants.
2618  printModuleTypes();
2619 
2620  // Global variable declarations...
2621  if (!M.global_empty()) {
2622  Out << "\n/* External Global Variable Declarations */\n";
2623  for (llvm::Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) {
2624 
2625  if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || I->hasCommonLinkage())
2626  Out << "extern ";
2627 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
2628  else if (I->hasDLLImportStorageClass())
2629 #else
2630  else if (I->hasDLLImportLinkage())
2631 #endif
2632  Out << "__declspec(dllimport) ";
2633  else
2634  continue; // Internal Global
2635 
2636  // Thread Local Storage
2637  if (I->isThreadLocal())
2638  Out << "__thread ";
2639 
2640  printType(Out, I->getType()->getElementType(), false, GetValueName(&*I));
2641 
2642  if (I->hasExternalWeakLinkage())
2643  Out << " __EXTERNAL_WEAK__";
2644  Out << ";\n";
2645  }
2646  }
2647 
2648  // Output the global variable declarations
2649  if (!M.global_empty()) {
2650  Out << "\n\n/* Global Variable Declarations */\n";
2651  for (llvm::Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
2652  if (!I->isDeclaration()) {
2653  // Ignore special globals, such as debug info.
2654  if (getGlobalVariableClass(&*I))
2655  continue;
2656 
2657  if (I->hasLocalLinkage())
2658  continue;
2659  else
2660  Out << "extern ";
2661 
2662  // Thread Local Storage
2663  if (I->isThreadLocal())
2664  Out << "__thread ";
2665 
2666  printType(Out, I->getType()->getElementType(), false, GetValueName(&*I));
2667 
2668  if (I->hasLinkOnceLinkage())
2669  Out << " __attribute__((common))";
2670  else if (I->hasCommonLinkage()) // FIXME is this right?
2671  Out << " __ATTRIBUTE_WEAK__";
2672  else if (I->hasWeakLinkage())
2673  Out << " __ATTRIBUTE_WEAK__";
2674  else if (I->hasExternalWeakLinkage())
2675  Out << " __EXTERNAL_WEAK__";
2676  if (I->hasHiddenVisibility())
2677  Out << " __HIDDEN__";
2678  Out << ";\n";
2679  }
2680  }
2681 
2682  // Function declarations
2683  Out << "\n/* Function Declarations */\n";
2684  Out << "extern \"C\" {\n";
2685 
2686  // Store the intrinsics which will be declared/defined below.
2687  llvm::SmallVector<const llvm::Function *, 8> intrinsicsToDefine;
2688 
2689  for (llvm::Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
2690  // Don't print declarations for intrinsic functions.
2691  // Store the used intrinsics, which need to be explicitly defined.
2692  if (I->isIntrinsic()) {
2693  switch (I->getIntrinsicID()) {
2694  default:
2695  break;
2696  case llvm::Intrinsic::uadd_with_overflow:
2697  case llvm::Intrinsic::sadd_with_overflow:
2698  case llvm::Intrinsic::umul_with_overflow:
2699 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2700  intrinsicsToDefine.push_back(I);
2701 #else /* LLVM 3.8+ */
2702  intrinsicsToDefine.push_back(&*I);
2703 #endif
2704  break;
2705  }
2706  continue;
2707  }
2708 
2709  if (I->getName() == "setjmp" || I->getName() == "abort" || I->getName() == "longjmp" ||
2710  I->getName() == "_setjmp" || I->getName() == "memset" || I->getName() == "memset_pattern16" ||
2711  I->getName() == "puts" || I->getName() == "printf" || I->getName() == "putchar" ||
2712  I->getName() == "fflush" ||
2713  // Memory allocation
2714  I->getName() == "malloc" || I->getName() == "posix_memalign" || I->getName() == "free" ||
2715  I->getName() == "_aligned_malloc" || I->getName() == "_aligned_free")
2716  continue;
2717 
2718  // Don't redeclare ispc's own intrinsics
2719  std::string name = I->getName();
2720  if (name.size() > 2 && name[0] == '_' && name[1] == '_')
2721  continue;
2722 
2723  if (I->hasExternalWeakLinkage())
2724  Out << "extern ";
2725 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2726  printFunctionSignature(I, true);
2727 #else /* LLVM 3.8+ */
2728  printFunctionSignature(&*I, true);
2729 #endif
2730  if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
2731  Out << " __ATTRIBUTE_WEAK__";
2732  if (I->hasExternalWeakLinkage())
2733  Out << " __EXTERNAL_WEAK__";
2734 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2735  if (StaticCtors.count(I))
2736  Out << " __ATTRIBUTE_CTOR__";
2737  if (StaticDtors.count(I))
2738 #else /* LLVM 3.8+ */
2739  if (StaticCtors.count(&*I))
2740  Out << " __ATTRIBUTE_CTOR__";
2741  if (StaticDtors.count(&*I))
2742 #endif
2743  Out << " __ATTRIBUTE_DTOR__";
2744  if (I->hasHiddenVisibility())
2745  Out << " __HIDDEN__";
2746 
2747  // This is MacOS specific feature, this should not appear on other platforms.
2748  if (I->hasName() && I->getName()[0] == 1)
2749  Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
2750 
2751  Out << ";\n";
2752  }
2753  Out << "}\n\n";
2754 
2755  if (!M.empty())
2756  Out << "\n\n/* Function Bodies */\n";
2757 
2758  // Emit some helper functions for dealing with FCMP instruction's
2759  // predicates
2760  Out << "template <typename A, typename B> static inline int llvm_fcmp_ord(A X, B Y) { ";
2761  Out << "return X == X && Y == Y; }\n";
2762  Out << "template <typename A, typename B> static inline int llvm_fcmp_uno(A X, B Y) { ";
2763  Out << "return X != X || Y != Y; }\n";
2764  Out << "template <typename A, typename B> static inline int llvm_fcmp_ueq(A X, B Y) { ";
2765  Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
2766  Out << "template <typename A, typename B> static inline int llvm_fcmp_une(A X, B Y) { ";
2767  Out << "return X != Y; }\n";
2768  Out << "template <typename A, typename B> static inline int llvm_fcmp_ult(A X, B Y) { ";
2769  Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
2770  Out << "template <typename A, typename B> static inline int llvm_fcmp_ugt(A X, B Y) { ";
2771  Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
2772  Out << "template <typename A, typename B> static inline int llvm_fcmp_ule(A X, B Y) { ";
2773  Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
2774  Out << "template <typename A, typename B> static inline int llvm_fcmp_uge(A X, B Y) { ";
2775  Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
2776  Out << "template <typename A, typename B> static inline int llvm_fcmp_oeq(A X, B Y) { ";
2777  Out << "return X == Y ; }\n";
2778  Out << "template <typename A, typename B> static inline int llvm_fcmp_one(A X, B Y) { ";
2779  Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
2780  Out << "template <typename A, typename B> static inline int llvm_fcmp_olt(A X, B Y) { ";
2781  Out << "return X < Y ; }\n";
2782  Out << "template <typename A, typename B> static inline int llvm_fcmp_ogt(A X, B Y) { ";
2783  Out << "return X > Y ; }\n";
2784  Out << "template <typename A, typename B> static inline int llvm_fcmp_ole(A X, B Y) { ";
2785  Out << "return X <= Y ; }\n";
2786  Out << "template <typename A, typename B> static inline int llvm_fcmp_oge(A X, B Y) { ";
2787  Out << "return X >= Y ; }\n";
2788  Out << "template <typename A> A *Memset(A *ptr, int count, size_t len) { ";
2789  Out << "return (A *)memset(ptr, count, len); }\n";
2790 
2791  // Emit definitions of the intrinsics.
2792  for (llvm::SmallVector<const llvm::Function *, 8>::const_iterator I = intrinsicsToDefine.begin(),
2793  E = intrinsicsToDefine.end();
2794  I != E; ++I) {
2795  printIntrinsicDefinition(**I, Out);
2796  }
2797 
2798  // Output the global variable definitions and contents...
2799  if (!M.global_empty()) {
2800  Out << "\n\n/* Global Variable Definitions and Initialization */\n";
2801  for (llvm::Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
2802  if (!I->isDeclaration()) {
2803  // Ignore special globals, such as debug info.
2804 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2805  if (getGlobalVariableClass(I))
2806 #else /* LLVM 3.8+ */
2807  if (getGlobalVariableClass(&*I))
2808 #endif
2809  continue;
2810 
2811  if (I->hasLocalLinkage())
2812  Out << "static ";
2813 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
2814  else if (I->hasDLLImportStorageClass())
2815  Out << "__declspec(dllimport) ";
2816  else if (I->hasDLLExportStorageClass())
2817  Out << "__declspec(dllexport) ";
2818 #else
2819  else if (I->hasDLLImportLinkage())
2820  Out << "__declspec(dllimport) ";
2821  else if (I->hasDLLExportLinkage())
2822  Out << "__declspec(dllexport) ";
2823 #endif
2824  // Thread Local Storage
2825  if (I->isThreadLocal())
2826  Out << "__thread ";
2827 
2828  printType(Out, I->getType()->getElementType(), false,
2829 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
2830  GetValueName(I));
2831 #else /* LLVM 3.8+ */
2832  GetValueName(&*I));
2833 #endif
2834 
2835  if (I->hasLinkOnceLinkage())
2836  Out << " __attribute__((common))";
2837  else if (I->hasWeakLinkage())
2838  Out << " __ATTRIBUTE_WEAK__";
2839  else if (I->hasCommonLinkage())
2840  Out << " __ATTRIBUTE_WEAK__";
2841 
2842  if (I->hasHiddenVisibility())
2843  Out << " __HIDDEN__";
2844 
2845  // If the initializer is not null, emit the initializer. If it is null,
2846  // we try to avoid emitting large amounts of zeros. The problem with
2847  // this, however, occurs when the variable has weak linkage. In this
2848  // case, the assembler will complain about the variable being both weak
2849  // and common, so we disable this optimization.
2850  // FIXME common linkage should avoid this problem.
2851  if (!I->getInitializer()->isNullValue()) {
2852  Out << " = ";
2853 
2854  // vec16_i64 should be handled separately
2855  if (is_vec16_i64_ty(I->getType()->getElementType())) {
2856  Out << "/* vec16_i64 should be loaded carefully on knc */\n";
2857  Out << "\n#if defined(KNC) \n";
2858  Out << "hilo2zmm";
2859  Out << "\n#endif \n";
2860  }
2861 
2862  Out << "(";
2863  writeOperand(I->getInitializer(), false);
2864  Out << ")";
2865  } else if (I->hasWeakLinkage()) {
2866  // We have to specify an initializer, but it doesn't have to be
2867  // complete. If the value is an aggregate, print out { 0 }, and let
2868  // the compiler figure out the rest of the zeros.
2869  Out << " = ";
2870  if (I->getInitializer()->getType()->isStructTy() || I->getInitializer()->getType()->isVectorTy()) {
2871  Out << "{ 0 }";
2872  } else if (I->getInitializer()->getType()->isArrayTy()) {
2873  // As with structs and vectors, but with an extra set of braces
2874  // because arrays are wrapped in structs.
2875  Out << "{ { 0 } }";
2876  } else {
2877  // Just print it out normally.
2878  writeOperand(I->getInitializer(), false);
2879  }
2880  }
2881  Out << ";\n";
2882  }
2883  }
2884 
2885  return false;
2886 }
2887 
2888 /// Output all floating point constants that cannot be printed accurately...
2889 void CWriter::printFloatingPointConstants(llvm::Function &F) {
2890  // Scan the module for floating point constants. If any FP constant is used
2891  // in the function, we want to redirect it here so that we do not depend on
2892  // the precision of the printed form, unless the printed form preserves
2893  // precision.
2894  //
2897  I != E; ++I)
2898  printFloatingPointConstants(*I);
2899 
2900  Out << '\n';
2901 }
2902 
2903 void CWriter::printFloatingPointConstants(const llvm::Constant *C) {
2904  // If this is a constant expression, recursively check for constant fp values.
2905  if (const llvm::ConstantExpr *CE = llvm::dyn_cast<llvm::ConstantExpr>(C)) {
2906  for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
2907  printFloatingPointConstants(CE->getOperand(i));
2908  return;
2909  }
2910 
2911  // Otherwise, check for a FP constant that we need to print.
2912  const llvm::ConstantFP *FPC = llvm::dyn_cast<llvm::ConstantFP>(C);
2913  if (FPC == 0 ||
2914  // Do not put in FPConstantMap if safe.
2915  isFPCSafeToPrint(FPC) ||
2916  // Already printed this constant?
2917  FPConstantMap.count(FPC))
2918  return;
2919 
2920  FPConstantMap[FPC] = FPCounter; // Number the FP constants
2921 
2922  if (FPC->getType() == llvm::Type::getDoubleTy(FPC->getContext())) {
2923  double Val = FPC->getValueAPF().convertToDouble();
2924  uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2925  Out << "static const ConstantDoubleTy FPConstant" << FPCounter++ << " = 0x" << llvm::utohexstr(i)
2926  << "ULL; /* " << Val << " */\n";
2927  } else if (FPC->getType() == llvm::Type::getFloatTy(FPC->getContext())) {
2928  float Val = FPC->getValueAPF().convertToFloat();
2929  uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2930  Out << "static const ConstantFloatTy FPConstant" << FPCounter++ << " = 0x" << llvm::utohexstr(i) << "U; /* "
2931  << Val << " */\n";
2932  } else if (FPC->getType() == llvm::Type::getX86_FP80Ty(FPC->getContext())) {
2933  // api needed to prevent premature destruction
2934  llvm::APInt api = FPC->getValueAPF().bitcastToAPInt();
2935  const uint64_t *p = api.getRawData();
2936  Out << "static const ConstantFP80Ty FPConstant" << FPCounter++ << " = { 0x" << llvm::utohexstr(p[0])
2937  << "ULL, 0x" << llvm::utohexstr((uint16_t)p[1]) << ",{0,0,0}"
2938  << "}; /* Long double constant */\n";
2939  } else if (FPC->getType() == llvm::Type::getPPC_FP128Ty(FPC->getContext()) ||
2940  FPC->getType() == llvm::Type::getFP128Ty(FPC->getContext())) {
2941  llvm::APInt api = FPC->getValueAPF().bitcastToAPInt();
2942  const uint64_t *p = api.getRawData();
2943  Out << "static const ConstantFP128Ty FPConstant" << FPCounter++ << " = { 0x" << llvm::utohexstr(p[0]) << ", 0x"
2944  << llvm::utohexstr(p[1]) << "}; /* Long double constant */\n";
2945 
2946  } else {
2947  llvm_unreachable("Unknown float type!");
2948  }
2949 }
2950 
2951 // For any vector constants, generate code to declare static const arrays
2952 // with their element values. Doing so allows us to emit aligned vector
2953 // loads to get their values, rather than tediously inserting the
2954 // individual values into the vector.
2955 void CWriter::printVectorConstants(llvm::Function &F) {
2958  I != E; ++I) {
2959  const llvm::ConstantDataVector *CDV = llvm::dyn_cast<llvm::ConstantDataVector>(*I);
2960  if (CDV == NULL)
2961  continue;
2962 
2963  // Don't bother if this is a splat of the same value; a (more
2964  // efficient?) __splat_* call will be generated for these.
2965  if (CDV->getSplatValue() != NULL)
2966  continue;
2967 
2968  // Don't align to anything more than 64 bytes
2969  int alignment = 4 * std::min(vectorWidth, 16);
2970 
2971  Out << "static const ";
2972  printSimpleType(Out, CDV->getElementType(), true, "");
2973  Out << "__attribute__ ((aligned(" << alignment << "))) ";
2974  Out << "VectorConstant" << VectorConstantIndex << "[] = { ";
2975  for (int i = 0; i < (int)CDV->getNumElements(); ++i) {
2976  printConstant(CDV->getElementAsConstant(i), false);
2977  Out << ", ";
2978  }
2979  Out << " };\n";
2980 
2981  VectorConstantMap[CDV] = VectorConstantIndex++;
2982  }
2983  Out << "\n";
2984 }
2985 
2986 /// printSymbolTable - Run through symbol table looking for type names. If a
2987 /// type name is found, emit its declaration...
2988 ///
2989 void CWriter::printModuleTypes() {
2990  Out << "\n/* Helper union for bitcasts */\n";
2991  Out << "typedef union {\n";
2992  Out << " unsigned int Int32;\n";
2993  Out << " unsigned long long Int64;\n";
2994  Out << " float Float;\n";
2995  Out << " double Double;\n";
2996  Out << "} llvmBitCastUnion;\n";
2997  Out << "\n/* This is special class, designed for operations with long int.*/ \n";
2998  Out << "namespace { \n";
2999  Out << "template <int num_bits> \n";
3000  Out << "struct iN { \n";
3001  Out << " int num[num_bits / (sizeof (int) * 8)]; \n";
3002  Out << " \n";
3003  Out << " iN () {} \n";
3004  Out << " \n";
3005  Out << " iN (const char *val) { \n";
3006  Out << " if (val == NULL) \n";
3007  Out << " return; \n";
3008  Out << " int length = num_bits / (sizeof (int) * 8); \n";
3009  Out << " int val_len = 0; \n";
3010  Out << " for (val_len = 0; val[val_len]; (val_len)++); \n";
3011  Out << " for (int i = 0; (i < val_len && i < num_bits); i++) \n";
3012  Out << " num[i / (sizeof (int) * 8)] = (num[i / (sizeof (int) * 8)] << 1) | (val[i] - '0'); \n";
3013  Out << " } \n";
3014  Out << " \n";
3015  Out << " ~iN () {} \n";
3016  Out << " \n";
3017  Out << " iN operator >> (const iN rhs) { \n";
3018  Out << " iN res; \n";
3019  Out << " int length = num_bits / (sizeof (int) * 8); \n";
3020  Out << " int cells_shift = rhs.num[0] / (sizeof(int) * 8); \n";
3021  Out << " int small_shift = rhs.num[0] % (sizeof(int) * 8); \n";
3022  Out << " for (int i = 0; i < (length - cells_shift); i++) \n";
3023  Out << " res.num[i] = this->num[cells_shift + i]; \n";
3024  Out << " for (int i = 0; i < length - 1; i++) { \n";
3025  Out << " res.num[i] = this->num[i] >> small_shift; \n";
3026  Out << " res.num[i] = ((this->num[i + 1] << ((sizeof(int) * 8) - small_shift))) | res.num[i];\n";
3027  Out << " } \n";
3028  Out << " res.num[length - 1] = res.num[length - 1] >> small_shift; \n";
3029  Out << " return res; \n";
3030  Out << " } \n";
3031  Out << " \n";
3032  Out << " iN operator & (iN rhs) { \n";
3033  Out << " iN res; \n";
3034  Out << " int length = num_bits / (sizeof (int) * 8); \n";
3035  Out << " for (int i = 0; i < length; i++) \n";
3036  Out << " res.num[i] = (this->num[i]) & (rhs.num[i]); \n";
3037  Out << " return res; \n";
3038  Out << " } \n";
3039  Out << " \n";
3040  Out << " operator uint32_t() { return this->num[0]; } \n";
3041  Out << " \n";
3042  Out << " template <class T> \n";
3043  Out << " friend iN<num_bits> __cast_bits(iN<num_bits> to, T from) { \n";
3044  Out << " for (int i = 0; i <" << vectorWidth << "; i++) \n";
3045  Out << " to.num[i] = ((int*)(&from))[i]; \n";
3046  Out << " return to; \n";
3047  Out << " } \n";
3048  Out << " \n";
3049  Out << " template <class T> \n";
3050  Out << " friend T __cast_bits(T to, iN<num_bits> from) { \n";
3051  Out << " for (int i = 0; i <" << vectorWidth << "; i++) \n";
3052  Out << " ((int*)(&to))[i] = from.num[i]; \n";
3053  Out << " return to; \n";
3054  Out << " } \n";
3055  Out << " \n";
3056  Out << " template <int ALIGN, class T> \n";
3057  Out << " friend void __store(T *p, iN<num_bits> val) { \n";
3058  Out << " for (int i = 0; i <" << vectorWidth << "; i++) \n";
3059  Out << " ((int*)p)[i] = val.num[i]; \n";
3060  Out << " } \n";
3061  Out << "}; \n";
3062  Out << "};\n";
3063  Out << "\n";
3064 
3065  // Get all of the struct types used in the module.
3066  std::vector<llvm::StructType *> StructTypes;
3067  llvm::TypeFinder typeFinder;
3068  typeFinder.run(*TheModule, false);
3069  for (llvm::TypeFinder::iterator iter = typeFinder.begin(); iter != typeFinder.end(); ++iter)
3070  StructTypes.push_back(*iter);
3071 
3072  // Get all of the array types used in the module
3073  std::vector<llvm::ArrayType *> ArrayTypes;
3074  std::vector<llvm::IntegerType *> IntegerTypes;
3075  std::vector<bool> IsVolatile;
3076  std::vector<int> Alignment;
3077 
3078  findUsedArrayAndLongIntTypes(TheModule, ArrayTypes, IntegerTypes, IsVolatile, Alignment);
3079 
3080  if (StructTypes.empty() && ArrayTypes.empty())
3081  return;
3082 
3083  Out << "/* Structure and array forward declarations */\n";
3084 
3085  unsigned NextTypeID = 0;
3086 
3087  // If any of them are missing names, add a unique ID to UnnamedStructIDs.
3088  // Print out forward declarations for structure types.
3089  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
3090  llvm::StructType *ST = StructTypes[i];
3091 
3092  if (ST->isLiteral() || ST->getName().empty())
3093  UnnamedStructIDs[ST] = NextTypeID++;
3094 
3095  std::string Name = getStructName(ST);
3096 
3097  Out << "struct " << Name << ";\n";
3098  }
3099 
3100  Out << "namespace {\n";
3101  for (unsigned i = 0, e = ArrayTypes.size(); i != e; ++i) {
3102  llvm::ArrayType *AT = ArrayTypes[i];
3103  ArrayIDs[AT] = NextTypeID++;
3104  std::string Name = getArrayName(AT);
3105  Out << " struct " << Name << ";\n";
3106  }
3107  Out << "};\n";
3108 
3109  for (unsigned i = 0, e = IntegerTypes.size(); i != e; ++i) {
3110  llvm::IntegerType *IT = IntegerTypes[i];
3111  if (IT->getIntegerBitWidth() <= 64 || Alignment[i] == 0)
3112  continue;
3113 
3114  Out << "typedef struct __attribute__ ((packed, aligned(" << Alignment[i] << "))) {\n ";
3115  IsVolatile[i] ? Out << " volatile " : Out << " ";
3116  printType(Out, IT, false, "data");
3117  Out << ";\n";
3118  Out << "} iN_" << IT->getIntegerBitWidth() << "_align_" << Alignment[i] << ";\n";
3119  }
3120 
3121  Out << '\n';
3122 
3123  // Keep track of which types have been printed so far.
3124  llvm::SmallPtrSet<llvm::Type *, 16> StructArrayPrinted;
3125 
3126  // Loop over all structures then push them into the stack so they are
3127  // printed in the correct order.
3128  //
3129  Out << "/* Structure and array contents */\n";
3130  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
3131  if (StructTypes[i]->isStructTy())
3132  // Only print out used types!
3133  printContainedStructs(StructTypes[i], StructArrayPrinted);
3134  }
3135 
3136  Out << "namespace {\n";
3137  for (unsigned i = 0, e = ArrayTypes.size(); i != e; ++i)
3138  printContainedArrays(ArrayTypes[i], StructArrayPrinted);
3139 
3140  Out << "};\n";
3141  Out << '\n';
3142 }
3143 
3144 // Push the struct onto the stack and recursively push all structs
3145 // this one depends on.
3146 //
3147 // TODO: Make this work properly with vector types
3148 //
3149 void CWriter::printContainedStructs(llvm::Type *Ty, llvm::SmallPtrSet<llvm::Type *, 16> &Printed) {
3150  // Don't walk through pointers.
3151  if (!(Ty->isStructTy() || Ty->isArrayTy()))
3152  return;
3153 
3154  // Print all contained types first.
3155  for (llvm::Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I)
3156  printContainedStructs(*I, Printed);
3157 
3158  if (llvm::StructType *ST = llvm::dyn_cast<llvm::StructType>(Ty)) {
3159  // Check to see if we have already printed this struct.
3160 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
3161  if (!Printed.insert(Ty).second)
3162  return;
3163 #else
3164  if (!Printed.insert(Ty))
3165  return;
3166 #endif
3167 
3168  // Print structure type out.
3169  printType(Out, ST, false, getStructName(ST), true);
3170  Out << ";\n\n";
3171  }
3172  if (llvm::ArrayType *AT = llvm::dyn_cast<llvm::ArrayType>(Ty)) {
3173 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
3174  if (!Printed.insert(Ty).second)
3175  return;
3176 #else
3177  if (!Printed.insert(Ty))
3178  return;
3179 #endif
3180 
3181  Out << "namespace {\n";
3182  printType(Out, AT, false, getArrayName(AT), true);
3183  Out << ";\n}\n\n";
3184  }
3185 }
3186 
3187 void CWriter::printContainedArrays(llvm::ArrayType *ATy, llvm::SmallPtrSet<llvm::Type *, 16> &Printed) {
3188 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
3189  if (!Printed.insert(ATy).second)
3190  return;
3191 #else
3192  if (!Printed.insert(ATy))
3193  return;
3194 #endif
3195 
3196  llvm::ArrayType *ChildTy = llvm::dyn_cast<llvm::ArrayType>(ATy->getElementType());
3197  if (ChildTy != NULL)
3198  printContainedArrays(ChildTy, Printed);
3199 
3200  printType(Out, ATy, false, getArrayName(ATy), true);
3201  Out << ";\n\n";
3202 }
3203 
3204 void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
3205  /// isStructReturn - Should this function actually return a struct by-value?
3206  bool isStructReturn = F->hasStructRetAttr();
3207 
3208  if (F->hasLocalLinkage())
3209  Out << "static ";
3210 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
3211  if (F->hasDLLImportStorageClass())
3212  Out << "__declspec(dllimport) ";
3213  if (F->hasDLLExportStorageClass())
3214  Out << "__declspec(dllexport) ";
3215 #else
3216  if (F->hasDLLImportLinkage())
3217  Out << "__declspec(dllimport) ";
3218  if (F->hasDLLExportLinkage())
3219  Out << "__declspec(dllexport) ";
3220 #endif
3221  switch (F->getCallingConv()) {
3222  case llvm::CallingConv::X86_StdCall:
3223  Out << "__attribute__((stdcall)) ";
3224  break;
3225  case llvm::CallingConv::X86_FastCall:
3226  Out << "__attribute__((fastcall)) ";
3227  break;
3228  case llvm::CallingConv::X86_ThisCall:
3229  Out << "__attribute__((thiscall)) ";
3230  break;
3231  default:
3232  break;
3233  }
3234 
3235  // Loop over the arguments, printing them...
3236  llvm::FunctionType *FT = llvm::cast<llvm::FunctionType>(F->getFunctionType());
3237 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3238  const llvm::AttrListPtr &PAL = F->getAttributes();
3239 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3240  const llvm::AttributeSet &PAL = F->getAttributes();
3241 #else // LLVM 5.0+
3242  const llvm::AttributeList &PAL = F->getAttributes();
3243 #endif
3244 
3245  std::string tstr;
3246  llvm::raw_string_ostream FunctionInnards(tstr);
3247 
3248  // Print out the name...
3249  FunctionInnards << GetValueName(F) << '(';
3250 
3251  bool PrintedArg = false;
3252  if (!F->isDeclaration()) {
3253  if (!F->arg_empty()) {
3254  llvm::Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3255  unsigned Idx = 1;
3256 
3257  // If this is a struct-return function, don't print the hidden
3258  // struct-return argument.
3259  if (isStructReturn) {
3260  assert(I != E && "Invalid struct return function!");
3261  ++I;
3262  ++Idx;
3263  }
3264 
3265  std::string ArgName;
3266  for (; I != E; ++I) {
3267  if (PrintedArg)
3268  FunctionInnards << ", ";
3269  if (I->hasName() || !Prototype)
3270 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
3271  ArgName = GetValueName(I);
3272 #else /* LLVM 3.8+ */
3273  ArgName = GetValueName(&*I);
3274 #endif
3275  else
3276  ArgName = "";
3277  llvm::Type *ArgTy = I->getType();
3278 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3279  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
3280 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3281  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex,
3282  llvm::Attribute::ByVal)) {
3283 #else // LLVM 5.0+
3284  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::ByVal)) {
3285 #endif
3286  ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
3287 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
3288  ByValParams.insert(I);
3289 #else /* LLVM 3.8+ */
3290  ByValParams.insert(&*I);
3291 #endif
3292  }
3293  printType(FunctionInnards, ArgTy,
3294 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3295  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
3296 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3297  PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex,
3298  llvm::Attribute::SExt),
3299 #else // LLVM 5.0+
3300  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::SExt),
3301 #endif
3302  ArgName);
3303  PrintedArg = true;
3304  ++Idx;
3305  }
3306  }
3307  } else {
3308  // Loop over the arguments, printing them.
3309  llvm::FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
3310  unsigned Idx = 1;
3311 
3312  // If this is a struct-return function, don't print the hidden
3313  // struct-return argument.
3314  if (isStructReturn) {
3315  assert(I != E && "Invalid struct return function!");
3316  ++I;
3317  ++Idx;
3318  }
3319 
3320  for (; I != E; ++I) {
3321  if (PrintedArg)
3322  FunctionInnards << ", ";
3323  llvm::Type *ArgTy = *I;
3324 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3325  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
3326 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3327  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
3328 #else // LLVM 5.0+
3329  if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::ByVal)) {
3330 #endif
3331  assert(ArgTy->isPointerTy());
3332  ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
3333  }
3334  printType(FunctionInnards, ArgTy,
3335 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3336  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt)
3337 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3338  PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt)
3339 #else // LLVM 5.0+
3340  PAL.getParamAttributes(Idx).hasAttribute(llvm::Attribute::SExt)
3341 #endif
3342  );
3343  PrintedArg = true;
3344  ++Idx;
3345  }
3346  }
3347 
3348  if (!PrintedArg && FT->isVarArg()) {
3349  FunctionInnards << "int vararg_dummy_arg";
3350  PrintedArg = true;
3351  }
3352 
3353  // Finish printing arguments... if this is a vararg function, print the ...,
3354  // unless there are no known types, in which case, we just emit ().
3355  //
3356  if (FT->isVarArg() && PrintedArg) {
3357  FunctionInnards << ",..."; // Output varargs portion of signature!
3358  } else if (!FT->isVarArg() && !PrintedArg) {
3359  FunctionInnards << "void"; // ret() -> ret(void) in C.
3360  }
3361  FunctionInnards << ')';
3362 
3363  // Get the return tpe for the function.
3364  llvm::Type *RetTy;
3365  if (!isStructReturn)
3366  RetTy = F->getReturnType();
3367  else {
3368  // If this is a struct-return function, print the struct-return type.
3369  RetTy = llvm::cast<llvm::PointerType>(FT->getParamType(0))->getElementType();
3370  }
3371 
3372  // Print out the return type and the signature built above.
3373  printType(Out, RetTy,
3374 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
3375  PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
3376 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3377  PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
3378 #else // LLVM 5.0+
3379  PAL.getParamAttributes(0).hasAttribute(llvm::Attribute::SExt),
3380 #endif
3381  FunctionInnards.str());
3382 }
3383 
3384 static inline bool isFPIntBitCast(const llvm::Instruction &I) {
3385  if (!llvm::isa<llvm::BitCastInst>(I))
3386  return false;
3387  llvm::Type *SrcTy = I.getOperand(0)->getType();
3388  llvm::Type *DstTy = I.getType();
3389  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
3390 }
3391 
3392 void CWriter::printFunction(llvm::Function &F) {
3393  /// isStructReturn - Should this function actually return a struct by-value?
3394  bool isStructReturn = F.hasStructRetAttr();
3395 
3396  printFunctionSignature(&F, false);
3397  Out << " {\n";
3398 
3399  // If this is a struct return function, handle the result with magic.
3400  if (isStructReturn) {
3401  llvm::Type *StructTy = llvm::cast<llvm::PointerType>(F.arg_begin()->getType())->getElementType();
3402  Out << " ";
3403  printType(Out, StructTy, false, "StructReturn");
3404  Out << "; /* Struct return temporary */\n";
3405 
3406  Out << " ";
3407  printType(Out, F.arg_begin()->getType(), false, GetValueName(&*(F.arg_begin())));
3408  Out << " = &StructReturn;\n";
3409  }
3410 
3411  bool PrintedVar = false;
3412 
3413  // print local variable information for the function
3414  for (llvm::inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
3415  if (const llvm::AllocaInst *AI = isDirectAlloca(&*I)) {
3416  Out << " ";
3417  printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
3418  Out << "; /* Address-exposed local */\n";
3419  PrintedVar = true;
3420  } else if (I->getType() != llvm::Type::getVoidTy(F.getContext()) && !isInlinableInst(*I)) {
3421  Out << " ";
3422  printType(Out, I->getType(), false, GetValueName(&*I));
3423  Out << ";\n";
3424 
3425  if (llvm::isa<llvm::PHINode>(*I)) { // Print out PHI node temporaries as well...
3426  Out << " ";
3427  printType(Out, I->getType(), false, GetValueName(&*I) + "__PHI");
3428  Out << ";\n";
3429  }
3430  PrintedVar = true;
3431  }
3432  // We need a temporary for the BitCast to use so it can pluck a value out
3433  // of a union to do the BitCast. This is separate from the need for a
3434  // variable to hold the result of the BitCast.
3435  if (isFPIntBitCast(*I)) {
3436  Out << " llvmBitCastUnion " << GetValueName(&*I) << "__BITCAST_TEMPORARY;\n";
3437  PrintedVar = true;
3438  }
3439  }
3440 
3441  if (PrintedVar)
3442  Out << '\n';
3443 
3444  if (F.hasExternalLinkage() && F.getName() == "main")
3445  Out << " CODE_FOR_MAIN();\n";
3446 
3447  // print the basic blocks
3448  for (llvm::Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
3449  if (llvm::Loop *L = LI->getLoopFor(&*BB)) {
3450 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_9 // LLVM 3.9+
3451  if (L->getHeader()->getIterator() == BB && L->getParentLoop() == 0)
3452 #else
3453  if (L->getHeader() == BB && L->getParentLoop() == 0)
3454 #endif
3455  printLoop(L);
3456  } else {
3457  printBasicBlock(&*BB);
3458  }
3459  }
3460 
3461  Out << "}\n\n";
3462 }
3463 
3464 void CWriter::printLoop(llvm::Loop *L) {
3465  Out << " do { /* Syntactic loop '" << L->getHeader()->getName() << "' to make GCC happy */\n";
3466  for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
3467  llvm::BasicBlock *BB = L->getBlocks()[i];
3468  llvm::Loop *BBLoop = LI->getLoopFor(BB);
3469  if (BBLoop == L)
3470  printBasicBlock(BB);
3471  else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
3472  printLoop(BBLoop);
3473  }
3474  Out << " } while (1); /* end of syntactic loop '" << L->getHeader()->getName() << "' */\n";
3475 }
3476 
3477 void CWriter::printBasicBlock(llvm::BasicBlock *BB) {
3478 
3479  // Don't print the label for the basic block if there are no uses, or if
3480  // the only terminator use is the predecessor basic block's terminator.
3481  // We have to scan the use list because PHI nodes use basic blocks too but
3482  // do not require a label to be generated.
3483  //
3484  bool NeedsLabel = false;
3485  for (llvm::pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
3486  if (isGotoCodeNecessary(*PI, BB)) {
3487  NeedsLabel = true;
3488  break;
3489  }
3490 
3491  if (NeedsLabel)
3492  Out << GetValueName(BB) << ": {\n";
3493 
3494  // Output all of the instructions in the basic block...
3495  for (llvm::BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; ++II) {
3496  if (!isInlinableInst(*II) && !isDirectAlloca(&*II)) {
3497  if (II->getType() != llvm::Type::getVoidTy(BB->getContext()) && !isInlineAsm(*II))
3498  outputLValue(&*II);
3499  else
3500  Out << " ";
3501  writeInstComputationInline(*II);
3502  Out << ";\n";
3503  }
3504  }
3505 
3506  // Don't emit prefix or suffix for the terminator.
3507  visit(*BB->getTerminator());
3508  if (NeedsLabel)
3509  Out << "}\n"; // workaround g++ bug
3510 }
3511 
3512 // Specific Instruction type classes... note that all of the casts are
3513 // necessary because we use the instruction classes as opaque types...
3514 //
3515 void CWriter::visitReturnInst(llvm::ReturnInst &I) {
3516  // If this is a struct return function, return the temporary struct.
3517  bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
3518 
3519  if (isStructReturn) {
3520  Out << " return StructReturn;\n";
3521  return;
3522  }
3523 
3524  // Don't output a void return if this is the last basic block in the function
3525  if (I.getNumOperands() == 0 && &*--I.getParent()->getParent()->end() == I.getParent() &&
3526  (!I.getParent()->size()) == 1) {
3527  return;
3528  }
3529 
3530  Out << " return";
3531  if (I.getNumOperands()) {
3532  Out << ' ';
3533  writeOperand(I.getOperand(0));
3534  }
3535  Out << ";\n";
3536 }
3537 
3538 void CWriter::visitSwitchInst(llvm::SwitchInst &SI) {
3539 
3540  llvm::Value *Cond = SI.getCondition();
3541 
3542  Out << " switch (";
3543  writeOperand(Cond);
3544  Out << ") {\n default:\n";
3545  printPHICopiesForSuccessor(SI.getParent(), SI.getDefaultDest(), 2);
3546  printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
3547  Out << ";\n";
3548 
3549  for (llvm::SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
3550 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
3551  llvm::ConstantInt *CaseVal = i.getCaseValue();
3552  llvm::BasicBlock *Succ = i.getCaseSuccessor();
3553 #else // LLVM 5.0+
3554  llvm::ConstantInt *CaseVal = i->getCaseValue();
3555  llvm::BasicBlock *Succ = i->getCaseSuccessor();
3556 #endif
3557  Out << " case ";
3558  writeOperand(CaseVal);
3559  Out << ":\n";
3560  printPHICopiesForSuccessor(SI.getParent(), Succ, 2);
3561  printBranchToBlock(SI.getParent(), Succ, 2);
3562 
3563 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
3564  if (llvm::Function::iterator(Succ) == std::next(llvm::Function::iterator(SI.getParent())))
3565 #else
3566  if (llvm::Function::iterator(Succ) == llvm::next(llvm::Function::iterator(SI.getParent())))
3567 #endif
3568  Out << " break;\n";
3569  }
3570 
3571  Out << " }\n";
3572 }
3573 
3574 void CWriter::visitIndirectBrInst(llvm::IndirectBrInst &IBI) {
3575  Out << " goto *(void*)(";
3576  writeOperand(IBI.getOperand(0));
3577  Out << ");\n";
3578 }
3579 
3580 void CWriter::visitUnreachableInst(llvm::UnreachableInst &I) { Out << " /*UNREACHABLE*/;\n"; }
3581 
3582 bool CWriter::isGotoCodeNecessary(llvm::BasicBlock *From, llvm::BasicBlock *To) {
3583  /// FIXME: This should be reenabled, but loop reordering safe!!
3584  return true;
3585 
3586 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
3587  if (std::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To))
3588 #else
3589  if (llvm::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To))
3590 #endif
3591  return true; // Not the direct successor, we need a goto.
3592 
3593  // llvm::isa<llvm::SwitchInst>(From->getTerminator())
3594 
3595  if (LI->getLoopFor(From) != LI->getLoopFor(To))
3596  return true;
3597  return false;
3598 }
3599 
3600 void CWriter::printPHICopiesForSuccessor(llvm::BasicBlock *CurBlock, llvm::BasicBlock *Successor, unsigned Indent) {
3601  for (llvm::BasicBlock::iterator I = Successor->begin(); llvm::isa<llvm::PHINode>(I); ++I) {
3602 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
3603  llvm::PHINode *PN = llvm::cast<llvm::PHINode>(I);
3604 #else /* LLVM 3.8+ */
3605  llvm::PHINode *PN = llvm::cast<llvm::PHINode>(&*I);
3606 #endif
3607  // Now we have to do the printing.
3608  llvm::Value *IV = PN->getIncomingValueForBlock(CurBlock);
3609  if (!llvm::isa<llvm::UndefValue>(IV)) {
3610  Out << std::string(Indent, ' ');
3611 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
3612  Out << " " << GetValueName(I) << "__PHI = ";
3613 #else /* LLVM 3.8+ */
3614  Out << " " << GetValueName(&*I) << "__PHI = ";
3615 #endif
3616  writeOperand(IV);
3617  Out << "; /* for PHI node */\n";
3618  }
3619  }
3620 }
3621 
3622 void CWriter::printBranchToBlock(llvm::BasicBlock *CurBB, llvm::BasicBlock *Succ, unsigned Indent) {
3623  if (isGotoCodeNecessary(CurBB, Succ)) {
3624  Out << std::string(Indent, ' ') << " goto ";
3625  writeOperand(Succ);
3626  Out << ";\n";
3627  }
3628 }
3629 
3630 // Branch instruction printing - Avoid printing out a branch to a basic block
3631 // that immediately succeeds the current one.
3632 //
3633 void CWriter::visitBranchInst(llvm::BranchInst &I) {
3634 
3635  if (I.isConditional()) {
3636  if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
3637  Out << " if (";
3638  writeOperand(I.getCondition());
3639  Out << ") {\n";
3640 
3641  printPHICopiesForSuccessor(I.getParent(), I.getSuccessor(0), 2);
3642  printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
3643 
3644  if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
3645  Out << " } else {\n";
3646  printPHICopiesForSuccessor(I.getParent(), I.getSuccessor(1), 2);
3647  printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
3648  }
3649  } else {
3650  // First goto not necessary, assume second one is...
3651  Out << " if (!";
3652  writeOperand(I.getCondition());
3653  Out << ") {\n";
3654 
3655  printPHICopiesForSuccessor(I.getParent(), I.getSuccessor(1), 2);
3656  printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
3657  }
3658 
3659  Out << " }\n";
3660  } else {
3661  printPHICopiesForSuccessor(I.getParent(), I.getSuccessor(0), 0);
3662  printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
3663  }
3664  Out << "\n";
3665 }
3666 
3667 // PHI nodes get copied into temporary values at the end of predecessor basic
3668 // blocks. We now need to copy these temporary values into the REAL value for
3669 // the PHI.
3670 void CWriter::visitPHINode(llvm::PHINode &I) {
3671  writeOperand(&I);
3672  Out << "__PHI";
3673 }
3674 
3675 void CWriter::visitBinaryOperator(llvm::Instruction &I) {
3676  // binary instructions, shift instructions, setCond instructions.
3677  assert(!I.getType()->isPointerTy());
3678 
3679  if (llvm::isa<const llvm::VectorType>(I.getOperand(0)->getType())) {
3680  const char *intrinsic = NULL;
3681  switch (I.getOpcode()) {
3682  case llvm::Instruction::Add:
3683  intrinsic = "__add";
3684  break;
3685  case llvm::Instruction::FAdd:
3686  intrinsic = "__add";
3687  break;
3688  case llvm::Instruction::Sub:
3689  intrinsic = "__sub";
3690  break;
3691  case llvm::Instruction::FSub:
3692  intrinsic = "__sub";
3693  break;
3694  case llvm::Instruction::Mul:
3695  intrinsic = "__mul";
3696  break;
3697  case llvm::Instruction::FMul:
3698  intrinsic = "__mul";
3699  break;
3700  case llvm::Instruction::URem:
3701  intrinsic = "__urem";
3702  break;
3703  case llvm::Instruction::SRem:
3704  intrinsic = "__srem";
3705  break;
3706  case llvm::Instruction::FRem:
3707  intrinsic = "__frem";
3708  break;
3709  case llvm::Instruction::UDiv:
3710  intrinsic = "__udiv";
3711  break;
3712  case llvm::Instruction::SDiv:
3713  intrinsic = "__sdiv";
3714  break;
3715  case llvm::Instruction::FDiv:
3716  intrinsic = "__div";
3717  break;
3718  case llvm::Instruction::And:
3719  intrinsic = "__and";
3720  break;
3721  case llvm::Instruction::Or:
3722  intrinsic = "__or";
3723  break;
3724  case llvm::Instruction::Xor:
3725  intrinsic = "__xor";
3726  break;
3727  case llvm::Instruction::Shl:
3728  intrinsic = "__shl";
3729  break;
3730  case llvm::Instruction::LShr:
3731  intrinsic = "__lshr";
3732  break;
3733  case llvm::Instruction::AShr:
3734  intrinsic = "__ashr";
3735  break;
3736  default:
3737 #ifndef NDEBUG
3738  llvm::errs() << "Invalid operator type!" << I;
3739 #endif
3740  llvm_unreachable(0);
3741  }
3742  Out << intrinsic;
3743  Out << "(";
3744  writeOperand(I.getOperand(0));
3745  Out << ", ";
3746  if ((I.getOpcode() == llvm::Instruction::Shl || I.getOpcode() == llvm::Instruction::LShr ||
3747  I.getOpcode() == llvm::Instruction::AShr)) {
3748  llvm::Value *splat = NULL;
3749  if (LLVMVectorValuesAllEqual(I.getOperand(1), &splat)) {
3750  if (splat) {
3751  // Avoid __extract_element(splat(value), 0), if possible.
3752  writeOperand(splat);
3753  } else {
3754  Out << "__extract_element(";
3755  writeOperand(I.getOperand(1));
3756  Out << ", 0) ";
3757  }
3758  } else
3759  writeOperand(I.getOperand(1));
3760  } else
3761  writeOperand(I.getOperand(1));
3762  Out << ")";
3763  return;
3764  }
3765 
3766  // We must cast the results of binary operations which might be promoted.
3767  bool needsCast = false;
3768  if ((I.getType() == llvm::Type::getInt8Ty(I.getContext())) ||
3769  (I.getType() == llvm::Type::getInt16Ty(I.getContext())) ||
3770  (I.getType() == llvm::Type::getFloatTy(I.getContext()))) {
3771  needsCast = true;
3772  Out << "((";
3773  printType(Out, I.getType(), false);
3774  Out << ")(";
3775  }
3776 
3777  // If this is a negation operation, print it out as such. For FP, we don't
3778  // want to print "-0.0 - X".
3779 #if ISPC_LLVM_VERSION > ISPC_LLVM_7_0 // LLVM 8.0+
3780  llvm::Value *X;
3781  if (match(&I, m_Neg(llvm::PatternMatch::m_Value(X)))) {
3782  Out << "-(";
3783  writeOperand(X);
3784  Out << ")";
3785  } else if (match(&I, m_FNeg(llvm::PatternMatch::m_Value(X)))) {
3786  Out << "-(";
3787  writeOperand(X);
3788  Out << ")";
3789  }
3790 #else
3791  if (llvm::BinaryOperator::isNeg(&I)) {
3792  Out << "-(";
3793  writeOperand(llvm::BinaryOperator::getNegArgument(llvm::cast<llvm::BinaryOperator>(&I)));
3794  Out << ")";
3795  } else if (llvm::BinaryOperator::isFNeg(&I)) {
3796  Out << "-(";
3797  writeOperand(llvm::BinaryOperator::getFNegArgument(llvm::cast<llvm::BinaryOperator>(&I)));
3798  Out << ")";
3799  }
3800 #endif
3801  else if (I.getOpcode() == llvm::Instruction::FRem) {
3802  // Output a call to fmod/fmodf instead of emitting a%b
3803  if (I.getType() == llvm::Type::getFloatTy(I.getContext()))
3804  Out << "fmodf(";
3805  else if (I.getType() == llvm::Type::getDoubleTy(I.getContext()))
3806  Out << "fmod(";
3807  else // all 3 flavors of long double
3808  Out << "fmodl(";
3809  writeOperand(I.getOperand(0));
3810  Out << ", ";
3811  writeOperand(I.getOperand(1));
3812  Out << ")";
3813  } else {
3814 
3815  // Write out the cast of the instruction's value back to the proper type
3816  // if necessary.
3817  bool NeedsClosingParens = writeInstructionCast(I);
3818 
3819  // Certain instructions require the operand to be forced to a specific type
3820  // so we use writeOperandWithCast here instead of writeOperand. Similarly
3821  // below for operand 1
3822  writeOperandWithCast(I.getOperand(0), I.getOpcode());
3823 
3824  switch (I.getOpcode()) {
3825  case llvm::Instruction::Add:
3826  case llvm::Instruction::FAdd:
3827  Out << " + ";
3828  break;
3829  case llvm::Instruction::Sub:
3830  case llvm::Instruction::FSub:
3831  Out << " - ";
3832  break;
3833  case llvm::Instruction::Mul:
3834  case llvm::Instruction::FMul:
3835  Out << " * ";
3836  break;
3837  case llvm::Instruction::URem:
3838  case llvm::Instruction::SRem:
3839  case llvm::Instruction::FRem:
3840  Out << " % ";
3841  break;
3842  case llvm::Instruction::UDiv:
3843  case llvm::Instruction::SDiv:
3844  case llvm::Instruction::FDiv:
3845  Out << " / ";
3846  break;
3847  case llvm::Instruction::And:
3848  Out << " & ";
3849  break;
3850  case llvm::Instruction::Or:
3851  Out << " | ";
3852  break;
3853  case llvm::Instruction::Xor:
3854  Out << " ^ ";
3855  break;
3856  case llvm::Instruction::Shl:
3857  Out << " << ";
3858  break;
3859  case llvm::Instruction::LShr:
3860  case llvm::Instruction::AShr:
3861  Out << " >> ";
3862  break;
3863  default:
3864 #ifndef NDEBUG
3865  llvm::errs() << "Invalid operator type!" << I;
3866 #endif
3867  llvm_unreachable(0);
3868  }
3869 
3870  writeOperandWithCast(I.getOperand(1), I.getOpcode());
3871  if (NeedsClosingParens)
3872  Out << "))";
3873  }
3874 
3875  if (needsCast) {
3876  Out << "))";
3877  }
3878 }
3879 
3880 static const char *lPredicateToString(llvm::CmpInst::Predicate p) {
3881  switch (p) {
3882  case llvm::ICmpInst::ICMP_EQ:
3883  return "__equal";
3884  case llvm::ICmpInst::ICMP_NE:
3885  return "__not_equal";
3886  case llvm::ICmpInst::ICMP_ULE:
3887  return "__unsigned_less_equal";
3888  case llvm::ICmpInst::ICMP_SLE:
3889  return "__signed_less_equal";
3890  case llvm::ICmpInst::ICMP_UGE:
3891  return "__unsigned_greater_equal";
3892  case llvm::ICmpInst::ICMP_SGE:
3893  return "__signed_greater_equal";
3894  case llvm::ICmpInst::ICMP_ULT:
3895  return "__unsigned_less_than";
3896  case llvm::ICmpInst::ICMP_SLT:
3897  return "__signed_less_than";
3898  case llvm::ICmpInst::ICMP_UGT:
3899  return "__unsigned_greater_than";
3900  case llvm::ICmpInst::ICMP_SGT:
3901  return "__signed_greater_than";
3902 
3903  case llvm::FCmpInst::FCMP_ORD:
3904  return "__ordered";
3905  case llvm::FCmpInst::FCMP_UNO:
3906  return "__unordered";
3907  case llvm::FCmpInst::FCMP_UEQ:
3908  return "__equal";
3909  case llvm::FCmpInst::FCMP_UNE:
3910  return "__not_equal";
3911  case llvm::FCmpInst::FCMP_ULT:
3912  return "__less_than";
3913  case llvm::FCmpInst::FCMP_ULE:
3914  return "__less_equal";
3915  case llvm::FCmpInst::FCMP_UGT:
3916  return "__greater_than";
3917  case llvm::FCmpInst::FCMP_UGE:
3918  return "__greater_equal";
3919  case llvm::FCmpInst::FCMP_OEQ:
3920  return "__equal";
3921  case llvm::FCmpInst::FCMP_ONE:
3922  return "__not_equal";
3923  case llvm::FCmpInst::FCMP_OLT:
3924  return "__less_than";
3925  case llvm::FCmpInst::FCMP_OLE:
3926  return "__less_equal";
3927  case llvm::FCmpInst::FCMP_OGT:
3928  return "__greater_than";
3929  case llvm::FCmpInst::FCMP_OGE:
3930  return "__greater_equal";
3931 
3932  default:
3933  llvm_unreachable(0);
3934  return NULL;
3935  }
3936 }
3937 
3938 static const char *lTypeToSuffix(llvm::Type *t) {
3939  llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(t);
3940  Assert(vt != NULL);
3941  t = vt->getElementType();
3942 
3943  switch (t->getTypeID()) {
3944  case llvm::Type::FloatTyID:
3945  return "float";
3946  case llvm::Type::DoubleTyID:
3947  return "double";
3948  case llvm::Type::IntegerTyID: {
3949  switch (llvm::cast<llvm::IntegerType>(t)->getBitWidth()) {
3950  case 1:
3951  return "i1";
3952  case 8:
3953  return "i8";
3954  case 16:
3955  return "i16";
3956  case 32:
3957  return "i32";
3958  case 64:
3959  return "i64";
3960  }
3961  }
3962  default:
3963  llvm_unreachable(0);
3964  return NULL;
3965  }
3966  return NULL;
3967 }
3968 
3969 void CWriter::visitICmpInst(llvm::ICmpInst &I) {
3970  bool isVector = llvm::isa<llvm::VectorType>(I.getOperand(0)->getType());
3971 
3972  if (isVector) {
3973  Out << lPredicateToString(I.getPredicate());
3974  Out << "_";
3975  Out << lTypeToSuffix(I.getOperand(0)->getType());
3976  Out << "(";
3977  writeOperand(I.getOperand(0));
3978  Out << ", ";
3979  writeOperand(I.getOperand(1));
3980  Out << ")";
3981  return;
3982  }
3983 
3984  // Write out the cast of the instruction's value back to the proper type
3985  // if necessary.
3986  bool NeedsClosingParens = writeInstructionCast(I);
3987 
3988  // Certain icmp predicate require the operand to be forced to a specific type
3989  // so we use writeOperandWithCast here instead of writeOperand. Similarly
3990  // below for operand 1
3991  writeOperandWithCast(I.getOperand(0), I);
3992 
3993  switch (I.getPredicate()) {
3994  case llvm::ICmpInst::ICMP_EQ:
3995  Out << " == ";
3996  break;
3997  case llvm::ICmpInst::ICMP_NE:
3998  Out << " != ";
3999  break;
4000  case llvm::ICmpInst::ICMP_ULE:
4001  case llvm::ICmpInst::ICMP_SLE:
4002  Out << " <= ";
4003  break;
4004  case llvm::ICmpInst::ICMP_UGE:
4005  case llvm::ICmpInst::ICMP_SGE:
4006  Out << " >= ";
4007  break;
4008  case llvm::ICmpInst::ICMP_ULT:
4009  case llvm::ICmpInst::ICMP_SLT:
4010  Out << " < ";
4011  break;
4012  case llvm::ICmpInst::ICMP_UGT:
4013  case llvm::ICmpInst::ICMP_SGT:
4014  Out << " > ";
4015  break;
4016  default:
4017 #ifndef NDEBUG
4018  llvm::errs() << "Invalid icmp predicate!" << I;
4019 #endif
4020  llvm_unreachable(0);
4021  }
4022 
4023  writeOperandWithCast(I.getOperand(1), I);
4024  if (NeedsClosingParens)
4025  Out << "))";
4026 }
4027 
4028 void CWriter::visitFCmpInst(llvm::FCmpInst &I) {
4029  bool isVector = llvm::isa<llvm::VectorType>(I.getOperand(0)->getType());
4030 
4031  if (I.getPredicate() == llvm::FCmpInst::FCMP_FALSE) {
4032  if (isVector)
4033  llvm::report_fatal_error("FIXME: vector FCMP_FALSE");
4034  else
4035  Out << "0";
4036  return;
4037  }
4038  if (I.getPredicate() == llvm::FCmpInst::FCMP_TRUE) {
4039  if (isVector)
4040  llvm::report_fatal_error("FIXME: vector FCMP_TRUE");
4041  else
4042  Out << "1";
4043  return;
4044  }
4045 
4046  if (isVector) {
4047  Out << lPredicateToString(I.getPredicate());
4048  Out << "_";
4049  Out << lTypeToSuffix(I.getOperand(0)->getType());
4050  Out << "(";
4051  } else {
4052  const char *op = 0;
4053  switch (I.getPredicate()) {
4054  default:
4055  llvm_unreachable("Illegal FCmp predicate");
4056  case llvm::FCmpInst::FCMP_ORD:
4057  op = "ord";
4058  break;
4059  case llvm::FCmpInst::FCMP_UNO:
4060  op = "uno";
4061  break;
4062 
4063  case llvm::FCmpInst::FCMP_UEQ:
4064  op = "ueq";
4065  break;
4066  case llvm::FCmpInst::FCMP_UNE:
4067  op = "une";
4068  break;
4069  case llvm::FCmpInst::FCMP_ULT:
4070  op = "ult";
4071  break;
4072  case llvm::FCmpInst::FCMP_ULE:
4073  op = "ule";
4074  break;
4075  case llvm::FCmpInst::FCMP_UGT:
4076  op = "ugt";
4077  break;
4078  case llvm::FCmpInst::FCMP_UGE:
4079  op = "uge";
4080  break;
4081 
4082  case llvm::FCmpInst::FCMP_OEQ:
4083  op = "oeq";
4084  break;
4085  case llvm::FCmpInst::FCMP_ONE:
4086  op = "one";
4087  break;
4088  case llvm::FCmpInst::FCMP_OLT:
4089  op = "olt";
4090  break;
4091  case llvm::FCmpInst::FCMP_OLE:
4092  op = "ole";
4093  break;
4094  case llvm::FCmpInst::FCMP_OGT:
4095  op = "ogt";
4096  break;
4097  case llvm::FCmpInst::FCMP_OGE:
4098  op = "oge";
4099  break;
4100  }
4101 
4102  Out << "llvm_fcmp_" << op << "(";
4103  }
4104 
4105  // Write the first operand
4106  writeOperand(I.getOperand(0));
4107  Out << ", ";
4108  // Write the second operand
4109  writeOperand(I.getOperand(1));
4110  Out << ")";
4111 }
4112 
4113 static const char *getFloatBitCastField(llvm::Type *Ty) {
4114  switch (Ty->getTypeID()) {
4115  default:
4116  llvm_unreachable("Invalid Type");
4117  case llvm::Type::FloatTyID:
4118  return "Float";
4119  case llvm::Type::DoubleTyID:
4120  return "Double";
4121  case llvm::Type::IntegerTyID: {
4122  unsigned NumBits = llvm::cast<llvm::IntegerType>(Ty)->getBitWidth();
4123  if (NumBits <= 32)
4124  return "Int32";
4125  else
4126  return "Int64";
4127  }
4128  }
4129 }
4130 
4131 void CWriter::visitCastInst(llvm::CastInst &I) {
4132  llvm::Type *DstTy = I.getType();
4133  llvm::Type *SrcTy = I.getOperand(0)->getType();
4134  if (isFPIntBitCast(I)) {
4135  Out << '(';
4136  // These int<->float and long<->double casts need to be handled specially
4137  Out << GetValueName(&I) << "__BITCAST_TEMPORARY." << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
4138  writeOperand(I.getOperand(0));
4139  Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." << getFloatBitCastField(I.getType());
4140  Out << ')';
4141  return;
4142  }
4143 
4144  if ((llvm::isa<llvm::VectorType>(DstTy)) && (!llvm::isa<llvm::VectorType>(SrcTy))) {
4145  writeOperand(I.getOperand(0));
4146  return;
4147  }
4148 
4149  Out << '(';
4150  bool closeParen = printCast(I.getOpcode(), SrcTy, DstTy);
4151 
4152  // Make a sext from i1 work by subtracting the i1 from 0 (an int).
4153  if (SrcTy == llvm::Type::getInt1Ty(I.getContext()) && I.getOpcode() == llvm::Instruction::SExt)
4154  Out << "0-";
4155 
4156  writeOperand(I.getOperand(0));
4157 
4158  if (DstTy == llvm::Type::getInt1Ty(I.getContext()) &&
4159  (I.getOpcode() == llvm::Instruction::Trunc || I.getOpcode() == llvm::Instruction::FPToUI ||
4160  I.getOpcode() == llvm::Instruction::FPToSI || I.getOpcode() == llvm::Instruction::PtrToInt)) {
4161  // Make sure we really get a trunc to bool by anding the operand with 1
4162  Out << "&1u";
4163  }
4164  Out << ')';
4165  if (closeParen)
4166  Out << ')';
4167 }
4168 
4169 void CWriter::visitSelectInst(llvm::SelectInst &I) {
4170  if (llvm::isa<llvm::VectorType>(I.getType())) {
4171  Out << "__select(";
4172  writeOperand(I.getCondition());
4173  Out << ", ";
4174  writeOperand(I.getTrueValue());
4175  Out << ", ";
4176  writeOperand(I.getFalseValue());
4177  Out << ")";
4178  return;
4179  }
4180 
4181  Out << "((";
4182  writeOperand(I.getCondition());
4183  Out << ") ? (";
4184  writeOperand(I.getTrueValue());
4185  Out << ") : (";
4186  writeOperand(I.getFalseValue());
4187  Out << "))";
4188 }
4189 
4190 // Returns the macro name or value of the max or min of an integer type
4191 // (as defined in limits.h).
4192 static void printLimitValue(llvm::IntegerType &Ty, bool isSigned, bool isMax, llvm::raw_ostream &Out) {
4193  const char *type = "";
4194  const char *sprefix = "";
4195 
4196  unsigned NumBits = Ty.getBitWidth();
4197  if (NumBits <= 8) {
4198  type = "CHAR";
4199  sprefix = "S";
4200  } else if (NumBits <= 16) {
4201  type = "SHRT";
4202  } else if (NumBits <= 32) {
4203  type = "INT";
4204  } else if (NumBits <= 64) {
4205  type = "LLONG";
4206  } else {
4207  llvm_unreachable("Bit widths > 64 not implemented yet");
4208  }
4209 
4210  if (isSigned)
4211  Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
4212  else
4213  Out << "U" << type << (isMax ? "_MAX" : "0");
4214 }
4215 
4216 #ifndef NDEBUG
4217 static bool isSupportedIntegerSize(llvm::IntegerType &T) {
4218  return T.getBitWidth() == 8 || T.getBitWidth() == 16 || T.getBitWidth() == 32 || T.getBitWidth() == 64;
4219 }
4220 #endif
4221 
4222 void CWriter::printIntrinsicDefinition(const llvm::Function &F, llvm::raw_ostream &Out) {
4223  llvm::FunctionType *funT = F.getFunctionType();
4224  llvm::Type *retT = F.getReturnType();
4225  llvm::IntegerType *elemT = llvm::cast<llvm::IntegerType>(funT->getParamType(1));
4226 
4227  assert(isSupportedIntegerSize(*elemT) && "CBackend does not support arbitrary size integers.");
4228  assert(llvm::cast<llvm::StructType>(retT)->getElementType(0) == elemT && elemT == funT->getParamType(0) &&
4229  funT->getNumParams() == 2);
4230 
4231  switch (F.getIntrinsicID()) {
4232  default:
4233  llvm_unreachable("Unsupported Intrinsic.");
4234  case llvm::Intrinsic::uadd_with_overflow:
4235  // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
4236  // Rty r;
4237  // r.field0 = a + b;
4238  // r.field1 = (r.field0 < a);
4239  // return r;
4240  // }
4241  Out << "static inline ";
4242  printType(Out, retT);
4243  Out << GetValueName(&F);
4244  Out << "(";
4245  printSimpleType(Out, elemT, false);
4246  Out << "a,";
4247  printSimpleType(Out, elemT, false);
4248  Out << "b) {\n ";
4249  printType(Out, retT);
4250  Out << "r;\n";
4251  Out << " r.field0 = a + b;\n";
4252  Out << " r.field1 = (r.field0 < a);\n";
4253  Out << " return r;\n}\n";
4254  break;
4255 
4256  case llvm::Intrinsic::sadd_with_overflow:
4257  // static inline Rty sadd_ixx(ixx a, ixx b) {
4258  // Rty r;
4259  // r.field1 = (b > 0 && a > XX_MAX - b) ||
4260  // (b < 0 && a < XX_MIN - b);
4261  // r.field0 = r.field1 ? 0 : a + b;
4262  // return r;
4263  // }
4264  Out << "static ";
4265  printType(Out, retT);
4266  Out << GetValueName(&F);
4267  Out << "(";
4268  printSimpleType(Out, elemT, true);
4269  Out << "a,";
4270  printSimpleType(Out, elemT, true);
4271  Out << "b) {\n ";
4272  printType(Out, retT);
4273  Out << "r;\n";
4274  Out << " r.field1 = (b > 0 && a > ";
4275  printLimitValue(*elemT, true, true, Out);
4276  Out << " - b) || (b < 0 && a < ";
4277  printLimitValue(*elemT, true, false, Out);
4278  Out << " - b);\n";
4279  Out << " r.field0 = r.field1 ? 0 : a + b;\n";
4280  Out << " return r;\n}\n";
4281  break;
4282 
4283  case llvm::Intrinsic::umul_with_overflow:
4284  Out << "static inline ";
4285  printType(Out, retT);
4286  Out << GetValueName(&F);
4287  Out << "(";
4288  printSimpleType(Out, elemT, false);
4289  Out << "a,";
4290  printSimpleType(Out, elemT, false);
4291  Out << "b) {\n ";
4292 
4293  printType(Out, retT);
4294  Out << "r;\n";
4295 
4296  unsigned NumBits = llvm::cast<llvm::IntegerType>(elemT)->getBitWidth();
4297  std::stringstream str_type;
4298  if (NumBits <= 32)
4299  str_type << "uint" << 2 * NumBits << "_t";
4300  else {
4301  assert(NumBits <= 64 && "Bit widths > 128 not implemented yet");
4302  str_type << "llvmUInt128";
4303  }
4304 
4305  Out << " " << str_type.str() << " result = (" << str_type.str() << ") a * (" << str_type.str() << ") b;\n";
4306  Out << " r.field0 = result;\n";
4307  Out << " r.field1 = result >> " << NumBits << ";\n";
4308  Out << " return r;\n}\n";
4309  break;
4310  }
4311 }
4312 
4313 void CWriter::lowerIntrinsics(llvm::Function &F) {
4314  // This is used to keep track of intrinsics that get generated to a lowered
4315  // function. We must generate the prototypes before the function body which
4316  // will only be expanded on first use (by the loop below).
4317  std::vector<llvm::Function *> prototypesToGen;
4318 
4319  // Examine all the instructions in this function to find the intrinsics that
4320  // need to be lowered.
4321  for (llvm::Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
4322  for (llvm::BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;)
4323  if (llvm::CallInst *CI = llvm::dyn_cast<llvm::CallInst>(I++))
4324  if (llvm::Function *F = CI->getCalledFunction())
4325  switch (F->getIntrinsicID()) {
4326  // We directly implement these intrinsics
4327  case llvm::Intrinsic::not_intrinsic:
4328  case llvm::Intrinsic::vastart:
4329  case llvm::Intrinsic::vacopy:
4330  case llvm::Intrinsic::vaend:
4331  case llvm::Intrinsic::returnaddress:
4332  case llvm::Intrinsic::frameaddress:
4333  case llvm::Intrinsic::setjmp:
4334  case llvm::Intrinsic::longjmp:
4335  case llvm::Intrinsic::memset:
4336  case llvm::Intrinsic::prefetch:
4337  case llvm::Intrinsic::powi:
4338  case llvm::Intrinsic::fabs:
4339  case llvm::Intrinsic::x86_sse_cmp_ss:
4340  case llvm::Intrinsic::x86_sse_cmp_ps:
4341  case llvm::Intrinsic::x86_sse2_cmp_sd:
4342  case llvm::Intrinsic::x86_sse2_cmp_pd:
4343  case llvm::Intrinsic::ppc_altivec_lvsl:
4344  case llvm::Intrinsic::uadd_with_overflow:
4345  case llvm::Intrinsic::sadd_with_overflow:
4346  case llvm::Intrinsic::trap:
4347  case llvm::Intrinsic::objectsize:
4348  case llvm::Intrinsic::readcyclecounter:
4349  case llvm::Intrinsic::umul_with_overflow:
4350  // Or we just ignore them because of their uselessness in C++ source
4351  case llvm::Intrinsic::dbg_value:
4352  case llvm::Intrinsic::dbg_declare:
4353  break;
4354  default:
4355  // If this is an intrinsic that directly corresponds to a GCC
4356  // builtin, we handle it.
4357  const char *BuiltinName = "";
4358 #define GET_GCC_BUILTIN_NAME
4359 #define Intrinsic llvm::Intrinsic
4360 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
4361 #include "llvm/Intrinsics.gen"
4362 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_6_0 /* LLVM 3.3-6.0 */
4363 #include "llvm/IR/Intrinsics.gen"
4364 #else /* LLVM 7.0+ */
4365 // This looks completely broken, even in 3.2, need to figure out what's going on here
4366 // and how to fix it (if needed).
4367 // #include "llvm/IR/Intrinsics.inc"
4368 #endif
4369 #undef Intrinsic
4370 #undef GET_GCC_BUILTIN_NAME
4371  // If we handle it, don't lower it.
4372  if (BuiltinName[0])
4373  break;
4374 
4375  // All other intrinsic calls we must lower.
4376  llvm::Instruction *Before = 0;
4377  if (CI != &BB->front())
4378 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
4379  Before = &*std::prev(llvm::BasicBlock::iterator(CI));
4380 #else
4381  Before = prior(llvm::BasicBlock::iterator(CI));
4382 #endif
4383 
4384  IL->LowerIntrinsicCall(CI);
4385  if (Before) { // Move iterator to instruction after call
4386 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
4387  I = Before;
4388  ++I;
4389 #else /* LLVM 3.8+ */
4390  I = Before->getIterator();
4391  ++I;
4392 #endif
4393  } else {
4394  I = BB->begin();
4395  }
4396  // If the intrinsic got lowered to another call, and that call has
4397  // a definition then we need to make sure its prototype is emitted
4398  // before any calls to it.
4399  if (llvm::CallInst *Call = llvm::dyn_cast<llvm::CallInst>(I))
4400  if (llvm::Function *NewF = Call->getCalledFunction())
4401  if (!NewF->isDeclaration())
4402  prototypesToGen.push_back(NewF);
4403 
4404  break;
4405  }
4406 
4407  // We may have collected some prototypes to emit in the loop above.
4408  // Emit them now, before the function that uses them is emitted. But,
4409  // be careful not to emit them twice.
4410  std::vector<llvm::Function *>::iterator I = prototypesToGen.begin();
4411  std::vector<llvm::Function *>::iterator E = prototypesToGen.end();
4412  for (; I != E; ++I) {
4413  if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
4414  Out << '\n';
4415  printFunctionSignature(*I, true);
4416  Out << ";\n";
4417  }
4418  }
4419 }
4420 
4421 void CWriter::visitCallInst(llvm::CallInst &I) {
4422  if (llvm::isa<llvm::InlineAsm>(I.getCalledValue()))
4423  return visitInlineAsm(I);
4424 
4425  bool WroteCallee = false;
4426 
4427  // Handle intrinsic function calls first...
4428  if (llvm::Function *F = I.getCalledFunction())
4429  if (llvm::Intrinsic::ID ID = (llvm::Intrinsic::ID)F->getIntrinsicID())
4430  if (visitBuiltinCall(I, ID, WroteCallee))
4431  return;
4432 
4433  llvm::Value *Callee = I.getCalledValue();
4434 
4435  llvm::PointerType *PTy = llvm::cast<llvm::PointerType>(Callee->getType());
4436  llvm::FunctionType *FTy = llvm::cast<llvm::FunctionType>(PTy->getElementType());
4437 
4438  // If this is a call to a struct-return function, assign to the first
4439  // parameter instead of passing it to the call.
4440 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
4441  const llvm::AttrListPtr &PAL = I.getAttributes();
4442 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
4443  const llvm::AttributeSet &PAL = I.getAttributes();
4444 #else // LLVM 5.0+
4445  const llvm::AttributeList &PAL = I.getAttributes();
4446 #endif
4447 
4448  bool hasByVal = I.hasByValArgument();
4449  bool isStructRet = (I.getNumArgOperands() > 0) && I.hasStructRetAttr();
4450  if (isStructRet) {
4451  writeOperandDeref(I.getArgOperand(0));
4452  Out << " = ";
4453  }
4454 
4455  if (I.isTailCall())
4456  Out << " /*tail*/ ";
4457 
4458  if (!WroteCallee) {
4459  // If this is an indirect call to a struct return function, we need to cast
4460  // the pointer. Ditto for indirect calls with byval arguments.
4461  bool NeedsCast = (hasByVal || isStructRet) && !llvm::isa<llvm::Function>(Callee);
4462 
4463  // GCC is a real PITA. It does not permit codegening casts of functions to
4464  // function pointers if they are in a call (it generates a trap instruction
4465  // instead!). We work around this by inserting a cast to void* in between
4466  // the function and the function pointer cast. Unfortunately, we can't just
4467  // form the constant expression here, because the folder will immediately
4468  // nuke it.
4469  //
4470  // Note finally, that this is completely unsafe. ANSI C does not guarantee
4471  // that void* and function pointers have the same size. :( To deal with this
4472  // in the common case, we handle casts where the number of arguments passed
4473  // match exactly.
4474  //
4475  if (llvm::ConstantExpr *CE = llvm::dyn_cast<llvm::ConstantExpr>(Callee))
4476  if (CE->isCast())
4477  if (llvm::Function *RF = llvm::dyn_cast<llvm::Function>(CE->getOperand(0))) {
4478  NeedsCast = true;
4479  Callee = RF;
4480  }
4481 
4482  if (Callee->getName() == "malloc" || Callee->getName() == "_aligned_malloc")
4483  Out << "(uint8_t *)";
4484 
4485  // This 'if' will fix 'soa-18.ispc' test (fails with optimizations off)
4486  // Yet the way the case is fixed is quite dirty and leads to many other fails
4487 
4488  // if (Callee->getName() == "__masked_store_i64") {
4489  // llvm::CallSite CS(&I);
4490  // llvm::CallSite::arg_iterator AI = CS.arg_begin();
4491  // if (is_vec16_i64_ty(llvm::cast<llvm::PointerType>((*AI)->getType())->getElementType())) {
4492  // Out << "/* Replacing store of vec16_i64 val into &vec16_i64 pointer with a simple copy */\n";
4493  // // If we are trying to get a pointer to from a vec16_i64 var
4494  // // It would be better to replace this instruction with a masked copy
4495  // if (llvm::isa<llvm::GetElementPtrInst>(*AI)) {
4496  // writeOperandDeref(*AI);
4497  // Out << " = __select(";
4498  // writeOperand(*(AI+2));
4499  // Out << ", ";
4500  // writeOperand(*(AI+1));
4501  // Out << ", ";
4502  // writeOperandDeref(*AI);
4503  // Out << ")";
4504  // return;
4505  // }
4506  // }
4507  //}
4508 
4509  if (NeedsCast) {
4510  // Ok, just cast the pointer type.
4511  Out << "((";
4512  if (isStructRet)
4513  printStructReturnPointerFunctionType(Out, PAL,
4514  llvm::cast<llvm::PointerType>(I.getCalledValue()->getType()));
4515  else if (hasByVal)
4516  printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
4517  else
4518  printType(Out, I.getCalledValue()->getType());
4519  Out << ")(void*)";
4520  }
4521  writeOperand(Callee);
4522  if (NeedsCast)
4523  Out << ')';
4524  }
4525 
4526  Out << '(';
4527 
4528  bool PrintedArg = false;
4529  if (FTy->isVarArg() && !FTy->getNumParams()) {
4530  Out << "0 /*dummy arg*/";
4531  PrintedArg = true;
4532  }
4533 
4534  unsigned NumDeclaredParams = FTy->getNumParams();
4535  llvm::CallSite CS(&I);
4536  llvm::CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
4537  unsigned ArgNo = 0;
4538  if (isStructRet) { // Skip struct return argument.
4539  ++AI;
4540  ++ArgNo;
4541  }
4542 
4543  for (; AI != AE; ++AI, ++ArgNo) {
4544  if (PrintedArg)
4545  Out << ", ";
4546  if (ArgNo == 0 && Callee->getName() == "posix_memalign") {
4547  // uint8_t** is incompatible with void** without explicit cast.
4548  // Should be do this any other functions?
4549  Out << "(void **)";
4550  } else if (ArgNo < NumDeclaredParams && (*AI)->getType() != FTy->getParamType(ArgNo)) {
4551  Out << '(';
4552  printType(Out, FTy->getParamType(ArgNo),
4553 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
4554  PAL.getParamAttributes(ArgNo + 1).hasAttribute(llvm::Attributes::SExt)
4555 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
4556  PAL.getParamAttributes(ArgNo + 1).hasAttribute(llvm::AttributeSet::FunctionIndex,
4557  llvm::Attribute::SExt)
4558 #else // LLVM 5.0+
4559  PAL.getParamAttributes(ArgNo + 1).hasAttribute(llvm::Attribute::SExt)
4560 #endif
4561  );
4562  Out << ')';
4563  }
4564  // Check if the argument is expected to be passed by value.
4565 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
4566  if (I.paramHasAttr(ArgNo + 1,
4567 #else // LLVM 5.0+
4568  if (I.paramHasAttr(ArgNo,
4569 #endif
4570 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
4571  llvm::Attributes::ByVal
4572 #else /* LLVM 3.3+ */
4573  llvm::Attribute::ByVal
4574 #endif
4575  )) {
4576  writeOperandDeref(*AI);
4577  } else {
4578  writeOperand(*AI);
4579  }
4580  PrintedArg = true;
4581  }
4582  Out << ')';
4583 }
4584 
4585 /// visitBuiltinCall - Handle the call to the specified builtin. Returns true
4586 /// if the entire call is handled, return false if it wasn't handled, and
4587 /// optionally set 'WroteCallee' if the callee has already been printed out.
4588 bool CWriter::visitBuiltinCall(llvm::CallInst &I, llvm::Intrinsic::ID ID, bool &WroteCallee) {
4589  switch (ID) {
4590  default: {
4591  // If this is an intrinsic that directly corresponds to a GCC
4592  // builtin, we emit it here.
4593  const char *BuiltinName = "";
4594 #define GET_GCC_BUILTIN_NAME
4595 #define Intrinsic llvm::Intrinsic
4596 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
4597 #include "llvm/Intrinsics.gen"
4598 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_6_0 /* LLVM 3.3-6.0 */
4599 #include "llvm/IR/Intrinsics.gen"
4600 #else /* LLVM 7.0+ */
4601 // This looks completely broken, even in 3.2, need to figure out what's going on here
4602 // and how to fix it (if needed).
4603 // #include "llvm/IR/Intrinsics.inc"
4604 #endif
4605 #undef Intrinsic
4606 #undef GET_GCC_BUILTIN_NAME
4607  assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
4608 
4609  Out << BuiltinName;
4610  WroteCallee = true;
4611  return false;
4612  }
4613  // Ignoring debug intrinsics
4614  case llvm::Intrinsic::dbg_value:
4615  case llvm::Intrinsic::dbg_declare:
4616  return true;
4617  case llvm::Intrinsic::vastart:
4618  Out << "0; ";
4619 
4620  Out << "va_start(*(va_list*)";
4621  writeOperand(I.getArgOperand(0));
4622  Out << ", ";
4623  // Output the last argument to the enclosing function.
4624  if (I.getParent()->getParent()->arg_empty())
4625  Out << "vararg_dummy_arg";
4626  else
4627  writeOperand(&*(std::prev(I.getParent()->getParent()->arg_end())));
4628  Out << ')';
4629  return true;
4630  case llvm::Intrinsic::vaend:
4631  if (!llvm::isa<llvm::ConstantPointerNull>(I.getArgOperand(0))) {
4632  Out << "0; va_end(*(va_list*)";
4633  writeOperand(I.getArgOperand(0));
4634  Out << ')';
4635  } else {
4636  Out << "va_end(*(va_list*)0)";
4637  }
4638  return true;
4639  case llvm::Intrinsic::vacopy:
4640  Out << "0; ";
4641  Out << "va_copy(*(va_list*)";
4642  writeOperand(I.getArgOperand(0));
4643  Out << ", *(va_list*)";
4644  writeOperand(I.getArgOperand(1));
4645  Out << ')';
4646  return true;
4647  case llvm::Intrinsic::returnaddress:
4648  Out << "__builtin_return_address(";
4649  writeOperand(I.getArgOperand(0));
4650  Out << ')';
4651  return true;
4652  case llvm::Intrinsic::frameaddress:
4653  Out << "__builtin_frame_address(";
4654  writeOperand(I.getArgOperand(0));
4655  Out << ')';
4656  return true;
4657  case llvm::Intrinsic::powi:
4658  Out << "__builtin_powi(";
4659  writeOperand(I.getArgOperand(0));
4660  Out << ", ";
4661  writeOperand(I.getArgOperand(1));
4662  Out << ')';
4663  return true;
4664  case llvm::Intrinsic::fabs:
4665  Out << "__builtin_fabs(";
4666  writeOperand(I.getArgOperand(0));
4667  Out << ')';
4668  return true;
4669  case llvm::Intrinsic::setjmp:
4670  Out << "setjmp(*(jmp_buf*)";
4671  writeOperand(I.getArgOperand(0));
4672  Out << ')';
4673  return true;
4674  case llvm::Intrinsic::longjmp:
4675  Out << "longjmp(*(jmp_buf*)";
4676  writeOperand(I.getArgOperand(0));
4677  Out << ", ";
4678  writeOperand(I.getArgOperand(1));
4679  Out << ')';
4680  return true;
4681  case llvm::Intrinsic::memset:
4682  Out << "Memset(";
4683  writeOperand(I.getArgOperand(0));
4684  Out << ", ";
4685  writeOperand(I.getArgOperand(1));
4686  Out << ", ";
4687  writeOperand(I.getArgOperand(2));
4688  Out << ')';
4689  return true;
4690  case llvm::Intrinsic::prefetch:
4691  Out << "LLVM_PREFETCH((const void *)";
4692  writeOperand(I.getArgOperand(0));
4693  Out << ", ";
4694  writeOperand(I.getArgOperand(1));
4695  Out << ", ";
4696  writeOperand(I.getArgOperand(2));
4697  Out << ")";
4698  return true;
4699  case llvm::Intrinsic::stacksave:
4700  // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
4701  // to work around GCC bugs (see PR1809).
4702  Out << "0; *((void**)&" << GetValueName(&I) << ") = __builtin_stack_save()";
4703  return true;
4704  case llvm::Intrinsic::x86_sse_cmp_ss:
4705  case llvm::Intrinsic::x86_sse_cmp_ps:
4706  case llvm::Intrinsic::x86_sse2_cmp_sd:
4707  case llvm::Intrinsic::x86_sse2_cmp_pd:
4708  Out << '(';
4709  printType(Out, I.getType());
4710  Out << ')';
4711  // Multiple GCC builtins multiplex onto this intrinsic.
4712  switch (llvm::cast<llvm::ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
4713  default:
4714  llvm_unreachable("Invalid llvm.x86.sse.cmp!");
4715  case 0:
4716  Out << "__builtin_ia32_cmpeq";
4717  break;
4718  case 1:
4719  Out << "__builtin_ia32_cmplt";
4720  break;
4721  case 2:
4722  Out << "__builtin_ia32_cmple";
4723  break;
4724  case 3:
4725  Out << "__builtin_ia32_cmpunord";
4726  break;
4727  case 4:
4728  Out << "__builtin_ia32_cmpneq";
4729  break;
4730  case 5:
4731  Out << "__builtin_ia32_cmpnlt";
4732  break;
4733  case 6:
4734  Out << "__builtin_ia32_cmpnle";
4735  break;
4736  case 7:
4737  Out << "__builtin_ia32_cmpord";
4738  break;
4739  }
4740  if (ID == llvm::Intrinsic::x86_sse_cmp_ps || ID == llvm::Intrinsic::x86_sse2_cmp_pd)
4741  Out << 'p';
4742  else
4743  Out << 's';
4744  if (ID == llvm::Intrinsic::x86_sse_cmp_ss || ID == llvm::Intrinsic::x86_sse_cmp_ps)
4745  Out << 's';
4746  else
4747  Out << 'd';
4748 
4749  Out << "(";
4750  writeOperand(I.getArgOperand(0));
4751  Out << ", ";
4752  writeOperand(I.getArgOperand(1));
4753  Out << ")";
4754  return true;
4755  case llvm::Intrinsic::ppc_altivec_lvsl:
4756  Out << '(';
4757  printType(Out, I.getType());
4758  Out << ')';
4759  Out << "__builtin_altivec_lvsl(0, (void*)";
4760  writeOperand(I.getArgOperand(0));
4761  Out << ")";
4762  return true;
4763  case llvm::Intrinsic::uadd_with_overflow:
4764  case llvm::Intrinsic::sadd_with_overflow:
4765  case llvm::Intrinsic::umul_with_overflow:
4766  Out << GetValueName(I.getCalledFunction()) << "(";
4767  writeOperand(I.getArgOperand(0));
4768  Out << ", ";
4769  writeOperand(I.getArgOperand(1));
4770  Out << ")";
4771  return true;
4772  case llvm::Intrinsic::trap:
4773  Out << "abort()";
4774  return true;
4775  case llvm::Intrinsic::objectsize:
4776  return true;
4777  case llvm::Intrinsic::readcyclecounter:
4778  Out << "__clock()";
4779  return true;
4780  }
4781 }
4782 
4783 // TODO: assumptions about what consume arguments from the call are likely wrong
4784 // handle communitivity
4785 void CWriter::visitInlineAsm(llvm::CallInst &CI) { assert(!"Inline assembly not supported"); }
4786 
4787 void CWriter::visitAllocaInst(llvm::AllocaInst &I) {
4788  Out << '(';
4789  printType(Out, I.getType());
4790  Out << ") alloca(sizeof(";
4791  printType(Out, I.getType()->getElementType());
4792  Out << ')';
4793  if (I.isArrayAllocation()) {
4794  Out << " * ";
4795  writeOperand(I.getOperand(0));
4796  }
4797  Out << ')';
4798 }
4799 
4800 void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I, llvm::gep_type_iterator E, bool Static) {
4801 
4802  // If there are no indices, just print out the pointer.
4803  if (I == E) {
4804  writeOperand(Ptr);
4805  return;
4806  }
4807 
4808  // Find out if the last index is into a vector. If so, we have to print this
4809  // specially. Since vectors can't have elements of indexable type, only the
4810  // last index could possibly be of a vector element.
4811  llvm::VectorType *LastIndexIsVector = 0;
4812  {
4813  for (llvm::gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
4814 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
4815  LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(*TmpI);
4816 #else // LLVM 4.0+
4817  LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(TmpI.getIndexedType());
4818 #endif
4819  }
4820 
4821  Out << "(";
4822 
4823  // If the last index is into a vector, we can't print it as &a[i][j] because
4824  // we can't index into a vector with j in GCC. Instead, emit this as
4825  // (((float*)&a[i])+j)
4826  if (LastIndexIsVector) {
4827  Out << "((";
4828  printType(Out, llvm::PointerType::getUnqual(LastIndexIsVector->getElementType()));
4829  Out << ")(";
4830  }
4831 
4832  Out << '&';
4833 
4834  llvm::Type *ParentTy = Ptr->getType();
4835 
4836  // If the first index is 0 (very typical) we can do a number of
4837  // simplifications to clean up the code.
4838  llvm::Value *FirstOp = I.getOperand();
4839  if (!llvm::isa<llvm::Constant>(FirstOp) || !llvm::cast<llvm::Constant>(FirstOp)->isNullValue()) {
4840  // First index isn't simple, print it the hard way.
4841  writeOperand(Ptr);
4842  } else {
4843  ParentTy = I.getIndexedType(); // Skip the zero index.
4844  ++I;
4845 
4846  // Okay, emit the first operand. If Ptr is something that is already address
4847  // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
4848  if (isAddressExposed(Ptr)) {
4849  writeOperandInternal(Ptr, Static);
4850 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
4851  } else if (I != E && (*I)->isStructTy()) {
4852 #else // LLVM 4.0+
4853  } else if (I != E && I.isStruct()) {
4854 #endif
4855  // If we didn't already emit the first operand, see if we can print it as
4856  // P->f instead of "P[0].f"
4857  writeOperand(Ptr);
4858  Out << "->field" << llvm::cast<llvm::ConstantInt>(I.getOperand())->getZExtValue();
4859  ParentTy = I.getIndexedType();
4860  ++I; // eat the struct index as well.
4861  } else {
4862  // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
4863  Out << "(*";
4864  writeOperand(Ptr);
4865  Out << ")";
4866  }
4867  }
4868 
4869  for (; I != E; ++I) {
4870  if (ParentTy->isStructTy()) {
4871  Out << ".field" << llvm::cast<llvm::ConstantInt>(I.getOperand())->getZExtValue();
4872  } else if (ParentTy->isArrayTy()) {
4873  Out << ".array[";
4874  writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
4875  Out << ']';
4876  } else if (!ParentTy->isVectorTy()) {
4877  Out << '[';
4878  writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
4879  Out << ']';
4880  } else {
4881  // If the last index is into a vector, then print it out as "+j)". This
4882  // works with the 'LastIndexIsVector' code above.
4883  if (llvm::isa<llvm::Constant>(I.getOperand()) &&
4884  llvm::cast<llvm::Constant>(I.getOperand())->isNullValue()) {
4885  Out << "))"; // avoid "+0".
4886  } else {
4887  Out << ")+(";
4888  writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
4889  Out << "))";
4890  }
4891  }
4892  ParentTy = I.getIndexedType();
4893  }
4894  Out << ")";
4895 }
4896 
4897 void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType, bool IsVolatile, unsigned Alignment) {
4898  assert(!llvm::isa<llvm::VectorType>(OperandType));
4899  bool IsUnaligned = Alignment && Alignment < TD->getABITypeAlignment(OperandType);
4900 
4901  llvm::IntegerType *ITy = llvm::dyn_cast<llvm::IntegerType>(OperandType);
4902  if (!IsUnaligned)
4903  Out << '*';
4904  if (IsVolatile || IsUnaligned) {
4905  Out << "((";
4906  if (IsUnaligned && ITy && (ITy->getBitWidth() > 64))
4907  Out << "iN_" << ITy->getBitWidth() << "_align_" << Alignment << " *)";
4908  else {
4909  if (IsUnaligned)
4910  Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
4911  printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
4912  if (IsUnaligned) {
4913  Out << "; } ";
4914  if (IsVolatile)
4915  Out << "volatile ";
4916  Out << "*";
4917  }
4918  Out << ")";
4919  }
4920  }
4921 
4922  writeOperand(Operand);
4923 
4924  if (IsVolatile || IsUnaligned) {
4925  Out << ')';
4926  if (IsUnaligned)
4927  Out << "->data";
4928  }
4929 }
4930 
4931 void CWriter::visitLoadInst(llvm::LoadInst &I) {
4932  llvm::VectorType *VT = llvm::dyn_cast<llvm::VectorType>(I.getType());
4933  if (VT != NULL) {
4934  Out << "__load<" << I.getAlignment() << ">(";
4935  writeOperand(I.getOperand(0));
4936  Out << ")";
4937  return;
4938  }
4939 
4940  writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), I.getAlignment());
4941 }
4942 
4943 void CWriter::visitStoreInst(llvm::StoreInst &I) {
4944  llvm::VectorType *VT = llvm::dyn_cast<llvm::VectorType>(I.getOperand(0)->getType());
4945  if (VT != NULL) {
4946  Out << "__store<" << I.getAlignment() << ">(";
4947  writeOperand(I.getOperand(1));
4948  Out << ", ";
4949  writeOperand(I.getOperand(0));
4950  Out << ")";
4951  return;
4952  }
4953 
4954  writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), I.isVolatile(), I.getAlignment());
4955  Out << " = ";
4956  llvm::Value *Operand = I.getOperand(0);
4957  llvm::Constant *BitMask = 0;
4958  if (llvm::IntegerType *ITy = llvm::dyn_cast<llvm::IntegerType>(Operand->getType()))
4959  if (!ITy->isPowerOf2ByteWidth())
4960  // We have a bit width that doesn't match an even power-of-2 byte
4961  // size. Consequently we must & the value with the type's bit mask
4962  BitMask = llvm::ConstantInt::get(ITy, ITy->getBitMask());
4963  if (BitMask)
4964  Out << "((";
4965  writeOperand(Operand);
4966  if (BitMask) {
4967  Out << ") & ";
4968  printConstant(BitMask, false);
4969  Out << ")";
4970  }
4971 }
4972 
4973 void CWriter::visitGetElementPtrInst(llvm::GetElementPtrInst &I) {
4974  printGEPExpression(I.getPointerOperand(), gep_type_begin(I), gep_type_end(I), false);
4975 }
4976 
4977 void CWriter::visitVAArgInst(llvm::VAArgInst &I) {
4978  Out << "va_arg(*(va_list*)";
4979  writeOperand(I.getOperand(0));
4980  Out << ", ";
4981  printType(Out, I.getType());
4982  Out << ");\n ";
4983 }
4984 
4985 void CWriter::visitInsertElementInst(llvm::InsertElementInst &I) {
4986 #if 0
4987  Type *EltTy = I.getType()->getElementType();
4988  writeOperand(I.getOperand(0));
4989  Out << ";\n ";
4990  Out << "((";
4991  printType(Out, llvm::PointerType::getUnqual(EltTy));
4992  Out << ")(&" << GetValueName(&I) << "))[";
4993  writeOperand(I.getOperand(2));
4994  Out << "] = (";
4995  writeOperand(I.getOperand(1));
4996  Out << ")";
4997 #else
4998  writeOperand(I.getOperand(0));
4999  Out << ";\n ";
5000  Out << "__insert_element(&" << GetValueName(&I) << ", ";
5001  writeOperand(I.getOperand(2));
5002  Out << ", ";
5003  writeOperand(I.getOperand(1));
5004  Out << ")";
5005 #endif
5006 }
5007 
5008 void CWriter::visitExtractElementInst(llvm::ExtractElementInst &I) {
5009  // We know that our operand is not inlined.
5010 #if 0
5011  Out << "((";
5012  Type *EltTy =
5013  llvm::cast<llvm::VectorType>(I.getOperand(0)->getType())->getElementType();
5014  printType(Out, llvm::PointerType::getUnqual(EltTy));
5015  Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
5016  writeOperand(I.getOperand(1));
5017  Out << "]";
5018 #else
5019  Out << "(__extract_element(";
5020  writeOperand(I.getOperand(0));
5021  Out << ", ";
5022  writeOperand(I.getOperand(1));
5023  Out << "))";
5024 #endif
5025 }
5026 
5027 void CWriter::visitShuffleVectorInst(llvm::ShuffleVectorInst &SVI) {
5028  printType(Out, SVI.getType());
5029  Out << "(";
5030  llvm::VectorType *VT = SVI.getType();
5031  unsigned NumElts = VT->getNumElements();
5032  llvm::Type *EltTy = VT->getElementType();
5033  llvm::VectorType *OpTy = llvm::dyn_cast<llvm::VectorType>(SVI.getOperand(0)->getType());
5034  unsigned OpElts = OpTy->getNumElements();
5035 
5036  for (unsigned i = 0; i != NumElts; ++i) {
5037  if (i)
5038  Out << ", ";
5039  int SrcVal = SVI.getMaskValue(i);
5040  if ((unsigned)SrcVal >= 2 * OpElts) {
5041  Out << " 0/*undef*/ ";
5042  } else {
5043  llvm::Value *Op = SVI.getOperand((unsigned)SrcVal >= OpElts);
5044  SrcVal &= OpElts - 1;
5045 
5046  if (llvm::isa<llvm::ConstantVector>(Op)) {
5047  printConstant(llvm::cast<llvm::ConstantVector>(Op)->getOperand(SrcVal), false);
5048  } else if (llvm::isa<llvm::ConstantAggregateZero>(Op) || llvm::isa<llvm::UndefValue>(Op)) {
5049  Out << "0";
5050  } else {
5051  // Do an extractelement of this value from the appropriate input.
5052  Out << " \n#if defined(KNC) \n";
5053  if (OpElts != 1) { // all __vec16_* have overloaded operator []
5054  Out << "(" << GetValueName(Op) << ")[" << SrcVal << "]";
5055  } else { // but __vec1_* don't have it
5056  Out << "((";
5057  printType(Out, llvm::PointerType::getUnqual(EltTy));
5058  Out << ")(&" << GetValueName(Op) << "))[" << SrcVal << "]";
5059  }
5060  Out << " \n#else \n";
5061  Out << "((";
5062  printType(Out, llvm::PointerType::getUnqual(EltTy));
5063  Out << ")(&" << GetValueName(Op) << "))[" << SrcVal << "]";
5064  Out << " \n#endif \n";
5065  }
5066  }
5067  }
5068  Out << ")";
5069 }
5070 
5071 void CWriter::visitInsertValueInst(llvm::InsertValueInst &IVI) {
5072  // Start by copying the entire aggregate value into the result variable.
5073  writeOperand(IVI.getOperand(0));
5074  Out << ";\n ";
5075 
5076  // Then do the insert to update the field.
5077  Out << GetValueName(&IVI);
5078  for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); i != e; ++i) {
5079  llvm::Type *IndexedTy =
5080  (b == i) ? IVI.getOperand(0)->getType()
5081  : llvm::ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), llvm::makeArrayRef(b, i));
5082  if (IndexedTy->isArrayTy())
5083  Out << ".array[" << *i << "]";
5084  else
5085  Out << ".field" << *i;
5086  }
5087  Out << " = ";
5088  writeOperand(IVI.getOperand(1));
5089 }
5090 
5091 void CWriter::visitExtractValueInst(llvm::ExtractValueInst &EVI) {
5092  Out << "(";
5093  if (llvm::isa<llvm::UndefValue>(EVI.getOperand(0))) {
5094  // FIXME: need to handle these--a 0 initializer won't do...
5095  assert(!llvm::isa<llvm::VectorType>(EVI.getType()));
5096  Out << "(";
5097  printType(Out, EVI.getType());
5098  Out << ") 0/*UNDEF*/";
5099  } else {
5100  Out << GetValueName(EVI.getOperand(0));
5101  for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); i != e; ++i) {
5102  llvm::Type *IndexedTy = (b == i) ? EVI.getOperand(0)->getType()
5103  : llvm::ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
5104  llvm::makeArrayRef(b, i));
5105  if (IndexedTy->isArrayTy())
5106  Out << ".array[" << *i << "]";
5107  else
5108  Out << ".field" << *i;
5109  }
5110  }
5111  Out << ")";
5112 }
5113 
5114 void CWriter::visitAtomicRMWInst(llvm::AtomicRMWInst &AI) {
5115  Out << "(";
5116  Out << "__atomic_";
5117  switch (AI.getOperation()) {
5118  default:
5119  llvm_unreachable("Unhandled case in visitAtomicRMWInst!");
5120  case llvm::AtomicRMWInst::Add:
5121  Out << "add";
5122  break;
5123  case llvm::AtomicRMWInst::Sub:
5124  Out << "sub";
5125  break;
5126  case llvm::AtomicRMWInst::Xchg:
5127  Out << "xchg";
5128  break;
5129  case llvm::AtomicRMWInst::And:
5130  Out << "and";
5131  break;
5132  case llvm::AtomicRMWInst::Nand:
5133  Out << "nand";
5134  break;
5135  case llvm::AtomicRMWInst::Or:
5136  Out << "or";
5137  break;
5138  case llvm::AtomicRMWInst::Xor:
5139  Out << "xor";
5140  break;
5141  case llvm::AtomicRMWInst::Min:
5142  Out << "min";
5143  break;
5144  case llvm::AtomicRMWInst::Max:
5145  Out << "max";
5146  break;
5147  case llvm::AtomicRMWInst::UMin:
5148  Out << "umin";
5149  break;
5150  case llvm::AtomicRMWInst::UMax:
5151  Out << "umax";
5152  break;
5153  }
5154  Out << "(";
5155  writeOperand(AI.getOperand(0));
5156  Out << ", ";
5157  writeOperand(AI.getOperand(1));
5158  Out << "))";
5159 }
5160 
5161 void CWriter::visitAtomicCmpXchgInst(llvm::AtomicCmpXchgInst &ACXI) {
5162  Out << "(";
5163 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
5164  printType(Out, ACXI.getType(), false);
5165  Out << "::init("; // LLVM cmpxchg returns a struct, so we need make an assighment properly
5166 #endif
5167  Out << "__atomic_cmpxchg(";
5168  writeOperand(ACXI.getPointerOperand());
5169  Out << ", ";
5170  writeOperand(ACXI.getCompareOperand());
5171  Out << ", ";
5172  writeOperand(ACXI.getNewValOperand());
5173  Out << ")";
5174 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
5175  Out << ", true /* There is no way to learn the value of this bit inside ISPC, so making it constant */)";
5176 #endif
5177  Out << ")";
5178 }
5179 
5180 ///////////////////////////////////////////////////////////////////////////
5181 // SmearCleanupPass
5182 
5183 class SmearCleanupPass : public llvm::BasicBlockPass {
5184  public:
5185  SmearCleanupPass(llvm::Module *m, int width) : BasicBlockPass(ID) {
5186  module = m;
5187  vectorWidth = width;
5188  }
5189 
5190 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
5191  const char *getPassName() const { return "Smear Cleanup Pass"; }
5192 #else // LLVM 4.0+
5193  llvm::StringRef getPassName() const { return "Smear Cleanup Pass"; }
5194 #endif
5195  bool runOnBasicBlock(llvm::BasicBlock &BB);
5196 
5197  static char ID;
5198  llvm::Module *module;
5199  unsigned int vectorWidth;
5200 
5201  private:
5202  unsigned int ChainLength(llvm::InsertElementInst *inst) const;
5203  llvm::Value *getInsertChainSmearValue(llvm::Instruction *inst) const;
5204  llvm::Value *getShuffleSmearValue(llvm::Instruction *inst) const;
5205 };
5206 
5207 char SmearCleanupPass::ID = 0;
5208 
5209 unsigned int SmearCleanupPass::ChainLength(llvm::InsertElementInst *inst) const {
5210  unsigned int length = 0;
5211  while (inst != NULL) {
5212  ++length;
5213  inst = llvm::dyn_cast<llvm::InsertElementInst>(inst->getOperand(0));
5214  }
5215  return length;
5216 }
5217 
5218 llvm::Value *SmearCleanupPass::getInsertChainSmearValue(llvm::Instruction *inst) const {
5219  // TODO: we don't check indexes where we do insertion, so we may trigger
5220  // transformation for a wrong chain.
5221  // This way of doing broadcast is obsolete and should be probably removed
5222  // some day.
5223 
5224  llvm::InsertElementInst *insertInst = llvm::dyn_cast<llvm::InsertElementInst>(inst);
5225  if (!insertInst) {
5226  return NULL;
5227  }
5228 
5229  // We consider only chians of vectorWidth length.
5230  if (ChainLength(insertInst) != vectorWidth) {
5231  return NULL;
5232  }
5233 
5234  // FIXME: we only want to do this to vectors with width equal to
5235  // the target vector width. But we can't easily get that here, so
5236  // for now we at least avoid one case where we definitely don't
5237  // want to do this.
5238  llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(insertInst->getType());
5239  if (vt->getNumElements() == 1) {
5240  return NULL;
5241  }
5242 
5243  llvm::Value *smearValue = NULL;
5244  while (insertInst != NULL) {
5245  // operand 1 is inserted value
5246  llvm::Value *insertValue = insertInst->getOperand(1);
5247  if (smearValue == NULL) {
5248  smearValue = insertValue;
5249  } else if (smearValue != insertValue) {
5250  return NULL;
5251  }
5252 
5253  // operand 0 is a vector to insert into.
5254  insertInst = llvm::dyn_cast<llvm::InsertElementInst>(insertInst->getOperand(0));
5255  }
5256  assert(smearValue != NULL);
5257 
5258  return smearValue;
5259 }
5260 
5261 llvm::Value *SmearCleanupPass::getShuffleSmearValue(llvm::Instruction *inst) const {
5262  llvm::ShuffleVectorInst *shuffleInst = llvm::dyn_cast<llvm::ShuffleVectorInst>(inst);
5263  if (!shuffleInst) {
5264  return NULL;
5265  }
5266 
5267  llvm::Constant *mask = llvm::dyn_cast<llvm::Constant>(shuffleInst->getOperand(2));
5268 
5269  // Check that the shuffle is a broadcast of the element of the first vector,
5270  // i.e. mask vector is vector with equal elements of expected size.
5271  if (!(mask &&
5272 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5273  (mask->isNullValue() ||
5274  (shuffleInst->getMask()->getType()->isVectorTy() &&
5275  llvm::dyn_cast<llvm::ConstantVector>(shuffleInst->getMask())->getSplatValue() != 0)) &&
5276 #else
5277  (mask->isNullValue() || (shuffleInst->getMask()->getSplatValue() != 0)) &&
5278 #endif
5279  llvm::dyn_cast<llvm::VectorType>(mask->getType())->getNumElements() == vectorWidth)) {
5280  return NULL;
5281  }
5282 
5283  llvm::InsertElementInst *insertInst = llvm::dyn_cast<llvm::InsertElementInst>(shuffleInst->getOperand(0));
5284 
5285  // Check that it's an InsertElementInst that inserts a value to first element.
5286  if (!(insertInst && llvm::isa<llvm::Constant>(insertInst->getOperand(2)) &&
5287  llvm::dyn_cast<llvm::Constant>(insertInst->getOperand(2))->isNullValue())) {
5288 
5289  // We can't extract element from vec1
5290  llvm::VectorType *operandVec = llvm::dyn_cast<llvm::VectorType>(shuffleInst->getOperand(0)->getType());
5291  if (operandVec && operandVec->getNumElements() == 1)
5292  return NULL;
5293 
5294  // Insert ExtractElementInstr to get value for smear
5295 
5296  llvm::Function *extractFunc = module->getFunction("__extract_element");
5297 
5298  if (extractFunc == NULL) {
5299  // Declare the __extract_element function if needed; it takes a vector and
5300  // a scalar parameter and returns a scalar of the vector parameter type.
5301 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
5302  llvm::Constant *ef =
5303 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5304  module->getOrInsertFunction(
5305  "__extract_element", shuffleInst->getOperand(0)->getType()->getVectorElementType(),
5306  shuffleInst->getOperand(0)->getType(), llvm::IntegerType::get(module->getContext(), 32), NULL);
5307 #else // LLVM 5.0+
5308  module->getOrInsertFunction(
5309  "__extract_element", shuffleInst->getOperand(0)->getType()->getVectorElementType(),
5310  shuffleInst->getOperand(0)->getType(), llvm::IntegerType::get(module->getContext(), 32));
5311 #endif
5312  extractFunc = llvm::dyn_cast<llvm::Function>(ef);
5313 #else // LLVM 9.0+
5314  llvm::FunctionCallee ef = module->getOrInsertFunction(
5315  "__extract_element", shuffleInst->getOperand(0)->getType()->getVectorElementType(),
5316  shuffleInst->getOperand(0)->getType(), llvm::IntegerType::get(module->getContext(), 32));
5317  extractFunc = llvm::dyn_cast<llvm::Function>(ef.getCallee());
5318 #endif
5319  assert(extractFunc != NULL);
5320  extractFunc->setDoesNotThrow();
5321  extractFunc->setOnlyReadsMemory();
5322  }
5323 
5324  if (extractFunc == NULL) {
5325  return NULL;
5326  }
5327  llvm::Instruction *extractCall =
5328  llvm::ExtractElementInst::Create(shuffleInst->getOperand(0),
5329 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5330  // mask is of VectorType
5331  llvm::dyn_cast<llvm::ConstantVector>(mask)->getSplatValue(),
5332 #else
5333  mask->getSplatValue(),
5334 #endif
5335  "__extract_element", inst);
5336  return extractCall;
5337  }
5338 
5339  llvm::Value *result = insertInst->getOperand(1);
5340 
5341  return result;
5342 }
5343 
5344 bool SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
5345  bool modifiedAny = false;
5346 
5347 restart:
5348  for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
5349  llvm::Value *smearValue = NULL;
5350 
5351  if (!(smearValue = getInsertChainSmearValue(&*iter)) && !(smearValue = getShuffleSmearValue(&*iter))) {
5352  continue;
5353  }
5354 
5355  llvm::Type *smearType = smearValue->getType();
5356  const char *smearFuncName = lGetTypedFunc("smear", smearType, vectorWidth);
5357  if (smearFuncName != NULL) {
5358  llvm::Function *smearFunc = module->getFunction(smearFuncName);
5359  if (smearFunc == NULL) {
5360  // Declare the smear function if needed; it takes a single
5361  // scalar parameter and returns a vector of the same
5362  // parameter type.
5363 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
5364  llvm::Constant *sf =
5365 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5366  module->getOrInsertFunction(smearFuncName, iter->getType(), smearType, NULL);
5367 #else // LLVM 5.0+
5368  module->getOrInsertFunction(smearFuncName, iter->getType(), smearType);
5369 #endif // LLVM 9.0+
5370  smearFunc = llvm::dyn_cast<llvm::Function>(sf);
5371 #else
5372  llvm::FunctionCallee sf = module->getOrInsertFunction(smearFuncName, iter->getType(), smearType);
5373  smearFunc = llvm::dyn_cast<llvm::Function>(sf.getCallee());
5374 #endif
5375  assert(smearFunc != NULL);
5376  smearFunc->setDoesNotThrow();
5377  smearFunc->setDoesNotAccessMemory();
5378  }
5379 
5380  assert(smearFunc != NULL);
5381  llvm::Value *args[1] = {smearValue};
5382  llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
5383  llvm::Instruction *smearCall = llvm::CallInst::Create(
5384  smearFunc, argArray, LLVMGetName(smearValue, "_smear"), (llvm::Instruction *)NULL);
5385 
5386  ReplaceInstWithInst(&*iter, smearCall);
5387 
5388  modifiedAny = true;
5389  goto restart;
5390  }
5391  }
5392 
5393  return modifiedAny;
5394 }
5395 
5396 ///////////////////////////////////////////////////////////////////////////
5397 // AndCmpCleanupPass
5398 
5399 class AndCmpCleanupPass : public llvm::BasicBlockPass {
5400  public:
5401  AndCmpCleanupPass() : BasicBlockPass(ID) {}
5402 
5403 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
5404  const char *getPassName() const { return "AndCmp Cleanup Pass"; }
5405 #else // LLVM 4.0+
5406  llvm::StringRef getPassName() const { return "AndCmp Cleanup Pass"; }
5407 #endif
5408  bool runOnBasicBlock(llvm::BasicBlock &BB);
5409 
5410  static char ID;
5411 };
5412 
5413 char AndCmpCleanupPass::ID = 0;
5414 
5415 // Look for ANDs of masks where one of the operands is a vector compare; we
5416 // can turn these into specialized calls to masked vector compares and
5417 // thence eliminate the AND. For example, rather than emitting
5418 // __and(__less(a, b), c), we will emit __less_and_mask(a, b, c).
5419 bool AndCmpCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
5420  bool modifiedAny = false;
5421 
5422 restart:
5423  for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
5424  // See if we have an AND instruction
5425  llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(&*iter);
5426  if (bop == NULL || bop->getOpcode() != llvm::Instruction::And)
5427  continue;
5428 
5429  // Make sure it's a vector AND
5430  if (llvm::isa<llvm::VectorType>(bop->getType()) == false)
5431  continue;
5432 
5433  // We only care about ANDs of the mask type, not, e.g. ANDs of
5434  // int32s vectors.
5435  if (bop->getType() != LLVMTypes::MaskType)
5436  continue;
5437 
5438  // Now see if either of the operands to the AND is a comparison
5439  for (int i = 0; i < 2; ++i) {
5440  llvm::Value *op = bop->getOperand(i);
5441  llvm::CmpInst *opCmp = llvm::dyn_cast<llvm::CmpInst>(op);
5442  if (opCmp == NULL)
5443  continue;
5444 
5445  // We have a comparison. However, we also need to make sure
5446  // that it's not comparing two mask values; those can't be
5447  // simplified to something simpler.
5448  if (opCmp->getOperand(0)->getType() == LLVMTypes::MaskType)
5449  break;
5450 
5451  // Success! Go ahead and replace the AND with a call to the
5452  // "__and_mask" variant of the comparison function for this
5453  // operand.
5454  std::string funcName = lPredicateToString(opCmp->getPredicate());
5455  funcName += "_";
5456  funcName += lTypeToSuffix(opCmp->getOperand(0)->getType());
5457  funcName += "_and_mask";
5458 
5459  llvm::Function *andCmpFunc = m->module->getFunction(funcName);
5460  if (andCmpFunc == NULL) {
5461  // Declare the function if needed; the first two arguments
5462  // are the same as the two arguments to the compare we're
5463  // replacing and the third argument is the mask type.
5464  llvm::Type *cmpOpType = opCmp->getOperand(0)->getType();
5465 #if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0
5466  llvm::Constant *acf =
5467 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5468  m->module->getOrInsertFunction(funcName, LLVMTypes::MaskType, cmpOpType, cmpOpType,
5469  LLVMTypes::MaskType, NULL);
5470 #else // LLVM 5.0+
5471  m->module->getOrInsertFunction(funcName, LLVMTypes::MaskType, cmpOpType, cmpOpType,
5472  LLVMTypes::MaskType);
5473 #endif
5474  andCmpFunc = llvm::dyn_cast<llvm::Function>(acf);
5475 #else
5476  llvm::FunctionCallee acf = m->module->getOrInsertFunction(funcName, LLVMTypes::MaskType, cmpOpType,
5477  cmpOpType, LLVMTypes::MaskType);
5478  andCmpFunc = llvm::dyn_cast<llvm::Function>(acf.getCallee());
5479 #endif
5480  Assert(andCmpFunc != NULL);
5481  andCmpFunc->setDoesNotThrow();
5482  andCmpFunc->setDoesNotAccessMemory();
5483  }
5484 
5485  // Set up the function call to the *_and_mask function; the
5486  // mask value passed in is the other operand to the AND.
5487  llvm::Value *args[3] = {opCmp->getOperand(0), opCmp->getOperand(1), bop->getOperand(i ^ 1)};
5488  llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[3]);
5489  llvm::Instruction *cmpCall =
5490  llvm::CallInst::Create(andCmpFunc, argArray, LLVMGetName(bop, "_and_mask"), (llvm::Instruction *)NULL);
5491 
5492  // And replace the original AND instruction with it.
5493  llvm::ReplaceInstWithInst(&*iter, cmpCall);
5494 
5495  modifiedAny = true;
5496  goto restart;
5497  }
5498  }
5499 
5500  return modifiedAny;
5501 }
5502 
5503 ///////////////////////////////////////////////////////////////////////////
5504 // MaskOpsCleanupPass
5505 
5506 /** This pass does various peephole improvements to mask modification
5507  operations. In particular, it converts mask XORs with "all true" to
5508  calls to __not() and replaces operations like and(not(a), b) to
5509  __and_not1(a, b) (and similarly if the second operand has not applied
5510  to it...)
5511  */
5512 class MaskOpsCleanupPass : public llvm::BasicBlockPass {
5513  public:
5514  MaskOpsCleanupPass(llvm::Module *m) : BasicBlockPass(ID) {
5515  llvm::Type *mt = LLVMTypes::MaskType;
5516 
5517  // Declare the __not, __and_not1, and __and_not2 functions that we
5518  // expect the target to end up providing.
5519  notFunc =
5520 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5521  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__not", mt, mt, NULL));
5522 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_8_0 // LLVM 5.0-LLVM 8.0
5523  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__not", mt, mt));
5524 #else // LLVM 9.0+
5525  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__not", mt, mt).getCallee());
5526 #endif
5527  assert(notFunc != NULL);
5528 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5529  notFunc->addFnAttr(llvm::Attributes::NoUnwind);
5530  notFunc->addFnAttr(llvm::Attributes::ReadNone);
5531 #else /* LLVM 3.3+ */
5532  notFunc->addFnAttr(llvm::Attribute::NoUnwind);
5533  notFunc->addFnAttr(llvm::Attribute::ReadNone);
5534 #endif
5535 
5536  andNotFuncs[0] =
5537 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5538  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not1", mt, mt, mt, NULL));
5539 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_8_0 // LLVM 5.0-LLVM 8.0
5540  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not1", mt, mt, mt));
5541 #else // LLVM 9.0+
5542  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not1", mt, mt, mt).getCallee());
5543 #endif
5544  assert(andNotFuncs[0] != NULL);
5545 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5546  andNotFuncs[0]->addFnAttr(llvm::Attributes::NoUnwind);
5547  andNotFuncs[0]->addFnAttr(llvm::Attributes::ReadNone);
5548 #else /* LLVM 3.3+ */
5549  andNotFuncs[0]->addFnAttr(llvm::Attribute::NoUnwind);
5550  andNotFuncs[0]->addFnAttr(llvm::Attribute::ReadNone);
5551 #endif
5552  andNotFuncs[1] =
5553 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
5554  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not2", mt, mt, mt, NULL));
5555 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_8_0 // LLVM 5.0-LLVM 8.0
5556  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not2", mt, mt, mt));
5557 #else // LLVM 9.0+
5558  llvm::dyn_cast<llvm::Function>(m->getOrInsertFunction("__and_not2", mt, mt, mt).getCallee());
5559 #endif
5560  assert(andNotFuncs[1] != NULL);
5561 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5562  andNotFuncs[1]->addFnAttr(llvm::Attributes::NoUnwind);
5563  andNotFuncs[1]->addFnAttr(llvm::Attributes::ReadNone);
5564 #else /* LLVM 3.3+ */
5565  andNotFuncs[1]->addFnAttr(llvm::Attribute::NoUnwind);
5566  andNotFuncs[1]->addFnAttr(llvm::Attribute::ReadNone);
5567 #endif
5568  }
5569 
5570 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
5571  const char *getPassName() const { return "MaskOps Cleanup Pass"; }
5572 #else // LLVM 4.0+
5573  llvm::StringRef getPassName() const { return "MaskOps Cleanup Pass"; }
5574 #endif
5575  bool runOnBasicBlock(llvm::BasicBlock &BB);
5576 
5577  private:
5578  llvm::Value *lGetNotOperand(llvm::Value *v) const;
5579 
5580  llvm::Function *notFunc, *andNotFuncs[2];
5581 
5582  static char ID;
5583 };
5584 
5585 char MaskOpsCleanupPass::ID = 0;
5586 
5587 /** Returns true if the given value is a compile-time constant vector of
5588  i1s with all elements 'true'.
5589 */
5590 static bool lIsAllTrue(llvm::Value *v) {
5591  if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
5592  llvm::ConstantInt *ci;
5593  return (cv->getSplatValue() != NULL && (ci = llvm::dyn_cast<llvm::ConstantInt>(cv->getSplatValue())) != NULL &&
5594  ci->isOne());
5595  }
5596 
5597  if (llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v)) {
5598  llvm::ConstantInt *ci;
5599  return (cdv->getSplatValue() != NULL &&
5600  (ci = llvm::dyn_cast<llvm::ConstantInt>(cdv->getSplatValue())) != NULL && ci->isOne());
5601  }
5602 
5603  return false;
5604 }
5605 
5606 /** Checks to see if the given value is the NOT of some other value. If
5607  so, it returns the operand of the NOT; otherwise returns NULL.
5608  */
5609 llvm::Value *MaskOpsCleanupPass::lGetNotOperand(llvm::Value *v) const {
5610  if (llvm::CallInst *ci = llvm::dyn_cast<llvm::CallInst>(v))
5611  if (ci->getCalledFunction() == notFunc)
5612  // Direct call to __not()
5613  return ci->getArgOperand(0);
5614 
5615  if (llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v))
5616  if (bop->getOpcode() == llvm::Instruction::Xor && lIsAllTrue(bop->getOperand(1)))
5617  // XOR of all-true vector.
5618  return bop->getOperand(0);
5619 
5620  return NULL;
5621 }
5622 
5623 bool MaskOpsCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
5624  bool modifiedAny = false;
5625 
5626 restart:
5627  for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
5628  llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(&*iter);
5629  if (bop == NULL)
5630  continue;
5631 
5632  if (bop->getType() != LLVMTypes::MaskType)
5633  continue;
5634 
5635  if (bop->getOpcode() == llvm::Instruction::Xor) {
5636  // Check for XOR with all-true values
5637  if (lIsAllTrue(bop->getOperand(1))) {
5638  llvm::Value *val = bop->getOperand(0);
5639  // Note that ArrayRef takes reference to an object, which must live
5640  // long enough, so passing return value of getOperand directly is
5641  // incorrect and it actually causes crashes with gcc 4.7 and later.
5642  llvm::ArrayRef<llvm::Value *> arg(val);
5643  llvm::CallInst *notCall = llvm::CallInst::Create(notFunc, arg, bop->getName());
5644  ReplaceInstWithInst(&*iter, notCall);
5645  modifiedAny = true;
5646  goto restart;
5647  }
5648  } else if (bop->getOpcode() == llvm::Instruction::And) {
5649  // Check each of the operands to see if they have NOT applied
5650  // to them.
5651  for (int i = 0; i < 2; ++i) {
5652  if (llvm::Value *notOp = lGetNotOperand(bop->getOperand(i))) {
5653  // In notOp we have the target of the NOT operation;
5654  // put it in its appropriate spot in the operand array.
5655  // Copy in the other operand directly.
5656  llvm::Value *args[2];
5657  args[i] = notOp;
5658  args[i ^ 1] = bop->getOperand(i ^ 1);
5659  llvm::ArrayRef<llvm::Value *> argsRef(&args[0], 2);
5660 
5661  // Call the appropriate __and_not* function.
5662  llvm::CallInst *andNotCall = llvm::CallInst::Create(andNotFuncs[i], argsRef, bop->getName());
5663 
5664  ReplaceInstWithInst(&*iter, andNotCall);
5665  modifiedAny = true;
5666  goto restart;
5667  }
5668  }
5669  }
5670  }
5671 
5672  return modifiedAny;
5673 }
5674 
5675 //===----------------------------------------------------------------------===//
5676 // External Interface declaration
5677 //===----------------------------------------------------------------------===//
5678 
5679 bool WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth, const char *includeName) {
5680 
5681 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // 3.2, 3.3, 3.4, 3.5, 3.6
5682  llvm::PassManager pm;
5683 #else // LLVM 3.7+
5684  llvm::legacy::PassManager pm;
5685 #endif
5686 #if 0
5687  if (const llvm::TargetData *td = targetMachine->getTargetData())
5688  pm.add(new llvm::TargetData(*td));
5689  else
5690  pm.add(new llvm::TargetData(module));
5691 #endif
5692 
5693 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // 3.2, 3.3
5694  int flags = 0;
5695 #else // LLVM 3.4+
5696  llvm::sys::fs::OpenFlags flags = llvm::sys::fs::F_None;
5697 #endif
5698 
5699 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2, 3.3, 3.4, 3.5
5700  std::string error;
5701 #else // LLVM 3.6+
5702  std::error_code error;
5703 #endif
5704 
5705 #if ISPC_LLVM_VERSION <= ISPC_LLVM_5_0
5706  llvm::tool_output_file *of = new llvm::tool_output_file(fn, error, flags);
5707 #else // LLVM 6.0+
5708  llvm::ToolOutputFile *of = new llvm::ToolOutputFile(fn, error, flags);
5709 #endif
5710 
5711 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2, 3.3, 3.4, 3.5
5712  if (error.size()) {
5713 #else // LLVM 3.6+
5714  if (error) {
5715 #endif
5716  fprintf(stderr, "Error opening output file \"%s\".\n", fn);
5717  return false;
5718  }
5719 
5720  llvm::formatted_raw_ostream fos(of->os());
5721 
5722  pm.add(llvm::createGCLoweringPass());
5723  pm.add(llvm::createLowerInvokePass());
5724  pm.add(llvm::createCFGSimplificationPass()); // clean up after lower invoke.
5725  pm.add(new SmearCleanupPass(module, vectorWidth));
5726  pm.add(new AndCmpCleanupPass());
5727  pm.add(new MaskOpsCleanupPass(module));
5728  pm.add(llvm::createDeadCodeEliminationPass()); // clean up after smear pass
5729  // CO pm.add(llvm::createPrintModulePass(&fos));
5730  pm.add(new CWriter(fos, includeName, vectorWidth));
5731 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
5732  // This interface is depricated for 3.3+
5733  pm.add(llvm::createGCInfoDeleter());
5734 #endif
5735  // CO pm.add(llvm::createVerifierPass());
5736 
5737  pm.run(*module);
5738 
5739  return true;
5740 }
static const char * lPredicateToString(llvm::CmpInst::Predicate p)
Definition: cbackend.cpp:3880
Opt opt
Definition: ispc.h:542
Definition: ispc.h:72
static bool isFPIntBitCast(const llvm::Instruction &I)
Definition: cbackend.cpp:3384
Module * m
Definition: ispc.cpp:102
static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector< llvm::ArrayType *> &t, std::vector< llvm::IntegerType *> &i, std::vector< bool > &IsVolatile, std::vector< int > &Alignment)
Definition: cbackend.cpp:376
constant_iterator constant_begin(const llvm::Function *F)
Definition: cbackend.cpp:191
static SpecialGlobalClass getGlobalVariableClass(const llvm::GlobalVariable *GV)
Definition: cbackend.cpp:2429
static void PrintEscapedString(const char *Str, unsigned Length, llvm::raw_ostream &Out)
Definition: cbackend.cpp:2454
bool forceAlignedMemory
Definition: ispc.h:466
bool operator==(const constant_iterator &x) const
Definition: cbackend.cpp:165
static const char * lGetTypedFunc(const char *base, llvm::Type *matchType, int width)
Definition: cbackend.cpp:1395
static bool is_vec16_i64_ty(llvm::Type *Ty)
Definition: cbackend.cpp:382
#define Assert(expr)
Definition: ispc.h:163
static void FindStaticTors(llvm::GlobalVariable *GV, std::set< llvm::Function *> &StaticTors)
Definition: cbackend.cpp:2404
Globals * g
Definition: ispc.cpp:101
bool LLVMVectorValuesAllEqual(llvm::Value *v, llvm::Value **splat=NULL)
Definition: llvmutil.cpp:1061
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
llvm::const_inst_iterator InstI
Definition: cbackend.cpp:146
bool operator!=(const constant_iterator &x) const
Definition: cbackend.cpp:166
static llvm::Type * Int8Type
Definition: llvmutil.h:75
Header file with declarations for various LLVM utility stuff.
constant_iterator constant_end(const llvm::Function *F)
Definition: cbackend.cpp:193
bool fastMath
Definition: ispc.h:436
static const char * getFloatBitCastField(llvm::Type *Ty)
Definition: cbackend.cpp:4113
SpecialGlobalClass
Definition: cbackend.cpp:2425
constant_iterator(const llvm::Function *F, bool)
Definition: cbackend.cpp:162
static void generateCompilerSpecificCode(llvm::formatted_raw_ostream &Out, const llvm::DataLayout *TD)
Definition: cbackend.cpp:2286
static bool isFPCSafeToPrint(const llvm::ConstantFP *CFP)
Definition: cbackend.cpp:1225
static std::string CBEMangle(const std::string &S)
Definition: cbackend.cpp:732
static void printLimitValue(llvm::IntegerType &Ty, bool isSigned, bool isMax, llvm::raw_ostream &Out)
Definition: cbackend.cpp:4192
static const char * lTypeToSuffix(llvm::Type *t)
Definition: cbackend.cpp:3938
static std::string ftostr(const llvm::APFloat &V)
Definition: cbackend.cpp:1195
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
constant_iterator & operator++()
Definition: cbackend.cpp:173
Main ispc.header file. Defines Target, Globals and Opt classes.
constant_iterator(const llvm::Function *F)
Definition: cbackend.cpp:155
static bool isSupportedIntegerSize(llvm::IntegerType &T)
Definition: cbackend.cpp:4217