Intel SPMD Program Compiler  1.10.0
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2016, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "module.h"
40 #include "util.h"
41 #include "llvmutil.h"
42 #include <stdio.h>
43 #include <sstream>
44 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
45 #ifdef ISPC_IS_WINDOWS
46  #include <windows.h>
47  #include <direct.h>
48  #define strcasecmp stricmp
49  #if ISPC_LLVM_VERSION >= ISPC_LLVM_7_0
50  #include <intrin.h>
51  #endif
52 #else
53  #include <sys/types.h>
54  #include <unistd.h>
55 #endif
56 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
57  #include <llvm/LLVMContext.h>
58  #include <llvm/Module.h>
59  #include <llvm/Instructions.h>
60 #else /* 3.3+ */
61  #include <llvm/IR/LLVMContext.h>
62  #include <llvm/IR/Module.h>
63  #include <llvm/IR/Instructions.h>
64 #endif
65 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
66  #if ISPC_LLVM_VERSION >= ISPC_LLVM_6_0
67  #include <llvm/CodeGen/TargetSubtargetInfo.h>
68  #include <llvm/CodeGen/TargetLowering.h>
69  #else
70  #include <llvm/Target/TargetSubtargetInfo.h>
71  #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
72  #include <llvm/Target/TargetLowering.h>
73  #endif
74  #endif
75 #endif
76 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
77  #include <llvm/IR/DebugInfo.h>
78  #include <llvm/IR/DIBuilder.h>
79 #else // LLVM 3.2, 3.3, 3.4
80  #include <llvm/DebugInfo.h>
81  #include <llvm/DIBuilder.h>
82 #endif
83 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
84  #include <llvm/BinaryFormat/Dwarf.h>
85 #else // LLVM up to 4.x
86  #include <llvm/Support/Dwarf.h>
87 #endif
88 #include <llvm/Target/TargetMachine.h>
89 #include <llvm/Target/TargetOptions.h>
90 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
91  #include <llvm/DataLayout.h>
92 #else // LLVM 3.3+
93  #include <llvm/IR/DataLayout.h>
94  #include <llvm/IR/Attributes.h>
95 #endif
96 #include <llvm/Support/TargetRegistry.h>
97 #include <llvm/Support/TargetSelect.h>
98 #include <llvm/Support/Host.h>
99 
102 
103 ///////////////////////////////////////////////////////////////////////////
104 // Target
105 
106 #if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
107 static void __cpuid(int info[4], int infoType) {
108  __asm__ __volatile__ ("cpuid"
109  : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
110  : "0" (infoType));
111 }
112 
113 /* Save %ebx in case it's the PIC register */
114 static void __cpuidex(int info[4], int level, int count) {
115  __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
116  "cpuid\n\t"
117  "xchg{l}\t{%%}ebx, %1\n\t"
118  : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
119  : "0" (level), "2" (count));
120 }
121 #endif // !ISPC_IS_WINDOWS && !__ARM__
122 
123 #if !defined(__arm__)
124 static bool __os_has_avx_support() {
125 #if defined(ISPC_IS_WINDOWS)
126  // Check if the OS will save the YMM registers
127  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
128  return (xcrFeatureMask & 6) == 6;
129 #else // !defined(ISPC_IS_WINDOWS)
130  // Check xgetbv; this uses a .byte sequence instead of the instruction
131  // directly because older assemblers do not include support for xgetbv and
132  // there is no easy way to conditionally compile based on the assembler used.
133  int rEAX, rEDX;
134  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
135  return (rEAX & 6) == 6;
136 #endif // !defined(ISPC_IS_WINDOWS)
137 }
138 
139 static bool __os_has_avx512_support() {
140 #if defined(ISPC_IS_WINDOWS)
141  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
142  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
143  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
144  return (xcrFeatureMask & 0xE6) == 0xE6;
145 #else // !defined(ISPC_IS_WINDOWS)
146  // Check xgetbv; this uses a .byte sequence instead of the instruction
147  // directly because older assemblers do not include support for xgetbv and
148  // there is no easy way to conditionally compile based on the assembler used.
149  int rEAX, rEDX;
150  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
151  return (rEAX & 0xE6) == 0xE6;
152 #endif // !defined(ISPC_IS_WINDOWS)
153 }
154 #endif // !__arm__
155 
156 static const char *
158 #ifdef __arm__
159  return "neon-i32x4";
160 #else
161  int info[4];
162  __cpuid(info, 1);
163 
164  int info2[4];
165  // Call cpuid with eax=7, ecx=0
166  __cpuidex(info2, 7, 0);
167 
168  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
169  (info2[1] & (1 << 5)) != 0 && // AVX2
170  (info2[1] & (1 << 16)) != 0 && // AVX512 F
172  // We need to verify that AVX2 is also available,
173  // as well as AVX512, because our targets are supposed
174  // to use both.
175 
176  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
177  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
178  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
179  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
180  return "avx512skx-i32x16";
181  }
182  else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
183  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
184  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
185  return "avx512knl-i32x16";
186  }
187  // If it's unknown AVX512 target, fall through and use AVX2
188  // or whatever is available in the machine.
189  }
190 
191  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
192  (info[2] & (1 << 28)) != 0 &&
193  __os_has_avx_support()) { // AVX
194  // AVX1 for sure....
195  // Ivy Bridge?
196  if ((info[2] & (1 << 29)) != 0 && // F16C
197  (info[2] & (1 << 30)) != 0) { // RDRAND
198  // So far, so good. AVX2?
199  if ((info2[1] & (1 << 5)) != 0)
200  return "avx2-i32x8";
201  else
202  return "avx1.1-i32x8";
203  }
204  // Regular AVX
205  return "avx1-i32x8";
206  }
207  else if ((info[2] & (1 << 19)) != 0)
208  return "sse4-i32x4";
209  else if ((info[3] & (1 << 26)) != 0)
210  return "sse2-i32x4";
211  else {
212  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
213  exit(1);
214  }
215 #endif
216 }
217 
218 
219 typedef enum {
220  // Special value, indicates that no CPU is present.
221  CPU_None = 0,
222 
223  // 'Generic' CPU without any hardware SIMD capabilities.
225 
226  // A generic 64-bit specific x86 processor model which tries to be good
227  // for modern chips without enabling instruction set encodings past the
228  // basic SSE2 and 64-bit ones
230 
231  // Early Atom CPU. Supports SSSE3.
233 
234  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
235  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
237 
238  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
240 
241  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
243 
244  // Sandy Bridge. Supports AVX 1.
246 
247  // Ivy Bridge. Supports AVX 1 + RDRAND.
249 
250  // Haswell. Supports AVX 2.
252 
253 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
254  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
256 #endif
257 
258 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
259  // Knights Landing - Xeon Phi.
260  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
261  // AVX-512CDI: Conflict Detection;
262  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
263  // new prefetch instructions.
264  CPU_KNL,
265 #endif
266 
267 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
268  // Skylake Xeon.
269  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
270  // AVX-512CDI: Conflict Detection;
271  // AVX-512VL: Vector Length Orthogonality;
272  // AVX-512DQ: New HPC ISA (vs AVX512F);
273  // AVX-512BW: Byte and Word Support.
274  CPU_SKX,
275 #endif
276 
277 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
278  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
280 #endif
281 
282  // FIXME: LLVM supports a ton of different ARM CPU variants--not just
283  // cortex-a9 and a15. We should be able to handle any of them that also
284  // have NEON support.
285 #ifdef ISPC_ARM_ENABLED
286  // ARM Cortex A15. Supports NEON VFPv4.
287  CPU_CortexA15,
288 
289  // ARM Cortex A9. Supports NEON VFPv3.
290  CPU_CortexA9,
291 #endif
292 
293 #ifdef ISPC_NVPTX_ENABLED
294  // NVidia CUDA-compatible SM-35 architecture.
295  CPU_SM35,
296 #endif
297 
299 } CPUtype;
300 
301 
302 class AllCPUs {
303 private:
304  std::vector<std::vector<std::string> > names;
305  std::vector<std::set<CPUtype> > compat;
306 
307  std::set<CPUtype> Set(int type, ...) {
308  std::set<CPUtype> retn;
309  va_list args;
310 
311  retn.insert((CPUtype)type);
312  va_start(args, type);
313  while ((type = va_arg(args, int)) != CPU_None)
314  retn.insert((CPUtype)type);
315  va_end(args);
316 
317  return retn;
318  }
319 
320 public:
322  names = std::vector<std::vector<std::string> >(sizeofCPUtype);
323  compat = std::vector<std::set<CPUtype> >(sizeofCPUtype);
324 
325  names[CPU_None].push_back("");
326 
327  names[CPU_Generic].push_back("generic");
328 
329  names[CPU_x86_64].push_back("x86-64");
330 
331  names[CPU_Bonnell].push_back("atom");
332  names[CPU_Bonnell].push_back("bonnell");
333 
334  names[CPU_Core2].push_back("core2");
335 
336  names[CPU_Penryn].push_back("penryn");
337 
338 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
339  names[CPU_Silvermont].push_back("slm");
340  names[CPU_Silvermont].push_back("silvermont");
341 #endif
342 
343  names[CPU_Nehalem].push_back("corei7");
344  names[CPU_Nehalem].push_back("nehalem");
345 
346  names[CPU_SandyBridge].push_back("corei7-avx");
347  names[CPU_SandyBridge].push_back("sandybridge");
348 
349  names[CPU_IvyBridge].push_back("core-avx-i");
350  names[CPU_IvyBridge].push_back("ivybridge");
351 
352  names[CPU_Haswell].push_back("core-avx2");
353  names[CPU_Haswell].push_back("haswell");
354 
355 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
356  names[CPU_Broadwell].push_back("broadwell");
357 #endif
358 
359 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
360  names[CPU_KNL].push_back("knl");
361 #endif
362 
363 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
364  names[CPU_SKX].push_back("skx");
365 #endif
366 
367 #ifdef ISPC_ARM_ENABLED
368  names[CPU_CortexA15].push_back("cortex-a15");
369 
370  names[CPU_CortexA9].push_back("cortex-a9");
371 #endif
372 
373 #ifdef ISPC_NVPTX_ENABLED
374  names[CPU_SM35].push_back("sm_35");
375 #endif
376 
377 
378 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
379  #define CPU_Silvermont CPU_Nehalem
380 #else /* LLVM 3.4+ */
383  CPU_None);
384 #endif
385 
386 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
387  compat[CPU_KNL] = Set(CPU_KNL, CPU_Generic, CPU_x86_64, CPU_Bonnell, CPU_Penryn,
391 #endif
392 
393 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
394  compat[CPU_SKX] = Set(CPU_SKX, CPU_x86_64, CPU_Bonnell, CPU_Penryn,
398 #endif
399 
400 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
401  #define CPU_Broadwell CPU_Haswell
402 #else /* LLVM 3.6+ */
407 #endif
415  CPU_None);
421  CPU_None);
424  CPU_None);
426  CPU_None);
428  CPU_None);
429  compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
430 
432 
433 #ifdef ISPC_ARM_ENABLED
434  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
435  CPU_None);
436  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
437 #endif
438 
439 #ifdef ISPC_NVPTX_ENABLED
440  compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
441 #endif
442  }
443 
444  std::string HumanReadableListOfNames() {
445  std::stringstream CPUs;
446  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
447  CPUs << names[i][0];
448  if (names[i].size() > 1) {
449  CPUs << " (synonyms: " << names[i][1];
450  for (int j = 2, je = names[i].size(); j < je; j++)
451  CPUs << ", " << names[i][j];
452  CPUs << ")";
453  }
454  if (i < sizeofCPUtype - 1)
455  CPUs << ", ";
456  }
457  return CPUs.str();
458  }
459 
460  std::string &GetDefaultNameFromType(CPUtype type) {
461  Assert((type >= CPU_None) && (type < sizeofCPUtype));
462  return names[type][0];
463  }
464 
465  CPUtype GetTypeFromName(std::string name) {
466  CPUtype retn = CPU_None;
467 
468  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
469  for (int j = 0, je = names[i].size();
470  (retn == CPU_None) && (j < je); j++)
471  if (!name.compare(names[i][j]))
472  retn = (CPUtype)i;
473  return retn;
474  }
475 
476  bool BackwardCompatible(CPUtype what, CPUtype with) {
477  Assert((what > CPU_None) && (what < sizeofCPUtype));
478  Assert((with > CPU_None) && (with < sizeofCPUtype));
479  return compat[what].find(with) != compat[what].end();
480  }
481 };
482 
483 
484 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
485  m_target(NULL),
486  m_targetMachine(NULL),
487  m_dataLayout(NULL),
488  m_valid(false),
489  m_isa(SSE2),
490  m_treatGenericAsSmth(genericAsSmth),
491  m_arch(""),
492  m_is32Bit(true),
493  m_cpu(""),
494  m_attributes(""),
495 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
496  m_tf_attributes(NULL),
497 #endif
498  m_nativeVectorWidth(-1),
499  m_nativeVectorAlignment(-1),
500  m_dataTypeWidth(-1),
501  m_vectorWidth(-1),
502  m_generatePIC(pic),
503  m_maskingIsFree(false),
504  m_maskBitCount(-1),
505  m_hasHalf(false),
506  m_hasRand(false),
507  m_hasGather(false),
508  m_hasScatter(false),
509  m_hasTranscendentals(false),
510  m_hasTrigonometry(false),
511  m_hasRsqrtd(false),
512  m_hasRcpd(false),
513  m_hasVecPrefetch(false)
514 {
515  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
516  AllCPUs a;
517 
518  if (cpu) {
519  CPUID = a.GetTypeFromName(cpu);
520  if (CPUID == CPU_None) {
521  Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
522  " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
523  return;
524  }
525  }
526 
527  if (isa == NULL) {
528  // If a CPU was specified explicitly, try to pick the best
529  // possible ISA based on that.
530  switch (CPUID) {
531  case CPU_None:
532  // No CPU and no ISA, so use system info to figure out
533  // what this CPU supports.
534  isa = lGetSystemISA();
535  Warning(SourcePos(), "No --target specified on command-line."
536  " Using default system target \"%s\".", isa);
537  break;
538 
539  case CPU_Generic:
540  isa = "generic-1";
541  break;
542 
543 #ifdef ISPC_NVPTX_ENABLED
544  case CPU_SM35:
545  isa = "nvptx";
546  break;
547 #endif
548 
549 #ifdef ISPC_ARM_ENABLED
550  case CPU_CortexA9:
551  case CPU_CortexA15:
552  isa = "neon-i32x4";
553  break;
554 #endif
555 
556 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
557  case CPU_KNL:
558  isa = "avx512knl-i32x16";
559  break;
560 #endif
561 
562 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
563  case CPU_SKX:
564  isa = "avx512skx-i32x16";
565  break;
566 #endif
567 
568 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
569  case CPU_Broadwell:
570 #endif
571  case CPU_Haswell:
572  isa = "avx2-i32x8";
573  break;
574 
575  case CPU_IvyBridge:
576  isa = "avx1.1-i32x8";
577  break;
578 
579  case CPU_SandyBridge:
580  isa = "avx1-i32x8";
581  break;
582 
583  // Penryn is here because ISPC does not use SSE 4.2
584  case CPU_Penryn:
585  case CPU_Nehalem:
586 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
587  case CPU_Silvermont:
588 #endif
589  isa = "sse4-i32x4";
590  break;
591 
592  default:
593  isa = "sse2-i32x4";
594  break;
595  }
596  if (CPUID != CPU_None)
597  Warning(SourcePos(), "No --target specified on command-line."
598  " Using ISA \"%s\" based on specified CPU \"%s\".",
599  isa, cpu);
600  }
601 
602  if (!strcasecmp(isa, "host")) {
603  isa = lGetSystemISA();
604  }
605 
606  if (arch == NULL) {
607 #ifdef ISPC_ARM_ENABLED
608  if (!strncmp(isa, "neon", 4))
609  arch = "arm";
610  else
611 #endif
612 #ifdef ISPC_NVPTX_ENABLED
613  if(!strncmp(isa, "nvptx", 5))
614  arch = "nvptx64";
615  else
616 #endif /* ISPC_NVPTX_ENABLED */
617  arch = "x86-64";
618  }
619 
620  // Define arch alias
621  if (std::string(arch) == "x86_64")
622  arch = "x86-64";
623 
624  bool error = false;
625 
626  // Make sure the target architecture is a known one; print an error
627  // with the valid ones otherwise.
628 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
629  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
630  iter != llvm::TargetRegistry::targets().end(); ++iter) {
631 #else
632  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
633  iter != llvm::TargetRegistry::end(); ++iter) {
634 #endif
635  if (std::string(arch) == iter->getName()) {
636  this->m_target = &*iter;
637  break;
638  }
639  }
640  if (this->m_target == NULL) {
641  fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
642  llvm::TargetRegistry::iterator iter;
643 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
644  for (iter = llvm::TargetRegistry::targets().begin();
645  iter != llvm::TargetRegistry::targets().end(); ++iter)
646 #else
647  for (iter = llvm::TargetRegistry::begin();
648  iter != llvm::TargetRegistry::end(); ++iter)
649 #endif
650  fprintf(stderr, "%s ", iter->getName());
651  fprintf(stderr, "\n");
652  error = true;
653  }
654  else {
655  this->m_arch = arch;
656  }
657 
658  // Check default LLVM generated targets
659  if (!strcasecmp(isa, "sse2") ||
660  !strcasecmp(isa, "sse2-i32x4")) {
661  this->m_isa = Target::SSE2;
662  this->m_nativeVectorWidth = 4;
663  this->m_nativeVectorAlignment = 16;
664  this->m_dataTypeWidth = 32;
665  this->m_vectorWidth = 4;
666  this->m_maskingIsFree = false;
667  this->m_maskBitCount = 32;
668  CPUfromISA = CPU_x86_64;
669  }
670  else if (!strcasecmp(isa, "sse2-x2") ||
671  !strcasecmp(isa, "sse2-i32x8")) {
672  this->m_isa = Target::SSE2;
673  this->m_nativeVectorWidth = 4;
674  this->m_nativeVectorAlignment = 16;
675  this->m_dataTypeWidth = 32;
676  this->m_vectorWidth = 8;
677  this->m_maskingIsFree = false;
678  this->m_maskBitCount = 32;
679  CPUfromISA = CPU_Core2;
680  }
681  else if (!strcasecmp(isa, "sse4") ||
682  !strcasecmp(isa, "sse4-i32x4")) {
683  this->m_isa = Target::SSE4;
684  this->m_nativeVectorWidth = 4;
685  this->m_nativeVectorAlignment = 16;
686  this->m_dataTypeWidth = 32;
687  this->m_vectorWidth = 4;
688  this->m_maskingIsFree = false;
689  this->m_maskBitCount = 32;
690  CPUfromISA = CPU_Nehalem;
691  }
692  else if (!strcasecmp(isa, "sse4x2") ||
693  !strcasecmp(isa, "sse4-x2") ||
694  !strcasecmp(isa, "sse4-i32x8")) {
695  this->m_isa = Target::SSE4;
696  this->m_nativeVectorWidth = 4;
697  this->m_nativeVectorAlignment = 16;
698  this->m_dataTypeWidth = 32;
699  this->m_vectorWidth = 8;
700  this->m_maskingIsFree = false;
701  this->m_maskBitCount = 32;
702  CPUfromISA = CPU_Nehalem;
703  }
704  else if (!strcasecmp(isa, "sse4-i8x16")) {
705  this->m_isa = Target::SSE4;
706  this->m_nativeVectorWidth = 16;
707  this->m_nativeVectorAlignment = 16;
708  this->m_dataTypeWidth = 8;
709  this->m_vectorWidth = 16;
710  this->m_maskingIsFree = false;
711  this->m_maskBitCount = 8;
712  CPUfromISA = CPU_Nehalem;
713  }
714  else if (!strcasecmp(isa, "sse4-i16x8")) {
715  this->m_isa = Target::SSE4;
716  this->m_nativeVectorWidth = 8;
717  this->m_nativeVectorAlignment = 16;
718  this->m_dataTypeWidth = 16;
719  this->m_vectorWidth = 8;
720  this->m_maskingIsFree = false;
721  this->m_maskBitCount = 16;
722  CPUfromISA = CPU_Nehalem;
723  }
724  else if (!strcasecmp(isa, "generic-4") ||
725  !strcasecmp(isa, "generic-x4")) {
726  this->m_isa = Target::GENERIC;
727  this->m_nativeVectorWidth = 4;
728  this->m_nativeVectorAlignment = 16;
729  this->m_vectorWidth = 4;
730  this->m_maskingIsFree = true;
731  this->m_maskBitCount = 1;
732  this->m_hasHalf = true;
733  this->m_hasTranscendentals = true;
734  this->m_hasTrigonometry = true;
735  this->m_hasGather = this->m_hasScatter = true;
736  this->m_hasRsqrtd = this->m_hasRcpd = true;
737  CPUfromISA = CPU_Generic;
738  }
739  else if (!strcasecmp(isa, "generic-8") ||
740  !strcasecmp(isa, "generic-x8")) {
741  this->m_isa = Target::GENERIC;
742  this->m_nativeVectorWidth = 8;
743  this->m_nativeVectorAlignment = 32;
744  this->m_vectorWidth = 8;
745  this->m_maskingIsFree = true;
746  this->m_maskBitCount = 1;
747  this->m_hasHalf = true;
748  this->m_hasTranscendentals = true;
749  this->m_hasTrigonometry = true;
750  this->m_hasGather = this->m_hasScatter = true;
751  this->m_hasRsqrtd = this->m_hasRcpd = true;
752  CPUfromISA = CPU_Generic;
753  }
754  else if (!strcasecmp(isa, "generic-16") ||
755  !strcasecmp(isa, "generic-x16") ||
756  // We treat *-generic-16 as generic-16, but with special name mangling
757  strstr(isa, "-generic-16") ||
758  strstr(isa, "-generic-x16")) {
759  this->m_isa = Target::GENERIC;
760  if (strstr(isa, "-generic-16") ||
761  strstr(isa, "-generic-x16")) {
762  // It is used for appropriate name mangling and dispatch function during multitarget compilation
763  this->m_treatGenericAsSmth = isa;
764  // We need to create appropriate name for mangling.
765  // Remove "-x16" or "-16" and replace "-" with "_".
766  this->m_treatGenericAsSmth = this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
767  std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
768  }
769  this->m_nativeVectorWidth = 16;
770  this->m_nativeVectorAlignment = 64;
771  this->m_vectorWidth = 16;
772  this->m_maskingIsFree = true;
773  this->m_maskBitCount = 1;
774  this->m_hasHalf = true;
775  this->m_hasTranscendentals = true;
776  // It's set to false, because stdlib implementation of math functions
777  // is faster on MIC, than "native" implementation provided by the
778  // icc compiler.
779  this->m_hasTrigonometry = false;
780  this->m_hasGather = this->m_hasScatter = true;
781  this->m_hasRsqrtd = this->m_hasRcpd = true;
782  // It's set to true, because MIC has hardware vector prefetch instruction
783  this->m_hasVecPrefetch = true;
784  CPUfromISA = CPU_Generic;
785  }
786  else if (!strcasecmp(isa, "generic-32") ||
787  !strcasecmp(isa, "generic-x32")) {
788  this->m_isa = Target::GENERIC;
789  this->m_nativeVectorWidth = 32;
790  this->m_nativeVectorAlignment = 64;
791  this->m_vectorWidth = 32;
792  this->m_maskingIsFree = true;
793  this->m_maskBitCount = 1;
794  this->m_hasHalf = true;
795  this->m_hasTranscendentals = true;
796  this->m_hasTrigonometry = true;
797  this->m_hasGather = this->m_hasScatter = true;
798  this->m_hasRsqrtd = this->m_hasRcpd = true;
799  CPUfromISA = CPU_Generic;
800  }
801  else if (!strcasecmp(isa, "generic-64") ||
802  !strcasecmp(isa, "generic-x64")) {
803  this->m_isa = Target::GENERIC;
804  this->m_nativeVectorWidth = 64;
805  this->m_nativeVectorAlignment = 64;
806  this->m_vectorWidth = 64;
807  this->m_maskingIsFree = true;
808  this->m_maskBitCount = 1;
809  this->m_hasHalf = true;
810  this->m_hasTranscendentals = true;
811  this->m_hasTrigonometry = true;
812  this->m_hasGather = this->m_hasScatter = true;
813  this->m_hasRsqrtd = this->m_hasRcpd = true;
814  CPUfromISA = CPU_Generic;
815  }
816  else if (!strcasecmp(isa, "generic-1") ||
817  !strcasecmp(isa, "generic-x1")) {
818  this->m_isa = Target::GENERIC;
819  this->m_nativeVectorWidth = 1;
820  this->m_nativeVectorAlignment = 16;
821  this->m_vectorWidth = 1;
822  this->m_maskingIsFree = false;
823  this->m_maskBitCount = 32;
824  CPUfromISA = CPU_Generic;
825  }
826  else if (!strcasecmp(isa, "avx1-i32x4")) {
827  this->m_isa = Target::AVX;
828  this->m_nativeVectorWidth = 8;
829  this->m_nativeVectorAlignment = 32;
830  this->m_dataTypeWidth = 32;
831  this->m_vectorWidth = 4;
832  this->m_maskingIsFree = false;
833  this->m_maskBitCount = 32;
834  CPUfromISA = CPU_SandyBridge;
835  }
836  else if (!strcasecmp(isa, "avx") ||
837  !strcasecmp(isa, "avx1") ||
838  !strcasecmp(isa, "avx1-i32x8")) {
839  this->m_isa = Target::AVX;
840  this->m_nativeVectorWidth = 8;
841  this->m_nativeVectorAlignment = 32;
842  this->m_dataTypeWidth = 32;
843  this->m_vectorWidth = 8;
844  this->m_maskingIsFree = false;
845  this->m_maskBitCount = 32;
846  CPUfromISA = CPU_SandyBridge;
847  }
848  else if (!strcasecmp(isa, "avx-i64x4") ||
849  !strcasecmp(isa, "avx1-i64x4")) {
850  this->m_isa = Target::AVX;
851  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
852  this->m_nativeVectorAlignment = 32;
853  this->m_dataTypeWidth = 64;
854  this->m_vectorWidth = 4;
855  this->m_maskingIsFree = false;
856  this->m_maskBitCount = 64;
857  CPUfromISA = CPU_SandyBridge;
858  }
859  else if (!strcasecmp(isa, "avx-x2") ||
860  !strcasecmp(isa, "avx1-x2") ||
861  !strcasecmp(isa, "avx1-i32x16")) {
862  this->m_isa = Target::AVX;
863  this->m_nativeVectorWidth = 8;
864  this->m_nativeVectorAlignment = 32;
865  this->m_dataTypeWidth = 32;
866  this->m_vectorWidth = 16;
867  this->m_maskingIsFree = false;
868  this->m_maskBitCount = 32;
869  CPUfromISA = CPU_SandyBridge;
870  }
871  else if (!strcasecmp(isa, "avx1.1") ||
872  !strcasecmp(isa, "avx1.1-i32x8")) {
873  this->m_isa = Target::AVX11;
874  this->m_nativeVectorWidth = 8;
875  this->m_nativeVectorAlignment = 32;
876  this->m_dataTypeWidth = 32;
877  this->m_vectorWidth = 8;
878  this->m_maskingIsFree = false;
879  this->m_maskBitCount = 32;
880  this->m_hasHalf = true;
881  this->m_hasRand = true;
882  CPUfromISA = CPU_IvyBridge;
883  }
884  else if (!strcasecmp(isa, "avx1.1-x2") ||
885  !strcasecmp(isa, "avx1.1-i32x16")) {
886  this->m_isa = Target::AVX11;
887  this->m_nativeVectorWidth = 8;
888  this->m_nativeVectorAlignment = 32;
889  this->m_dataTypeWidth = 32;
890  this->m_vectorWidth = 16;
891  this->m_maskingIsFree = false;
892  this->m_maskBitCount = 32;
893  this->m_hasHalf = true;
894  this->m_hasRand = true;
895  CPUfromISA = CPU_IvyBridge;
896  }
897  else if (!strcasecmp(isa, "avx1.1-i64x4")) {
898  this->m_isa = Target::AVX11;
899  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
900  this->m_nativeVectorAlignment = 32;
901  this->m_dataTypeWidth = 64;
902  this->m_vectorWidth = 4;
903  this->m_maskingIsFree = false;
904  this->m_maskBitCount = 64;
905  this->m_hasHalf = true;
906  this->m_hasRand = true;
907  CPUfromISA = CPU_IvyBridge;
908  }
909  else if (!strcasecmp(isa, "avx2") ||
910  !strcasecmp(isa, "avx2-i32x8")) {
911  this->m_isa = Target::AVX2;
912  this->m_nativeVectorWidth = 8;
913  this->m_nativeVectorAlignment = 32;
914  this->m_dataTypeWidth = 32;
915  this->m_vectorWidth = 8;
916  this->m_maskingIsFree = false;
917  this->m_maskBitCount = 32;
918  this->m_hasHalf = true;
919  this->m_hasRand = true;
920  this->m_hasGather = true;
921  CPUfromISA = CPU_Haswell;
922  }
923  else if (!strcasecmp(isa, "avx2-x2") ||
924  !strcasecmp(isa, "avx2-i32x16")) {
925  this->m_isa = Target::AVX2;
926  this->m_nativeVectorWidth = 16;
927  this->m_nativeVectorAlignment = 32;
928  this->m_dataTypeWidth = 32;
929  this->m_vectorWidth = 16;
930  this->m_maskingIsFree = false;
931  this->m_maskBitCount = 32;
932  this->m_hasHalf = true;
933  this->m_hasRand = true;
934  this->m_hasGather = true;
935  CPUfromISA = CPU_Haswell;
936  }
937  else if (!strcasecmp(isa, "avx2-i64x4")) {
938  this->m_isa = Target::AVX2;
939  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
940  this->m_nativeVectorAlignment = 32;
941  this->m_dataTypeWidth = 64;
942  this->m_vectorWidth = 4;
943  this->m_maskingIsFree = false;
944  this->m_maskBitCount = 64;
945  this->m_hasHalf = true;
946  this->m_hasRand = true;
947  this->m_hasGather = true;
948  CPUfromISA = CPU_Haswell;
949  }
950 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
951  else if (!strcasecmp(isa, "avx512knl-i32x16")) {
952  this->m_isa = Target::KNL_AVX512;
953  this->m_nativeVectorWidth = 16;
954  this->m_nativeVectorAlignment = 64;
955  this->m_dataTypeWidth = 32;
956  this->m_vectorWidth = 16;
957  this->m_maskingIsFree = true;
958  this->m_maskBitCount = 8;
959  this->m_hasHalf = true;
960  this->m_hasRand = true;
961  this->m_hasGather = this->m_hasScatter = true;
962  this->m_hasTranscendentals = false;
963  // For MIC it is set to true due to performance reasons. The option should be tested.
964  this->m_hasTrigonometry = false;
965  this->m_hasRsqrtd = this->m_hasRcpd = false;
966  this->m_hasVecPrefetch = false;
967  CPUfromISA = CPU_KNL;
968  }
969 #endif
970 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
971  else if (!strcasecmp(isa, "avx512skx-i32x16")) {
972  this->m_isa = Target::SKX_AVX512;
973  this->m_nativeVectorWidth = 16;
974  this->m_nativeVectorAlignment = 64;
975  this->m_dataTypeWidth = 32;
976  this->m_vectorWidth = 16;
977  this->m_maskingIsFree = true;
978  this->m_maskBitCount = 8;
979  this->m_hasHalf = true;
980  this->m_hasRand = true;
981  this->m_hasGather = this->m_hasScatter = true;
982  this->m_hasTranscendentals = false;
983  // For MIC it is set to true due to performance reasons. The option should be tested.
984  this->m_hasTrigonometry = false;
985  this->m_hasRsqrtd = this->m_hasRcpd = false;
986  this->m_hasVecPrefetch = false;
987  CPUfromISA = CPU_SKX;
988  }
989 #endif
990 #ifdef ISPC_ARM_ENABLED
991  else if (!strcasecmp(isa, "neon-i8x16")) {
992  this->m_isa = Target::NEON8;
993  this->m_nativeVectorWidth = 16;
994  this->m_nativeVectorAlignment = 16;
995  this->m_dataTypeWidth = 8;
996  this->m_vectorWidth = 16;
997  this->m_attributes = "+neon,+fp16";
998  this->m_hasHalf = true; // ??
999  this->m_maskingIsFree = false;
1000  this->m_maskBitCount = 8;
1001  }
1002  else if (!strcasecmp(isa, "neon-i16x8")) {
1003  this->m_isa = Target::NEON16;
1004  this->m_nativeVectorWidth = 8;
1005  this->m_nativeVectorAlignment = 16;
1006  this->m_dataTypeWidth = 16;
1007  this->m_vectorWidth = 8;
1008  this->m_attributes = "+neon,+fp16";
1009  this->m_hasHalf = true; // ??
1010  this->m_maskingIsFree = false;
1011  this->m_maskBitCount = 16;
1012  }
1013  else if (!strcasecmp(isa, "neon") ||
1014  !strcasecmp(isa, "neon-i32x4")) {
1015  this->m_isa = Target::NEON32;
1016  this->m_nativeVectorWidth = 4;
1017  this->m_nativeVectorAlignment = 16;
1018  this->m_dataTypeWidth = 32;
1019  this->m_vectorWidth = 4;
1020  this->m_attributes = "+neon,+fp16";
1021  this->m_hasHalf = true; // ??
1022  this->m_maskingIsFree = false;
1023  this->m_maskBitCount = 32;
1024  }
1025 #endif
1026 #ifdef ISPC_NVPTX_ENABLED
1027  else if (!strcasecmp(isa, "nvptx")) {
1028  this->m_isa = Target::NVPTX;
1029  this->m_cpu = "sm_35";
1030  this->m_nativeVectorWidth = 32;
1031  this->m_nativeVectorAlignment = 32;
1032  this->m_vectorWidth = 1;
1033  this->m_hasHalf = true;
1034  this->m_maskingIsFree = true;
1035  this->m_maskBitCount = 1;
1036  this->m_hasTranscendentals = true;
1037  this->m_hasTrigonometry = true;
1038  this->m_hasGather = this->m_hasScatter = false;
1039  CPUfromISA = CPU_SM35;
1040  }
1041 #endif /* ISPC_NVPTX_ENABLED */
1042  else {
1043  Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
1044  isa, SupportedTargets());
1045  error = true;
1046  }
1047 
1048 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
1049  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
1050  CPUID = CPU_CortexA9;
1051 #endif
1052 
1053  if (CPUID == CPU_None) {
1054 #ifndef ISPC_ARM_ENABLED
1055  if (isa == NULL) {
1056 #endif
1057  std::string hostCPU = llvm::sys::getHostCPUName();
1058  if (hostCPU.size() > 0)
1059  cpu = strdup(hostCPU.c_str());
1060  else {
1061  Warning(SourcePos(), "Unable to determine host CPU!\n");
1062  cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
1063  }
1064 #ifndef ISPC_ARM_ENABLED
1065  }
1066  else {
1067  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
1068  }
1069 #endif
1070  }
1071  else {
1072  if ((CPUfromISA != CPU_None) &&
1073  !a.BackwardCompatible(CPUID, CPUfromISA)) {
1074  Error(SourcePos(), "The requested CPU is incompatible"
1075  " with the CPU %s needs: %s vs. %s!\n",
1076  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1077  return;
1078  }
1079  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1080  }
1081  this->m_cpu = cpu;
1082 
1083  if (!error) {
1084  // Create TargetMachine
1085  std::string triple = GetTripleString();
1086 
1087 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_8
1088  llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
1089  llvm::Reloc::Default;
1090 #else
1091  llvm::Optional<llvm::Reloc::Model> relocModel;
1092  if (m_generatePIC) {
1093  relocModel = llvm::Reloc::PIC_;
1094  }
1095 #endif
1096  std::string featuresString = m_attributes;
1097  llvm::TargetOptions options;
1098 #ifdef ISPC_ARM_ENABLED
1099  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
1100  m_isa == Target::NEON32)
1101  options.FloatABIType = llvm::FloatABI::Hard;
1102 #endif
1103  if (g->opt.disableFMA == false)
1104  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1105 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1106  if (g->NoOmitFramePointer)
1107  options.NoFramePointerElim = true;
1108 #ifdef ISPC_IS_WINDOWS
1109  if (strcmp("x86", arch) == 0) {
1110  // Workaround for issue #503 (LLVM issue 14646).
1111  // It's Win32 specific.
1112  options.NoFramePointerElim = true;
1113  }
1114 #endif
1115 #endif
1116  m_targetMachine =
1117  m_target->createTargetMachine(triple, m_cpu, featuresString, options,
1118  relocModel);
1119  Assert(m_targetMachine != NULL);
1120 
1121 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1122  m_targetMachine->setAsmVerbosityDefault(true);
1123 #else /* LLVM 3.7+ */
1124  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1125 #endif
1126 
1127 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1128  // Change default version of generated DWARF.
1129  if (g->generateDWARFVersion != 0) {
1130  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1131  }
1132 #endif
1133 
1134  // Initialize TargetData/DataLayout in 3 steps.
1135  // 1. Get default data layout first
1136  std::string dl_string;
1137 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1138  dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1139 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1140  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1141 #else // LLVM 3.5- or LLVM 3.7
1142  dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1143 #endif
1144  // 2. Adjust for generic
1145  if (m_isa == Target::GENERIC) {
1146  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1147  // that to the regular datalayout string for IA..
1148  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1149  // in terms of required memory storage and alignment, as this is
1150  // translated to __m128 type.
1151  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1152  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1153  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1154  }
1155 #ifdef ISPC_NVPTX_ENABLED
1156  else if (m_isa == Target::NVPTX)
1157  {
1158  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
1159  }
1160 #endif
1161 
1162  // 3. Finally set member data
1163  m_dataLayout = new llvm::DataLayout(dl_string);
1164 
1165  // Set is32Bit
1166  // This indicates if we are compiling for 32 bit platform
1167  // and can assume 32 bit runtime.
1168  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1169 
1170  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1171 
1172 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1173  // This is LLVM 3.3+ feature.
1174  // Initialize target-specific "target-feature" attribute.
1175  if (!m_attributes.empty()) {
1176  llvm::AttrBuilder attrBuilder;
1177 #ifdef ISPC_NVPTX_ENABLED
1178  if (m_isa != Target::NVPTX)
1179 #endif
1180  attrBuilder.addAttribute("target-cpu", this->m_cpu);
1181  attrBuilder.addAttribute("target-features", this->m_attributes);
1182 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1183  this->m_tf_attributes = new llvm::AttributeSet(
1184  llvm::AttributeSet::get(
1185  *g->ctx,
1186  llvm::AttributeSet::FunctionIndex,
1187  attrBuilder));
1188 #else // LLVM 5.0+
1189  this->m_tf_attributes = new llvm::AttrBuilder(attrBuilder);
1190 #endif
1191  }
1192 #endif
1193 
1195  }
1196 
1197  m_valid = !error;
1198 
1199  if (printTarget) {
1200  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1201  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1202  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1203  }
1204 
1205  return;
1206 }
1207 
1208 
1209 std::string
1211  AllCPUs a;
1212  return a.HumanReadableListOfNames();
1213 }
1214 
1215 
1216 const char *
1218  return
1219 #ifdef ISPC_ARM_ENABLED
1220  "arm, "
1221 #endif
1222  "x86, x86-64";
1223 }
1224 
1225 
1226 const char *
1228  return
1229  "host, sse2-i32x4, sse2-i32x8, "
1230  "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1231  "avx1-i32x4, "
1232  "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1233  "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
1234  "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1235 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1236  "avx512knl-i32x16, "
1237 #endif
1238 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1239  "avx512skx-i32x16, "
1240 #endif
1241  "generic-x1, generic-x4, generic-x8, generic-x16, "
1242  "generic-x32, generic-x64, *-generic-x16"
1243 #ifdef ISPC_ARM_ENABLED
1244  ", neon-i8x16, neon-i16x8, neon-i32x4"
1245 #endif
1246 #ifdef ISPC_NVPTX_ENABLED
1247  ", nvptx"
1248 #endif
1249 ;
1250 
1251 }
1252 
1253 
1254 std::string
1256  llvm::Triple triple;
1257 #ifdef ISPC_ARM_ENABLED
1258  if (m_arch == "arm") {
1259  triple.setTriple("armv7-eabi");
1260  }
1261  else
1262 #endif
1263  {
1264  // Start with the host triple as the default
1265  triple.setTriple(llvm::sys::getDefaultTargetTriple());
1266 
1267  // And override the arch in the host triple based on what the user
1268  // specified. Here we need to deal with the fact that LLVM uses one
1269  // naming convention for targets TargetRegistry, but wants some
1270  // slightly different ones for the triple. TODO: is there a way to
1271  // have it do this remapping, which would presumably be a bit less
1272  // error prone?
1273  if (m_arch == "x86")
1274  triple.setArchName("i386");
1275  else if (m_arch == "x86-64")
1276  triple.setArchName("x86_64");
1277 #ifdef ISPC_NVPTX_ENABLED
1278  else if (m_arch == "nvptx64")
1279  triple = llvm::Triple("nvptx64", "nvidia", "cuda");
1280 #endif /* ISPC_NVPTX_ENABLED */
1281  else
1282  triple.setArchName(m_arch);
1283  }
1284  return triple.str();
1285 }
1286 
1287 // This function returns string representation of ISA for the purpose of
1288 // mangling. And may return any unique string, preferably short, like
1289 // sse4, avx and etc.
1290 const char *
1292  switch (isa) {
1293 #ifdef ISPC_ARM_ENABLED
1294  case Target::NEON8:
1295  return "neon-8";
1296  case Target::NEON16:
1297  return "neon-16";
1298  case Target::NEON32:
1299  return "neon-32";
1300 #endif
1301  case Target::SSE2:
1302  return "sse2";
1303  case Target::SSE4:
1304  return "sse4";
1305  case Target::AVX:
1306  return "avx";
1307  case Target::AVX11:
1308  return "avx11";
1309  case Target::AVX2:
1310  return "avx2";
1311 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1312  case Target::KNL_AVX512:
1313  return "avx512knl";
1314 #endif
1315 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1316  case Target::SKX_AVX512:
1317  return "avx512skx";
1318 #endif
1319  case Target::GENERIC:
1320  return "generic";
1321 #ifdef ISPC_NVPTX_ENABLED
1322  case Target::NVPTX:
1323  return "nvptx";
1324 #endif /* ISPC_NVPTX_ENABLED */
1325  default:
1326  FATAL("Unhandled target in ISAToString()");
1327  }
1328  return "";
1329 }
1330 
1331 const char *
1333  return ISAToString(m_isa);
1334 }
1335 
1336 
1337 // This function returns string representation of default target corresponding
1338 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
1339 // string may be used to initialize Target.
1340 const char *
1342  switch (isa) {
1343 #ifdef ISPC_ARM_ENABLED
1344  case Target::NEON8:
1345  return "neon-8";
1346  case Target::NEON16:
1347  return "neon-16";
1348  case Target::NEON32:
1349  return "neon-32";
1350 #endif
1351  case Target::SSE2:
1352  return "sse2-i32x4";
1353  case Target::SSE4:
1354  return "sse4-i32x4";
1355  case Target::AVX:
1356  return "avx1-i32x8";
1357  case Target::AVX11:
1358  return "avx1.1-i32x8";
1359  case Target::AVX2:
1360  return "avx2-i32x8";
1361 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1362  case Target::KNL_AVX512:
1363  return "avx512knl-i32x16";
1364 #endif
1365 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1366  case Target::SKX_AVX512:
1367  return "avx512skx-i32x16";
1368 #endif
1369  case Target::GENERIC:
1370  return "generic-4";
1371 #ifdef ISPC_NVPTX_ENABLED
1372  case Target::NVPTX:
1373  return "nvptx";
1374 #endif /* ISPC_NVPTX_ENABLED */
1375  default:
1376  FATAL("Unhandled target in ISAToTargetString()");
1377  }
1378  return "";
1379 }
1380 
1381 
1382 const char *
1384  return ISAToString(m_isa);
1385 }
1386 
1387 
1388 static bool
1390  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() ||
1391  type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy())
1392  return false;
1393 
1394  if (type == LLVMTypes::BoolVectorType ||
1395  type == LLVMTypes::MaskType ||
1396  type == LLVMTypes::Int1VectorType)
1397  return true;
1398 
1399  llvm::ArrayType *at =
1400  llvm::dyn_cast<llvm::ArrayType>(type);
1401  if (at != NULL)
1402  return lGenericTypeLayoutIndeterminate(at->getElementType());
1403 
1404  llvm::PointerType *pt =
1405  llvm::dyn_cast<llvm::PointerType>(type);
1406  if (pt != NULL)
1407  return false;
1408 
1409  llvm::StructType *st =
1410  llvm::dyn_cast<llvm::StructType>(type);
1411  if (st != NULL) {
1412  for (int i = 0; i < (int)st->getNumElements(); ++i)
1413  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1414  return true;
1415  return false;
1416  }
1417 
1418  Assert(llvm::isa<llvm::VectorType>(type));
1419  return true;
1420 }
1421 
1422 
1423 llvm::Value *
1424 Target::SizeOf(llvm::Type *type,
1425  llvm::BasicBlock *insertAtEnd) {
1426  if (m_isa == Target::GENERIC &&
1428  llvm::Value *index[1] = { LLVMInt32(1) };
1429  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1430  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1431  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1432 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1433  llvm::Instruction *gep =
1434  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
1435  insertAtEnd);
1436 #else /* LLVM 3.7+ */
1437  llvm::Instruction *gep =
1438  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1439  arrayRef, "sizeof_gep",
1440  insertAtEnd);
1441 #endif
1443  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1444  "sizeof_int", insertAtEnd);
1445  else
1446  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1447  "sizeof_int", insertAtEnd);
1448  }
1449 
1450  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1452  return LLVMInt32((int32_t)byteSize);
1453  else
1454  return LLVMInt64(byteSize);
1455 }
1456 
1457 
1458 llvm::Value *
1459 Target::StructOffset(llvm::Type *type, int element,
1460  llvm::BasicBlock *insertAtEnd) {
1461  if (m_isa == Target::GENERIC &&
1462  lGenericTypeLayoutIndeterminate(type) == true) {
1463  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
1464  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1465  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1466  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1467 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1468  llvm::Instruction *gep =
1469  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
1470  insertAtEnd);
1471 #else /* LLVM 3.7+ */
1472  llvm::Instruction *gep =
1473  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1474  arrayRef, "offset_gep",
1475  insertAtEnd);
1476 #endif
1478  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1479  "offset_int", insertAtEnd);
1480  else
1481  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1482  "offset_int", insertAtEnd);
1483  }
1484 
1485  llvm::StructType *structType =
1486  llvm::dyn_cast<llvm::StructType>(type);
1487  if (structType == NULL || structType->isSized() == false) {
1488  Assert(m->errorCount > 0);
1489  return NULL;
1490  }
1491 
1492  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1493  Assert(sl != NULL);
1494 
1495  uint64_t offset = sl->getElementOffset(element);
1497  return LLVMInt32((int32_t)offset);
1498  else
1499  return LLVMInt64(offset);
1500 }
1501 
1502 void Target::markFuncWithTargetAttr(llvm::Function* func) {
1503 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1504  if (m_tf_attributes) {
1505 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1506  func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1507 #else // LLVM 5.0+
1508  func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
1509 #endif
1510  }
1511 #endif
1512 }
1513 
1514 
1515 ///////////////////////////////////////////////////////////////////////////
1516 // Opt
1517 
1519  level = 1;
1520  fastMath = false;
1521  fastMaskedVload = false;
1522  force32BitAddressing = true;
1523  unrollLoops = true;
1524  disableAsserts = false;
1525  disableFMA = false;
1526  forceAlignedMemory = false;
1527  disableMaskAllOnOptimizations = false;
1528  disableHandlePseudoMemoryOps = false;
1529  disableBlendedMaskedStores = false;
1530  disableCoherentControlFlow = false;
1531  disableUniformControlFlow = false;
1532  disableGatherScatterOptimizations = false;
1533  disableMaskedStoreToStore = false;
1534  disableGatherScatterFlattening = false;
1535  disableUniformMemoryOptimizations = false;
1536  disableCoalescing = false;
1537 }
1538 
1539 ///////////////////////////////////////////////////////////////////////////
1540 // Globals
1541 
1543  mathLib = Globals::Math_ISPC;
1544 
1545  includeStdlib = true;
1546  runCPP = true;
1547  debugPrint = false;
1548  printTarget = false;
1549  NoOmitFramePointer = false;
1550  debugIR = -1;
1551  disableWarnings = false;
1552  warningsAsErrors = false;
1553  quiet = false;
1554  forceColoredOutput = false;
1555  disableLineWrap = false;
1556  emitPerfWarnings = true;
1557  emitInstrumentation = false;
1558  generateDebuggingSymbols = false;
1559 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1560  generateDWARFVersion = 3;
1561 #endif
1562  enableFuzzTest = false;
1563  fuzzTestSeed = -1;
1564  mangleFunctionsWithTarget = false;
1565 
1566  ctx = new llvm::LLVMContext;
1567 
1568 #ifdef ISPC_IS_WINDOWS
1569  _getcwd(currentDirectory, sizeof(currentDirectory));
1570 #else
1571  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1572  FATAL("Current directory path too long!");
1573 #endif
1574  forceAlignment = -1;
1575  dllExport = false;
1576 }
1577 
1578 ///////////////////////////////////////////////////////////////////////////
1579 // SourcePos
1580 
1581 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1582  name = n;
1583  if (name == NULL) {
1584  if (m != NULL)
1585  name = m->module->getModuleIdentifier().c_str();
1586  else
1587  name = "(unknown)";
1588  }
1589  first_line = fl;
1590  first_column = fc;
1591  last_line = ll != 0 ? ll : fl;
1592  last_column = lc != 0 ? lc : fc;
1593 }
1594 
1595 
1596 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1597 llvm::DIFile
1598 #else /* LLVM 3.7+ */
1599 llvm::DIFile*
1600 //llvm::MDFile*
1601 #endif
1603  std::string directory, filename;
1604  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1605 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1606  llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1607  Assert(ret.Verify());
1608 #else /* LLVM 3.7+ */
1609  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1610 #endif
1611  return ret;
1612 }
1613 
1614 
1615 void
1617  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
1618  last_line, last_column);
1619 }
1620 
1621 
1622 bool
1624  return (!strcmp(name, p2.name) &&
1625  first_line == p2.first_line &&
1626  first_column == p2.first_column &&
1627  last_line == p2.last_line &&
1628  last_column == p2.last_column);
1629 }
1630 
1631 
1632 SourcePos
1633 Union(const SourcePos &p1, const SourcePos &p2) {
1634  if (strcmp(p1.name, p2.name) != 0)
1635  return p1;
1636 
1637  SourcePos ret;
1638  ret.name = p1.name;
1639  ret.first_line = std::min(p1.first_line, p2.first_line);
1640  ret.first_column = std::min(p1.first_column, p2.first_column);
1641  ret.last_line = std::max(p1.last_line, p2.last_line);
1642  ret.last_column = std::max(p1.last_column, p2.last_column);
1643  return ret;
1644 }
bool disableFMA
Definition: ispc.h:466
#define CPU_Broadwell
bool m_hasTranscendentals
Definition: ispc.h:411
#define CPU_Silvermont
Globals()
Definition: ispc.cpp:1542
Opt opt
Definition: ispc.h:548
int last_column
Definition: ispc.h:145
const llvm::Target * m_target
Definition: ispc.h:315
static bool __os_has_avx_support()
Definition: ispc.cpp:124
This structure collects together a number of global variables.
Definition: ispc.h:544
const char * GetISATargetString() const
Definition: ispc.cpp:1383
int m_nativeVectorAlignment
Definition: ispc.h:371
AllCPUs()
Definition: ispc.cpp:321
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1633
int first_line
Definition: ispc.h:142
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1424
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1581
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:92
std::string m_cpu
Definition: ispc.h:345
Opt()
Definition: ispc.cpp:1518
std::string m_arch
Definition: ispc.h:339
static const char * lGetSystemISA()
Definition: ispc.cpp:157
bool NoOmitFramePointer
Definition: ispc.h:573
std::string HumanReadableListOfNames()
Definition: ispc.cpp:444
#define Assert(expr)
Definition: ispc.h:173
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:476
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1389
const char * GetISAString() const
Definition: ispc.cpp:1332
int m_nativeVectorWidth
Definition: ispc.h:364
Module * m
Definition: ispc.cpp:101
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1291
bool m_generatePIC
Definition: ispc.h:383
static const char * SupportedArchs()
Definition: ispc.cpp:1217
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:307
bool m_maskingIsFree
Definition: ispc.h:389
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:93
void GetDirectoryAndFileName(const std::string &currentDir, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:565
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genenricAsSmth="")
Definition: ispc.cpp:484
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:114
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
std::string m_attributes
Definition: ispc.h:348
llvm::Module * module
Definition: module.h:166
static std::string SupportedCPUs()
Definition: ispc.cpp:1210
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
Definition: module.h:57
static llvm::Type * Int64Type
Definition: llvmutil.h:79
char currentDirectory[1024]
Definition: ispc.h:643
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:460
Header file with declarations for various LLVM utility stuff.
bool m_is32Bit
Definition: ispc.h:342
bool m_hasRand
Definition: ispc.h:401
bool m_hasRcpd
Definition: ispc.h:420
int m_maskBitCount
Definition: ispc.h:394
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:107
Representation of a range of positions in a source file.
Definition: ispc.h:137
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1459
int m_vectorWidth
Definition: ispc.h:380
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:325
bool force32BitAddressing
Definition: ispc.h:458
static bool __os_has_avx512_support()
Definition: ispc.cpp:139
const char * name
Definition: ispc.h:141
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1502
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1623
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
#define FATAL(message)
Definition: util.h:113
int m_dataTypeWidth
Definition: ispc.h:375
static llvm::Type * Int32Type
Definition: llvmutil.h:78
int last_line
Definition: ispc.h:144
#define PTYPE(p)
Definition: llvmutil.h:55
#define ISPC_MAX_NVEC
Definition: ispc.h:69
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:304
bool m_hasGather
Definition: ispc.h:404
int first_column
Definition: ispc.h:143
llvm::DataLayout * m_dataLayout
Definition: ispc.h:326
void Print() const
Definition: ispc.cpp:1616
bool m_hasScatter
Definition: ispc.h:407
ISA
Definition: ispc.h:192
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1602
bool m_valid
Definition: ispc.h:330
Globals * g
Definition: ispc.cpp:100
static llvm::VectorType * MaskType
Definition: llvmutil.h:90
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:305
bool m_hasTrigonometry
Definition: ispc.h:414
std::string m_treatGenericAsSmth
Definition: ispc.h:336
bool m_hasHalf
Definition: ispc.h:398
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:265
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:159
llvm::LLVMContext * ctx
Definition: ispc.h:639
static const char * SupportedTargets()
Definition: ispc.cpp:1227
ISA m_isa
Definition: ispc.h:333
bool m_hasVecPrefetch
Definition: ispc.h:423
llvm::DIBuilder * diBuilder
Definition: module.h:169
Main ispc.header file. Defines Target, Globals and Opt classes.
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:465
std::string GetTripleString() const
Definition: ispc.cpp:1255
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1341
bool m_hasRsqrtd
Definition: ispc.h:417
CPUtype
Definition: ispc.cpp:219