Intel SPMD Program Compiler  1.9.1
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2016, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "module.h"
40 #include "util.h"
41 #include "llvmutil.h"
42 #include <stdio.h>
43 #include <sstream>
44 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
45 #ifdef ISPC_IS_WINDOWS
46  #include <windows.h>
47  #include <direct.h>
48  #define strcasecmp stricmp
49 #else
50  #include <sys/types.h>
51  #include <unistd.h>
52 #endif
53 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
54  #include <llvm/LLVMContext.h>
55  #include <llvm/Module.h>
56  #include <llvm/Instructions.h>
57 #else /* 3.3+ */
58  #include <llvm/IR/LLVMContext.h>
59  #include <llvm/IR/Module.h>
60  #include <llvm/IR/Instructions.h>
61 #endif
62 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
63  #include <llvm/Target/TargetSubtargetInfo.h>
64  #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
65  #include <llvm/Target/TargetLowering.h>
66  #endif
67 #endif
68 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
69  #include <llvm/IR/DebugInfo.h>
70  #include <llvm/IR/DIBuilder.h>
71 #else // LLVM 3.2, 3.3, 3.4
72  #include <llvm/DebugInfo.h>
73  #include <llvm/DIBuilder.h>
74 #endif
75 #include <llvm/Support/Dwarf.h>
76 #include <llvm/Target/TargetMachine.h>
77 #include <llvm/Target/TargetOptions.h>
78 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
79  #include <llvm/DataLayout.h>
80 #else // LLVM 3.3+
81  #include <llvm/IR/DataLayout.h>
82  #include <llvm/IR/Attributes.h>
83 #endif
84 #include <llvm/Support/TargetRegistry.h>
85 #include <llvm/Support/TargetSelect.h>
86 #include <llvm/Support/Host.h>
87 
90 
91 ///////////////////////////////////////////////////////////////////////////
92 // Target
93 
94 #if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
95 static void __cpuid(int info[4], int infoType) {
96  __asm__ __volatile__ ("cpuid"
97  : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
98  : "0" (infoType));
99 }
100 
101 /* Save %ebx in case it's the PIC register */
102 static void __cpuidex(int info[4], int level, int count) {
103  __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
104  "cpuid\n\t"
105  "xchg{l}\t{%%}ebx, %1\n\t"
106  : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
107  : "0" (level), "2" (count));
108 }
109 #endif // !ISPC_IS_WINDOWS && !__ARM__
110 
111 #if !defined(__arm__)
112 static bool __os_has_avx_support() {
113 #if defined(ISPC_IS_WINDOWS)
114  // Check if the OS will save the YMM registers
115  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
116  return (xcrFeatureMask & 6) == 6;
117 #else // !defined(ISPC_IS_WINDOWS)
118  // Check xgetbv; this uses a .byte sequence instead of the instruction
119  // directly because older assemblers do not include support for xgetbv and
120  // there is no easy way to conditionally compile based on the assembler used.
121  int rEAX, rEDX;
122  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
123  return (rEAX & 6) == 6;
124 #endif // !defined(ISPC_IS_WINDOWS)
125 }
126 
127 static bool __os_has_avx512_support() {
128 #if defined(ISPC_IS_WINDOWS)
129  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
130  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
131  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
132  return (xcrFeatureMask & 0xE6) == 0xE6;
133 #else // !defined(ISPC_IS_WINDOWS)
134  // Check xgetbv; this uses a .byte sequence instead of the instruction
135  // directly because older assemblers do not include support for xgetbv and
136  // there is no easy way to conditionally compile based on the assembler used.
137  int rEAX, rEDX;
138  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
139  return (rEAX & 0xE6) == 0xE6;
140 #endif // !defined(ISPC_IS_WINDOWS)
141 }
142 #endif // !__arm__
143 
144 static const char *
146 #ifdef __arm__
147  return "neon-i32x4";
148 #else
149  int info[4];
150  __cpuid(info, 1);
151 
152  int info2[4];
153  // Call cpuid with eax=7, ecx=0
154  __cpuidex(info2, 7, 0);
155 
156  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
157  (info2[1] & (1 << 5)) != 0 && // AVX2
158  (info2[1] & (1 << 16)) != 0 && // AVX512 F
160  // We need to verify that AVX2 is also available,
161  // as well as AVX512, because our targets are supposed
162  // to use both.
163 
164  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
165  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
166  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
167  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
168  return "avx512skx-i32x16";
169  }
170  else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
171  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
172  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
173  return "avx512knl-i32x16";
174  }
175  // If it's unknown AVX512 target, fall through and use AVX2
176  // or whatever is available in the machine.
177  }
178 
179  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
180  (info[2] & (1 << 28)) != 0 &&
181  __os_has_avx_support()) { // AVX
182  // AVX1 for sure....
183  // Ivy Bridge?
184  if ((info[2] & (1 << 29)) != 0 && // F16C
185  (info[2] & (1 << 30)) != 0) { // RDRAND
186  // So far, so good. AVX2?
187  if ((info2[1] & (1 << 5)) != 0)
188  return "avx2-i32x8";
189  else
190  return "avx1.1-i32x8";
191  }
192  // Regular AVX
193  return "avx1-i32x8";
194  }
195  else if ((info[2] & (1 << 19)) != 0)
196  return "sse4-i32x4";
197  else if ((info[3] & (1 << 26)) != 0)
198  return "sse2-i32x4";
199  else {
200  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
201  exit(1);
202  }
203 #endif
204 }
205 
206 
207 typedef enum {
208  // Special value, indicates that no CPU is present.
209  CPU_None = 0,
210 
211  // 'Generic' CPU without any hardware SIMD capabilities.
213 
214  // Early Atom CPU. Supports SSSE3.
216 
217  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
218  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
220 
221  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
223 
224  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
226 
227  // Sandy Bridge. Supports AVX 1.
229 
230  // Ivy Bridge. Supports AVX 1 + RDRAND.
232 
233  // Haswell. Supports AVX 2.
235 
236 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
237  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
239 #endif
240 
241 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
242  // Knights Landing - Xeon Phi.
243  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
244  // AVX-512CDI: Conflict Detection;
245  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
246  // new prefetch instructions.
247  CPU_KNL,
248 #endif
249 
250 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
251  // Skylake Xeon.
252  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
253  // AVX-512CDI: Conflict Detection;
254  // AVX-512VL: Vector Length Orthogonality;
255  // AVX-512DQ: New HPC ISA (vs AVX512F);
256  // AVX-512BW: Byte and Word Support.
257  CPU_SKX,
258 #endif
259 
260 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
261  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
263 #endif
264 
265  // FIXME: LLVM supports a ton of different ARM CPU variants--not just
266  // cortex-a9 and a15. We should be able to handle any of them that also
267  // have NEON support.
268 #ifdef ISPC_ARM_ENABLED
269  // ARM Cortex A15. Supports NEON VFPv4.
270  CPU_CortexA15,
271 
272  // ARM Cortex A9. Supports NEON VFPv3.
273  CPU_CortexA9,
274 #endif
275 
276 #ifdef ISPC_NVPTX_ENABLED
277  // NVidia CUDA-compatible SM-35 architecture.
278  CPU_SM35,
279 #endif
280 
282 } CPUtype;
283 
284 
285 class AllCPUs {
286 private:
287  std::vector<std::vector<std::string> > names;
288  std::vector<std::set<CPUtype> > compat;
289 
290  std::set<CPUtype> Set(int type, ...) {
291  std::set<CPUtype> retn;
292  va_list args;
293 
294  retn.insert((CPUtype)type);
295  va_start(args, type);
296  while ((type = va_arg(args, int)) != CPU_None)
297  retn.insert((CPUtype)type);
298  va_end(args);
299 
300  return retn;
301  }
302 
303 public:
305  names = std::vector<std::vector<std::string> >(sizeofCPUtype);
306  compat = std::vector<std::set<CPUtype> >(sizeofCPUtype);
307 
308  names[CPU_None].push_back("");
309 
310  names[CPU_Generic].push_back("generic");
311 
312  names[CPU_Bonnell].push_back("atom");
313  names[CPU_Bonnell].push_back("bonnell");
314 
315  names[CPU_Core2].push_back("core2");
316 
317  names[CPU_Penryn].push_back("penryn");
318 
319 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
320  names[CPU_Silvermont].push_back("slm");
321  names[CPU_Silvermont].push_back("silvermont");
322 #endif
323 
324  names[CPU_Nehalem].push_back("corei7");
325  names[CPU_Nehalem].push_back("nehalem");
326 
327  names[CPU_SandyBridge].push_back("corei7-avx");
328  names[CPU_SandyBridge].push_back("sandybridge");
329 
330  names[CPU_IvyBridge].push_back("core-avx-i");
331  names[CPU_IvyBridge].push_back("ivybridge");
332 
333  names[CPU_Haswell].push_back("core-avx2");
334  names[CPU_Haswell].push_back("haswell");
335 
336 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
337  names[CPU_Broadwell].push_back("broadwell");
338 #endif
339 
340 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
341  names[CPU_KNL].push_back("knl");
342 #endif
343 
344 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
345  names[CPU_SKX].push_back("skx");
346 #endif
347 
348 #ifdef ISPC_ARM_ENABLED
349  names[CPU_CortexA15].push_back("cortex-a15");
350 
351  names[CPU_CortexA9].push_back("cortex-a9");
352 #endif
353 
354 #ifdef ISPC_NVPTX_ENABLED
355  names[CPU_SM35].push_back("sm_35");
356 #endif
357 
358 
359 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
360  #define CPU_Silvermont CPU_Nehalem
361 #else /* LLVM 3.4+ */
364  CPU_None);
365 #endif
366 
367 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
368  compat[CPU_KNL] = Set(CPU_KNL, CPU_Generic, CPU_Bonnell, CPU_Penryn,
372 #endif
373 
374 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
375  compat[CPU_SKX] = Set(CPU_SKX, CPU_Bonnell, CPU_Penryn,
379 #endif
380 
381 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
382  #define CPU_Broadwell CPU_Haswell
383 #else /* LLVM 3.6+ */
388 #endif
396  CPU_None);
402  CPU_None);
405  CPU_None);
407  CPU_None);
409  CPU_None);
410  compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
411 
412 #ifdef ISPC_ARM_ENABLED
413  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
414  CPU_None);
415  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
416 #endif
417 
418 #ifdef ISPC_NVPTX_ENABLED
419  compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
420 #endif
421  }
422 
423  std::string HumanReadableListOfNames() {
424  std::stringstream CPUs;
425  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
426  CPUs << names[i][0];
427  if (names[i].size() > 1) {
428  CPUs << " (synonyms: " << names[i][1];
429  for (int j = 2, je = names[i].size(); j < je; j++)
430  CPUs << ", " << names[i][j];
431  CPUs << ")";
432  }
433  if (i < sizeofCPUtype - 1)
434  CPUs << ", ";
435  }
436  return CPUs.str();
437  }
438 
439  std::string &GetDefaultNameFromType(CPUtype type) {
440  Assert((type >= CPU_None) && (type < sizeofCPUtype));
441  return names[type][0];
442  }
443 
444  CPUtype GetTypeFromName(std::string name) {
445  CPUtype retn = CPU_None;
446 
447  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
448  for (int j = 0, je = names[i].size();
449  (retn == CPU_None) && (j < je); j++)
450  if (!name.compare(names[i][j]))
451  retn = (CPUtype)i;
452  return retn;
453  }
454 
455  bool BackwardCompatible(CPUtype what, CPUtype with) {
456  Assert((what > CPU_None) && (what < sizeofCPUtype));
457  Assert((with > CPU_None) && (with < sizeofCPUtype));
458  return compat[what].find(with) != compat[what].end();
459  }
460 };
461 
462 
463 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
464  m_target(NULL),
465  m_targetMachine(NULL),
466  m_dataLayout(NULL),
467  m_valid(false),
468  m_isa(SSE2),
469  m_treatGenericAsSmth(genericAsSmth),
470  m_arch(""),
471  m_is32Bit(true),
472  m_cpu(""),
473  m_attributes(""),
474 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
475  m_tf_attributes(NULL),
476 #endif
477  m_nativeVectorWidth(-1),
478  m_nativeVectorAlignment(-1),
479  m_dataTypeWidth(-1),
480  m_vectorWidth(-1),
481  m_generatePIC(pic),
482  m_maskingIsFree(false),
483  m_maskBitCount(-1),
484  m_hasHalf(false),
485  m_hasRand(false),
486  m_hasGather(false),
487  m_hasScatter(false),
488  m_hasTranscendentals(false),
489  m_hasTrigonometry(false),
490  m_hasRsqrtd(false),
491  m_hasRcpd(false),
492  m_hasVecPrefetch(false)
493 {
494  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
495  AllCPUs a;
496 
497  if (cpu) {
498  CPUID = a.GetTypeFromName(cpu);
499  if (CPUID == CPU_None) {
500  Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
501  " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
502  return;
503  }
504  }
505 
506  if (isa == NULL) {
507  // If a CPU was specified explicitly, try to pick the best
508  // possible ISA based on that.
509  switch (CPUID) {
510  case CPU_None:
511  // No CPU and no ISA, so use system info to figure out
512  // what this CPU supports.
513  isa = lGetSystemISA();
514  Warning(SourcePos(), "No --target specified on command-line."
515  " Using default system target \"%s\".", isa);
516  break;
517 
518  case CPU_Generic:
519  isa = "generic-1";
520  break;
521 
522 #ifdef ISPC_NVPTX_ENABLED
523  case CPU_SM35:
524  isa = "nvptx";
525  break;
526 #endif
527 
528 #ifdef ISPC_ARM_ENABLED
529  case CPU_CortexA9:
530  case CPU_CortexA15:
531  isa = "neon-i32x4";
532  break;
533 #endif
534 
535 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
536  case CPU_KNL:
537  isa = "avx512knl-i32x16";
538  break;
539 #endif
540 
541 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
542  case CPU_SKX:
543  isa = "avx512skx-i32x16";
544  break;
545 #endif
546 
547 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
548  case CPU_Broadwell:
549 #endif
550  case CPU_Haswell:
551  isa = "avx2-i32x8";
552  break;
553 
554  case CPU_IvyBridge:
555  isa = "avx1.1-i32x8";
556  break;
557 
558  case CPU_SandyBridge:
559  isa = "avx1-i32x8";
560  break;
561 
562  // Penryn is here because ISPC does not use SSE 4.2
563  case CPU_Penryn:
564  case CPU_Nehalem:
565 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
566  case CPU_Silvermont:
567 #endif
568  isa = "sse4-i32x4";
569  break;
570 
571  default:
572  isa = "sse2-i32x4";
573  break;
574  }
575  if (CPUID != CPU_None)
576  Warning(SourcePos(), "No --target specified on command-line."
577  " Using ISA \"%s\" based on specified CPU \"%s\".",
578  isa, cpu);
579  }
580 
581  if (!strcasecmp(isa, "host")) {
582  isa = lGetSystemISA();
583  }
584 
585  if (arch == NULL) {
586 #ifdef ISPC_ARM_ENABLED
587  if (!strncmp(isa, "neon", 4))
588  arch = "arm";
589  else
590 #endif
591 #ifdef ISPC_NVPTX_ENABLED
592  if(!strncmp(isa, "nvptx", 5))
593  arch = "nvptx64";
594  else
595 #endif /* ISPC_NVPTX_ENABLED */
596  arch = "x86-64";
597  }
598 
599  // Define arch alias
600  if (std::string(arch) == "x86_64")
601  arch = "x86-64";
602 
603  bool error = false;
604 
605  // Make sure the target architecture is a known one; print an error
606  // with the valid ones otherwise.
607 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
608  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
609  iter != llvm::TargetRegistry::targets().end(); ++iter) {
610 #else
611  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
612  iter != llvm::TargetRegistry::end(); ++iter) {
613 #endif
614  if (std::string(arch) == iter->getName()) {
615  this->m_target = &*iter;
616  break;
617  }
618  }
619  if (this->m_target == NULL) {
620  fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
621  llvm::TargetRegistry::iterator iter;
622 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
623  for (iter = llvm::TargetRegistry::targets().begin();
624  iter != llvm::TargetRegistry::targets().end(); ++iter)
625 #else
626  for (iter = llvm::TargetRegistry::begin();
627  iter != llvm::TargetRegistry::end(); ++iter)
628 #endif
629  fprintf(stderr, "%s ", iter->getName());
630  fprintf(stderr, "\n");
631  error = true;
632  }
633  else {
634  this->m_arch = arch;
635  }
636 
637  // Check default LLVM generated targets
638  if (!strcasecmp(isa, "sse2") ||
639  !strcasecmp(isa, "sse2-i32x4")) {
640  this->m_isa = Target::SSE2;
641  this->m_nativeVectorWidth = 4;
642  this->m_nativeVectorAlignment = 16;
643  this->m_dataTypeWidth = 32;
644  this->m_vectorWidth = 4;
645  this->m_maskingIsFree = false;
646  this->m_maskBitCount = 32;
647  CPUfromISA = CPU_Core2;
648  }
649  else if (!strcasecmp(isa, "sse2-x2") ||
650  !strcasecmp(isa, "sse2-i32x8")) {
651  this->m_isa = Target::SSE2;
652  this->m_nativeVectorWidth = 4;
653  this->m_nativeVectorAlignment = 16;
654  this->m_dataTypeWidth = 32;
655  this->m_vectorWidth = 8;
656  this->m_maskingIsFree = false;
657  this->m_maskBitCount = 32;
658  CPUfromISA = CPU_Core2;
659  }
660  else if (!strcasecmp(isa, "sse4") ||
661  !strcasecmp(isa, "sse4-i32x4")) {
662  this->m_isa = Target::SSE4;
663  this->m_nativeVectorWidth = 4;
664  this->m_nativeVectorAlignment = 16;
665  this->m_dataTypeWidth = 32;
666  this->m_vectorWidth = 4;
667  this->m_maskingIsFree = false;
668  this->m_maskBitCount = 32;
669  CPUfromISA = CPU_Nehalem;
670  }
671  else if (!strcasecmp(isa, "sse4x2") ||
672  !strcasecmp(isa, "sse4-x2") ||
673  !strcasecmp(isa, "sse4-i32x8")) {
674  this->m_isa = Target::SSE4;
675  this->m_nativeVectorWidth = 4;
676  this->m_nativeVectorAlignment = 16;
677  this->m_dataTypeWidth = 32;
678  this->m_vectorWidth = 8;
679  this->m_maskingIsFree = false;
680  this->m_maskBitCount = 32;
681  CPUfromISA = CPU_Nehalem;
682  }
683  else if (!strcasecmp(isa, "sse4-i8x16")) {
684  this->m_isa = Target::SSE4;
685  this->m_nativeVectorWidth = 16;
686  this->m_nativeVectorAlignment = 16;
687  this->m_dataTypeWidth = 8;
688  this->m_vectorWidth = 16;
689  this->m_maskingIsFree = false;
690  this->m_maskBitCount = 8;
691  CPUfromISA = CPU_Nehalem;
692  }
693  else if (!strcasecmp(isa, "sse4-i16x8")) {
694  this->m_isa = Target::SSE4;
695  this->m_nativeVectorWidth = 8;
696  this->m_nativeVectorAlignment = 16;
697  this->m_dataTypeWidth = 16;
698  this->m_vectorWidth = 8;
699  this->m_maskingIsFree = false;
700  this->m_maskBitCount = 16;
701  CPUfromISA = CPU_Nehalem;
702  }
703  else if (!strcasecmp(isa, "generic-4") ||
704  !strcasecmp(isa, "generic-x4")) {
705  this->m_isa = Target::GENERIC;
706  this->m_nativeVectorWidth = 4;
707  this->m_nativeVectorAlignment = 16;
708  this->m_vectorWidth = 4;
709  this->m_maskingIsFree = true;
710  this->m_maskBitCount = 1;
711  this->m_hasHalf = true;
712  this->m_hasTranscendentals = true;
713  this->m_hasTrigonometry = true;
714  this->m_hasGather = this->m_hasScatter = true;
715  this->m_hasRsqrtd = this->m_hasRcpd = true;
716  CPUfromISA = CPU_Generic;
717  }
718  else if (!strcasecmp(isa, "generic-8") ||
719  !strcasecmp(isa, "generic-x8")) {
720  this->m_isa = Target::GENERIC;
721  this->m_nativeVectorWidth = 8;
722  this->m_nativeVectorAlignment = 32;
723  this->m_vectorWidth = 8;
724  this->m_maskingIsFree = true;
725  this->m_maskBitCount = 1;
726  this->m_hasHalf = true;
727  this->m_hasTranscendentals = true;
728  this->m_hasTrigonometry = true;
729  this->m_hasGather = this->m_hasScatter = true;
730  this->m_hasRsqrtd = this->m_hasRcpd = true;
731  CPUfromISA = CPU_Generic;
732  }
733  else if (!strcasecmp(isa, "generic-16") ||
734  !strcasecmp(isa, "generic-x16") ||
735  // We treat *-generic-16 as generic-16, but with special name mangling
736  strstr(isa, "-generic-16") ||
737  strstr(isa, "-generic-x16")) {
738  this->m_isa = Target::GENERIC;
739  if (strstr(isa, "-generic-16") ||
740  strstr(isa, "-generic-x16")) {
741  // It is used for appropriate name mangling and dispatch function during multitarget compilation
742  this->m_treatGenericAsSmth = isa;
743  // We need to create appropriate name for mangling.
744  // Remove "-x16" or "-16" and replace "-" with "_".
745  this->m_treatGenericAsSmth = this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
746  std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
747  }
748  this->m_nativeVectorWidth = 16;
749  this->m_nativeVectorAlignment = 64;
750  this->m_vectorWidth = 16;
751  this->m_maskingIsFree = true;
752  this->m_maskBitCount = 1;
753  this->m_hasHalf = true;
754  this->m_hasTranscendentals = true;
755  // It's set to false, because stdlib implementation of math functions
756  // is faster on MIC, than "native" implementation provided by the
757  // icc compiler.
758  this->m_hasTrigonometry = false;
759  this->m_hasGather = this->m_hasScatter = true;
760  this->m_hasRsqrtd = this->m_hasRcpd = true;
761  // It's set to true, because MIC has hardware vector prefetch instruction
762  this->m_hasVecPrefetch = true;
763  CPUfromISA = CPU_Generic;
764  }
765  else if (!strcasecmp(isa, "generic-32") ||
766  !strcasecmp(isa, "generic-x32")) {
767  this->m_isa = Target::GENERIC;
768  this->m_nativeVectorWidth = 32;
769  this->m_nativeVectorAlignment = 64;
770  this->m_vectorWidth = 32;
771  this->m_maskingIsFree = true;
772  this->m_maskBitCount = 1;
773  this->m_hasHalf = true;
774  this->m_hasTranscendentals = true;
775  this->m_hasTrigonometry = true;
776  this->m_hasGather = this->m_hasScatter = true;
777  this->m_hasRsqrtd = this->m_hasRcpd = true;
778  CPUfromISA = CPU_Generic;
779  }
780  else if (!strcasecmp(isa, "generic-64") ||
781  !strcasecmp(isa, "generic-x64")) {
782  this->m_isa = Target::GENERIC;
783  this->m_nativeVectorWidth = 64;
784  this->m_nativeVectorAlignment = 64;
785  this->m_vectorWidth = 64;
786  this->m_maskingIsFree = true;
787  this->m_maskBitCount = 1;
788  this->m_hasHalf = true;
789  this->m_hasTranscendentals = true;
790  this->m_hasTrigonometry = true;
791  this->m_hasGather = this->m_hasScatter = true;
792  this->m_hasRsqrtd = this->m_hasRcpd = true;
793  CPUfromISA = CPU_Generic;
794  }
795  else if (!strcasecmp(isa, "generic-1") ||
796  !strcasecmp(isa, "generic-x1")) {
797  this->m_isa = Target::GENERIC;
798  this->m_nativeVectorWidth = 1;
799  this->m_nativeVectorAlignment = 16;
800  this->m_vectorWidth = 1;
801  this->m_maskingIsFree = false;
802  this->m_maskBitCount = 32;
803  CPUfromISA = CPU_Generic;
804  }
805  else if (!strcasecmp(isa, "avx1-i32x4")) {
806  this->m_isa = Target::AVX;
807  this->m_nativeVectorWidth = 8;
808  this->m_nativeVectorAlignment = 32;
809  this->m_dataTypeWidth = 32;
810  this->m_vectorWidth = 4;
811  this->m_maskingIsFree = false;
812  this->m_maskBitCount = 32;
813  CPUfromISA = CPU_SandyBridge;
814  }
815  else if (!strcasecmp(isa, "avx") ||
816  !strcasecmp(isa, "avx1") ||
817  !strcasecmp(isa, "avx1-i32x8")) {
818  this->m_isa = Target::AVX;
819  this->m_nativeVectorWidth = 8;
820  this->m_nativeVectorAlignment = 32;
821  this->m_dataTypeWidth = 32;
822  this->m_vectorWidth = 8;
823  this->m_maskingIsFree = false;
824  this->m_maskBitCount = 32;
825  CPUfromISA = CPU_SandyBridge;
826  }
827  else if (!strcasecmp(isa, "avx-i64x4") ||
828  !strcasecmp(isa, "avx1-i64x4")) {
829  this->m_isa = Target::AVX;
830  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
831  this->m_nativeVectorAlignment = 32;
832  this->m_dataTypeWidth = 64;
833  this->m_vectorWidth = 4;
834  this->m_maskingIsFree = false;
835  this->m_maskBitCount = 64;
836  CPUfromISA = CPU_SandyBridge;
837  }
838  else if (!strcasecmp(isa, "avx-x2") ||
839  !strcasecmp(isa, "avx1-x2") ||
840  !strcasecmp(isa, "avx1-i32x16")) {
841  this->m_isa = Target::AVX;
842  this->m_nativeVectorWidth = 8;
843  this->m_nativeVectorAlignment = 32;
844  this->m_dataTypeWidth = 32;
845  this->m_vectorWidth = 16;
846  this->m_maskingIsFree = false;
847  this->m_maskBitCount = 32;
848  CPUfromISA = CPU_SandyBridge;
849  }
850  else if (!strcasecmp(isa, "avx1.1") ||
851  !strcasecmp(isa, "avx1.1-i32x8")) {
852  this->m_isa = Target::AVX11;
853  this->m_nativeVectorWidth = 8;
854  this->m_nativeVectorAlignment = 32;
855  this->m_dataTypeWidth = 32;
856  this->m_vectorWidth = 8;
857  this->m_maskingIsFree = false;
858  this->m_maskBitCount = 32;
859  this->m_hasHalf = true;
860  this->m_hasRand = true;
861  CPUfromISA = CPU_IvyBridge;
862  }
863  else if (!strcasecmp(isa, "avx1.1-x2") ||
864  !strcasecmp(isa, "avx1.1-i32x16")) {
865  this->m_isa = Target::AVX11;
866  this->m_nativeVectorWidth = 8;
867  this->m_nativeVectorAlignment = 32;
868  this->m_dataTypeWidth = 32;
869  this->m_vectorWidth = 16;
870  this->m_maskingIsFree = false;
871  this->m_maskBitCount = 32;
872  this->m_hasHalf = true;
873  this->m_hasRand = true;
874  CPUfromISA = CPU_IvyBridge;
875  }
876  else if (!strcasecmp(isa, "avx1.1-i64x4")) {
877  this->m_isa = Target::AVX11;
878  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
879  this->m_nativeVectorAlignment = 32;
880  this->m_dataTypeWidth = 64;
881  this->m_vectorWidth = 4;
882  this->m_maskingIsFree = false;
883  this->m_maskBitCount = 64;
884  this->m_hasHalf = true;
885  this->m_hasRand = true;
886  CPUfromISA = CPU_IvyBridge;
887  }
888  else if (!strcasecmp(isa, "avx2") ||
889  !strcasecmp(isa, "avx2-i32x8")) {
890  this->m_isa = Target::AVX2;
891  this->m_nativeVectorWidth = 8;
892  this->m_nativeVectorAlignment = 32;
893  this->m_dataTypeWidth = 32;
894  this->m_vectorWidth = 8;
895  this->m_maskingIsFree = false;
896  this->m_maskBitCount = 32;
897  this->m_hasHalf = true;
898  this->m_hasRand = true;
899  this->m_hasGather = true;
900  CPUfromISA = CPU_Haswell;
901  }
902  else if (!strcasecmp(isa, "avx2-x2") ||
903  !strcasecmp(isa, "avx2-i32x16")) {
904  this->m_isa = Target::AVX2;
905  this->m_nativeVectorWidth = 16;
906  this->m_nativeVectorAlignment = 32;
907  this->m_dataTypeWidth = 32;
908  this->m_vectorWidth = 16;
909  this->m_maskingIsFree = false;
910  this->m_maskBitCount = 32;
911  this->m_hasHalf = true;
912  this->m_hasRand = true;
913  this->m_hasGather = true;
914  CPUfromISA = CPU_Haswell;
915  }
916  else if (!strcasecmp(isa, "avx2-i64x4")) {
917  this->m_isa = Target::AVX2;
918  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
919  this->m_nativeVectorAlignment = 32;
920  this->m_dataTypeWidth = 64;
921  this->m_vectorWidth = 4;
922  this->m_maskingIsFree = false;
923  this->m_maskBitCount = 64;
924  this->m_hasHalf = true;
925  this->m_hasRand = true;
926  this->m_hasGather = true;
927  CPUfromISA = CPU_Haswell;
928  }
929 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
930  else if (!strcasecmp(isa, "avx512knl-i32x16")) {
931  this->m_isa = Target::KNL_AVX512;
932  this->m_nativeVectorWidth = 16;
933  this->m_nativeVectorAlignment = 64;
934  // ?? this->m_dataTypeWidth = 32;
935  this->m_vectorWidth = 16;
936  this->m_maskingIsFree = true;
937  this->m_maskBitCount = 8;
938  this->m_hasHalf = true;
939  this->m_hasRand = true;
940  this->m_hasGather = this->m_hasScatter = true;
941  this->m_hasTranscendentals = false;
942  // For MIC it is set to true due to performance reasons. The option should be tested.
943  this->m_hasTrigonometry = false;
944  this->m_hasRsqrtd = this->m_hasRcpd = false;
945  this->m_hasVecPrefetch = false;
946  CPUfromISA = CPU_KNL;
947  }
948 #endif
949 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
950  else if (!strcasecmp(isa, "avx512skx-i32x16")) {
951  this->m_isa = Target::SKX_AVX512;
952  this->m_nativeVectorWidth = 16;
953  this->m_nativeVectorAlignment = 64;
954  // ?? this->m_dataTypeWidth = 32;
955  this->m_vectorWidth = 16;
956  this->m_maskingIsFree = true;
957  this->m_maskBitCount = 8;
958  this->m_hasHalf = true;
959  this->m_hasRand = true;
960  this->m_hasGather = this->m_hasScatter = true;
961  this->m_hasTranscendentals = false;
962  // For MIC it is set to true due to performance reasons. The option should be tested.
963  this->m_hasTrigonometry = false;
964  this->m_hasRsqrtd = this->m_hasRcpd = false;
965  this->m_hasVecPrefetch = false;
966  CPUfromISA = CPU_SKX;
967  }
968 #endif
969 #ifdef ISPC_ARM_ENABLED
970  else if (!strcasecmp(isa, "neon-i8x16")) {
971  this->m_isa = Target::NEON8;
972  this->m_nativeVectorWidth = 16;
973  this->m_nativeVectorAlignment = 16;
974  this->m_dataTypeWidth = 8;
975  this->m_vectorWidth = 16;
976  this->m_attributes = "+neon,+fp16";
977  this->m_hasHalf = true; // ??
978  this->m_maskingIsFree = false;
979  this->m_maskBitCount = 8;
980  }
981  else if (!strcasecmp(isa, "neon-i16x8")) {
982  this->m_isa = Target::NEON16;
983  this->m_nativeVectorWidth = 8;
984  this->m_nativeVectorAlignment = 16;
985  this->m_dataTypeWidth = 16;
986  this->m_vectorWidth = 8;
987  this->m_attributes = "+neon,+fp16";
988  this->m_hasHalf = true; // ??
989  this->m_maskingIsFree = false;
990  this->m_maskBitCount = 16;
991  }
992  else if (!strcasecmp(isa, "neon") ||
993  !strcasecmp(isa, "neon-i32x4")) {
994  this->m_isa = Target::NEON32;
995  this->m_nativeVectorWidth = 4;
996  this->m_nativeVectorAlignment = 16;
997  this->m_dataTypeWidth = 32;
998  this->m_vectorWidth = 4;
999  this->m_attributes = "+neon,+fp16";
1000  this->m_hasHalf = true; // ??
1001  this->m_maskingIsFree = false;
1002  this->m_maskBitCount = 32;
1003  }
1004 #endif
1005 #ifdef ISPC_NVPTX_ENABLED
1006  else if (!strcasecmp(isa, "nvptx")) {
1007  this->m_isa = Target::NVPTX;
1008  this->m_cpu = "sm_35";
1009  this->m_nativeVectorWidth = 32;
1010  this->m_nativeVectorAlignment = 32;
1011  this->m_vectorWidth = 1;
1012  this->m_hasHalf = true;
1013  this->m_maskingIsFree = true;
1014  this->m_maskBitCount = 1;
1015  this->m_hasTranscendentals = true;
1016  this->m_hasTrigonometry = true;
1017  this->m_hasGather = this->m_hasScatter = false;
1018  CPUfromISA = CPU_SM35;
1019  }
1020 #endif /* ISPC_NVPTX_ENABLED */
1021  else {
1022  Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
1023  isa, SupportedTargets());
1024  error = true;
1025  }
1026 
1027 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
1028  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
1029  CPUID = CPU_CortexA9;
1030 #endif
1031 
1032  if (CPUID == CPU_None) {
1033 #ifndef ISPC_ARM_ENABLED
1034  if (isa == NULL) {
1035 #endif
1036  std::string hostCPU = llvm::sys::getHostCPUName();
1037  if (hostCPU.size() > 0)
1038  cpu = strdup(hostCPU.c_str());
1039  else {
1040  Warning(SourcePos(), "Unable to determine host CPU!\n");
1041  cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
1042  }
1043 #ifndef ISPC_ARM_ENABLED
1044  }
1045  else {
1046  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
1047  }
1048 #endif
1049  }
1050  else {
1051  if ((CPUfromISA != CPU_None) &&
1052  !a.BackwardCompatible(CPUID, CPUfromISA)) {
1053  Error(SourcePos(), "The requested CPU is incompatible"
1054  " with the CPU %s needs: %s vs. %s!\n",
1055  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1056  return;
1057  }
1058  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1059  }
1060  this->m_cpu = cpu;
1061 
1062  if (!error) {
1063  // Create TargetMachine
1064  std::string triple = GetTripleString();
1065 
1066 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_8
1067  llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
1068  llvm::Reloc::Default;
1069 #else
1070  llvm::Optional<llvm::Reloc::Model> relocModel;
1071  if (m_generatePIC) {
1072  relocModel = llvm::Reloc::PIC_;
1073  }
1074 #endif
1075  std::string featuresString = m_attributes;
1076  llvm::TargetOptions options;
1077 #ifdef ISPC_ARM_ENABLED
1078  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
1079  m_isa == Target::NEON32)
1080  options.FloatABIType = llvm::FloatABI::Hard;
1081 #endif
1082  if (g->opt.disableFMA == false)
1083  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1084 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1085  if (g->NoOmitFramePointer)
1086  options.NoFramePointerElim = true;
1087 #ifdef ISPC_IS_WINDOWS
1088  if (strcmp("x86", arch) == 0) {
1089  // Workaround for issue #503 (LLVM issue 14646).
1090  // It's Win32 specific.
1091  options.NoFramePointerElim = true;
1092  }
1093 #endif
1094 #endif
1095  m_targetMachine =
1096  m_target->createTargetMachine(triple, m_cpu, featuresString, options,
1097  relocModel);
1098  Assert(m_targetMachine != NULL);
1099 
1100 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1101  m_targetMachine->setAsmVerbosityDefault(true);
1102 #else /* LLVM 3.7+ */
1103  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1104 #endif
1105 
1106 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1107  // Change default version of generated DWARF.
1108  if (g->generateDWARFVersion != 0) {
1109  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1110  }
1111 #endif
1112 
1113  // Initialize TargetData/DataLayout in 3 steps.
1114  // 1. Get default data layout first
1115  std::string dl_string;
1116 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1117  dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1118 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1119  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1120 #else // LLVM 3.5- or LLVM 3.7
1121  dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1122 #endif
1123  // 2. Adjust for generic
1124  if (m_isa == Target::GENERIC) {
1125  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1126  // that to the regular datalayout string for IA..
1127  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1128  // in terms of required memory storage and alignment, as this is
1129  // translated to __m128 type.
1130  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1131  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1132  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1133  }
1134 #ifdef ISPC_NVPTX_ENABLED
1135  else if (m_isa == Target::NVPTX)
1136  {
1137  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
1138  }
1139 #endif
1140 
1141  // 3. Finally set member data
1142  m_dataLayout = new llvm::DataLayout(dl_string);
1143 
1144  // Set is32Bit
1145  // This indicates if we are compiling for 32 bit platform
1146  // and can assume 32 bit runtime.
1147  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1148 
1149  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1150 
1151 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1152  // This is LLVM 3.3+ feature.
1153  // Initialize target-specific "target-feature" attribute.
1154  if (!m_attributes.empty()) {
1155  llvm::AttrBuilder attrBuilder;
1156 #ifdef ISPC_NVPTX_ENABLED
1157  if (m_isa != Target::NVPTX)
1158 #endif
1159  attrBuilder.addAttribute("target-cpu", this->m_cpu);
1160  attrBuilder.addAttribute("target-features", this->m_attributes);
1161  this->m_tf_attributes = new llvm::AttributeSet(
1162  llvm::AttributeSet::get(
1163  *g->ctx,
1164  llvm::AttributeSet::FunctionIndex,
1165  attrBuilder));
1166  }
1167 #endif
1168 
1170  }
1171 
1172  m_valid = !error;
1173 
1174  if (printTarget) {
1175  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1176  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1177  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1178  }
1179 
1180  return;
1181 }
1182 
1183 
1184 std::string
1186  AllCPUs a;
1187  return a.HumanReadableListOfNames();
1188 }
1189 
1190 
1191 const char *
1193  return
1194 #ifdef ISPC_ARM_ENABLED
1195  "arm, "
1196 #endif
1197  "x86, x86-64";
1198 }
1199 
1200 
1201 const char *
1203  return
1204  "host, sse2-i32x4, sse2-i32x8, "
1205  "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1206  "avx1-i32x4, "
1207  "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1208  "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
1209  "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1210 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1211  "avx512knl-i32x16, "
1212 #endif
1213 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1214  "avx512skx-i32x16, "
1215 #endif
1216  "generic-x1, generic-x4, generic-x8, generic-x16, "
1217  "generic-x32, generic-x64, *-generic-x16"
1218 #ifdef ISPC_ARM_ENABLED
1219  ", neon-i8x16, neon-i16x8, neon-i32x4"
1220 #endif
1221 #ifdef ISPC_NVPTX_ENABLED
1222  ", nvptx"
1223 #endif
1224 ;
1225 
1226 }
1227 
1228 
1229 std::string
1231  llvm::Triple triple;
1232 #ifdef ISPC_ARM_ENABLED
1233  if (m_arch == "arm") {
1234  triple.setTriple("armv7-eabi");
1235  }
1236  else
1237 #endif
1238  {
1239  // Start with the host triple as the default
1240  triple.setTriple(llvm::sys::getDefaultTargetTriple());
1241 
1242  // And override the arch in the host triple based on what the user
1243  // specified. Here we need to deal with the fact that LLVM uses one
1244  // naming convention for targets TargetRegistry, but wants some
1245  // slightly different ones for the triple. TODO: is there a way to
1246  // have it do this remapping, which would presumably be a bit less
1247  // error prone?
1248  if (m_arch == "x86")
1249  triple.setArchName("i386");
1250  else if (m_arch == "x86-64")
1251  triple.setArchName("x86_64");
1252 #ifdef ISPC_NVPTX_ENABLED
1253  else if (m_arch == "nvptx64")
1254  triple = llvm::Triple("nvptx64", "nvidia", "cuda");
1255 #endif /* ISPC_NVPTX_ENABLED */
1256  else
1257  triple.setArchName(m_arch);
1258  }
1259  return triple.str();
1260 }
1261 
1262 // This function returns string representation of ISA for the purpose of
1263 // mangling. And may return any unique string, preferably short, like
1264 // sse4, avx and etc.
1265 const char *
1267  switch (isa) {
1268 #ifdef ISPC_ARM_ENABLED
1269  case Target::NEON8:
1270  return "neon-8";
1271  case Target::NEON16:
1272  return "neon-16";
1273  case Target::NEON32:
1274  return "neon-32";
1275 #endif
1276  case Target::SSE2:
1277  return "sse2";
1278  case Target::SSE4:
1279  return "sse4";
1280  case Target::AVX:
1281  return "avx";
1282  case Target::AVX11:
1283  return "avx11";
1284  case Target::AVX2:
1285  return "avx2";
1286 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1287  case Target::KNL_AVX512:
1288  return "avx512knl";
1289 #endif
1290 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1291  case Target::SKX_AVX512:
1292  return "avx512skx";
1293 #endif
1294  case Target::GENERIC:
1295  return "generic";
1296 #ifdef ISPC_NVPTX_ENABLED
1297  case Target::NVPTX:
1298  return "nvptx";
1299 #endif /* ISPC_NVPTX_ENABLED */
1300  default:
1301  FATAL("Unhandled target in ISAToString()");
1302  }
1303  return "";
1304 }
1305 
1306 const char *
1308  return ISAToString(m_isa);
1309 }
1310 
1311 
1312 // This function returns string representation of default target corresponding
1313 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
1314 // string may be used to initialize Target.
1315 const char *
1317  switch (isa) {
1318 #ifdef ISPC_ARM_ENABLED
1319  case Target::NEON8:
1320  return "neon-8";
1321  case Target::NEON16:
1322  return "neon-16";
1323  case Target::NEON32:
1324  return "neon-32";
1325 #endif
1326  case Target::SSE2:
1327  return "sse2-i32x4";
1328  case Target::SSE4:
1329  return "sse4-i32x4";
1330  case Target::AVX:
1331  return "avx1-i32x8";
1332  case Target::AVX11:
1333  return "avx1.1-i32x8";
1334  case Target::AVX2:
1335  return "avx2-i32x8";
1336 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1337  case Target::KNL_AVX512:
1338  return "avx512knl-i32x16";
1339 #endif
1340 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1341  case Target::SKX_AVX512:
1342  return "avx512skx-i32x16";
1343 #endif
1344  case Target::GENERIC:
1345  return "generic-4";
1346 #ifdef ISPC_NVPTX_ENABLED
1347  case Target::NVPTX:
1348  return "nvptx";
1349 #endif /* ISPC_NVPTX_ENABLED */
1350  default:
1351  FATAL("Unhandled target in ISAToTargetString()");
1352  }
1353  return "";
1354 }
1355 
1356 
1357 const char *
1359  return ISAToString(m_isa);
1360 }
1361 
1362 
1363 static bool
1365  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() ||
1366  type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy())
1367  return false;
1368 
1369  if (type == LLVMTypes::BoolVectorType ||
1370  type == LLVMTypes::MaskType ||
1371  type == LLVMTypes::Int1VectorType)
1372  return true;
1373 
1374  llvm::ArrayType *at =
1375  llvm::dyn_cast<llvm::ArrayType>(type);
1376  if (at != NULL)
1377  return lGenericTypeLayoutIndeterminate(at->getElementType());
1378 
1379  llvm::PointerType *pt =
1380  llvm::dyn_cast<llvm::PointerType>(type);
1381  if (pt != NULL)
1382  return false;
1383 
1384  llvm::StructType *st =
1385  llvm::dyn_cast<llvm::StructType>(type);
1386  if (st != NULL) {
1387  for (int i = 0; i < (int)st->getNumElements(); ++i)
1388  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1389  return true;
1390  return false;
1391  }
1392 
1393  Assert(llvm::isa<llvm::VectorType>(type));
1394  return true;
1395 }
1396 
1397 
1398 llvm::Value *
1399 Target::SizeOf(llvm::Type *type,
1400  llvm::BasicBlock *insertAtEnd) {
1401  if (m_isa == Target::GENERIC &&
1403  llvm::Value *index[1] = { LLVMInt32(1) };
1404  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1405  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1406  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1407 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1408  llvm::Instruction *gep =
1409  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
1410  insertAtEnd);
1411 #else /* LLVM 3.7+ */
1412  llvm::Instruction *gep =
1413  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1414  arrayRef, "sizeof_gep",
1415  insertAtEnd);
1416 #endif
1418  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1419  "sizeof_int", insertAtEnd);
1420  else
1421  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1422  "sizeof_int", insertAtEnd);
1423  }
1424 
1425  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1427  return LLVMInt32((int32_t)byteSize);
1428  else
1429  return LLVMInt64(byteSize);
1430 }
1431 
1432 
1433 llvm::Value *
1434 Target::StructOffset(llvm::Type *type, int element,
1435  llvm::BasicBlock *insertAtEnd) {
1436  if (m_isa == Target::GENERIC &&
1437  lGenericTypeLayoutIndeterminate(type) == true) {
1438  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
1439  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1440  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1441  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1442 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1443  llvm::Instruction *gep =
1444  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
1445  insertAtEnd);
1446 #else /* LLVM 3.7+ */
1447  llvm::Instruction *gep =
1448  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1449  arrayRef, "offset_gep",
1450  insertAtEnd);
1451 #endif
1453  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1454  "offset_int", insertAtEnd);
1455  else
1456  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1457  "offset_int", insertAtEnd);
1458  }
1459 
1460  llvm::StructType *structType =
1461  llvm::dyn_cast<llvm::StructType>(type);
1462  if (structType == NULL || structType->isSized() == false) {
1463  Assert(m->errorCount > 0);
1464  return NULL;
1465  }
1466 
1467  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1468  Assert(sl != NULL);
1469 
1470  uint64_t offset = sl->getElementOffset(element);
1472  return LLVMInt32((int32_t)offset);
1473  else
1474  return LLVMInt64(offset);
1475 }
1476 
1477 void Target::markFuncWithTargetAttr(llvm::Function* func) {
1478 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1479  if (m_tf_attributes) {
1480  func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1481  }
1482 #endif
1483 }
1484 
1485 
1486 ///////////////////////////////////////////////////////////////////////////
1487 // Opt
1488 
1490  level = 1;
1491  fastMath = false;
1492  fastMaskedVload = false;
1493  force32BitAddressing = true;
1494  unrollLoops = true;
1495  disableAsserts = false;
1496  disableFMA = false;
1497  forceAlignedMemory = false;
1498  disableMaskAllOnOptimizations = false;
1499  disableHandlePseudoMemoryOps = false;
1500  disableBlendedMaskedStores = false;
1501  disableCoherentControlFlow = false;
1502  disableUniformControlFlow = false;
1503  disableGatherScatterOptimizations = false;
1504  disableMaskedStoreToStore = false;
1505  disableGatherScatterFlattening = false;
1506  disableUniformMemoryOptimizations = false;
1507  disableCoalescing = false;
1508 }
1509 
1510 ///////////////////////////////////////////////////////////////////////////
1511 // Globals
1512 
1514  mathLib = Globals::Math_ISPC;
1515 
1516  includeStdlib = true;
1517  runCPP = true;
1518  debugPrint = false;
1519  printTarget = false;
1520  NoOmitFramePointer = false;
1521  debugIR = -1;
1522  disableWarnings = false;
1523  warningsAsErrors = false;
1524  quiet = false;
1525  forceColoredOutput = false;
1526  disableLineWrap = false;
1527  emitPerfWarnings = true;
1528  emitInstrumentation = false;
1529  generateDebuggingSymbols = false;
1530 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1531  generateDWARFVersion = 0;
1532 #endif
1533  enableFuzzTest = false;
1534  fuzzTestSeed = -1;
1535  mangleFunctionsWithTarget = false;
1536 
1537  ctx = new llvm::LLVMContext;
1538 
1539 #ifdef ISPC_IS_WINDOWS
1540  _getcwd(currentDirectory, sizeof(currentDirectory));
1541 #else
1542  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1543  FATAL("Current directory path too long!");
1544 #endif
1545  forceAlignment = -1;
1546  dllExport = false;
1547 }
1548 
1549 ///////////////////////////////////////////////////////////////////////////
1550 // SourcePos
1551 
1552 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1553  name = n;
1554  if (name == NULL) {
1555  if (m != NULL)
1556  name = m->module->getModuleIdentifier().c_str();
1557  else
1558  name = "(unknown)";
1559  }
1560  first_line = fl;
1561  first_column = fc;
1562  last_line = ll != 0 ? ll : fl;
1563  last_column = lc != 0 ? lc : fc;
1564 }
1565 
1566 
1567 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1568 llvm::DIFile
1569 #else /* LLVM 3.7+ */
1570 llvm::DIFile*
1571 //llvm::MDFile*
1572 #endif
1574  std::string directory, filename;
1575  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1576 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1577  llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1578  Assert(ret.Verify());
1579 #else /* LLVM 3.7+ */
1580  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1581  //llvm::MDFile *ret = m->diBuilder->createFile(filename, directory);
1582 #endif
1583  return ret;
1584 }
1585 
1586 
1587 void
1589  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
1590  last_line, last_column);
1591 }
1592 
1593 
1594 bool
1596  return (!strcmp(name, p2.name) &&
1597  first_line == p2.first_line &&
1598  first_column == p2.first_column &&
1599  last_line == p2.last_line &&
1600  last_column == p2.last_column);
1601 }
1602 
1603 
1604 SourcePos
1605 Union(const SourcePos &p1, const SourcePos &p2) {
1606  if (strcmp(p1.name, p2.name) != 0)
1607  return p1;
1608 
1609  SourcePos ret;
1610  ret.name = p1.name;
1611  ret.first_line = std::min(p1.first_line, p2.first_line);
1612  ret.first_column = std::min(p1.first_column, p2.first_column);
1613  ret.last_line = std::max(p1.last_line, p2.last_line);
1614  ret.last_column = std::max(p1.last_column, p2.last_column);
1615  return ret;
1616 }
bool disableFMA
Definition: ispc.h:459
#define CPU_Broadwell
bool m_hasTranscendentals
Definition: ispc.h:404
#define CPU_Silvermont
Globals()
Definition: ispc.cpp:1513
Opt opt
Definition: ispc.h:541
int last_column
Definition: ispc.h:142
const llvm::Target * m_target
Definition: ispc.h:312
static bool __os_has_avx_support()
Definition: ispc.cpp:112
This structure collects together a number of global variables.
Definition: ispc.h:537
const char * GetISATargetString() const
Definition: ispc.cpp:1358
int m_nativeVectorAlignment
Definition: ispc.h:364
AllCPUs()
Definition: ispc.cpp:304
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1605
int first_line
Definition: ispc.h:139
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1399
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1552
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:88
std::string m_cpu
Definition: ispc.h:342
Opt()
Definition: ispc.cpp:1489
std::string m_arch
Definition: ispc.h:336
static const char * lGetSystemISA()
Definition: ispc.cpp:145
bool NoOmitFramePointer
Definition: ispc.h:566
std::string HumanReadableListOfNames()
Definition: ispc.cpp:423
#define Assert(expr)
Definition: ispc.h:170
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:455
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1364
const char * GetISAString() const
Definition: ispc.cpp:1307
int m_nativeVectorWidth
Definition: ispc.h:357
Module * m
Definition: ispc.cpp:89
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1266
bool m_generatePIC
Definition: ispc.h:376
static const char * SupportedArchs()
Definition: ispc.cpp:1192
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:290
bool m_maskingIsFree
Definition: ispc.h:382
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:89
void GetDirectoryAndFileName(const std::string &currentDir, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:547
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genenricAsSmth="")
Definition: ispc.cpp:463
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:102
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
std::string m_attributes
Definition: ispc.h:345
llvm::Module * module
Definition: module.h:158
static std::string SupportedCPUs()
Definition: ispc.cpp:1185
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
Definition: module.h:57
static llvm::Type * Int64Type
Definition: llvmutil.h:75
char currentDirectory[1024]
Definition: ispc.h:636
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:439
Header file with declarations for various LLVM utility stuff.
bool m_is32Bit
Definition: ispc.h:339
bool m_hasRand
Definition: ispc.h:394
bool m_hasRcpd
Definition: ispc.h:413
int m_maskBitCount
Definition: ispc.h:387
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:95
Representation of a range of positions in a source file.
Definition: ispc.h:134
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1434
int m_vectorWidth
Definition: ispc.h:373
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:322
bool force32BitAddressing
Definition: ispc.h:451
static bool __os_has_avx512_support()
Definition: ispc.cpp:127
const char * name
Definition: ispc.h:138
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1477
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1595
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
#define FATAL(message)
Definition: util.h:113
int m_dataTypeWidth
Definition: ispc.h:368
static llvm::Type * Int32Type
Definition: llvmutil.h:74
int last_line
Definition: ispc.h:141
#define PTYPE(p)
Definition: llvmutil.h:54
#define ISPC_MAX_NVEC
Definition: ispc.h:68
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:287
bool m_hasGather
Definition: ispc.h:397
int first_column
Definition: ispc.h:140
llvm::DataLayout * m_dataLayout
Definition: ispc.h:323
void Print() const
Definition: ispc.cpp:1588
bool m_hasScatter
Definition: ispc.h:400
ISA
Definition: ispc.h:189
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1573
bool m_valid
Definition: ispc.h:327
Globals * g
Definition: ispc.cpp:88
static llvm::VectorType * MaskType
Definition: llvmutil.h:86
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:288
bool m_hasTrigonometry
Definition: ispc.h:407
std::string m_treatGenericAsSmth
Definition: ispc.h:333
bool m_hasHalf
Definition: ispc.h:391
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:262
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:632
static const char * SupportedTargets()
Definition: ispc.cpp:1202
ISA m_isa
Definition: ispc.h:330
bool m_hasVecPrefetch
Definition: ispc.h:416
llvm::DIBuilder * diBuilder
Definition: module.h:161
Main ispc.header file. Defines Target, Globals and Opt classes.
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:444
std::string GetTripleString() const
Definition: ispc.cpp:1230
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1316
bool m_hasRsqrtd
Definition: ispc.h:410
CPUtype
Definition: ispc.cpp:207