Intel SPMD Program Compiler  1.9.2
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2016, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "module.h"
40 #include "util.h"
41 #include "llvmutil.h"
42 #include <stdio.h>
43 #include <sstream>
44 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
45 #ifdef ISPC_IS_WINDOWS
46  #include <windows.h>
47  #include <direct.h>
48  #define strcasecmp stricmp
49 #else
50  #include <sys/types.h>
51  #include <unistd.h>
52 #endif
53 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
54  #include <llvm/LLVMContext.h>
55  #include <llvm/Module.h>
56  #include <llvm/Instructions.h>
57 #else /* 3.3+ */
58  #include <llvm/IR/LLVMContext.h>
59  #include <llvm/IR/Module.h>
60  #include <llvm/IR/Instructions.h>
61 #endif
62 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
63  #include <llvm/Target/TargetSubtargetInfo.h>
64  #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
65  #include <llvm/Target/TargetLowering.h>
66  #endif
67 #endif
68 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
69  #include <llvm/IR/DebugInfo.h>
70  #include <llvm/IR/DIBuilder.h>
71 #else // LLVM 3.2, 3.3, 3.4
72  #include <llvm/DebugInfo.h>
73  #include <llvm/DIBuilder.h>
74 #endif
75 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
76  #include <llvm/BinaryFormat/Dwarf.h>
77 #else // LLVM up to 4.x
78  #include <llvm/Support/Dwarf.h>
79 #endif
80 #include <llvm/Target/TargetMachine.h>
81 #include <llvm/Target/TargetOptions.h>
82 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
83  #include <llvm/DataLayout.h>
84 #else // LLVM 3.3+
85  #include <llvm/IR/DataLayout.h>
86  #include <llvm/IR/Attributes.h>
87 #endif
88 #include <llvm/Support/TargetRegistry.h>
89 #include <llvm/Support/TargetSelect.h>
90 #include <llvm/Support/Host.h>
91 
94 
95 ///////////////////////////////////////////////////////////////////////////
96 // Target
97 
98 #if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
99 static void __cpuid(int info[4], int infoType) {
100  __asm__ __volatile__ ("cpuid"
101  : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
102  : "0" (infoType));
103 }
104 
105 /* Save %ebx in case it's the PIC register */
106 static void __cpuidex(int info[4], int level, int count) {
107  __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
108  "cpuid\n\t"
109  "xchg{l}\t{%%}ebx, %1\n\t"
110  : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
111  : "0" (level), "2" (count));
112 }
113 #endif // !ISPC_IS_WINDOWS && !__ARM__
114 
115 #if !defined(__arm__)
116 static bool __os_has_avx_support() {
117 #if defined(ISPC_IS_WINDOWS)
118  // Check if the OS will save the YMM registers
119  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
120  return (xcrFeatureMask & 6) == 6;
121 #else // !defined(ISPC_IS_WINDOWS)
122  // Check xgetbv; this uses a .byte sequence instead of the instruction
123  // directly because older assemblers do not include support for xgetbv and
124  // there is no easy way to conditionally compile based on the assembler used.
125  int rEAX, rEDX;
126  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
127  return (rEAX & 6) == 6;
128 #endif // !defined(ISPC_IS_WINDOWS)
129 }
130 
131 static bool __os_has_avx512_support() {
132 #if defined(ISPC_IS_WINDOWS)
133  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
134  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
135  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
136  return (xcrFeatureMask & 0xE6) == 0xE6;
137 #else // !defined(ISPC_IS_WINDOWS)
138  // Check xgetbv; this uses a .byte sequence instead of the instruction
139  // directly because older assemblers do not include support for xgetbv and
140  // there is no easy way to conditionally compile based on the assembler used.
141  int rEAX, rEDX;
142  __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
143  return (rEAX & 0xE6) == 0xE6;
144 #endif // !defined(ISPC_IS_WINDOWS)
145 }
146 #endif // !__arm__
147 
148 static const char *
150 #ifdef __arm__
151  return "neon-i32x4";
152 #else
153  int info[4];
154  __cpuid(info, 1);
155 
156  int info2[4];
157  // Call cpuid with eax=7, ecx=0
158  __cpuidex(info2, 7, 0);
159 
160  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
161  (info2[1] & (1 << 5)) != 0 && // AVX2
162  (info2[1] & (1 << 16)) != 0 && // AVX512 F
164  // We need to verify that AVX2 is also available,
165  // as well as AVX512, because our targets are supposed
166  // to use both.
167 
168  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
169  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
170  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
171  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
172  return "avx512skx-i32x16";
173  }
174  else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
175  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
176  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
177  return "avx512knl-i32x16";
178  }
179  // If it's unknown AVX512 target, fall through and use AVX2
180  // or whatever is available in the machine.
181  }
182 
183  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
184  (info[2] & (1 << 28)) != 0 &&
185  __os_has_avx_support()) { // AVX
186  // AVX1 for sure....
187  // Ivy Bridge?
188  if ((info[2] & (1 << 29)) != 0 && // F16C
189  (info[2] & (1 << 30)) != 0) { // RDRAND
190  // So far, so good. AVX2?
191  if ((info2[1] & (1 << 5)) != 0)
192  return "avx2-i32x8";
193  else
194  return "avx1.1-i32x8";
195  }
196  // Regular AVX
197  return "avx1-i32x8";
198  }
199  else if ((info[2] & (1 << 19)) != 0)
200  return "sse4-i32x4";
201  else if ((info[3] & (1 << 26)) != 0)
202  return "sse2-i32x4";
203  else {
204  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
205  exit(1);
206  }
207 #endif
208 }
209 
210 
211 typedef enum {
212  // Special value, indicates that no CPU is present.
213  CPU_None = 0,
214 
215  // 'Generic' CPU without any hardware SIMD capabilities.
217 
218  // Early Atom CPU. Supports SSSE3.
220 
221  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
222  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
224 
225  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
227 
228  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
230 
231  // Sandy Bridge. Supports AVX 1.
233 
234  // Ivy Bridge. Supports AVX 1 + RDRAND.
236 
237  // Haswell. Supports AVX 2.
239 
240 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
241  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
243 #endif
244 
245 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
246  // Knights Landing - Xeon Phi.
247  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
248  // AVX-512CDI: Conflict Detection;
249  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
250  // new prefetch instructions.
251  CPU_KNL,
252 #endif
253 
254 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
255  // Skylake Xeon.
256  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
257  // AVX-512CDI: Conflict Detection;
258  // AVX-512VL: Vector Length Orthogonality;
259  // AVX-512DQ: New HPC ISA (vs AVX512F);
260  // AVX-512BW: Byte and Word Support.
261  CPU_SKX,
262 #endif
263 
264 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
265  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
267 #endif
268 
269  // FIXME: LLVM supports a ton of different ARM CPU variants--not just
270  // cortex-a9 and a15. We should be able to handle any of them that also
271  // have NEON support.
272 #ifdef ISPC_ARM_ENABLED
273  // ARM Cortex A15. Supports NEON VFPv4.
274  CPU_CortexA15,
275 
276  // ARM Cortex A9. Supports NEON VFPv3.
277  CPU_CortexA9,
278 #endif
279 
280 #ifdef ISPC_NVPTX_ENABLED
281  // NVidia CUDA-compatible SM-35 architecture.
282  CPU_SM35,
283 #endif
284 
286 } CPUtype;
287 
288 
289 class AllCPUs {
290 private:
291  std::vector<std::vector<std::string> > names;
292  std::vector<std::set<CPUtype> > compat;
293 
294  std::set<CPUtype> Set(int type, ...) {
295  std::set<CPUtype> retn;
296  va_list args;
297 
298  retn.insert((CPUtype)type);
299  va_start(args, type);
300  while ((type = va_arg(args, int)) != CPU_None)
301  retn.insert((CPUtype)type);
302  va_end(args);
303 
304  return retn;
305  }
306 
307 public:
309  names = std::vector<std::vector<std::string> >(sizeofCPUtype);
310  compat = std::vector<std::set<CPUtype> >(sizeofCPUtype);
311 
312  names[CPU_None].push_back("");
313 
314  names[CPU_Generic].push_back("generic");
315 
316  names[CPU_Bonnell].push_back("atom");
317  names[CPU_Bonnell].push_back("bonnell");
318 
319  names[CPU_Core2].push_back("core2");
320 
321  names[CPU_Penryn].push_back("penryn");
322 
323 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
324  names[CPU_Silvermont].push_back("slm");
325  names[CPU_Silvermont].push_back("silvermont");
326 #endif
327 
328  names[CPU_Nehalem].push_back("corei7");
329  names[CPU_Nehalem].push_back("nehalem");
330 
331  names[CPU_SandyBridge].push_back("corei7-avx");
332  names[CPU_SandyBridge].push_back("sandybridge");
333 
334  names[CPU_IvyBridge].push_back("core-avx-i");
335  names[CPU_IvyBridge].push_back("ivybridge");
336 
337  names[CPU_Haswell].push_back("core-avx2");
338  names[CPU_Haswell].push_back("haswell");
339 
340 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
341  names[CPU_Broadwell].push_back("broadwell");
342 #endif
343 
344 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
345  names[CPU_KNL].push_back("knl");
346 #endif
347 
348 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
349  names[CPU_SKX].push_back("skx");
350 #endif
351 
352 #ifdef ISPC_ARM_ENABLED
353  names[CPU_CortexA15].push_back("cortex-a15");
354 
355  names[CPU_CortexA9].push_back("cortex-a9");
356 #endif
357 
358 #ifdef ISPC_NVPTX_ENABLED
359  names[CPU_SM35].push_back("sm_35");
360 #endif
361 
362 
363 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
364  #define CPU_Silvermont CPU_Nehalem
365 #else /* LLVM 3.4+ */
368  CPU_None);
369 #endif
370 
371 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
372  compat[CPU_KNL] = Set(CPU_KNL, CPU_Generic, CPU_Bonnell, CPU_Penryn,
376 #endif
377 
378 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
379  compat[CPU_SKX] = Set(CPU_SKX, CPU_Bonnell, CPU_Penryn,
383 #endif
384 
385 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
386  #define CPU_Broadwell CPU_Haswell
387 #else /* LLVM 3.6+ */
392 #endif
400  CPU_None);
406  CPU_None);
409  CPU_None);
411  CPU_None);
413  CPU_None);
414  compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
415 
416 #ifdef ISPC_ARM_ENABLED
417  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
418  CPU_None);
419  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
420 #endif
421 
422 #ifdef ISPC_NVPTX_ENABLED
423  compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
424 #endif
425  }
426 
427  std::string HumanReadableListOfNames() {
428  std::stringstream CPUs;
429  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
430  CPUs << names[i][0];
431  if (names[i].size() > 1) {
432  CPUs << " (synonyms: " << names[i][1];
433  for (int j = 2, je = names[i].size(); j < je; j++)
434  CPUs << ", " << names[i][j];
435  CPUs << ")";
436  }
437  if (i < sizeofCPUtype - 1)
438  CPUs << ", ";
439  }
440  return CPUs.str();
441  }
442 
443  std::string &GetDefaultNameFromType(CPUtype type) {
444  Assert((type >= CPU_None) && (type < sizeofCPUtype));
445  return names[type][0];
446  }
447 
448  CPUtype GetTypeFromName(std::string name) {
449  CPUtype retn = CPU_None;
450 
451  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
452  for (int j = 0, je = names[i].size();
453  (retn == CPU_None) && (j < je); j++)
454  if (!name.compare(names[i][j]))
455  retn = (CPUtype)i;
456  return retn;
457  }
458 
459  bool BackwardCompatible(CPUtype what, CPUtype with) {
460  Assert((what > CPU_None) && (what < sizeofCPUtype));
461  Assert((with > CPU_None) && (with < sizeofCPUtype));
462  return compat[what].find(with) != compat[what].end();
463  }
464 };
465 
466 
467 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genericAsSmth) :
468  m_target(NULL),
469  m_targetMachine(NULL),
470  m_dataLayout(NULL),
471  m_valid(false),
472  m_isa(SSE2),
473  m_treatGenericAsSmth(genericAsSmth),
474  m_arch(""),
475  m_is32Bit(true),
476  m_cpu(""),
477  m_attributes(""),
478 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
479  m_tf_attributes(NULL),
480 #endif
481  m_nativeVectorWidth(-1),
482  m_nativeVectorAlignment(-1),
483  m_dataTypeWidth(-1),
484  m_vectorWidth(-1),
485  m_generatePIC(pic),
486  m_maskingIsFree(false),
487  m_maskBitCount(-1),
488  m_hasHalf(false),
489  m_hasRand(false),
490  m_hasGather(false),
491  m_hasScatter(false),
492  m_hasTranscendentals(false),
493  m_hasTrigonometry(false),
494  m_hasRsqrtd(false),
495  m_hasRcpd(false),
496  m_hasVecPrefetch(false)
497 {
498  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
499  AllCPUs a;
500 
501  if (cpu) {
502  CPUID = a.GetTypeFromName(cpu);
503  if (CPUID == CPU_None) {
504  Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
505  " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
506  return;
507  }
508  }
509 
510  if (isa == NULL) {
511  // If a CPU was specified explicitly, try to pick the best
512  // possible ISA based on that.
513  switch (CPUID) {
514  case CPU_None:
515  // No CPU and no ISA, so use system info to figure out
516  // what this CPU supports.
517  isa = lGetSystemISA();
518  Warning(SourcePos(), "No --target specified on command-line."
519  " Using default system target \"%s\".", isa);
520  break;
521 
522  case CPU_Generic:
523  isa = "generic-1";
524  break;
525 
526 #ifdef ISPC_NVPTX_ENABLED
527  case CPU_SM35:
528  isa = "nvptx";
529  break;
530 #endif
531 
532 #ifdef ISPC_ARM_ENABLED
533  case CPU_CortexA9:
534  case CPU_CortexA15:
535  isa = "neon-i32x4";
536  break;
537 #endif
538 
539 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
540  case CPU_KNL:
541  isa = "avx512knl-i32x16";
542  break;
543 #endif
544 
545 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
546  case CPU_SKX:
547  isa = "avx512skx-i32x16";
548  break;
549 #endif
550 
551 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
552  case CPU_Broadwell:
553 #endif
554  case CPU_Haswell:
555  isa = "avx2-i32x8";
556  break;
557 
558  case CPU_IvyBridge:
559  isa = "avx1.1-i32x8";
560  break;
561 
562  case CPU_SandyBridge:
563  isa = "avx1-i32x8";
564  break;
565 
566  // Penryn is here because ISPC does not use SSE 4.2
567  case CPU_Penryn:
568  case CPU_Nehalem:
569 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
570  case CPU_Silvermont:
571 #endif
572  isa = "sse4-i32x4";
573  break;
574 
575  default:
576  isa = "sse2-i32x4";
577  break;
578  }
579  if (CPUID != CPU_None)
580  Warning(SourcePos(), "No --target specified on command-line."
581  " Using ISA \"%s\" based on specified CPU \"%s\".",
582  isa, cpu);
583  }
584 
585  if (!strcasecmp(isa, "host")) {
586  isa = lGetSystemISA();
587  }
588 
589  if (arch == NULL) {
590 #ifdef ISPC_ARM_ENABLED
591  if (!strncmp(isa, "neon", 4))
592  arch = "arm";
593  else
594 #endif
595 #ifdef ISPC_NVPTX_ENABLED
596  if(!strncmp(isa, "nvptx", 5))
597  arch = "nvptx64";
598  else
599 #endif /* ISPC_NVPTX_ENABLED */
600  arch = "x86-64";
601  }
602 
603  // Define arch alias
604  if (std::string(arch) == "x86_64")
605  arch = "x86-64";
606 
607  bool error = false;
608 
609  // Make sure the target architecture is a known one; print an error
610  // with the valid ones otherwise.
611 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
612  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
613  iter != llvm::TargetRegistry::targets().end(); ++iter) {
614 #else
615  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
616  iter != llvm::TargetRegistry::end(); ++iter) {
617 #endif
618  if (std::string(arch) == iter->getName()) {
619  this->m_target = &*iter;
620  break;
621  }
622  }
623  if (this->m_target == NULL) {
624  fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
625  llvm::TargetRegistry::iterator iter;
626 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
627  for (iter = llvm::TargetRegistry::targets().begin();
628  iter != llvm::TargetRegistry::targets().end(); ++iter)
629 #else
630  for (iter = llvm::TargetRegistry::begin();
631  iter != llvm::TargetRegistry::end(); ++iter)
632 #endif
633  fprintf(stderr, "%s ", iter->getName());
634  fprintf(stderr, "\n");
635  error = true;
636  }
637  else {
638  this->m_arch = arch;
639  }
640 
641  // Check default LLVM generated targets
642  if (!strcasecmp(isa, "sse2") ||
643  !strcasecmp(isa, "sse2-i32x4")) {
644  this->m_isa = Target::SSE2;
645  this->m_nativeVectorWidth = 4;
646  this->m_nativeVectorAlignment = 16;
647  this->m_dataTypeWidth = 32;
648  this->m_vectorWidth = 4;
649  this->m_maskingIsFree = false;
650  this->m_maskBitCount = 32;
651  CPUfromISA = CPU_Core2;
652  }
653  else if (!strcasecmp(isa, "sse2-x2") ||
654  !strcasecmp(isa, "sse2-i32x8")) {
655  this->m_isa = Target::SSE2;
656  this->m_nativeVectorWidth = 4;
657  this->m_nativeVectorAlignment = 16;
658  this->m_dataTypeWidth = 32;
659  this->m_vectorWidth = 8;
660  this->m_maskingIsFree = false;
661  this->m_maskBitCount = 32;
662  CPUfromISA = CPU_Core2;
663  }
664  else if (!strcasecmp(isa, "sse4") ||
665  !strcasecmp(isa, "sse4-i32x4")) {
666  this->m_isa = Target::SSE4;
667  this->m_nativeVectorWidth = 4;
668  this->m_nativeVectorAlignment = 16;
669  this->m_dataTypeWidth = 32;
670  this->m_vectorWidth = 4;
671  this->m_maskingIsFree = false;
672  this->m_maskBitCount = 32;
673  CPUfromISA = CPU_Nehalem;
674  }
675  else if (!strcasecmp(isa, "sse4x2") ||
676  !strcasecmp(isa, "sse4-x2") ||
677  !strcasecmp(isa, "sse4-i32x8")) {
678  this->m_isa = Target::SSE4;
679  this->m_nativeVectorWidth = 4;
680  this->m_nativeVectorAlignment = 16;
681  this->m_dataTypeWidth = 32;
682  this->m_vectorWidth = 8;
683  this->m_maskingIsFree = false;
684  this->m_maskBitCount = 32;
685  CPUfromISA = CPU_Nehalem;
686  }
687  else if (!strcasecmp(isa, "sse4-i8x16")) {
688  this->m_isa = Target::SSE4;
689  this->m_nativeVectorWidth = 16;
690  this->m_nativeVectorAlignment = 16;
691  this->m_dataTypeWidth = 8;
692  this->m_vectorWidth = 16;
693  this->m_maskingIsFree = false;
694  this->m_maskBitCount = 8;
695  CPUfromISA = CPU_Nehalem;
696  }
697  else if (!strcasecmp(isa, "sse4-i16x8")) {
698  this->m_isa = Target::SSE4;
699  this->m_nativeVectorWidth = 8;
700  this->m_nativeVectorAlignment = 16;
701  this->m_dataTypeWidth = 16;
702  this->m_vectorWidth = 8;
703  this->m_maskingIsFree = false;
704  this->m_maskBitCount = 16;
705  CPUfromISA = CPU_Nehalem;
706  }
707  else if (!strcasecmp(isa, "generic-4") ||
708  !strcasecmp(isa, "generic-x4")) {
709  this->m_isa = Target::GENERIC;
710  this->m_nativeVectorWidth = 4;
711  this->m_nativeVectorAlignment = 16;
712  this->m_vectorWidth = 4;
713  this->m_maskingIsFree = true;
714  this->m_maskBitCount = 1;
715  this->m_hasHalf = true;
716  this->m_hasTranscendentals = true;
717  this->m_hasTrigonometry = true;
718  this->m_hasGather = this->m_hasScatter = true;
719  this->m_hasRsqrtd = this->m_hasRcpd = true;
720  CPUfromISA = CPU_Generic;
721  }
722  else if (!strcasecmp(isa, "generic-8") ||
723  !strcasecmp(isa, "generic-x8")) {
724  this->m_isa = Target::GENERIC;
725  this->m_nativeVectorWidth = 8;
726  this->m_nativeVectorAlignment = 32;
727  this->m_vectorWidth = 8;
728  this->m_maskingIsFree = true;
729  this->m_maskBitCount = 1;
730  this->m_hasHalf = true;
731  this->m_hasTranscendentals = true;
732  this->m_hasTrigonometry = true;
733  this->m_hasGather = this->m_hasScatter = true;
734  this->m_hasRsqrtd = this->m_hasRcpd = true;
735  CPUfromISA = CPU_Generic;
736  }
737  else if (!strcasecmp(isa, "generic-16") ||
738  !strcasecmp(isa, "generic-x16") ||
739  // We treat *-generic-16 as generic-16, but with special name mangling
740  strstr(isa, "-generic-16") ||
741  strstr(isa, "-generic-x16")) {
742  this->m_isa = Target::GENERIC;
743  if (strstr(isa, "-generic-16") ||
744  strstr(isa, "-generic-x16")) {
745  // It is used for appropriate name mangling and dispatch function during multitarget compilation
746  this->m_treatGenericAsSmth = isa;
747  // We need to create appropriate name for mangling.
748  // Remove "-x16" or "-16" and replace "-" with "_".
749  this->m_treatGenericAsSmth = this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
750  std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
751  }
752  this->m_nativeVectorWidth = 16;
753  this->m_nativeVectorAlignment = 64;
754  this->m_vectorWidth = 16;
755  this->m_maskingIsFree = true;
756  this->m_maskBitCount = 1;
757  this->m_hasHalf = true;
758  this->m_hasTranscendentals = true;
759  // It's set to false, because stdlib implementation of math functions
760  // is faster on MIC, than "native" implementation provided by the
761  // icc compiler.
762  this->m_hasTrigonometry = false;
763  this->m_hasGather = this->m_hasScatter = true;
764  this->m_hasRsqrtd = this->m_hasRcpd = true;
765  // It's set to true, because MIC has hardware vector prefetch instruction
766  this->m_hasVecPrefetch = true;
767  CPUfromISA = CPU_Generic;
768  }
769  else if (!strcasecmp(isa, "generic-32") ||
770  !strcasecmp(isa, "generic-x32")) {
771  this->m_isa = Target::GENERIC;
772  this->m_nativeVectorWidth = 32;
773  this->m_nativeVectorAlignment = 64;
774  this->m_vectorWidth = 32;
775  this->m_maskingIsFree = true;
776  this->m_maskBitCount = 1;
777  this->m_hasHalf = true;
778  this->m_hasTranscendentals = true;
779  this->m_hasTrigonometry = true;
780  this->m_hasGather = this->m_hasScatter = true;
781  this->m_hasRsqrtd = this->m_hasRcpd = true;
782  CPUfromISA = CPU_Generic;
783  }
784  else if (!strcasecmp(isa, "generic-64") ||
785  !strcasecmp(isa, "generic-x64")) {
786  this->m_isa = Target::GENERIC;
787  this->m_nativeVectorWidth = 64;
788  this->m_nativeVectorAlignment = 64;
789  this->m_vectorWidth = 64;
790  this->m_maskingIsFree = true;
791  this->m_maskBitCount = 1;
792  this->m_hasHalf = true;
793  this->m_hasTranscendentals = true;
794  this->m_hasTrigonometry = true;
795  this->m_hasGather = this->m_hasScatter = true;
796  this->m_hasRsqrtd = this->m_hasRcpd = true;
797  CPUfromISA = CPU_Generic;
798  }
799  else if (!strcasecmp(isa, "generic-1") ||
800  !strcasecmp(isa, "generic-x1")) {
801  this->m_isa = Target::GENERIC;
802  this->m_nativeVectorWidth = 1;
803  this->m_nativeVectorAlignment = 16;
804  this->m_vectorWidth = 1;
805  this->m_maskingIsFree = false;
806  this->m_maskBitCount = 32;
807  CPUfromISA = CPU_Generic;
808  }
809  else if (!strcasecmp(isa, "avx1-i32x4")) {
810  this->m_isa = Target::AVX;
811  this->m_nativeVectorWidth = 8;
812  this->m_nativeVectorAlignment = 32;
813  this->m_dataTypeWidth = 32;
814  this->m_vectorWidth = 4;
815  this->m_maskingIsFree = false;
816  this->m_maskBitCount = 32;
817  CPUfromISA = CPU_SandyBridge;
818  }
819  else if (!strcasecmp(isa, "avx") ||
820  !strcasecmp(isa, "avx1") ||
821  !strcasecmp(isa, "avx1-i32x8")) {
822  this->m_isa = Target::AVX;
823  this->m_nativeVectorWidth = 8;
824  this->m_nativeVectorAlignment = 32;
825  this->m_dataTypeWidth = 32;
826  this->m_vectorWidth = 8;
827  this->m_maskingIsFree = false;
828  this->m_maskBitCount = 32;
829  CPUfromISA = CPU_SandyBridge;
830  }
831  else if (!strcasecmp(isa, "avx-i64x4") ||
832  !strcasecmp(isa, "avx1-i64x4")) {
833  this->m_isa = Target::AVX;
834  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
835  this->m_nativeVectorAlignment = 32;
836  this->m_dataTypeWidth = 64;
837  this->m_vectorWidth = 4;
838  this->m_maskingIsFree = false;
839  this->m_maskBitCount = 64;
840  CPUfromISA = CPU_SandyBridge;
841  }
842  else if (!strcasecmp(isa, "avx-x2") ||
843  !strcasecmp(isa, "avx1-x2") ||
844  !strcasecmp(isa, "avx1-i32x16")) {
845  this->m_isa = Target::AVX;
846  this->m_nativeVectorWidth = 8;
847  this->m_nativeVectorAlignment = 32;
848  this->m_dataTypeWidth = 32;
849  this->m_vectorWidth = 16;
850  this->m_maskingIsFree = false;
851  this->m_maskBitCount = 32;
852  CPUfromISA = CPU_SandyBridge;
853  }
854  else if (!strcasecmp(isa, "avx1.1") ||
855  !strcasecmp(isa, "avx1.1-i32x8")) {
856  this->m_isa = Target::AVX11;
857  this->m_nativeVectorWidth = 8;
858  this->m_nativeVectorAlignment = 32;
859  this->m_dataTypeWidth = 32;
860  this->m_vectorWidth = 8;
861  this->m_maskingIsFree = false;
862  this->m_maskBitCount = 32;
863  this->m_hasHalf = true;
864  this->m_hasRand = true;
865  CPUfromISA = CPU_IvyBridge;
866  }
867  else if (!strcasecmp(isa, "avx1.1-x2") ||
868  !strcasecmp(isa, "avx1.1-i32x16")) {
869  this->m_isa = Target::AVX11;
870  this->m_nativeVectorWidth = 8;
871  this->m_nativeVectorAlignment = 32;
872  this->m_dataTypeWidth = 32;
873  this->m_vectorWidth = 16;
874  this->m_maskingIsFree = false;
875  this->m_maskBitCount = 32;
876  this->m_hasHalf = true;
877  this->m_hasRand = true;
878  CPUfromISA = CPU_IvyBridge;
879  }
880  else if (!strcasecmp(isa, "avx1.1-i64x4")) {
881  this->m_isa = Target::AVX11;
882  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
883  this->m_nativeVectorAlignment = 32;
884  this->m_dataTypeWidth = 64;
885  this->m_vectorWidth = 4;
886  this->m_maskingIsFree = false;
887  this->m_maskBitCount = 64;
888  this->m_hasHalf = true;
889  this->m_hasRand = true;
890  CPUfromISA = CPU_IvyBridge;
891  }
892  else if (!strcasecmp(isa, "avx2") ||
893  !strcasecmp(isa, "avx2-i32x8")) {
894  this->m_isa = Target::AVX2;
895  this->m_nativeVectorWidth = 8;
896  this->m_nativeVectorAlignment = 32;
897  this->m_dataTypeWidth = 32;
898  this->m_vectorWidth = 8;
899  this->m_maskingIsFree = false;
900  this->m_maskBitCount = 32;
901  this->m_hasHalf = true;
902  this->m_hasRand = true;
903  this->m_hasGather = true;
904  CPUfromISA = CPU_Haswell;
905  }
906  else if (!strcasecmp(isa, "avx2-x2") ||
907  !strcasecmp(isa, "avx2-i32x16")) {
908  this->m_isa = Target::AVX2;
909  this->m_nativeVectorWidth = 16;
910  this->m_nativeVectorAlignment = 32;
911  this->m_dataTypeWidth = 32;
912  this->m_vectorWidth = 16;
913  this->m_maskingIsFree = false;
914  this->m_maskBitCount = 32;
915  this->m_hasHalf = true;
916  this->m_hasRand = true;
917  this->m_hasGather = true;
918  CPUfromISA = CPU_Haswell;
919  }
920  else if (!strcasecmp(isa, "avx2-i64x4")) {
921  this->m_isa = Target::AVX2;
922  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
923  this->m_nativeVectorAlignment = 32;
924  this->m_dataTypeWidth = 64;
925  this->m_vectorWidth = 4;
926  this->m_maskingIsFree = false;
927  this->m_maskBitCount = 64;
928  this->m_hasHalf = true;
929  this->m_hasRand = true;
930  this->m_hasGather = true;
931  CPUfromISA = CPU_Haswell;
932  }
933 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
934  else if (!strcasecmp(isa, "avx512knl-i32x16")) {
935  this->m_isa = Target::KNL_AVX512;
936  this->m_nativeVectorWidth = 16;
937  this->m_nativeVectorAlignment = 64;
938  // ?? this->m_dataTypeWidth = 32;
939  this->m_vectorWidth = 16;
940  this->m_maskingIsFree = true;
941  this->m_maskBitCount = 8;
942  this->m_hasHalf = true;
943  this->m_hasRand = true;
944  this->m_hasGather = this->m_hasScatter = true;
945  this->m_hasTranscendentals = false;
946  // For MIC it is set to true due to performance reasons. The option should be tested.
947  this->m_hasTrigonometry = false;
948  this->m_hasRsqrtd = this->m_hasRcpd = false;
949  this->m_hasVecPrefetch = false;
950  CPUfromISA = CPU_KNL;
951  }
952 #endif
953 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
954  else if (!strcasecmp(isa, "avx512skx-i32x16")) {
955  this->m_isa = Target::SKX_AVX512;
956  this->m_nativeVectorWidth = 16;
957  this->m_nativeVectorAlignment = 64;
958  // ?? this->m_dataTypeWidth = 32;
959  this->m_vectorWidth = 16;
960  this->m_maskingIsFree = true;
961  this->m_maskBitCount = 8;
962  this->m_hasHalf = true;
963  this->m_hasRand = true;
964  this->m_hasGather = this->m_hasScatter = true;
965  this->m_hasTranscendentals = false;
966  // For MIC it is set to true due to performance reasons. The option should be tested.
967  this->m_hasTrigonometry = false;
968  this->m_hasRsqrtd = this->m_hasRcpd = false;
969  this->m_hasVecPrefetch = false;
970  CPUfromISA = CPU_SKX;
971  }
972 #endif
973 #ifdef ISPC_ARM_ENABLED
974  else if (!strcasecmp(isa, "neon-i8x16")) {
975  this->m_isa = Target::NEON8;
976  this->m_nativeVectorWidth = 16;
977  this->m_nativeVectorAlignment = 16;
978  this->m_dataTypeWidth = 8;
979  this->m_vectorWidth = 16;
980  this->m_attributes = "+neon,+fp16";
981  this->m_hasHalf = true; // ??
982  this->m_maskingIsFree = false;
983  this->m_maskBitCount = 8;
984  }
985  else if (!strcasecmp(isa, "neon-i16x8")) {
986  this->m_isa = Target::NEON16;
987  this->m_nativeVectorWidth = 8;
988  this->m_nativeVectorAlignment = 16;
989  this->m_dataTypeWidth = 16;
990  this->m_vectorWidth = 8;
991  this->m_attributes = "+neon,+fp16";
992  this->m_hasHalf = true; // ??
993  this->m_maskingIsFree = false;
994  this->m_maskBitCount = 16;
995  }
996  else if (!strcasecmp(isa, "neon") ||
997  !strcasecmp(isa, "neon-i32x4")) {
998  this->m_isa = Target::NEON32;
999  this->m_nativeVectorWidth = 4;
1000  this->m_nativeVectorAlignment = 16;
1001  this->m_dataTypeWidth = 32;
1002  this->m_vectorWidth = 4;
1003  this->m_attributes = "+neon,+fp16";
1004  this->m_hasHalf = true; // ??
1005  this->m_maskingIsFree = false;
1006  this->m_maskBitCount = 32;
1007  }
1008 #endif
1009 #ifdef ISPC_NVPTX_ENABLED
1010  else if (!strcasecmp(isa, "nvptx")) {
1011  this->m_isa = Target::NVPTX;
1012  this->m_cpu = "sm_35";
1013  this->m_nativeVectorWidth = 32;
1014  this->m_nativeVectorAlignment = 32;
1015  this->m_vectorWidth = 1;
1016  this->m_hasHalf = true;
1017  this->m_maskingIsFree = true;
1018  this->m_maskBitCount = 1;
1019  this->m_hasTranscendentals = true;
1020  this->m_hasTrigonometry = true;
1021  this->m_hasGather = this->m_hasScatter = false;
1022  CPUfromISA = CPU_SM35;
1023  }
1024 #endif /* ISPC_NVPTX_ENABLED */
1025  else {
1026  Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
1027  isa, SupportedTargets());
1028  error = true;
1029  }
1030 
1031 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
1032  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
1033  CPUID = CPU_CortexA9;
1034 #endif
1035 
1036  if (CPUID == CPU_None) {
1037 #ifndef ISPC_ARM_ENABLED
1038  if (isa == NULL) {
1039 #endif
1040  std::string hostCPU = llvm::sys::getHostCPUName();
1041  if (hostCPU.size() > 0)
1042  cpu = strdup(hostCPU.c_str());
1043  else {
1044  Warning(SourcePos(), "Unable to determine host CPU!\n");
1045  cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
1046  }
1047 #ifndef ISPC_ARM_ENABLED
1048  }
1049  else {
1050  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
1051  }
1052 #endif
1053  }
1054  else {
1055  if ((CPUfromISA != CPU_None) &&
1056  !a.BackwardCompatible(CPUID, CPUfromISA)) {
1057  Error(SourcePos(), "The requested CPU is incompatible"
1058  " with the CPU %s needs: %s vs. %s!\n",
1059  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1060  return;
1061  }
1062  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1063  }
1064  this->m_cpu = cpu;
1065 
1066  if (!error) {
1067  // Create TargetMachine
1068  std::string triple = GetTripleString();
1069 
1070 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_8
1071  llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
1072  llvm::Reloc::Default;
1073 #else
1074  llvm::Optional<llvm::Reloc::Model> relocModel;
1075  if (m_generatePIC) {
1076  relocModel = llvm::Reloc::PIC_;
1077  }
1078 #endif
1079  std::string featuresString = m_attributes;
1080  llvm::TargetOptions options;
1081 #ifdef ISPC_ARM_ENABLED
1082  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
1083  m_isa == Target::NEON32)
1084  options.FloatABIType = llvm::FloatABI::Hard;
1085 #endif
1086  if (g->opt.disableFMA == false)
1087  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1088 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1089  if (g->NoOmitFramePointer)
1090  options.NoFramePointerElim = true;
1091 #ifdef ISPC_IS_WINDOWS
1092  if (strcmp("x86", arch) == 0) {
1093  // Workaround for issue #503 (LLVM issue 14646).
1094  // It's Win32 specific.
1095  options.NoFramePointerElim = true;
1096  }
1097 #endif
1098 #endif
1099  m_targetMachine =
1100  m_target->createTargetMachine(triple, m_cpu, featuresString, options,
1101  relocModel);
1102  Assert(m_targetMachine != NULL);
1103 
1104 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1105  m_targetMachine->setAsmVerbosityDefault(true);
1106 #else /* LLVM 3.7+ */
1107  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1108 #endif
1109 
1110 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1111  // Change default version of generated DWARF.
1112  if (g->generateDWARFVersion != 0) {
1113  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1114  }
1115 #endif
1116 
1117  // Initialize TargetData/DataLayout in 3 steps.
1118  // 1. Get default data layout first
1119  std::string dl_string;
1120 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1121  dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1122 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1123  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1124 #else // LLVM 3.5- or LLVM 3.7
1125  dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1126 #endif
1127  // 2. Adjust for generic
1128  if (m_isa == Target::GENERIC) {
1129  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1130  // that to the regular datalayout string for IA..
1131  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1132  // in terms of required memory storage and alignment, as this is
1133  // translated to __m128 type.
1134  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1135  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1136  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1137  }
1138 #ifdef ISPC_NVPTX_ENABLED
1139  else if (m_isa == Target::NVPTX)
1140  {
1141  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
1142  }
1143 #endif
1144 
1145  // 3. Finally set member data
1146  m_dataLayout = new llvm::DataLayout(dl_string);
1147 
1148  // Set is32Bit
1149  // This indicates if we are compiling for 32 bit platform
1150  // and can assume 32 bit runtime.
1151  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1152 
1153  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1154 
1155 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1156  // This is LLVM 3.3+ feature.
1157  // Initialize target-specific "target-feature" attribute.
1158  if (!m_attributes.empty()) {
1159  llvm::AttrBuilder attrBuilder;
1160 #ifdef ISPC_NVPTX_ENABLED
1161  if (m_isa != Target::NVPTX)
1162 #endif
1163  attrBuilder.addAttribute("target-cpu", this->m_cpu);
1164  attrBuilder.addAttribute("target-features", this->m_attributes);
1165 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1166  this->m_tf_attributes = new llvm::AttributeSet(
1167  llvm::AttributeSet::get(
1168  *g->ctx,
1169  llvm::AttributeSet::FunctionIndex,
1170  attrBuilder));
1171 #else // LLVM 5.0+
1172  this->m_tf_attributes = new llvm::AttrBuilder(attrBuilder);
1173 #endif
1174  }
1175 #endif
1176 
1178  }
1179 
1180  m_valid = !error;
1181 
1182  if (printTarget) {
1183  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1184  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1185  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1186  }
1187 
1188  return;
1189 }
1190 
1191 
1192 std::string
1194  AllCPUs a;
1195  return a.HumanReadableListOfNames();
1196 }
1197 
1198 
1199 const char *
1201  return
1202 #ifdef ISPC_ARM_ENABLED
1203  "arm, "
1204 #endif
1205  "x86, x86-64";
1206 }
1207 
1208 
1209 const char *
1211  return
1212  "host, sse2-i32x4, sse2-i32x8, "
1213  "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1214  "avx1-i32x4, "
1215  "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1216  "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
1217  "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1218 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1219  "avx512knl-i32x16, "
1220 #endif
1221 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1222  "avx512skx-i32x16, "
1223 #endif
1224  "generic-x1, generic-x4, generic-x8, generic-x16, "
1225  "generic-x32, generic-x64, *-generic-x16"
1226 #ifdef ISPC_ARM_ENABLED
1227  ", neon-i8x16, neon-i16x8, neon-i32x4"
1228 #endif
1229 #ifdef ISPC_NVPTX_ENABLED
1230  ", nvptx"
1231 #endif
1232 ;
1233 
1234 }
1235 
1236 
1237 std::string
1239  llvm::Triple triple;
1240 #ifdef ISPC_ARM_ENABLED
1241  if (m_arch == "arm") {
1242  triple.setTriple("armv7-eabi");
1243  }
1244  else
1245 #endif
1246  {
1247  // Start with the host triple as the default
1248  triple.setTriple(llvm::sys::getDefaultTargetTriple());
1249 
1250  // And override the arch in the host triple based on what the user
1251  // specified. Here we need to deal with the fact that LLVM uses one
1252  // naming convention for targets TargetRegistry, but wants some
1253  // slightly different ones for the triple. TODO: is there a way to
1254  // have it do this remapping, which would presumably be a bit less
1255  // error prone?
1256  if (m_arch == "x86")
1257  triple.setArchName("i386");
1258  else if (m_arch == "x86-64")
1259  triple.setArchName("x86_64");
1260 #ifdef ISPC_NVPTX_ENABLED
1261  else if (m_arch == "nvptx64")
1262  triple = llvm::Triple("nvptx64", "nvidia", "cuda");
1263 #endif /* ISPC_NVPTX_ENABLED */
1264  else
1265  triple.setArchName(m_arch);
1266  }
1267  return triple.str();
1268 }
1269 
1270 // This function returns string representation of ISA for the purpose of
1271 // mangling. And may return any unique string, preferably short, like
1272 // sse4, avx and etc.
1273 const char *
1275  switch (isa) {
1276 #ifdef ISPC_ARM_ENABLED
1277  case Target::NEON8:
1278  return "neon-8";
1279  case Target::NEON16:
1280  return "neon-16";
1281  case Target::NEON32:
1282  return "neon-32";
1283 #endif
1284  case Target::SSE2:
1285  return "sse2";
1286  case Target::SSE4:
1287  return "sse4";
1288  case Target::AVX:
1289  return "avx";
1290  case Target::AVX11:
1291  return "avx11";
1292  case Target::AVX2:
1293  return "avx2";
1294 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1295  case Target::KNL_AVX512:
1296  return "avx512knl";
1297 #endif
1298 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1299  case Target::SKX_AVX512:
1300  return "avx512skx";
1301 #endif
1302  case Target::GENERIC:
1303  return "generic";
1304 #ifdef ISPC_NVPTX_ENABLED
1305  case Target::NVPTX:
1306  return "nvptx";
1307 #endif /* ISPC_NVPTX_ENABLED */
1308  default:
1309  FATAL("Unhandled target in ISAToString()");
1310  }
1311  return "";
1312 }
1313 
1314 const char *
1316  return ISAToString(m_isa);
1317 }
1318 
1319 
1320 // This function returns string representation of default target corresponding
1321 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
1322 // string may be used to initialize Target.
1323 const char *
1325  switch (isa) {
1326 #ifdef ISPC_ARM_ENABLED
1327  case Target::NEON8:
1328  return "neon-8";
1329  case Target::NEON16:
1330  return "neon-16";
1331  case Target::NEON32:
1332  return "neon-32";
1333 #endif
1334  case Target::SSE2:
1335  return "sse2-i32x4";
1336  case Target::SSE4:
1337  return "sse4-i32x4";
1338  case Target::AVX:
1339  return "avx1-i32x8";
1340  case Target::AVX11:
1341  return "avx1.1-i32x8";
1342  case Target::AVX2:
1343  return "avx2-i32x8";
1344 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1345  case Target::KNL_AVX512:
1346  return "avx512knl-i32x16";
1347 #endif
1348 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1349  case Target::SKX_AVX512:
1350  return "avx512skx-i32x16";
1351 #endif
1352  case Target::GENERIC:
1353  return "generic-4";
1354 #ifdef ISPC_NVPTX_ENABLED
1355  case Target::NVPTX:
1356  return "nvptx";
1357 #endif /* ISPC_NVPTX_ENABLED */
1358  default:
1359  FATAL("Unhandled target in ISAToTargetString()");
1360  }
1361  return "";
1362 }
1363 
1364 
1365 const char *
1367  return ISAToString(m_isa);
1368 }
1369 
1370 
1371 static bool
1373  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() ||
1374  type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy())
1375  return false;
1376 
1377  if (type == LLVMTypes::BoolVectorType ||
1378  type == LLVMTypes::MaskType ||
1379  type == LLVMTypes::Int1VectorType)
1380  return true;
1381 
1382  llvm::ArrayType *at =
1383  llvm::dyn_cast<llvm::ArrayType>(type);
1384  if (at != NULL)
1385  return lGenericTypeLayoutIndeterminate(at->getElementType());
1386 
1387  llvm::PointerType *pt =
1388  llvm::dyn_cast<llvm::PointerType>(type);
1389  if (pt != NULL)
1390  return false;
1391 
1392  llvm::StructType *st =
1393  llvm::dyn_cast<llvm::StructType>(type);
1394  if (st != NULL) {
1395  for (int i = 0; i < (int)st->getNumElements(); ++i)
1396  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1397  return true;
1398  return false;
1399  }
1400 
1401  Assert(llvm::isa<llvm::VectorType>(type));
1402  return true;
1403 }
1404 
1405 
1406 llvm::Value *
1407 Target::SizeOf(llvm::Type *type,
1408  llvm::BasicBlock *insertAtEnd) {
1409  if (m_isa == Target::GENERIC &&
1411  llvm::Value *index[1] = { LLVMInt32(1) };
1412  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1413  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1414  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1415 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1416  llvm::Instruction *gep =
1417  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
1418  insertAtEnd);
1419 #else /* LLVM 3.7+ */
1420  llvm::Instruction *gep =
1421  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1422  arrayRef, "sizeof_gep",
1423  insertAtEnd);
1424 #endif
1426  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1427  "sizeof_int", insertAtEnd);
1428  else
1429  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1430  "sizeof_int", insertAtEnd);
1431  }
1432 
1433  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1435  return LLVMInt32((int32_t)byteSize);
1436  else
1437  return LLVMInt64(byteSize);
1438 }
1439 
1440 
1441 llvm::Value *
1442 Target::StructOffset(llvm::Type *type, int element,
1443  llvm::BasicBlock *insertAtEnd) {
1444  if (m_isa == Target::GENERIC &&
1445  lGenericTypeLayoutIndeterminate(type) == true) {
1446  llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
1447  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1448  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1449  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1450 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1451  llvm::Instruction *gep =
1452  llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
1453  insertAtEnd);
1454 #else /* LLVM 3.7+ */
1455  llvm::Instruction *gep =
1456  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr,
1457  arrayRef, "offset_gep",
1458  insertAtEnd);
1459 #endif
1461  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
1462  "offset_int", insertAtEnd);
1463  else
1464  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
1465  "offset_int", insertAtEnd);
1466  }
1467 
1468  llvm::StructType *structType =
1469  llvm::dyn_cast<llvm::StructType>(type);
1470  if (structType == NULL || structType->isSized() == false) {
1471  Assert(m->errorCount > 0);
1472  return NULL;
1473  }
1474 
1475  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1476  Assert(sl != NULL);
1477 
1478  uint64_t offset = sl->getElementOffset(element);
1480  return LLVMInt32((int32_t)offset);
1481  else
1482  return LLVMInt64(offset);
1483 }
1484 
1485 void Target::markFuncWithTargetAttr(llvm::Function* func) {
1486 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1487  if (m_tf_attributes) {
1488 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1489  func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1490 #else // LLVM 5.0+
1491  func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
1492 #endif
1493  }
1494 #endif
1495 }
1496 
1497 
1498 ///////////////////////////////////////////////////////////////////////////
1499 // Opt
1500 
1502  level = 1;
1503  fastMath = false;
1504  fastMaskedVload = false;
1505  force32BitAddressing = true;
1506  unrollLoops = true;
1507  disableAsserts = false;
1508  disableFMA = false;
1509  forceAlignedMemory = false;
1510  disableMaskAllOnOptimizations = false;
1511  disableHandlePseudoMemoryOps = false;
1512  disableBlendedMaskedStores = false;
1513  disableCoherentControlFlow = false;
1514  disableUniformControlFlow = false;
1515  disableGatherScatterOptimizations = false;
1516  disableMaskedStoreToStore = false;
1517  disableGatherScatterFlattening = false;
1518  disableUniformMemoryOptimizations = false;
1519  disableCoalescing = false;
1520 }
1521 
1522 ///////////////////////////////////////////////////////////////////////////
1523 // Globals
1524 
1526  mathLib = Globals::Math_ISPC;
1527 
1528  includeStdlib = true;
1529  runCPP = true;
1530  debugPrint = false;
1531  printTarget = false;
1532  NoOmitFramePointer = false;
1533  debugIR = -1;
1534  disableWarnings = false;
1535  warningsAsErrors = false;
1536  quiet = false;
1537  forceColoredOutput = false;
1538  disableLineWrap = false;
1539  emitPerfWarnings = true;
1540  emitInstrumentation = false;
1541  generateDebuggingSymbols = false;
1542 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1543  generateDWARFVersion = 3;
1544 #endif
1545  enableFuzzTest = false;
1546  fuzzTestSeed = -1;
1547  mangleFunctionsWithTarget = false;
1548 
1549  ctx = new llvm::LLVMContext;
1550 
1551 #ifdef ISPC_IS_WINDOWS
1552  _getcwd(currentDirectory, sizeof(currentDirectory));
1553 #else
1554  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1555  FATAL("Current directory path too long!");
1556 #endif
1557  forceAlignment = -1;
1558  dllExport = false;
1559 }
1560 
1561 ///////////////////////////////////////////////////////////////////////////
1562 // SourcePos
1563 
1564 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1565  name = n;
1566  if (name == NULL) {
1567  if (m != NULL)
1568  name = m->module->getModuleIdentifier().c_str();
1569  else
1570  name = "(unknown)";
1571  }
1572  first_line = fl;
1573  first_column = fc;
1574  last_line = ll != 0 ? ll : fl;
1575  last_column = lc != 0 ? lc : fc;
1576 }
1577 
1578 
1579 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1580 llvm::DIFile
1581 #else /* LLVM 3.7+ */
1582 llvm::DIFile*
1583 //llvm::MDFile*
1584 #endif
1586  std::string directory, filename;
1587  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1588 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1589  llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1590  Assert(ret.Verify());
1591 #else /* LLVM 3.7+ */
1592  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1593 #endif
1594  return ret;
1595 }
1596 
1597 
1598 void
1600  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
1601  last_line, last_column);
1602 }
1603 
1604 
1605 bool
1607  return (!strcmp(name, p2.name) &&
1608  first_line == p2.first_line &&
1609  first_column == p2.first_column &&
1610  last_line == p2.last_line &&
1611  last_column == p2.last_column);
1612 }
1613 
1614 
1615 SourcePos
1616 Union(const SourcePos &p1, const SourcePos &p2) {
1617  if (strcmp(p1.name, p2.name) != 0)
1618  return p1;
1619 
1620  SourcePos ret;
1621  ret.name = p1.name;
1622  ret.first_line = std::min(p1.first_line, p2.first_line);
1623  ret.first_column = std::min(p1.first_column, p2.first_column);
1624  ret.last_line = std::max(p1.last_line, p2.last_line);
1625  ret.last_column = std::max(p1.last_column, p2.last_column);
1626  return ret;
1627 }
bool disableFMA
Definition: ispc.h:465
#define CPU_Broadwell
bool m_hasTranscendentals
Definition: ispc.h:410
#define CPU_Silvermont
Globals()
Definition: ispc.cpp:1525
Opt opt
Definition: ispc.h:547
int last_column
Definition: ispc.h:144
const llvm::Target * m_target
Definition: ispc.h:314
static bool __os_has_avx_support()
Definition: ispc.cpp:116
This structure collects together a number of global variables.
Definition: ispc.h:543
const char * GetISATargetString() const
Definition: ispc.cpp:1366
int m_nativeVectorAlignment
Definition: ispc.h:370
AllCPUs()
Definition: ispc.cpp:308
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1616
int first_line
Definition: ispc.h:141
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1407
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1564
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:92
std::string m_cpu
Definition: ispc.h:344
Opt()
Definition: ispc.cpp:1501
std::string m_arch
Definition: ispc.h:338
static const char * lGetSystemISA()
Definition: ispc.cpp:149
bool NoOmitFramePointer
Definition: ispc.h:572
std::string HumanReadableListOfNames()
Definition: ispc.cpp:427
#define Assert(expr)
Definition: ispc.h:172
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:459
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1372
const char * GetISAString() const
Definition: ispc.cpp:1315
int m_nativeVectorWidth
Definition: ispc.h:363
Module * m
Definition: ispc.cpp:93
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1274
bool m_generatePIC
Definition: ispc.h:382
static const char * SupportedArchs()
Definition: ispc.cpp:1200
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:294
bool m_maskingIsFree
Definition: ispc.h:388
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:93
void GetDirectoryAndFileName(const std::string &currentDir, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:547
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genenricAsSmth="")
Definition: ispc.cpp:467
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:106
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
std::string m_attributes
Definition: ispc.h:347
llvm::Module * module
Definition: module.h:158
static std::string SupportedCPUs()
Definition: ispc.cpp:1193
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
Definition: module.h:57
static llvm::Type * Int64Type
Definition: llvmutil.h:79
char currentDirectory[1024]
Definition: ispc.h:642
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:443
Header file with declarations for various LLVM utility stuff.
bool m_is32Bit
Definition: ispc.h:341
bool m_hasRand
Definition: ispc.h:400
bool m_hasRcpd
Definition: ispc.h:419
int m_maskBitCount
Definition: ispc.h:393
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:99
Representation of a range of positions in a source file.
Definition: ispc.h:136
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1442
int m_vectorWidth
Definition: ispc.h:379
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:324
bool force32BitAddressing
Definition: ispc.h:457
static bool __os_has_avx512_support()
Definition: ispc.cpp:131
const char * name
Definition: ispc.h:140
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1485
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1606
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
#define FATAL(message)
Definition: util.h:113
int m_dataTypeWidth
Definition: ispc.h:374
static llvm::Type * Int32Type
Definition: llvmutil.h:78
int last_line
Definition: ispc.h:143
#define PTYPE(p)
Definition: llvmutil.h:55
#define ISPC_MAX_NVEC
Definition: ispc.h:68
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:291
bool m_hasGather
Definition: ispc.h:403
int first_column
Definition: ispc.h:142
llvm::DataLayout * m_dataLayout
Definition: ispc.h:325
void Print() const
Definition: ispc.cpp:1599
bool m_hasScatter
Definition: ispc.h:406
ISA
Definition: ispc.h:191
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1585
bool m_valid
Definition: ispc.h:329
Globals * g
Definition: ispc.cpp:92
static llvm::VectorType * MaskType
Definition: llvmutil.h:90
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:292
bool m_hasTrigonometry
Definition: ispc.h:413
std::string m_treatGenericAsSmth
Definition: ispc.h:335
bool m_hasHalf
Definition: ispc.h:397
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:264
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:277
int errorCount
Definition: module.h:151
llvm::LLVMContext * ctx
Definition: ispc.h:638
static const char * SupportedTargets()
Definition: ispc.cpp:1210
ISA m_isa
Definition: ispc.h:332
bool m_hasVecPrefetch
Definition: ispc.h:422
llvm::DIBuilder * diBuilder
Definition: module.h:161
Main ispc.header file. Defines Target, Globals and Opt classes.
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:448
std::string GetTripleString() const
Definition: ispc.cpp:1238
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1324
bool m_hasRsqrtd
Definition: ispc.h:416
CPUtype
Definition: ispc.cpp:211