Intel SPMD Program Compiler  1.11.0
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2019, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "llvmutil.h"
40 #include "module.h"
41 #include "util.h"
42 #include <sstream>
43 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
44 #include <stdio.h>
45 #ifdef ISPC_IS_WINDOWS
46 #include <direct.h>
47 #include <windows.h>
48 #define strcasecmp stricmp
49 #if ISPC_LLVM_VERSION >= ISPC_LLVM_7_0
50 #include <intrin.h>
51 #endif
52 #else
53 #include <sys/types.h>
54 #include <unistd.h>
55 #endif
56 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
57 #include <llvm/Instructions.h>
58 #include <llvm/LLVMContext.h>
59 #include <llvm/Module.h>
60 #else /* 3.3+ */
61 #include <llvm/IR/Instructions.h>
62 #include <llvm/IR/LLVMContext.h>
63 #include <llvm/IR/Module.h>
64 #endif
65 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
66 #if ISPC_LLVM_VERSION >= ISPC_LLVM_6_0
67 #include <llvm/CodeGen/TargetLowering.h>
68 #include <llvm/CodeGen/TargetSubtargetInfo.h>
69 #else
70 #include <llvm/Target/TargetSubtargetInfo.h>
71 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
72 #include <llvm/Target/TargetLowering.h>
73 #endif
74 #endif
75 #endif
76 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
77 #include <llvm/IR/DIBuilder.h>
78 #include <llvm/IR/DebugInfo.h>
79 #else // LLVM 3.2, 3.3, 3.4
80 #include <llvm/DIBuilder.h>
81 #include <llvm/DebugInfo.h>
82 #endif
83 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
84 #include <llvm/BinaryFormat/Dwarf.h>
85 #else // LLVM up to 4.x
86 #include <llvm/Support/Dwarf.h>
87 #endif
88 #include <llvm/Target/TargetMachine.h>
89 #include <llvm/Target/TargetOptions.h>
90 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
91 #include <llvm/DataLayout.h>
92 #else // LLVM 3.3+
93 #include <llvm/IR/Attributes.h>
94 #include <llvm/IR/DataLayout.h>
95 #endif
96 #include <llvm/Support/CodeGen.h>
97 #include <llvm/Support/Host.h>
98 #include <llvm/Support/TargetRegistry.h>
99 #include <llvm/Support/TargetSelect.h>
100 
103 
104 ///////////////////////////////////////////////////////////////////////////
105 // Target
106 
107 #if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
108 static void __cpuid(int info[4], int infoType) {
109  __asm__ __volatile__("cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType));
110 }
111 
112 /* Save %ebx in case it's the PIC register */
113 static void __cpuidex(int info[4], int level, int count) {
114  __asm__ __volatile__("xchg{l}\t{%%}ebx, %1\n\t"
115  "cpuid\n\t"
116  "xchg{l}\t{%%}ebx, %1\n\t"
117  : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
118  : "0"(level), "2"(count));
119 }
120 #endif // !ISPC_IS_WINDOWS && !__ARM__
121 
122 #if !defined(__arm__)
123 static bool __os_has_avx_support() {
124 #if defined(ISPC_IS_WINDOWS)
125  // Check if the OS will save the YMM registers
126  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
127  return (xcrFeatureMask & 6) == 6;
128 #else // !defined(ISPC_IS_WINDOWS)
129  // Check xgetbv; this uses a .byte sequence instead of the instruction
130  // directly because older assemblers do not include support for xgetbv and
131  // there is no easy way to conditionally compile based on the assembler used.
132  int rEAX, rEDX;
133  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
134  return (rEAX & 6) == 6;
135 #endif // !defined(ISPC_IS_WINDOWS)
136 }
137 
138 static bool __os_has_avx512_support() {
139 #if defined(ISPC_IS_WINDOWS)
140  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
141  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
142  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
143  return (xcrFeatureMask & 0xE6) == 0xE6;
144 #else // !defined(ISPC_IS_WINDOWS)
145  // Check xgetbv; this uses a .byte sequence instead of the instruction
146  // directly because older assemblers do not include support for xgetbv and
147  // there is no easy way to conditionally compile based on the assembler used.
148  int rEAX, rEDX;
149  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
150  return (rEAX & 0xE6) == 0xE6;
151 #endif // !defined(ISPC_IS_WINDOWS)
152 }
153 #endif // !__arm__
154 
155 static const char *lGetSystemISA() {
156 #ifdef __arm__
157  return "neon-i32x4";
158 #else
159  int info[4];
160  __cpuid(info, 1);
161 
162  int info2[4];
163  // Call cpuid with eax=7, ecx=0
164  __cpuidex(info2, 7, 0);
165 
166  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
167  (info2[1] & (1 << 5)) != 0 && // AVX2
168  (info2[1] & (1 << 16)) != 0 && // AVX512 F
170  // We need to verify that AVX2 is also available,
171  // as well as AVX512, because our targets are supposed
172  // to use both.
173 
174  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
175  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
176  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
177  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
178  return "avx512skx-i32x16";
179  } else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
180  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
181  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
182  return "avx512knl-i32x16";
183  }
184  // If it's unknown AVX512 target, fall through and use AVX2
185  // or whatever is available in the machine.
186  }
187 
188  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
189  (info[2] & (1 << 28)) != 0 && __os_has_avx_support()) { // AVX
190  // AVX1 for sure....
191  // Ivy Bridge?
192  if ((info[2] & (1 << 29)) != 0 && // F16C
193  (info[2] & (1 << 30)) != 0) { // RDRAND
194  // So far, so good. AVX2?
195  if ((info2[1] & (1 << 5)) != 0)
196  return "avx2-i32x8";
197  else
198  return "avx1.1-i32x8";
199  }
200  // Regular AVX
201  return "avx1-i32x8";
202  } else if ((info[2] & (1 << 19)) != 0)
203  return "sse4-i32x4";
204  else if ((info[3] & (1 << 26)) != 0)
205  return "sse2-i32x4";
206  else {
207  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
208  exit(1);
209  }
210 #endif
211 }
212 
213 typedef enum {
214  // Special value, indicates that no CPU is present.
215  CPU_None = 0,
216 
217  // 'Generic' CPU without any hardware SIMD capabilities.
219 
220  // A generic 64-bit specific x86 processor model which tries to be good
221  // for modern chips without enabling instruction set encodings past the
222  // basic SSE2 and 64-bit ones
224 
225  // Early Atom CPU. Supports SSSE3.
227 
228  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
229  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
231 
232  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
234 
235  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
237 
238  // Sandy Bridge. Supports AVX 1.
240 
241  // Ivy Bridge. Supports AVX 1 + RDRAND.
243 
244  // Haswell. Supports AVX 2.
246 
247 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
248  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
250 #endif
251 
252 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
253  // Knights Landing - Xeon Phi.
254  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
255  // AVX-512CDI: Conflict Detection;
256  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
257  // new prefetch instructions.
258  CPU_KNL,
259 #endif
260 
261 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
262  // Skylake Xeon.
263  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
264  // AVX-512CDI: Conflict Detection;
265  // AVX-512VL: Vector Length Orthogonality;
266  // AVX-512DQ: New HPC ISA (vs AVX512F);
267  // AVX-512BW: Byte and Word Support.
268  CPU_SKX,
269 #endif
270 
271 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
272  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
274 #endif
275 
276 // FIXME: LLVM supports a ton of different ARM CPU variants--not just
277 // cortex-a9 and a15. We should be able to handle any of them that also
278 // have NEON support.
279 #ifdef ISPC_ARM_ENABLED
280  // ARM Cortex A15. Supports NEON VFPv4.
281  CPU_CortexA15,
282 
283  // ARM Cortex A9. Supports NEON VFPv3.
284  CPU_CortexA9,
285 #endif
286 
287 #ifdef ISPC_NVPTX_ENABLED
288  // NVidia CUDA-compatible SM-35 architecture.
289  CPU_SM35,
290 #endif
291 
293 } CPUtype;
294 
295 class AllCPUs {
296  private:
297  std::vector<std::vector<std::string>> names;
298  std::vector<std::set<CPUtype>> compat;
299 
300  std::set<CPUtype> Set(int type, ...) {
301  std::set<CPUtype> retn;
302  va_list args;
303 
304  retn.insert((CPUtype)type);
305  va_start(args, type);
306  while ((type = va_arg(args, int)) != CPU_None)
307  retn.insert((CPUtype)type);
308  va_end(args);
309 
310  return retn;
311  }
312 
313  public:
315  names = std::vector<std::vector<std::string>>(sizeofCPUtype);
316  compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
317 
318  names[CPU_None].push_back("");
319 
320  names[CPU_Generic].push_back("generic");
321 
322  names[CPU_x86_64].push_back("x86-64");
323 
324  names[CPU_Bonnell].push_back("atom");
325  names[CPU_Bonnell].push_back("bonnell");
326 
327  names[CPU_Core2].push_back("core2");
328 
329  names[CPU_Penryn].push_back("penryn");
330 
331 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
332  names[CPU_Silvermont].push_back("slm");
333  names[CPU_Silvermont].push_back("silvermont");
334 #endif
335 
336  names[CPU_Nehalem].push_back("corei7");
337  names[CPU_Nehalem].push_back("nehalem");
338 
339  names[CPU_SandyBridge].push_back("corei7-avx");
340  names[CPU_SandyBridge].push_back("sandybridge");
341 
342  names[CPU_IvyBridge].push_back("core-avx-i");
343  names[CPU_IvyBridge].push_back("ivybridge");
344 
345  names[CPU_Haswell].push_back("core-avx2");
346  names[CPU_Haswell].push_back("haswell");
347 
348 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
349  names[CPU_Broadwell].push_back("broadwell");
350 #endif
351 
352 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
353  names[CPU_KNL].push_back("knl");
354 #endif
355 
356 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
357  names[CPU_SKX].push_back("skx");
358 #endif
359 
360 #ifdef ISPC_ARM_ENABLED
361  names[CPU_CortexA15].push_back("cortex-a15");
362 
363  names[CPU_CortexA9].push_back("cortex-a9");
364 #endif
365 
366 #ifdef ISPC_NVPTX_ENABLED
367  names[CPU_SM35].push_back("sm_35");
368 #endif
369 
370 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
371 #define CPU_Silvermont CPU_Nehalem
372 #else /* LLVM 3.4+ */
375 #endif
376 
377 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
380 #endif
381 
382 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
385 #endif
386 
387 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
388 #define CPU_Broadwell CPU_Haswell
389 #else /* LLVM 3.6+ */
393 #endif
402  compat[CPU_Penryn] =
407 
409 
410 #ifdef ISPC_ARM_ENABLED
411  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15, CPU_None);
412  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
413 #endif
414 
415 #ifdef ISPC_NVPTX_ENABLED
416  compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
417 #endif
418  }
419 
420  std::string HumanReadableListOfNames() {
421  std::stringstream CPUs;
422  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
423  CPUs << names[i][0];
424  if (names[i].size() > 1) {
425  CPUs << " (synonyms: " << names[i][1];
426  for (int j = 2, je = names[i].size(); j < je; j++)
427  CPUs << ", " << names[i][j];
428  CPUs << ")";
429  }
430  if (i < sizeofCPUtype - 1)
431  CPUs << ", ";
432  }
433  return CPUs.str();
434  }
435 
436  std::string &GetDefaultNameFromType(CPUtype type) {
437  Assert((type >= CPU_None) && (type < sizeofCPUtype));
438  return names[type][0];
439  }
440 
441  CPUtype GetTypeFromName(std::string name) {
442  CPUtype retn = CPU_None;
443 
444  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
445  for (int j = 0, je = names[i].size(); (retn == CPU_None) && (j < je); j++)
446  if (!name.compare(names[i][j]))
447  retn = (CPUtype)i;
448  return retn;
449  }
450 
451  bool BackwardCompatible(CPUtype what, CPUtype with) {
452  Assert((what > CPU_None) && (what < sizeofCPUtype));
453  Assert((with > CPU_None) && (with < sizeofCPUtype));
454  return compat[what].find(with) != compat[what].end();
455  }
456 };
457 
458 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget,
459  std::string genericAsSmth)
460  : m_target(NULL), m_targetMachine(NULL), m_dataLayout(NULL), m_valid(false), m_isa(SSE2),
461  m_treatGenericAsSmth(genericAsSmth), m_arch(""), m_is32Bit(true), m_cpu(""), m_attributes(""),
462 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
463  m_tf_attributes(NULL),
464 #endif
465  m_nativeVectorWidth(-1), m_nativeVectorAlignment(-1), m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic),
466  m_maskingIsFree(false), m_maskBitCount(-1), m_hasHalf(false), m_hasRand(false), m_hasGather(false),
467  m_hasScatter(false), m_hasTranscendentals(false), m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false),
468  m_hasVecPrefetch(false) {
469  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
470  AllCPUs a;
471 
472  if (cpu) {
473  CPUID = a.GetTypeFromName(cpu);
474  if (CPUID == CPU_None) {
475  Error(SourcePos(),
476  "Error: CPU type \"%s\" unknown. Supported"
477  " CPUs: %s.",
478  cpu, a.HumanReadableListOfNames().c_str());
479  return;
480  }
481  }
482 
483  if (isa == NULL) {
484  // If a CPU was specified explicitly, try to pick the best
485  // possible ISA based on that.
486  switch (CPUID) {
487  case CPU_None:
488  // No CPU and no ISA, so use system info to figure out
489  // what this CPU supports.
490  isa = lGetSystemISA();
491  Warning(SourcePos(),
492  "No --target specified on command-line."
493  " Using default system target \"%s\".",
494  isa);
495  break;
496 
497  case CPU_Generic:
498  isa = "generic-1";
499  break;
500 
501 #ifdef ISPC_NVPTX_ENABLED
502  case CPU_SM35:
503  isa = "nvptx";
504  break;
505 #endif
506 
507 #ifdef ISPC_ARM_ENABLED
508  case CPU_CortexA9:
509  case CPU_CortexA15:
510  isa = "neon-i32x4";
511  break;
512 #endif
513 
514 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
515  case CPU_KNL:
516  isa = "avx512knl-i32x16";
517  break;
518 #endif
519 
520 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
521  case CPU_SKX:
522  isa = "avx512skx-i32x16";
523  break;
524 #endif
525 
526 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
527  case CPU_Broadwell:
528 #endif
529  case CPU_Haswell:
530  isa = "avx2-i32x8";
531  break;
532 
533  case CPU_IvyBridge:
534  isa = "avx1.1-i32x8";
535  break;
536 
537  case CPU_SandyBridge:
538  isa = "avx1-i32x8";
539  break;
540 
541  // Penryn is here because ISPC does not use SSE 4.2
542  case CPU_Penryn:
543  case CPU_Nehalem:
544 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
545  case CPU_Silvermont:
546 #endif
547  isa = "sse4-i32x4";
548  break;
549 
550  default:
551  isa = "sse2-i32x4";
552  break;
553  }
554  if (CPUID != CPU_None)
555  Warning(SourcePos(),
556  "No --target specified on command-line."
557  " Using ISA \"%s\" based on specified CPU \"%s\".",
558  isa, cpu);
559  }
560 
561  if (!strcasecmp(isa, "host")) {
562  isa = lGetSystemISA();
563  }
564 
565  if (arch == NULL) {
566 #ifdef ISPC_ARM_ENABLED
567  if (!strncmp(isa, "neon", 4))
568  arch = "arm";
569  else
570 #endif
571 #ifdef ISPC_NVPTX_ENABLED
572  if (!strncmp(isa, "nvptx", 5))
573  arch = "nvptx64";
574  else
575 #endif /* ISPC_NVPTX_ENABLED */
576  arch = "x86-64";
577  }
578 
579  // Define arch alias
580  if (std::string(arch) == "x86_64")
581  arch = "x86-64";
582 
583  bool error = false;
584 
585  // Make sure the target architecture is a known one; print an error
586  // with the valid ones otherwise.
587 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
588  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
589  iter != llvm::TargetRegistry::targets().end(); ++iter) {
590 #else
591  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin(); iter != llvm::TargetRegistry::end();
592  ++iter) {
593 #endif
594  if (std::string(arch) == iter->getName()) {
595  this->m_target = &*iter;
596  break;
597  }
598  }
599  if (this->m_target == NULL) {
600  fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
601  llvm::TargetRegistry::iterator iter;
602 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
603  for (iter = llvm::TargetRegistry::targets().begin(); iter != llvm::TargetRegistry::targets().end(); ++iter)
604 #else
605  for (iter = llvm::TargetRegistry::begin(); iter != llvm::TargetRegistry::end(); ++iter)
606 #endif
607  fprintf(stderr, "%s ", iter->getName());
608  fprintf(stderr, "\n");
609  error = true;
610  } else {
611  this->m_arch = arch;
612  }
613 
614  // Check default LLVM generated targets
615  if (!strcasecmp(isa, "sse2") || !strcasecmp(isa, "sse2-i32x4")) {
616  this->m_isa = Target::SSE2;
617  this->m_nativeVectorWidth = 4;
618  this->m_nativeVectorAlignment = 16;
619  this->m_dataTypeWidth = 32;
620  this->m_vectorWidth = 4;
621  this->m_maskingIsFree = false;
622  this->m_maskBitCount = 32;
623  CPUfromISA = CPU_x86_64;
624  } else if (!strcasecmp(isa, "sse2-x2") || !strcasecmp(isa, "sse2-i32x8")) {
625  this->m_isa = Target::SSE2;
626  this->m_nativeVectorWidth = 4;
627  this->m_nativeVectorAlignment = 16;
628  this->m_dataTypeWidth = 32;
629  this->m_vectorWidth = 8;
630  this->m_maskingIsFree = false;
631  this->m_maskBitCount = 32;
632  CPUfromISA = CPU_Core2;
633  } else if (!strcasecmp(isa, "sse4") || !strcasecmp(isa, "sse4-i32x4")) {
634  this->m_isa = Target::SSE4;
635  this->m_nativeVectorWidth = 4;
636  this->m_nativeVectorAlignment = 16;
637  this->m_dataTypeWidth = 32;
638  this->m_vectorWidth = 4;
639  this->m_maskingIsFree = false;
640  this->m_maskBitCount = 32;
641  CPUfromISA = CPU_Nehalem;
642  } else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2") || !strcasecmp(isa, "sse4-i32x8")) {
643  this->m_isa = Target::SSE4;
644  this->m_nativeVectorWidth = 4;
645  this->m_nativeVectorAlignment = 16;
646  this->m_dataTypeWidth = 32;
647  this->m_vectorWidth = 8;
648  this->m_maskingIsFree = false;
649  this->m_maskBitCount = 32;
650  CPUfromISA = CPU_Nehalem;
651  } else if (!strcasecmp(isa, "sse4-i8x16")) {
652  this->m_isa = Target::SSE4;
653  this->m_nativeVectorWidth = 16;
654  this->m_nativeVectorAlignment = 16;
655  this->m_dataTypeWidth = 8;
656  this->m_vectorWidth = 16;
657  this->m_maskingIsFree = false;
658  this->m_maskBitCount = 8;
659  CPUfromISA = CPU_Nehalem;
660  } else if (!strcasecmp(isa, "sse4-i16x8")) {
661  this->m_isa = Target::SSE4;
662  this->m_nativeVectorWidth = 8;
663  this->m_nativeVectorAlignment = 16;
664  this->m_dataTypeWidth = 16;
665  this->m_vectorWidth = 8;
666  this->m_maskingIsFree = false;
667  this->m_maskBitCount = 16;
668  CPUfromISA = CPU_Nehalem;
669  } else if (!strcasecmp(isa, "generic-4") || !strcasecmp(isa, "generic-x4")) {
670  this->m_isa = Target::GENERIC;
671  this->m_nativeVectorWidth = 4;
672  this->m_nativeVectorAlignment = 16;
673  this->m_vectorWidth = 4;
674  this->m_maskingIsFree = true;
675  this->m_maskBitCount = 1;
676  this->m_hasHalf = true;
677  this->m_hasTranscendentals = true;
678  this->m_hasTrigonometry = true;
679  this->m_hasGather = this->m_hasScatter = true;
680  this->m_hasRsqrtd = this->m_hasRcpd = true;
681  CPUfromISA = CPU_Generic;
682  } else if (!strcasecmp(isa, "generic-8") || !strcasecmp(isa, "generic-x8")) {
683  this->m_isa = Target::GENERIC;
684  this->m_nativeVectorWidth = 8;
685  this->m_nativeVectorAlignment = 32;
686  this->m_vectorWidth = 8;
687  this->m_maskingIsFree = true;
688  this->m_maskBitCount = 1;
689  this->m_hasHalf = true;
690  this->m_hasTranscendentals = true;
691  this->m_hasTrigonometry = true;
692  this->m_hasGather = this->m_hasScatter = true;
693  this->m_hasRsqrtd = this->m_hasRcpd = true;
694  CPUfromISA = CPU_Generic;
695  } else if (!strcasecmp(isa, "generic-16") || !strcasecmp(isa, "generic-x16") ||
696  // We treat *-generic-16 as generic-16, but with special name mangling
697  strstr(isa, "-generic-16") || strstr(isa, "-generic-x16")) {
698  this->m_isa = Target::GENERIC;
699  if (strstr(isa, "-generic-16") || strstr(isa, "-generic-x16")) {
700  // It is used for appropriate name mangling and dispatch function during multitarget compilation
701  this->m_treatGenericAsSmth = isa;
702  // We need to create appropriate name for mangling.
703  // Remove "-x16" or "-16" and replace "-" with "_".
704  this->m_treatGenericAsSmth =
705  this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
706  std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
707  }
708  this->m_nativeVectorWidth = 16;
709  this->m_nativeVectorAlignment = 64;
710  this->m_vectorWidth = 16;
711  this->m_maskingIsFree = true;
712  this->m_maskBitCount = 1;
713  this->m_hasHalf = true;
714  this->m_hasTranscendentals = true;
715  // It's set to false, because stdlib implementation of math functions
716  // is faster on MIC, than "native" implementation provided by the
717  // icc compiler.
718  this->m_hasTrigonometry = false;
719  this->m_hasGather = this->m_hasScatter = true;
720  this->m_hasRsqrtd = this->m_hasRcpd = true;
721  // It's set to true, because MIC has hardware vector prefetch instruction
722  this->m_hasVecPrefetch = true;
723  CPUfromISA = CPU_Generic;
724  } else if (!strcasecmp(isa, "generic-32") || !strcasecmp(isa, "generic-x32")) {
725  this->m_isa = Target::GENERIC;
726  this->m_nativeVectorWidth = 32;
727  this->m_nativeVectorAlignment = 64;
728  this->m_vectorWidth = 32;
729  this->m_maskingIsFree = true;
730  this->m_maskBitCount = 1;
731  this->m_hasHalf = true;
732  this->m_hasTranscendentals = true;
733  this->m_hasTrigonometry = true;
734  this->m_hasGather = this->m_hasScatter = true;
735  this->m_hasRsqrtd = this->m_hasRcpd = true;
736  CPUfromISA = CPU_Generic;
737  } else if (!strcasecmp(isa, "generic-64") || !strcasecmp(isa, "generic-x64")) {
738  this->m_isa = Target::GENERIC;
739  this->m_nativeVectorWidth = 64;
740  this->m_nativeVectorAlignment = 64;
741  this->m_vectorWidth = 64;
742  this->m_maskingIsFree = true;
743  this->m_maskBitCount = 1;
744  this->m_hasHalf = true;
745  this->m_hasTranscendentals = true;
746  this->m_hasTrigonometry = true;
747  this->m_hasGather = this->m_hasScatter = true;
748  this->m_hasRsqrtd = this->m_hasRcpd = true;
749  CPUfromISA = CPU_Generic;
750  } else if (!strcasecmp(isa, "generic-1") || !strcasecmp(isa, "generic-x1")) {
751  this->m_isa = Target::GENERIC;
752  this->m_nativeVectorWidth = 1;
753  this->m_nativeVectorAlignment = 16;
754  this->m_vectorWidth = 1;
755  this->m_maskingIsFree = false;
756  this->m_maskBitCount = 32;
757  CPUfromISA = CPU_Generic;
758  } else if (!strcasecmp(isa, "avx1-i32x4")) {
759  this->m_isa = Target::AVX;
760  this->m_nativeVectorWidth = 8;
761  this->m_nativeVectorAlignment = 32;
762  this->m_dataTypeWidth = 32;
763  this->m_vectorWidth = 4;
764  this->m_maskingIsFree = false;
765  this->m_maskBitCount = 32;
766  CPUfromISA = CPU_SandyBridge;
767  } else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1") || !strcasecmp(isa, "avx1-i32x8")) {
768  this->m_isa = Target::AVX;
769  this->m_nativeVectorWidth = 8;
770  this->m_nativeVectorAlignment = 32;
771  this->m_dataTypeWidth = 32;
772  this->m_vectorWidth = 8;
773  this->m_maskingIsFree = false;
774  this->m_maskBitCount = 32;
775  CPUfromISA = CPU_SandyBridge;
776  } else if (!strcasecmp(isa, "avx-i64x4") || !strcasecmp(isa, "avx1-i64x4")) {
777  this->m_isa = Target::AVX;
778  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
779  this->m_nativeVectorAlignment = 32;
780  this->m_dataTypeWidth = 64;
781  this->m_vectorWidth = 4;
782  this->m_maskingIsFree = false;
783  this->m_maskBitCount = 64;
784  CPUfromISA = CPU_SandyBridge;
785  } else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2") || !strcasecmp(isa, "avx1-i32x16")) {
786  this->m_isa = Target::AVX;
787  this->m_nativeVectorWidth = 8;
788  this->m_nativeVectorAlignment = 32;
789  this->m_dataTypeWidth = 32;
790  this->m_vectorWidth = 16;
791  this->m_maskingIsFree = false;
792  this->m_maskBitCount = 32;
793  CPUfromISA = CPU_SandyBridge;
794  } else if (!strcasecmp(isa, "avx1.1") || !strcasecmp(isa, "avx1.1-i32x8")) {
795  this->m_isa = Target::AVX11;
796  this->m_nativeVectorWidth = 8;
797  this->m_nativeVectorAlignment = 32;
798  this->m_dataTypeWidth = 32;
799  this->m_vectorWidth = 8;
800  this->m_maskingIsFree = false;
801  this->m_maskBitCount = 32;
802  this->m_hasHalf = true;
803  this->m_hasRand = true;
804  CPUfromISA = CPU_IvyBridge;
805  } else if (!strcasecmp(isa, "avx1.1-x2") || !strcasecmp(isa, "avx1.1-i32x16")) {
806  this->m_isa = Target::AVX11;
807  this->m_nativeVectorWidth = 8;
808  this->m_nativeVectorAlignment = 32;
809  this->m_dataTypeWidth = 32;
810  this->m_vectorWidth = 16;
811  this->m_maskingIsFree = false;
812  this->m_maskBitCount = 32;
813  this->m_hasHalf = true;
814  this->m_hasRand = true;
815  CPUfromISA = CPU_IvyBridge;
816  } else if (!strcasecmp(isa, "avx1.1-i64x4")) {
817  this->m_isa = Target::AVX11;
818  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
819  this->m_nativeVectorAlignment = 32;
820  this->m_dataTypeWidth = 64;
821  this->m_vectorWidth = 4;
822  this->m_maskingIsFree = false;
823  this->m_maskBitCount = 64;
824  this->m_hasHalf = true;
825  this->m_hasRand = true;
826  CPUfromISA = CPU_IvyBridge;
827  } else if (!strcasecmp(isa, "avx2") || !strcasecmp(isa, "avx2-i32x8")) {
828  this->m_isa = Target::AVX2;
829  this->m_nativeVectorWidth = 8;
830  this->m_nativeVectorAlignment = 32;
831  this->m_dataTypeWidth = 32;
832  this->m_vectorWidth = 8;
833  this->m_maskingIsFree = false;
834  this->m_maskBitCount = 32;
835  this->m_hasHalf = true;
836  this->m_hasRand = true;
837  this->m_hasGather = true;
838  CPUfromISA = CPU_Haswell;
839  } else if (!strcasecmp(isa, "avx2-x2") || !strcasecmp(isa, "avx2-i32x16")) {
840  this->m_isa = Target::AVX2;
841  this->m_nativeVectorWidth = 16;
842  this->m_nativeVectorAlignment = 32;
843  this->m_dataTypeWidth = 32;
844  this->m_vectorWidth = 16;
845  this->m_maskingIsFree = false;
846  this->m_maskBitCount = 32;
847  this->m_hasHalf = true;
848  this->m_hasRand = true;
849  this->m_hasGather = true;
850  CPUfromISA = CPU_Haswell;
851  } else if (!strcasecmp(isa, "avx2-i64x4")) {
852  this->m_isa = Target::AVX2;
853  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
854  this->m_nativeVectorAlignment = 32;
855  this->m_dataTypeWidth = 64;
856  this->m_vectorWidth = 4;
857  this->m_maskingIsFree = false;
858  this->m_maskBitCount = 64;
859  this->m_hasHalf = true;
860  this->m_hasRand = true;
861  this->m_hasGather = true;
862  CPUfromISA = CPU_Haswell;
863  }
864 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
865  else if (!strcasecmp(isa, "avx512knl-i32x16")) {
866  this->m_isa = Target::KNL_AVX512;
867  this->m_nativeVectorWidth = 16;
868  this->m_nativeVectorAlignment = 64;
869  this->m_dataTypeWidth = 32;
870  this->m_vectorWidth = 16;
871  this->m_maskingIsFree = true;
872  this->m_maskBitCount = 8;
873  this->m_hasHalf = true;
874  this->m_hasRand = true;
875  this->m_hasGather = this->m_hasScatter = true;
876  this->m_hasTranscendentals = false;
877  // For MIC it is set to true due to performance reasons. The option should be tested.
878  this->m_hasTrigonometry = false;
879  this->m_hasRsqrtd = this->m_hasRcpd = false;
880  this->m_hasVecPrefetch = false;
881  CPUfromISA = CPU_KNL;
882  }
883 #endif
884 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
885  else if (!strcasecmp(isa, "avx512skx-i32x16")) {
886  this->m_isa = Target::SKX_AVX512;
887  this->m_nativeVectorWidth = 16;
888  this->m_nativeVectorAlignment = 64;
889  this->m_dataTypeWidth = 32;
890  this->m_vectorWidth = 16;
891  this->m_maskingIsFree = true;
892  this->m_maskBitCount = 8;
893  this->m_hasHalf = true;
894  this->m_hasRand = true;
895  this->m_hasGather = this->m_hasScatter = true;
896  this->m_hasTranscendentals = false;
897  // For MIC it is set to true due to performance reasons. The option should be tested.
898  this->m_hasTrigonometry = false;
899  this->m_hasRsqrtd = this->m_hasRcpd = false;
900  this->m_hasVecPrefetch = false;
901  CPUfromISA = CPU_SKX;
902  }
903 #endif
904 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
905  else if (!strcasecmp(isa, "avx512skx-i32x8")) {
906  this->m_isa = Target::SKX_AVX512;
907  this->m_nativeVectorWidth = 16;
908  this->m_nativeVectorAlignment = 64;
909  this->m_dataTypeWidth = 32;
910  this->m_vectorWidth = 8;
911  this->m_maskingIsFree = true;
912  this->m_maskBitCount = 8;
913  this->m_hasHalf = true;
914  this->m_hasRand = true;
915  this->m_hasGather = this->m_hasScatter = true;
916  this->m_hasTranscendentals = false;
917  // For MIC it is set to true due to performance reasons. The option should be tested.
918  this->m_hasTrigonometry = false;
919  this->m_hasRsqrtd = this->m_hasRcpd = false;
920  this->m_hasVecPrefetch = false;
921  CPUfromISA = CPU_SKX;
922  this->m_funcAttributes.push_back(std::make_pair("prefer-vector-width", "256"));
923  this->m_funcAttributes.push_back(std::make_pair("min-legal-vector-width", "256"));
924  }
925 #endif
926 #ifdef ISPC_ARM_ENABLED
927  else if (!strcasecmp(isa, "neon-i8x16")) {
928  this->m_isa = Target::NEON8;
929  this->m_nativeVectorWidth = 16;
930  this->m_nativeVectorAlignment = 16;
931  this->m_dataTypeWidth = 8;
932  this->m_vectorWidth = 16;
933  this->m_attributes = "+neon,+fp16";
934  this->m_hasHalf = true; // ??
935  this->m_maskingIsFree = false;
936  this->m_maskBitCount = 8;
937  } else if (!strcasecmp(isa, "neon-i16x8")) {
938  this->m_isa = Target::NEON16;
939  this->m_nativeVectorWidth = 8;
940  this->m_nativeVectorAlignment = 16;
941  this->m_dataTypeWidth = 16;
942  this->m_vectorWidth = 8;
943  this->m_attributes = "+neon,+fp16";
944  this->m_hasHalf = true; // ??
945  this->m_maskingIsFree = false;
946  this->m_maskBitCount = 16;
947  } else if (!strcasecmp(isa, "neon") || !strcasecmp(isa, "neon-i32x4")) {
948  this->m_isa = Target::NEON32;
949  this->m_nativeVectorWidth = 4;
950  this->m_nativeVectorAlignment = 16;
951  this->m_dataTypeWidth = 32;
952  this->m_vectorWidth = 4;
953  this->m_attributes = "+neon,+fp16";
954  this->m_hasHalf = true; // ??
955  this->m_maskingIsFree = false;
956  this->m_maskBitCount = 32;
957  }
958 #endif
959 #ifdef ISPC_NVPTX_ENABLED
960  else if (!strcasecmp(isa, "nvptx")) {
961  this->m_isa = Target::NVPTX;
962  this->m_cpu = "sm_35";
963  this->m_nativeVectorWidth = 32;
964  this->m_nativeVectorAlignment = 32;
965  this->m_vectorWidth = 1;
966  this->m_hasHalf = true;
967  this->m_maskingIsFree = true;
968  this->m_maskBitCount = 1;
969  this->m_hasTranscendentals = true;
970  this->m_hasTrigonometry = true;
971  this->m_hasGather = this->m_hasScatter = false;
972  CPUfromISA = CPU_SM35;
973  }
974 #endif /* ISPC_NVPTX_ENABLED */
975  else {
976  Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.", isa, SupportedTargets());
977  error = true;
978  }
979 
980 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
981  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
982  CPUID = CPU_CortexA9;
983 #endif
984 
985  if (CPUID == CPU_None) {
986 #ifndef ISPC_ARM_ENABLED
987  if (isa == NULL) {
988 #endif
989  std::string hostCPU = llvm::sys::getHostCPUName();
990  if (hostCPU.size() > 0)
991  cpu = strdup(hostCPU.c_str());
992  else {
993  Warning(SourcePos(), "Unable to determine host CPU!\n");
994  cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
995  }
996 #ifndef ISPC_ARM_ENABLED
997  } else {
998  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
999  }
1000 #endif
1001  } else {
1002  if ((CPUfromISA != CPU_None) && !a.BackwardCompatible(CPUID, CPUfromISA)) {
1003  Error(SourcePos(),
1004  "The requested CPU is incompatible"
1005  " with the CPU %s needs: %s vs. %s!\n",
1006  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1007  return;
1008  }
1009  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1010  }
1011  this->m_cpu = cpu;
1012 
1013  if (!error) {
1014  // Create TargetMachine
1015  std::string triple = GetTripleString();
1016 
1017 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_8
1018  llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ : llvm::Reloc::Default;
1019 #else
1020  llvm::Optional<llvm::Reloc::Model> relocModel;
1021  if (m_generatePIC) {
1022  relocModel = llvm::Reloc::PIC_;
1023  }
1024 #endif
1025  std::string featuresString = m_attributes;
1026  llvm::TargetOptions options;
1027 #ifdef ISPC_ARM_ENABLED
1028  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 || m_isa == Target::NEON32)
1029  options.FloatABIType = llvm::FloatABI::Hard;
1030 #endif
1031  if (g->opt.disableFMA == false)
1032  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1033 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1034  if (g->NoOmitFramePointer)
1035  options.NoFramePointerElim = true;
1036 #ifdef ISPC_IS_WINDOWS
1037  if (strcmp("x86", arch) == 0) {
1038  // Workaround for issue #503 (LLVM issue 14646).
1039  // It's Win32 specific.
1040  options.NoFramePointerElim = true;
1041  }
1042 #endif
1043 #endif
1044  m_targetMachine = m_target->createTargetMachine(triple, m_cpu, featuresString, options, relocModel);
1045  Assert(m_targetMachine != NULL);
1046 
1047  // Set Optimization level for llvm codegen based on Optimization level
1048  // requested by user via ISPC Optimization Flag. Mapping is :
1049  // ISPC O0 -> Codegen O0
1050  // ISPC O1,O2,O3,default -> Codegen O3
1051  llvm::CodeGenOpt::Level cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1052  switch (g->codegenOptLevel) {
1053  case Globals::CodegenOptLevel::None:
1054  cOptLevel = llvm::CodeGenOpt::Level::None;
1055  break;
1056 
1057  case Globals::CodegenOptLevel::Aggressive:
1058  cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1059  break;
1060  }
1061  m_targetMachine->setOptLevel(cOptLevel);
1062 
1063 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1064  m_targetMachine->setAsmVerbosityDefault(true);
1065 #else /* LLVM 3.7+ */
1066  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1067 #endif
1068 
1069 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1070  // Change default version of generated DWARF.
1071  if (g->generateDWARFVersion != 0) {
1072  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1073  }
1074 #endif
1075 
1076  // Initialize TargetData/DataLayout in 3 steps.
1077  // 1. Get default data layout first
1078  std::string dl_string;
1079 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1080  dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1081 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1082  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1083 #else // LLVM 3.5- or LLVM 3.7
1084  dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1085 #endif
1086  // 2. Adjust for generic
1087  if (m_isa == Target::GENERIC) {
1088  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1089  // that to the regular datalayout string for IA..
1090  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1091  // in terms of required memory storage and alignment, as this is
1092  // translated to __m128 type.
1093  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1094  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1095  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1096  }
1097 #ifdef ISPC_NVPTX_ENABLED
1098  else if (m_isa == Target::NVPTX) {
1099  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:"
1100  "32-v64:64:64-v128:128:128-n16:32:64";
1101  }
1102 #endif
1103 
1104  // 3. Finally set member data
1105  m_dataLayout = new llvm::DataLayout(dl_string);
1106 
1107  // Set is32Bit
1108  // This indicates if we are compiling for 32 bit platform
1109  // and can assume 32 bit runtime.
1110  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1111 
1112  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1113 
1114  // TO-DO : Revisit addition of "target-features" and "target-cpu" for ARM support.
1115  llvm::AttrBuilder fattrBuilder;
1116  for (auto const &f_attr : m_funcAttributes)
1117  fattrBuilder.addAttribute(f_attr.first, f_attr.second);
1118 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1119  this->m_tf_attributes =
1120  new llvm::AttributeSet(llvm::AttributeSet::get(*g->ctx, llvm::AttributeSet::FunctionIndex, fattrBuilder));
1121 #else // LLVM 5.0+
1122  this->m_tf_attributes = new llvm::AttrBuilder(fattrBuilder);
1123 #endif
1124 
1126  }
1127 
1128  m_valid = !error;
1129 
1130  if (printTarget) {
1131  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1132  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1133  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1134  }
1135 
1136  return;
1137 }
1138 
1139 std::string Target::SupportedCPUs() {
1140  AllCPUs a;
1141  return a.HumanReadableListOfNames();
1142 }
1143 
1144 const char *Target::SupportedArchs() {
1145  return
1146 #ifdef ISPC_ARM_ENABLED
1147  "arm, "
1148 #endif
1149  "x86, x86-64";
1150 }
1151 
1153  return "host, sse2-i32x4, sse2-i32x8, "
1154  "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1155  "avx1-i32x4, "
1156  "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1157  "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4, "
1158  "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1159 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1160  "avx512knl-i32x16, "
1161 #endif
1162 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1163  "avx512skx-i32x16, "
1164 #endif
1165 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
1166  "avx512skx-i32x8, "
1167 #endif
1168  "generic-x1, generic-x4, generic-x8, generic-x16, "
1169  "generic-x32, generic-x64, *-generic-x16"
1170 #ifdef ISPC_ARM_ENABLED
1171  ", neon-i8x16, neon-i16x8, neon-i32x4"
1172 #endif
1173 #ifdef ISPC_NVPTX_ENABLED
1174  ", nvptx"
1175 #endif
1176  ;
1177 }
1178 
1179 std::string Target::GetTripleString() const {
1180  llvm::Triple triple;
1181 #ifdef ISPC_ARM_ENABLED
1182  if (m_arch == "arm") {
1183  triple.setTriple("armv7-eabi");
1184  } else
1185 #endif
1186  {
1187  // Start with the host triple as the default
1188  triple.setTriple(llvm::sys::getDefaultTargetTriple());
1189 
1190  // And override the arch in the host triple based on what the user
1191  // specified. Here we need to deal with the fact that LLVM uses one
1192  // naming convention for targets TargetRegistry, but wants some
1193  // slightly different ones for the triple. TODO: is there a way to
1194  // have it do this remapping, which would presumably be a bit less
1195  // error prone?
1196  if (m_arch == "x86")
1197  triple.setArchName("i386");
1198  else if (m_arch == "x86-64")
1199  triple.setArchName("x86_64");
1200 #ifdef ISPC_NVPTX_ENABLED
1201  else if (m_arch == "nvptx64")
1202  triple = llvm::Triple("nvptx64", "nvidia", "cuda");
1203 #endif /* ISPC_NVPTX_ENABLED */
1204  else
1205  triple.setArchName(m_arch);
1206  }
1207  return triple.str();
1208 }
1209 
1210 // This function returns string representation of ISA for the purpose of
1211 // mangling. And may return any unique string, preferably short, like
1212 // sse4, avx and etc.
1213 const char *Target::ISAToString(ISA isa) {
1214  switch (isa) {
1215 #ifdef ISPC_ARM_ENABLED
1216  case Target::NEON8:
1217  return "neon-8";
1218  case Target::NEON16:
1219  return "neon-16";
1220  case Target::NEON32:
1221  return "neon-32";
1222 #endif
1223  case Target::SSE2:
1224  return "sse2";
1225  case Target::SSE4:
1226  return "sse4";
1227  case Target::AVX:
1228  return "avx";
1229  case Target::AVX11:
1230  return "avx11";
1231  case Target::AVX2:
1232  return "avx2";
1233 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1234  case Target::KNL_AVX512:
1235  return "avx512knl";
1236 #endif
1237 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1238  case Target::SKX_AVX512:
1239  return "avx512skx";
1240 #endif
1241  case Target::GENERIC:
1242  return "generic";
1243 #ifdef ISPC_NVPTX_ENABLED
1244  case Target::NVPTX:
1245  return "nvptx";
1246 #endif /* ISPC_NVPTX_ENABLED */
1247  default:
1248  FATAL("Unhandled target in ISAToString()");
1249  }
1250  return "";
1251 }
1252 
1253 const char *Target::GetISAString() const { return ISAToString(m_isa); }
1254 
1255 // This function returns string representation of default target corresponding
1256 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX11 it's avx1.1-i32x8. This
1257 // string may be used to initialize Target.
1258 const char *Target::ISAToTargetString(ISA isa) {
1259  switch (isa) {
1260 #ifdef ISPC_ARM_ENABLED
1261  case Target::NEON8:
1262  return "neon-8";
1263  case Target::NEON16:
1264  return "neon-16";
1265  case Target::NEON32:
1266  return "neon-32";
1267 #endif
1268  case Target::SSE2:
1269  return "sse2-i32x4";
1270  case Target::SSE4:
1271  return "sse4-i32x4";
1272  case Target::AVX:
1273  return "avx1-i32x8";
1274  case Target::AVX11:
1275  return "avx1.1-i32x8";
1276  case Target::AVX2:
1277  return "avx2-i32x8";
1278 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1279  case Target::KNL_AVX512:
1280  return "avx512knl-i32x16";
1281 #endif
1282 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1283  case Target::SKX_AVX512:
1284  return "avx512skx-i32x16";
1285 #endif
1286  case Target::GENERIC:
1287  return "generic-4";
1288 #ifdef ISPC_NVPTX_ENABLED
1289  case Target::NVPTX:
1290  return "nvptx";
1291 #endif /* ISPC_NVPTX_ENABLED */
1292  default:
1293  FATAL("Unhandled target in ISAToTargetString()");
1294  }
1295  return "";
1296 }
1297 
1298 const char *Target::GetISATargetString() const { return ISAToString(m_isa); }
1299 
1300 static bool lGenericTypeLayoutIndeterminate(llvm::Type *type) {
1301  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() || type->isIntegerTy() ||
1302  type->isLabelTy() || type->isMetadataTy())
1303  return false;
1304 
1306  return true;
1307 
1308  llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(type);
1309  if (at != NULL)
1310  return lGenericTypeLayoutIndeterminate(at->getElementType());
1311 
1312  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(type);
1313  if (pt != NULL)
1314  return false;
1315 
1316  llvm::StructType *st = llvm::dyn_cast<llvm::StructType>(type);
1317  if (st != NULL) {
1318  for (int i = 0; i < (int)st->getNumElements(); ++i)
1319  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1320  return true;
1321  return false;
1322  }
1323 
1324  Assert(llvm::isa<llvm::VectorType>(type));
1325  return true;
1326 }
1327 
1328 llvm::Value *Target::SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd) {
1330  llvm::Value *index[1] = {LLVMInt32(1)};
1331  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1332  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1333  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1334 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1335  llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep", insertAtEnd);
1336 #else /* LLVM 3.7+ */
1337  llvm::Instruction *gep =
1338  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "sizeof_gep", insertAtEnd);
1339 #endif
1341  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "sizeof_int", insertAtEnd);
1342  else
1343  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "sizeof_int", insertAtEnd);
1344  }
1345 
1346  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1348  return LLVMInt32((int32_t)byteSize);
1349  else
1350  return LLVMInt64(byteSize);
1351 }
1352 
1353 llvm::Value *Target::StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd) {
1354  if (m_isa == Target::GENERIC && lGenericTypeLayoutIndeterminate(type) == true) {
1355  llvm::Value *indices[2] = {LLVMInt32(0), LLVMInt32(element)};
1356  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1357  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1358  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1359 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1360  llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep", insertAtEnd);
1361 #else /* LLVM 3.7+ */
1362  llvm::Instruction *gep =
1363  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "offset_gep", insertAtEnd);
1364 #endif
1366  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "offset_int", insertAtEnd);
1367  else
1368  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "offset_int", insertAtEnd);
1369  }
1370 
1371  llvm::StructType *structType = llvm::dyn_cast<llvm::StructType>(type);
1372  if (structType == NULL || structType->isSized() == false) {
1373  Assert(m->errorCount > 0);
1374  return NULL;
1375  }
1376 
1377  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1378  Assert(sl != NULL);
1379 
1380  uint64_t offset = sl->getElementOffset(element);
1382  return LLVMInt32((int32_t)offset);
1383  else
1384  return LLVMInt64(offset);
1385 }
1386 
1387 void Target::markFuncWithTargetAttr(llvm::Function *func) {
1388 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1389  if (m_tf_attributes) {
1390 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1391  func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1392 #else // LLVM 5.0+
1393  func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
1394 #endif
1395  }
1396 #endif
1397 }
1398 
1399 ///////////////////////////////////////////////////////////////////////////
1400 // Opt
1401 
1403  level = 1;
1404  fastMath = false;
1405  fastMaskedVload = false;
1406  force32BitAddressing = true;
1407  unrollLoops = true;
1408  disableAsserts = false;
1409  disableFMA = false;
1410  forceAlignedMemory = false;
1411  disableMaskAllOnOptimizations = false;
1412  disableHandlePseudoMemoryOps = false;
1413  disableBlendedMaskedStores = false;
1414  disableCoherentControlFlow = false;
1415  disableUniformControlFlow = false;
1416  disableGatherScatterOptimizations = false;
1417  disableMaskedStoreToStore = false;
1418  disableGatherScatterFlattening = false;
1419  disableUniformMemoryOptimizations = false;
1420  disableCoalescing = false;
1421 }
1422 
1423 ///////////////////////////////////////////////////////////////////////////
1424 // Globals
1425 
1427  mathLib = Globals::Math_ISPC;
1428  codegenOptLevel = Globals::Aggressive;
1429 
1430  includeStdlib = true;
1431  runCPP = true;
1432  debugPrint = false;
1433  printTarget = false;
1434  NoOmitFramePointer = false;
1435  debugIR = -1;
1436  disableWarnings = false;
1437  warningsAsErrors = false;
1438  quiet = false;
1439  forceColoredOutput = false;
1440  disableLineWrap = false;
1441  emitPerfWarnings = true;
1442  emitInstrumentation = false;
1443  noPragmaOnce = false;
1444  generateDebuggingSymbols = false;
1445 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1446  generateDWARFVersion = 3;
1447 #endif
1448  enableFuzzTest = false;
1449  fuzzTestSeed = -1;
1450  mangleFunctionsWithTarget = false;
1451 
1452  ctx = new llvm::LLVMContext;
1453 
1454 #ifdef ISPC_IS_WINDOWS
1455  _getcwd(currentDirectory, sizeof(currentDirectory));
1456 #else
1457  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1458  FATAL("Current directory path too long!");
1459 #endif
1460  forceAlignment = -1;
1461  dllExport = false;
1462 }
1463 
1464 ///////////////////////////////////////////////////////////////////////////
1465 // SourcePos
1466 
1467 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1468  name = n;
1469  if (name == NULL) {
1470  if (m != NULL)
1471  name = m->module->getModuleIdentifier().c_str();
1472  else
1473  name = "(unknown)";
1474  }
1475  first_line = fl;
1476  first_column = fc;
1477  last_line = ll != 0 ? ll : fl;
1478  last_column = lc != 0 ? lc : fc;
1479 }
1480 
1481 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1482 llvm::DIFile
1483 #else /* LLVM 3.7+ */
1484 llvm::DIFile *
1485 // llvm::MDFile*
1486 #endif
1488  std::string directory, filename;
1489  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1490 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1491  llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1492  Assert(ret.Verify());
1493 #else /* LLVM 3.7+ */
1494  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1495 #endif
1496  return ret;
1497 }
1498 
1499 void SourcePos::Print() const {
1500  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column, last_line, last_column);
1501 }
1502 
1503 bool SourcePos::operator==(const SourcePos &p2) const {
1504  return (!strcmp(name, p2.name) && first_line == p2.first_line && first_column == p2.first_column &&
1505  last_line == p2.last_line && last_column == p2.last_column);
1506 }
1507 
1508 SourcePos Union(const SourcePos &p1, const SourcePos &p2) {
1509  if (strcmp(p1.name, p2.name) != 0)
1510  return p1;
1511 
1512  SourcePos ret;
1513  ret.name = p1.name;
1514  ret.first_line = std::min(p1.first_line, p2.first_line);
1515  ret.first_column = std::min(p1.first_column, p2.first_column);
1516  ret.last_line = std::max(p1.last_line, p2.last_line);
1517  ret.last_column = std::max(p1.last_column, p2.last_column);
1518  return ret;
1519 }
bool disableFMA
Definition: ispc.h:453
#define CPU_Broadwell
bool m_hasTranscendentals
Definition: ispc.h:399
#define CPU_Silvermont
Globals()
Definition: ispc.cpp:1426
Opt opt
Definition: ispc.h:535
int last_column
Definition: ispc.h:136
const llvm::Target * m_target
Definition: ispc.h:300
static bool __os_has_avx_support()
Definition: ispc.cpp:123
This structure collects together a number of global variables.
Definition: ispc.h:531
std::vector< std::pair< std::string, std::string > > m_funcAttributes
Definition: ispc.h:336
int m_nativeVectorAlignment
Definition: ispc.h:359
AllCPUs()
Definition: ispc.cpp:314
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1508
int first_line
Definition: ispc.h:133
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1328
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1467
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:91
const char * GetISATargetString() const
Definition: ispc.cpp:1298
std::string m_cpu
Definition: ispc.h:330
Opt()
Definition: ispc.cpp:1402
std::string m_arch
Definition: ispc.h:324
static const char * lGetSystemISA()
Definition: ispc.cpp:155
bool NoOmitFramePointer
Definition: ispc.h:564
std::string HumanReadableListOfNames()
Definition: ispc.cpp:420
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:451
std::string GetTripleString() const
Definition: ispc.cpp:1179
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1300
int m_nativeVectorWidth
Definition: ispc.h:352
Module * m
Definition: ispc.cpp:102
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1213
bool m_generatePIC
Definition: ispc.h:371
static const char * SupportedArchs()
Definition: ispc.cpp:1144
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:300
bool m_maskingIsFree
Definition: ispc.h:377
#define Assert(expr)
Definition: ispc.h:161
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:92
void GetDirectoryAndFileName(const std::string &currentDir, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:513
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genenricAsSmth="")
Definition: ispc.cpp:458
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:113
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:228
std::string m_attributes
Definition: ispc.h:333
llvm::Module * module
Definition: module.h:155
static std::string SupportedCPUs()
Definition: ispc.cpp:1139
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
Definition: module.h:56
static llvm::Type * Int64Type
Definition: llvmutil.h:78
char currentDirectory[1024]
Definition: ispc.h:636
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:436
Header file with declarations for various LLVM utility stuff.
bool m_is32Bit
Definition: ispc.h:327
CodegenOptLevel codegenOptLevel
Definition: ispc.h:546
bool m_hasRand
Definition: ispc.h:389
bool m_hasRcpd
Definition: ispc.h:408
int m_maskBitCount
Definition: ispc.h:382
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:108
Representation of a range of positions in a source file.
Definition: ispc.h:129
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1353
int m_vectorWidth
Definition: ispc.h:368
void Print() const
Definition: ispc.cpp:1499
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:310
bool force32BitAddressing
Definition: ispc.h:445
static bool __os_has_avx512_support()
Definition: ispc.cpp:138
const char * name
Definition: ispc.h:132
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1387
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:375
#define FATAL(message)
Definition: util.h:112
int m_dataTypeWidth
Definition: ispc.h:363
static llvm::Type * Int32Type
Definition: llvmutil.h:77
int last_line
Definition: ispc.h:135
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:251
#define PTYPE(p)
Definition: llvmutil.h:55
#define ISPC_MAX_NVEC
Definition: ispc.h:69
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:297
bool m_hasGather
Definition: ispc.h:392
int first_column
Definition: ispc.h:134
llvm::DataLayout * m_dataLayout
Definition: ispc.h:311
bool m_hasScatter
Definition: ispc.h:395
ISA
Definition: ispc.h:177
const char * GetISAString() const
Definition: ispc.cpp:1253
bool m_valid
Definition: ispc.h:315
Globals * g
Definition: ispc.cpp:101
static llvm::VectorType * MaskType
Definition: llvmutil.h:89
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:298
bool m_hasTrigonometry
Definition: ispc.h:402
std::string m_treatGenericAsSmth
Definition: ispc.h:321
bool m_hasHalf
Definition: ispc.h:386
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1503
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:236
int errorCount
Definition: module.h:148
llvm::LLVMContext * ctx
Definition: ispc.h:632
static const char * SupportedTargets()
Definition: ispc.cpp:1152
ISA m_isa
Definition: ispc.h:318
bool m_hasVecPrefetch
Definition: ispc.h:411
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1487
llvm::DIBuilder * diBuilder
Definition: module.h:158
Main ispc.header file. Defines Target, Globals and Opt classes.
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:441
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1258
bool m_hasRsqrtd
Definition: ispc.h:405
CPUtype
Definition: ispc.cpp:213