Intel® Implicit SPMD Program Compiler (Intel® ISPC)  1.13.0
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2020, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "llvmutil.h"
40 #include "module.h"
41 #include "util.h"
42 #include <sstream>
43 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
44 #include <stdio.h>
45 #ifdef ISPC_HOST_IS_WINDOWS
46 #include <direct.h>
47 #include <windows.h>
48 #define strcasecmp stricmp
49 #include <intrin.h>
50 #else
51 #include <sys/types.h>
52 #include <unistd.h>
53 #endif
54 #include <llvm/CodeGen/TargetLowering.h>
55 #include <llvm/CodeGen/TargetSubtargetInfo.h>
56 #include <llvm/IR/DIBuilder.h>
57 #include <llvm/IR/DebugInfo.h>
58 #include <llvm/IR/Instructions.h>
59 #include <llvm/IR/LLVMContext.h>
60 #include <llvm/IR/Module.h>
61 
62 #include <llvm/BinaryFormat/Dwarf.h>
63 #include <llvm/IR/Attributes.h>
64 #include <llvm/IR/DataLayout.h>
65 #include <llvm/Support/CodeGen.h>
66 #include <llvm/Support/Host.h>
67 #include <llvm/Support/TargetRegistry.h>
68 #include <llvm/Support/TargetSelect.h>
69 #include <llvm/Target/TargetMachine.h>
70 #include <llvm/Target/TargetOptions.h>
71 
74 
75 ///////////////////////////////////////////////////////////////////////////
76 // Target
77 
78 #if !defined(ISPC_HOST_IS_WINDOWS) && !defined(__arm__) && !defined(__aarch64__)
79 // __cpuid() and __cpuidex() are defined on Windows in <intrin.h> for x86/x64.
80 // On *nix they need to be defined manually through inline assembler.
81 static void __cpuid(int info[4], int infoType) {
82  __asm__ __volatile__("cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType));
83 }
84 
85 /* Save %ebx in case it's the PIC register */
86 static void __cpuidex(int info[4], int level, int count) {
87  __asm__ __volatile__("xchg{l}\t{%%}ebx, %1\n\t"
88  "cpuid\n\t"
89  "xchg{l}\t{%%}ebx, %1\n\t"
90  : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
91  : "0"(level), "2"(count));
92 }
93 #endif // !ISPC_HOST_IS_WINDOWS && !__ARM__ && !__AARCH64__
94 
95 #if !defined(__arm__) && !defined(__aarch64__)
96 static bool __os_has_avx_support() {
97 #if defined(ISPC_HOST_IS_WINDOWS)
98  // Check if the OS will save the YMM registers
99  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
100  return (xcrFeatureMask & 6) == 6;
101 #else // !defined(ISPC_HOST_IS_WINDOWS)
102  // Check xgetbv; this uses a .byte sequence instead of the instruction
103  // directly because older assemblers do not include support for xgetbv and
104  // there is no easy way to conditionally compile based on the assembler used.
105  int rEAX, rEDX;
106  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
107  return (rEAX & 6) == 6;
108 #endif // !defined(ISPC_HOST_IS_WINDOWS)
109 }
110 
111 static bool __os_has_avx512_support() {
112 #if defined(ISPC_HOST_IS_WINDOWS)
113  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
114  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
115  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
116  return (xcrFeatureMask & 0xE6) == 0xE6;
117 #else // !defined(ISPC_HOST_IS_WINDOWS)
118  // Check xgetbv; this uses a .byte sequence instead of the instruction
119  // directly because older assemblers do not include support for xgetbv and
120  // there is no easy way to conditionally compile based on the assembler used.
121  int rEAX, rEDX;
122  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
123  return (rEAX & 0xE6) == 0xE6;
124 #endif // !defined(ISPC_HOST_IS_WINDOWS)
125 }
126 #endif // !__arm__ && !__aarch64__
127 
129 #if defined(__arm__) || defined(__aarch64__)
130  return ISPCTarget::neon_i32x4;
131 #else
132  int info[4];
133  __cpuid(info, 1);
134 
135  int info2[4];
136  // Call cpuid with eax=7, ecx=0
137  __cpuidex(info2, 7, 0);
138 
139  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
140  (info2[1] & (1 << 5)) != 0 && // AVX2
141  (info2[1] & (1 << 16)) != 0 && // AVX512 F
143  // We need to verify that AVX2 is also available,
144  // as well as AVX512, because our targets are supposed
145  // to use both.
146 
147  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
148  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
149  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
150  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
152  } else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
153  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
154  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
156  }
157  // If it's unknown AVX512 target, fall through and use AVX2
158  // or whatever is available in the machine.
159  }
160 
161  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
162  (info[2] & (1 << 28)) != 0 && __os_has_avx_support()) { // AVX
163  // AVX1 for sure....
164  // Ivy Bridge?
165  if ((info[2] & (1 << 29)) != 0 && // F16C
166  (info[2] & (1 << 30)) != 0 && // RDRAND
167  (info2[1] & (1 << 5)) != 0) { // AVX2.
168  return ISPCTarget::avx2_i32x8;
169  }
170  // Regular AVX
171  return ISPCTarget::avx1_i32x8;
172  } else if ((info[2] & (1 << 19)) != 0)
173  return ISPCTarget::sse4_i32x4;
174  else if ((info[3] & (1 << 26)) != 0)
175  return ISPCTarget::sse2_i32x4;
176  else {
177  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
178  exit(1);
179  }
180 #endif
181 }
182 
183 static const bool lIsTargetValidforArch(ISPCTarget target, Arch arch) {
184  bool ret = true;
185  // If target name starts with sse or avx, has to be x86 or x86-64.
186  if (ISPCTargetIsX86(target)) {
187  if (arch != Arch::x86_64 && arch != Arch::x86)
188  ret = false;
189  } else if (target == ISPCTarget::neon_i8x16 || target == ISPCTarget::neon_i16x8) {
190  if (arch != Arch::arm)
191  ret = false;
192  } else if (target == ISPCTarget::neon_i32x4 || target == ISPCTarget::neon_i32x8) {
193  if (arch != Arch::arm && arch != Arch::aarch64)
194  ret = false;
195  }
196 
197  return ret;
198 }
199 
200 typedef enum {
201  // Special value, indicates that no CPU is present.
202  CPU_None = 0,
203 
204  // 'Generic' CPU without any hardware SIMD capabilities.
206 
207  // A generic 64-bit specific x86 processor model which tries to be good
208  // for modern chips without enabling instruction set encodings past the
209  // basic SSE2 and 64-bit ones
211 
212  // Early Atom CPU. Supports SSSE3.
214 
215  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
216  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
218 
219  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
221 
222  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
224 
225  // CPU in PS4/Xbox One.
227 
228  // Sandy Bridge. Supports AVX 1.
230 
231  // Ivy Bridge. Supports AVX 1 + RDRAND.
233 
234  // Haswell. Supports AVX 2.
236 
237  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
239 
240  // Knights Landing - Xeon Phi.
241  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
242  // AVX-512CDI: Conflict Detection;
243  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
244  // new prefetch instructions.
246  // Skylake Xeon.
247  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
248  // AVX-512CDI: Conflict Detection;
249  // AVX-512VL: Vector Length Orthogonality;
250  // AVX-512DQ: New HPC ISA (vs AVX512F);
251  // AVX-512BW: Byte and Word Support.
253 
254 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0
255  // Icelake client
256  CPU_ICL,
257 #endif
258 
259  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
261 
262 // FIXME: LLVM supports a ton of different ARM CPU variants--not just
263 // cortex-a9 and a15. We should be able to handle any of them that also
264 // have NEON support.
265 #ifdef ISPC_ARM_ENABLED
266  // ARM Cortex A15. Supports NEON VFPv4.
267  CPU_CortexA15,
268 
269  // ARM Cortex A9. Supports NEON VFPv3.
270  CPU_CortexA9,
271 
272  // ARM Cortex A35, A53, A57.
273  CPU_CortexA35,
274  CPU_CortexA53,
275  CPU_CortexA57,
276 #endif
277 
279 } CPUtype;
280 
281 class AllCPUs {
282  private:
283  std::vector<std::vector<std::string>> names;
284  std::vector<std::set<CPUtype>> compat;
285 
286  std::set<CPUtype> Set(int type, ...) {
287  std::set<CPUtype> retn;
288  va_list args;
289 
290  retn.insert((CPUtype)type);
291  va_start(args, type);
292  while ((type = va_arg(args, int)) != CPU_None)
293  retn.insert((CPUtype)type);
294  va_end(args);
295 
296  return retn;
297  }
298 
299  public:
301  names = std::vector<std::vector<std::string>>(sizeofCPUtype);
302  compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
303 
304  names[CPU_None].push_back("");
305 
306  names[CPU_Generic].push_back("generic");
307 
308  names[CPU_x86_64].push_back("x86-64");
309 
310  names[CPU_Bonnell].push_back("atom");
311  names[CPU_Bonnell].push_back("bonnell");
312 
313  names[CPU_Core2].push_back("core2");
314 
315  names[CPU_Penryn].push_back("penryn");
316 
317  names[CPU_Silvermont].push_back("slm");
318  names[CPU_Silvermont].push_back("silvermont");
319 
320  names[CPU_Nehalem].push_back("corei7");
321  names[CPU_Nehalem].push_back("nehalem");
322 
323  names[CPU_PS4].push_back("btver2");
324  names[CPU_PS4].push_back("ps4");
325 
326  names[CPU_SandyBridge].push_back("corei7-avx");
327  names[CPU_SandyBridge].push_back("sandybridge");
328 
329  names[CPU_IvyBridge].push_back("core-avx-i");
330  names[CPU_IvyBridge].push_back("ivybridge");
331 
332  names[CPU_Haswell].push_back("core-avx2");
333  names[CPU_Haswell].push_back("haswell");
334 
335  names[CPU_Broadwell].push_back("broadwell");
336 
337  names[CPU_KNL].push_back("knl");
338 
339  names[CPU_SKX].push_back("skx");
340 
341 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
342  names[CPU_ICL].push_back("icelake-client");
343  names[CPU_ICL].push_back("icl");
344 #endif
345 
346 #ifdef ISPC_ARM_ENABLED
347  names[CPU_CortexA15].push_back("cortex-a15");
348 
349  names[CPU_CortexA9].push_back("cortex-a9");
350 
351  names[CPU_CortexA35].push_back("cortex-a35");
352 
353  names[CPU_CortexA53].push_back("cortex-a53");
354 
355  names[CPU_CortexA57].push_back("cortex-a57");
356 #endif
357 
358  Assert(names.size() == sizeofCPUtype);
359 
360  compat[CPU_Silvermont] =
362 
365 
368 
369 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
370  compat[CPU_ICL] = Set(CPU_ICL, CPU_SKX, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem,
372  ;
373 #endif
374 
375  compat[CPU_Broadwell] =
386  compat[CPU_Nehalem] =
388  compat[CPU_Penryn] =
392  compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
393 
395 
396 #ifdef ISPC_ARM_ENABLED
397  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15, CPU_None);
398  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
399  compat[CPU_CortexA35] = Set(CPU_Generic, CPU_CortexA35, CPU_None);
400  compat[CPU_CortexA53] = Set(CPU_Generic, CPU_CortexA53, CPU_None);
401  compat[CPU_CortexA57] = Set(CPU_Generic, CPU_CortexA57, CPU_None);
402 #endif
403  }
404 
405  std::string HumanReadableListOfNames() {
406  std::stringstream CPUs;
407  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
408  CPUs << names[i][0];
409  if (names[i].size() > 1) {
410  CPUs << " (synonyms: " << names[i][1];
411  for (int j = 2, je = names[i].size(); j < je; j++)
412  CPUs << ", " << names[i][j];
413  CPUs << ")";
414  }
415  if (i < sizeofCPUtype - 1)
416  CPUs << ", ";
417  }
418  return CPUs.str();
419  }
420 
421  std::string &GetDefaultNameFromType(CPUtype type) {
422  Assert((type >= CPU_None) && (type < sizeofCPUtype));
423  return names[type][0];
424  }
425 
426  CPUtype GetTypeFromName(std::string name) {
427  CPUtype retn = CPU_None;
428 
429  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
430  for (int j = 0, je = names[i].size(); (retn == CPU_None) && (j < je); j++)
431  if (!name.compare(names[i][j]))
432  retn = (CPUtype)i;
433  return retn;
434  }
435 
436  bool BackwardCompatible(CPUtype what, CPUtype with) {
437  Assert((what > CPU_None) && (what < sizeofCPUtype));
438  Assert((with > CPU_None) && (with < sizeofCPUtype));
439  return compat[what].find(with) != compat[what].end();
440  }
441 };
442 
443 Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, bool printTarget)
444  : m_target(NULL), m_targetMachine(NULL), m_dataLayout(NULL), m_valid(false), m_ispc_target(ispc_target),
445  m_isa(SSE2), m_arch(Arch::none), m_is32Bit(true), m_cpu(""), m_attributes(""), m_tf_attributes(NULL),
446  m_nativeVectorWidth(-1), m_nativeVectorAlignment(-1), m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic),
447  m_maskingIsFree(false), m_maskBitCount(-1), m_hasHalf(false), m_hasRand(false), m_hasGather(false),
448  m_hasScatter(false), m_hasTranscendentals(false), m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false),
449  m_hasVecPrefetch(false) {
450  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
451  AllCPUs a;
452  std::string featuresString;
453 
454  if (cpu) {
455  CPUID = a.GetTypeFromName(cpu);
456  if (CPUID == CPU_None) {
457  Error(SourcePos(),
458  "Error: CPU type \"%s\" unknown. Supported"
459  " CPUs: %s.",
460  cpu, a.HumanReadableListOfNames().c_str());
461  return;
462  }
463  }
464 
466  // If a CPU was specified explicitly, try to pick the best
467  // possible ISA based on that.
468  switch (CPUID) {
469  case CPU_None: {
470  // No CPU and no ISA, so use system info to figure out
471  // what this CPU supports.
473  std::string target_string = ISPCTargetToString(m_ispc_target);
474  Warning(SourcePos(),
475  "No --target specified on command-line."
476  " Using default system target \"%s\".",
477  target_string.c_str());
478  break;
479  }
480 
481  case CPU_Generic:
483  break;
484 
485 #ifdef ISPC_ARM_ENABLED
486  case CPU_CortexA9:
487  case CPU_CortexA15:
488  case CPU_CortexA35:
489  case CPU_CortexA53:
490  case CPU_CortexA57:
492  break;
493 #endif
494 
495  case CPU_KNL:
497  break;
498 
499 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0
500  case CPU_ICL:
501 #endif
502  case CPU_SKX:
504  break;
505 
506  case CPU_Broadwell:
507  case CPU_Haswell:
509  break;
510 
511  case CPU_IvyBridge:
512  case CPU_SandyBridge:
514  break;
515 
516  // Penryn is here because ISPC does not use SSE 4.2
517  case CPU_Penryn:
518  case CPU_Nehalem:
519  case CPU_Silvermont:
521  break;
522 
523  case CPU_PS4:
525  break;
526 
527  default:
529  break;
530  }
531  if (CPUID != CPU_None) {
532  std::string target_string = ISPCTargetToString(m_ispc_target);
533  Warning(SourcePos(),
534  "No --target specified on command-line."
535  " Using ISA \"%s\" based on specified CPU \"%s\".",
536  target_string.c_str(), cpu);
537  }
538  }
539 
542  }
543 
544  if (arch == Arch::none) {
545 #ifdef ISPC_ARM_ENABLED
547 #if defined(__arm__)
548  arch = Arch::arm;
549 #else
550  arch = Arch::aarch64;
551 #endif
552  } else
553 #endif
554  arch = Arch::x86_64;
555  }
556 
557  bool error = false;
558 
559  // Make sure the target architecture is a known one; print an error
560  // with the valid ones otherwise.
561  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
562  iter != llvm::TargetRegistry::targets().end(); ++iter) {
563  if (ArchToString(arch) == iter->getName()) {
564  this->m_target = &*iter;
565  break;
566  }
567  }
568  if (this->m_target == NULL) {
569  std::string error_message;
570  error_message = "Invalid architecture \"";
571  error_message += ArchToString(arch);
572  error_message += "\"\nOptions: ";
573  llvm::TargetRegistry::iterator iter;
574  const char *separator = "";
575  for (iter = llvm::TargetRegistry::targets().begin(); iter != llvm::TargetRegistry::targets().end(); ++iter) {
576  error_message += separator;
577  error_message += iter->getName();
578  separator = ", ";
579  }
580  error_message += ".";
581  Error(SourcePos(), "%s", error_message.c_str());
582  error = true;
583  } else {
584  this->m_arch = arch;
585  }
586 
587  // Ensure that we have a valid target/arch combination.
588  if (!lIsTargetValidforArch(m_ispc_target, arch)) {
589  std::string str_arch = ArchToString(arch);
590  std::string target_string = ISPCTargetToString(m_ispc_target);
591  Error(SourcePos(), "arch = %s and target = %s is not a valid combination.", str_arch.c_str(),
592  target_string.c_str());
593  return;
594  }
595 
596  // Check default LLVM generated targets
597  bool unsupported_target = false;
598  switch (m_ispc_target) {
600  this->m_isa = Target::SSE2;
601  this->m_nativeVectorWidth = 4;
602  this->m_nativeVectorAlignment = 16;
603  this->m_dataTypeWidth = 32;
604  this->m_vectorWidth = 4;
605  this->m_maskingIsFree = false;
606  this->m_maskBitCount = 32;
607  CPUfromISA = CPU_x86_64;
608  break;
610  this->m_isa = Target::SSE2;
611  this->m_nativeVectorWidth = 4;
612  this->m_nativeVectorAlignment = 16;
613  this->m_dataTypeWidth = 32;
614  this->m_vectorWidth = 8;
615  this->m_maskingIsFree = false;
616  this->m_maskBitCount = 32;
617  CPUfromISA = CPU_Core2;
618  break;
620  this->m_isa = Target::SSE4;
621  this->m_nativeVectorWidth = 16;
622  this->m_nativeVectorAlignment = 16;
623  this->m_dataTypeWidth = 8;
624  this->m_vectorWidth = 16;
625  this->m_maskingIsFree = false;
626  this->m_maskBitCount = 8;
627  CPUfromISA = CPU_Nehalem;
628  break;
630  this->m_isa = Target::SSE4;
631  this->m_nativeVectorWidth = 8;
632  this->m_nativeVectorAlignment = 16;
633  this->m_dataTypeWidth = 16;
634  this->m_vectorWidth = 8;
635  this->m_maskingIsFree = false;
636  this->m_maskBitCount = 16;
637  CPUfromISA = CPU_Nehalem;
638  break;
640  this->m_isa = Target::SSE4;
641  this->m_nativeVectorWidth = 4;
642  this->m_nativeVectorAlignment = 16;
643  this->m_dataTypeWidth = 32;
644  this->m_vectorWidth = 4;
645  this->m_maskingIsFree = false;
646  this->m_maskBitCount = 32;
647  CPUfromISA = CPU_Nehalem;
648  break;
650  this->m_isa = Target::SSE4;
651  this->m_nativeVectorWidth = 4;
652  this->m_nativeVectorAlignment = 16;
653  this->m_dataTypeWidth = 32;
654  this->m_vectorWidth = 8;
655  this->m_maskingIsFree = false;
656  this->m_maskBitCount = 32;
657  CPUfromISA = CPU_Nehalem;
658  break;
660  this->m_isa = Target::AVX;
661  this->m_nativeVectorWidth = 8;
662  this->m_nativeVectorAlignment = 32;
663  this->m_dataTypeWidth = 32;
664  this->m_vectorWidth = 4;
665  this->m_maskingIsFree = false;
666  this->m_maskBitCount = 32;
667  CPUfromISA = CPU_SandyBridge;
668  break;
670  this->m_isa = Target::AVX;
671  this->m_nativeVectorWidth = 8;
672  this->m_nativeVectorAlignment = 32;
673  this->m_dataTypeWidth = 32;
674  this->m_vectorWidth = 8;
675  this->m_maskingIsFree = false;
676  this->m_maskBitCount = 32;
677  CPUfromISA = CPU_SandyBridge;
678  break;
680  this->m_isa = Target::AVX;
681  this->m_nativeVectorWidth = 8;
682  this->m_nativeVectorAlignment = 32;
683  this->m_dataTypeWidth = 32;
684  this->m_vectorWidth = 16;
685  this->m_maskingIsFree = false;
686  this->m_maskBitCount = 32;
687  CPUfromISA = CPU_SandyBridge;
688  break;
690  this->m_isa = Target::AVX;
691  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
692  this->m_nativeVectorAlignment = 32;
693  this->m_dataTypeWidth = 64;
694  this->m_vectorWidth = 4;
695  this->m_maskingIsFree = false;
696  this->m_maskBitCount = 64;
697  CPUfromISA = CPU_SandyBridge;
698  break;
700  this->m_isa = Target::AVX2;
701  this->m_nativeVectorWidth = 8;
702  this->m_nativeVectorAlignment = 32;
703  this->m_dataTypeWidth = 32;
704  this->m_vectorWidth = 4;
705  this->m_maskingIsFree = false;
706  this->m_maskBitCount = 32;
707  this->m_hasHalf = true;
708  this->m_hasRand = true;
709  this->m_hasGather = true;
710  CPUfromISA = CPU_Haswell;
711  break;
713  this->m_isa = Target::AVX2;
714  this->m_nativeVectorWidth = 8;
715  this->m_nativeVectorAlignment = 32;
716  this->m_dataTypeWidth = 32;
717  this->m_vectorWidth = 8;
718  this->m_maskingIsFree = false;
719  this->m_maskBitCount = 32;
720  this->m_hasHalf = true;
721  this->m_hasRand = true;
722  this->m_hasGather = true;
723  CPUfromISA = CPU_Haswell;
724  break;
726  this->m_isa = Target::AVX2;
727  this->m_nativeVectorWidth = 16;
728  this->m_nativeVectorAlignment = 32;
729  this->m_dataTypeWidth = 32;
730  this->m_vectorWidth = 16;
731  this->m_maskingIsFree = false;
732  this->m_maskBitCount = 32;
733  this->m_hasHalf = true;
734  this->m_hasRand = true;
735  this->m_hasGather = true;
736  CPUfromISA = CPU_Haswell;
737  break;
739  this->m_isa = Target::AVX2;
740  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
741  this->m_nativeVectorAlignment = 32;
742  this->m_dataTypeWidth = 64;
743  this->m_vectorWidth = 4;
744  this->m_maskingIsFree = false;
745  this->m_maskBitCount = 64;
746  this->m_hasHalf = true;
747  this->m_hasRand = true;
748  this->m_hasGather = true;
749  CPUfromISA = CPU_Haswell;
750  break;
752  this->m_isa = Target::KNL_AVX512;
753  this->m_nativeVectorWidth = 16;
754  this->m_nativeVectorAlignment = 64;
755  this->m_dataTypeWidth = 32;
756  this->m_vectorWidth = 16;
757  this->m_maskingIsFree = true;
758  this->m_maskBitCount = 1;
759  this->m_hasHalf = true;
760  this->m_hasRand = true;
761  this->m_hasGather = this->m_hasScatter = true;
762  this->m_hasTranscendentals = false;
763  // For MIC it is set to true due to performance reasons. The option should be tested.
764  this->m_hasTrigonometry = false;
765  this->m_hasRsqrtd = this->m_hasRcpd = false;
766  this->m_hasVecPrefetch = false;
767  CPUfromISA = CPU_KNL;
768  break;
770 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
771  this->m_isa = Target::SKX_AVX512;
772  this->m_nativeVectorWidth = 16;
773  this->m_nativeVectorAlignment = 64;
774  this->m_dataTypeWidth = 32;
775  this->m_vectorWidth = 8;
776  this->m_maskingIsFree = true;
777  this->m_maskBitCount = 1;
778  this->m_hasHalf = true;
779  this->m_hasRand = true;
780  this->m_hasGather = this->m_hasScatter = true;
781  this->m_hasTranscendentals = false;
782  // For MIC it is set to true due to performance reasons. The option should be tested.
783  this->m_hasTrigonometry = false;
784  this->m_hasRsqrtd = this->m_hasRcpd = false;
785  this->m_hasVecPrefetch = false;
786  CPUfromISA = CPU_SKX;
787  this->m_funcAttributes.push_back(std::make_pair("prefer-vector-width", "256"));
788  this->m_funcAttributes.push_back(std::make_pair("min-legal-vector-width", "256"));
789  break;
790 #else
791  unsupported_target = true;
792  break;
793 #endif
795  this->m_isa = Target::SKX_AVX512;
796  this->m_nativeVectorWidth = 16;
797  this->m_nativeVectorAlignment = 64;
798  this->m_dataTypeWidth = 32;
799  this->m_vectorWidth = 16;
800  this->m_maskingIsFree = true;
801  this->m_maskBitCount = 1;
802  this->m_hasHalf = true;
803  this->m_hasRand = true;
804  this->m_hasGather = this->m_hasScatter = true;
805  this->m_hasTranscendentals = false;
806  // For MIC it is set to true due to performance reasons. The option should be tested.
807  this->m_hasTrigonometry = false;
808  this->m_hasRsqrtd = this->m_hasRcpd = false;
809  this->m_hasVecPrefetch = false;
810  CPUfromISA = CPU_SKX;
811  if (g->opt.disableZMM) {
812  this->m_funcAttributes.push_back(std::make_pair("prefer-vector-width", "256"));
813  this->m_funcAttributes.push_back(std::make_pair("min-legal-vector-width", "256"));
814  } else {
815  this->m_funcAttributes.push_back(std::make_pair("prefer-vector-width", "512"));
816  this->m_funcAttributes.push_back(std::make_pair("min-legal-vector-width", "512"));
817  }
818  break;
820  this->m_isa = Target::GENERIC;
821  this->m_nativeVectorWidth = 1;
822  this->m_nativeVectorAlignment = 16;
823  this->m_vectorWidth = 1;
824  this->m_maskingIsFree = false;
825  this->m_maskBitCount = 32;
826  CPUfromISA = CPU_Generic;
827  break;
829  this->m_isa = Target::GENERIC;
830  this->m_nativeVectorWidth = 4;
831  this->m_nativeVectorAlignment = 16;
832  this->m_vectorWidth = 4;
833  this->m_maskingIsFree = true;
834  this->m_maskBitCount = 1;
835  this->m_hasHalf = true;
836  this->m_hasTranscendentals = true;
837  this->m_hasTrigonometry = true;
838  this->m_hasGather = this->m_hasScatter = true;
839  this->m_hasRsqrtd = this->m_hasRcpd = true;
840  CPUfromISA = CPU_Generic;
841  break;
843  this->m_isa = Target::GENERIC;
844  this->m_nativeVectorWidth = 8;
845  this->m_nativeVectorAlignment = 32;
846  this->m_vectorWidth = 8;
847  this->m_maskingIsFree = true;
848  this->m_maskBitCount = 1;
849  this->m_hasHalf = true;
850  this->m_hasTranscendentals = true;
851  this->m_hasTrigonometry = true;
852  this->m_hasGather = this->m_hasScatter = true;
853  this->m_hasRsqrtd = this->m_hasRcpd = true;
854  CPUfromISA = CPU_Generic;
855  break;
857  this->m_isa = Target::GENERIC;
858  this->m_nativeVectorWidth = 16;
859  this->m_nativeVectorAlignment = 64;
860  this->m_vectorWidth = 16;
861  this->m_maskingIsFree = true;
862  this->m_maskBitCount = 1;
863  this->m_hasHalf = true;
864  this->m_hasTranscendentals = true;
865  // It's set to false, because stdlib implementation of math functions
866  // is faster on MIC, than "native" implementation provided by the
867  // icc compiler.
868  this->m_hasTrigonometry = false;
869  this->m_hasGather = this->m_hasScatter = true;
870  this->m_hasRsqrtd = this->m_hasRcpd = true;
871  // It's set to true, because MIC has hardware vector prefetch instruction
872  this->m_hasVecPrefetch = true;
873  CPUfromISA = CPU_Generic;
874  break;
876  this->m_isa = Target::GENERIC;
877  this->m_nativeVectorWidth = 32;
878  this->m_nativeVectorAlignment = 64;
879  this->m_vectorWidth = 32;
880  this->m_maskingIsFree = true;
881  this->m_maskBitCount = 1;
882  this->m_hasHalf = true;
883  this->m_hasTranscendentals = true;
884  this->m_hasTrigonometry = true;
885  this->m_hasGather = this->m_hasScatter = true;
886  this->m_hasRsqrtd = this->m_hasRcpd = true;
887  CPUfromISA = CPU_Generic;
888  break;
890  this->m_isa = Target::GENERIC;
891  this->m_nativeVectorWidth = 64;
892  this->m_nativeVectorAlignment = 64;
893  this->m_vectorWidth = 64;
894  this->m_maskingIsFree = true;
895  this->m_maskBitCount = 1;
896  this->m_hasHalf = true;
897  this->m_hasTranscendentals = true;
898  this->m_hasTrigonometry = true;
899  this->m_hasGather = this->m_hasScatter = true;
900  this->m_hasRsqrtd = this->m_hasRcpd = true;
901  CPUfromISA = CPU_Generic;
902  break;
903 #ifdef ISPC_ARM_ENABLED
905  this->m_isa = Target::NEON;
906  this->m_nativeVectorWidth = 16;
907  this->m_nativeVectorAlignment = 16;
908  this->m_dataTypeWidth = 8;
909  this->m_vectorWidth = 16;
910  this->m_hasHalf = true; // ??
911  this->m_maskingIsFree = false;
912  this->m_maskBitCount = 8;
913  break;
915  this->m_isa = Target::NEON;
916  this->m_nativeVectorWidth = 8;
917  this->m_nativeVectorAlignment = 16;
918  this->m_dataTypeWidth = 16;
919  this->m_vectorWidth = 8;
920  this->m_hasHalf = true; // ??
921  this->m_maskingIsFree = false;
922  this->m_maskBitCount = 16;
923  break;
925  this->m_isa = Target::NEON;
926  this->m_nativeVectorWidth = 4;
927  this->m_nativeVectorAlignment = 16;
928  this->m_dataTypeWidth = 32;
929  this->m_vectorWidth = 4;
930  this->m_hasHalf = true; // ??
931  this->m_maskingIsFree = false;
932  this->m_maskBitCount = 32;
933  break;
935  this->m_isa = Target::NEON;
936  this->m_nativeVectorWidth = 4;
937  this->m_nativeVectorAlignment = 16;
938  this->m_dataTypeWidth = 32;
939  this->m_vectorWidth = 8;
940  this->m_hasHalf = true; // ??
941  this->m_maskingIsFree = false;
942  this->m_maskBitCount = 32;
943  break;
944 #else
949  unsupported_target = true;
950  break;
951 #endif
952 #ifdef ISPC_WASM_ENABLED
954  this->m_isa = Target::WASM;
955  this->m_nativeVectorWidth = 4;
956  this->m_nativeVectorAlignment = 16;
957  this->m_dataTypeWidth = 32;
958  this->m_vectorWidth = 4;
959  this->m_hasHalf = false;
960  this->m_maskingIsFree = false;
961  this->m_maskBitCount = 32;
962  this->m_hasTranscendentals = false;
963  this->m_hasTrigonometry = false;
964  this->m_hasRcpd = false;
965  this->m_hasRsqrtd = false;
966  this->m_hasScatter = false;
967  this->m_hasGather = false;
968  this->m_hasVecPrefetch = false;
969  break;
970 #else
972  unsupported_target = true;
973  break;
974 #endif
975  case ISPCTarget::none:
976  case ISPCTarget::host:
977  case ISPCTarget::error:
978  unsupported_target = true;
979  break;
980  }
981 
982  if (unsupported_target) {
983  // Hitting one of unsupported targets is internal error.
984  // Proper reporting about incorrect targets is done during options parsing.
985  std::string target_string = "Problem with target (" + ISPCTargetToString(m_ispc_target) + ")";
986  FATAL(target_string.c_str());
987  }
988 
989 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
990  if ((CPUID == CPU_None) && ISPCTargetIsNeon(m_ispc_target) && arch == Arch::arm)
991  CPUID = CPU_CortexA9;
992 #endif
993 #if defined(ISPC_ARM_ENABLED) && !defined(__aarch64__)
994  if ((CPUID == CPU_None) && ISPCTargetIsNeon(m_ispc_target) && arch == Arch::aarch64)
995  CPUID = CPU_CortexA35;
996 #endif
997  if (CPUID == CPU_None) {
998  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
999  } else {
1000  if ((CPUfromISA != CPU_None) && !a.BackwardCompatible(CPUID, CPUfromISA)) {
1001  std::string target_string = ISPCTargetToString(m_ispc_target);
1002  Error(SourcePos(),
1003  "The requested CPU (%s) is incompatible"
1004  " with the CPU required for %s target (%s)",
1005  cpu, target_string.c_str(), a.GetDefaultNameFromType(CPUfromISA).c_str());
1006  return;
1007  }
1008  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1009  }
1010  this->m_cpu = cpu;
1011 
1012  if (!error) {
1013  // Create TargetMachine
1014  std::string triple = GetTripleString();
1015 
1016  llvm::Optional<llvm::Reloc::Model> relocModel;
1017  if (m_generatePIC) {
1018  relocModel = llvm::Reloc::PIC_;
1019  }
1020  llvm::TargetOptions options;
1021 #ifdef ISPC_ARM_ENABLED
1022  options.FloatABIType = llvm::FloatABI::Hard;
1023  if (arch == Arch::arm) {
1024  if (g->target_os == TargetOS::custom_linux) {
1025  this->m_funcAttributes.push_back(std::make_pair("target-features", "+crypto,+fp-armv8,+neon,+sha2"));
1026  } else {
1027  this->m_funcAttributes.push_back(std::make_pair("target-features", "+neon,+fp16"));
1028  }
1029  featuresString = "+neon,+fp16";
1030  } else if (arch == Arch::aarch64) {
1031  if (g->target_os == TargetOS::custom_linux) {
1032  this->m_funcAttributes.push_back(
1033  std::make_pair("target-features", "+aes,+crc,+crypto,+fp-armv8,+neon,+sha2"));
1034  } else {
1035  this->m_funcAttributes.push_back(std::make_pair("target-features", "+neon"));
1036  }
1037  featuresString = "+neon";
1038  }
1039 #endif
1040  if (g->opt.disableFMA == false)
1041  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1042 
1043  m_targetMachine = m_target->createTargetMachine(triple, m_cpu, featuresString, options, relocModel);
1044  Assert(m_targetMachine != NULL);
1045 
1046  // Set Optimization level for llvm codegen based on Optimization level
1047  // requested by user via ISPC Optimization Flag. Mapping is :
1048  // ISPC O0 -> Codegen O0
1049  // ISPC O1,O2,O3,default -> Codegen O3
1050  llvm::CodeGenOpt::Level cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1051  switch (g->codegenOptLevel) {
1052  case Globals::CodegenOptLevel::None:
1053  cOptLevel = llvm::CodeGenOpt::Level::None;
1054  break;
1055 
1056  case Globals::CodegenOptLevel::Aggressive:
1057  cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1058  break;
1059  }
1060  m_targetMachine->setOptLevel(cOptLevel);
1061 
1062  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1063 
1064  // Change default version of generated DWARF.
1065  if (g->generateDWARFVersion != 0) {
1066  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1067  }
1068 
1069  // Initialize TargetData/DataLayout in 3 steps.
1070  // 1. Get default data layout first
1071  std::string dl_string;
1072  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1073  // 2. Adjust for generic
1074  if (m_isa == Target::GENERIC) {
1075  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1076  // that to the regular datalayout string for IA..
1077  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1078  // in terms of required memory storage and alignment, as this is
1079  // translated to __m128 type.
1080  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1081  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1082  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1083  }
1084 
1085  // 3. Finally set member data
1086  m_dataLayout = new llvm::DataLayout(dl_string);
1087 
1088  // Set is32Bit
1089  // This indicates if we are compiling for 32 bit platform
1090  // and can assume 32 bit runtime.
1091  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1092 
1093  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1094 
1095  // TO-DO : Revisit addition of "target-features" and "target-cpu" for ARM support.
1096  llvm::AttrBuilder fattrBuilder;
1097 #ifdef ISPC_ARM_ENABLED
1098  if (m_isa == Target::NEON)
1099  fattrBuilder.addAttribute("target-cpu", this->m_cpu);
1100 #endif
1101  for (auto const &f_attr : m_funcAttributes)
1102  fattrBuilder.addAttribute(f_attr.first, f_attr.second);
1103  this->m_tf_attributes = new llvm::AttrBuilder(fattrBuilder);
1104 
1106  }
1107 
1108  m_valid = !error;
1109 
1110  if (printTarget) {
1111  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1112  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1113  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1114  }
1115 
1116  return;
1117 }
1118 
1119 std::string Target::SupportedCPUs() {
1120  AllCPUs a;
1121  return a.HumanReadableListOfNames();
1122 }
1123 
1124 std::string Target::GetTripleString() const {
1125  llvm::Triple triple;
1126  switch (g->target_os) {
1127  case TargetOS::windows:
1128  if (m_arch == Arch::x86) {
1129  triple.setArchName("i386");
1130  } else if (m_arch == Arch::x86_64) {
1131  triple.setArchName("x86_64");
1132  } else if (m_arch == Arch::arm) {
1133  Error(SourcePos(), "Arm is not supported on Windows.");
1134  exit(1);
1135  } else if (m_arch == Arch::aarch64) {
1136  Error(SourcePos(), "Aarch64 is not supported on Windows.");
1137  exit(1);
1138  } else {
1139  Error(SourcePos(), "Unknown arch.");
1140  exit(1);
1141  }
1142  //"x86_64-pc-windows-msvc"
1143  triple.setVendor(llvm::Triple::VendorType::PC);
1144  triple.setOS(llvm::Triple::OSType::Win32);
1145  triple.setEnvironment(llvm::Triple::EnvironmentType::MSVC);
1146  break;
1148  case TargetOS::linux:
1149  if (m_arch == Arch::x86) {
1150  triple.setArchName("i386");
1151  } else if (m_arch == Arch::x86_64) {
1152  triple.setArchName("x86_64");
1153  } else if (m_arch == Arch::arm) {
1154  triple.setArchName("armv7");
1155  } else if (m_arch == Arch::aarch64) {
1156  triple.setArchName("aarch64");
1157  } else {
1158  Error(SourcePos(), "Unknown arch.");
1159  exit(1);
1160  }
1161  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1162  triple.setOS(llvm::Triple::OSType::Linux);
1163  if (m_arch == Arch::x86 || m_arch == Arch::x86_64 || m_arch == Arch::aarch64) {
1164  triple.setEnvironment(llvm::Triple::EnvironmentType::GNU);
1165  } else if (m_arch == Arch::arm) {
1166  triple.setEnvironment(llvm::Triple::EnvironmentType::GNUEABIHF);
1167  } else {
1168  Error(SourcePos(), "Unknown arch.");
1169  exit(1);
1170  }
1171  break;
1172  case TargetOS::freebsd:
1173  if (m_arch == Arch::x86) {
1174  triple.setArchName("i386");
1175  } else if (m_arch == Arch::x86_64) {
1176  triple.setArchName("amd64");
1177  } else if (m_arch == Arch::arm) {
1178  triple.setArchName("armv7");
1179  } else if (m_arch == Arch::aarch64) {
1180  triple.setArchName("aarch64");
1181  } else {
1182  Error(SourcePos(), "Unknown arch.");
1183  exit(1);
1184  }
1185  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1186  triple.setOS(llvm::Triple::OSType::FreeBSD);
1187  break;
1188  case TargetOS::macos:
1189  // asserts
1190  if (m_arch != Arch::x86_64) {
1191  Error(SourcePos(), "macOS target supports only x86_64.");
1192  exit(1);
1193  }
1194  triple.setArch(llvm::Triple::ArchType::x86_64);
1195  triple.setVendor(llvm::Triple::VendorType::Apple);
1196  triple.setOS(llvm::Triple::OSType::MacOSX);
1197  break;
1198  case TargetOS::android:
1199  if (m_arch == Arch::x86) {
1200  triple.setArchName("i386");
1201  } else if (m_arch == Arch::x86_64) {
1202  triple.setArchName("x86_64");
1203  } else if (m_arch == Arch::arm) {
1204  triple.setArchName("armv7");
1205  } else if (m_arch == Arch::aarch64) {
1206  triple.setArchName("aarch64");
1207  } else {
1208  Error(SourcePos(), "Unknown arch.");
1209  exit(1);
1210  }
1211  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1212  triple.setOS(llvm::Triple::OSType::Linux);
1213  triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
1214  break;
1215  case TargetOS::ios:
1216  if (m_arch != Arch::aarch64) {
1217  Error(SourcePos(), "iOS target supports only aarch64.");
1218  exit(1);
1219  }
1220  // Note, for iOS arch need to be set to "arm64", instead of "aarch64".
1221  // Internet say this is for historical reasons.
1222  // "arm64-apple-ios"
1223  triple.setArchName("arm64");
1224  triple.setVendor(llvm::Triple::VendorType::Apple);
1225  triple.setOS(llvm::Triple::OSType::IOS);
1226  break;
1227  case TargetOS::ps4:
1228  if (m_arch != Arch::x86_64) {
1229  Error(SourcePos(), "PS4 target supports only x86_64.");
1230  exit(1);
1231  }
1232  // "x86_64-scei-ps4"
1233  triple.setArch(llvm::Triple::ArchType::x86_64);
1234  triple.setVendor(llvm::Triple::VendorType::SCEI);
1235  triple.setOS(llvm::Triple::OSType::PS4);
1236  break;
1237  case TargetOS::web:
1238  if (m_arch != Arch::wasm32) {
1239  Error(SourcePos(), "Web target supports only wasm32.");
1240  exit(1);
1241  }
1242  triple.setArch(llvm::Triple::ArchType::wasm32);
1243  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1244  triple.setOS(llvm::Triple::OSType::UnknownOS);
1245  break;
1246  case TargetOS::error:
1247  Error(SourcePos(), "Invalid target OS.");
1248  exit(1);
1249  }
1250 
1251  return triple.str();
1252 }
1253 
1254 // This function returns string representation of ISA for the purpose of
1255 // mangling. And may return any unique string, preferably short, like
1256 // sse4, avx and etc.
1257 const char *Target::ISAToString(ISA isa) {
1258  switch (isa) {
1259 #ifdef ISPC_ARM_ENABLED
1260  case Target::NEON:
1261  return "neon";
1262 #endif
1263 #ifdef ISPC_WASM_ENABLED
1264  case Target::WASM:
1265  return "wasm";
1266 #endif
1267  case Target::SSE2:
1268  return "sse2";
1269  case Target::SSE4:
1270  return "sse4";
1271  case Target::AVX:
1272  return "avx";
1273  case Target::AVX2:
1274  return "avx2";
1275  case Target::KNL_AVX512:
1276  return "avx512knl";
1277  case Target::SKX_AVX512:
1278  return "avx512skx";
1279  case Target::GENERIC:
1280  return "generic";
1281  default:
1282  FATAL("Unhandled target in ISAToString()");
1283  }
1284  return "";
1285 }
1286 
1287 const char *Target::GetISAString() const { return ISAToString(m_isa); }
1288 
1289 // This function returns string representation of default target corresponding
1290 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX2 it's avx2-i32x8. This
1291 // string may be used to initialize Target.
1292 const char *Target::ISAToTargetString(ISA isa) {
1293  switch (isa) {
1294 #ifdef ISPC_ARM_ENABLED
1295  case Target::NEON:
1296  return "neon-i32x4";
1297 #endif
1298 #ifdef ISPC_WASM_ENABLED
1299  case Target::WASM:
1300  return "wasm-i32x4";
1301 #endif
1302  case Target::SSE2:
1303  return "sse2-i32x4";
1304  case Target::SSE4:
1305  return "sse4-i32x4";
1306  case Target::AVX:
1307  return "avx1-i32x8";
1308  case Target::AVX2:
1309  return "avx2-i32x8";
1310  case Target::KNL_AVX512:
1311  return "avx512knl-i32x16";
1312  case Target::SKX_AVX512:
1313  return "avx512skx-i32x16";
1314  case Target::GENERIC:
1315  return "generic-4";
1316  default:
1317  FATAL("Unhandled target in ISAToTargetString()");
1318  }
1319  return "";
1320 }
1321 
1322 const char *Target::GetISATargetString() const { return ISAToTargetString(m_isa); }
1323 
1324 static bool lGenericTypeLayoutIndeterminate(llvm::Type *type) {
1325  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() || type->isIntegerTy() ||
1326  type->isLabelTy() || type->isMetadataTy())
1327  return false;
1328 
1330  return true;
1331 
1332  llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(type);
1333  if (at != NULL)
1334  return lGenericTypeLayoutIndeterminate(at->getElementType());
1335 
1336  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(type);
1337  if (pt != NULL)
1338  return false;
1339 
1340  llvm::StructType *st = llvm::dyn_cast<llvm::StructType>(type);
1341  if (st != NULL) {
1342  for (int i = 0; i < (int)st->getNumElements(); ++i)
1343  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1344  return true;
1345  return false;
1346  }
1347 
1348  Assert(llvm::isa<llvm::VectorType>(type));
1349  return true;
1350 }
1351 
1353  bool ret = false;
1354  if (m_isa == Target::GENERIC && lGenericTypeLayoutIndeterminate(type) == true)
1355  ret = true;
1356  return ret;
1357 }
1358 
1359 llvm::Value *Target::SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd) {
1361  llvm::Value *index[1] = {LLVMInt32(1)};
1362  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1363  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1364  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1365  llvm::Instruction *gep =
1366  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "sizeof_gep", insertAtEnd);
1368  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "sizeof_int", insertAtEnd);
1369  else
1370  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "sizeof_int", insertAtEnd);
1371  }
1372 
1373  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1375  return LLVMInt32((int32_t)byteSize);
1376  else
1377  return LLVMInt64(byteSize);
1378 }
1379 
1380 llvm::Value *Target::StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd) {
1382  llvm::Value *indices[2] = {LLVMInt32(0), LLVMInt32(element)};
1383  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1384  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1385  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1386  llvm::Instruction *gep =
1387  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "offset_gep", insertAtEnd);
1389  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "offset_int", insertAtEnd);
1390  else
1391  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "offset_int", insertAtEnd);
1392  }
1393 
1394  llvm::StructType *structType = llvm::dyn_cast<llvm::StructType>(type);
1395  if (structType == NULL || structType->isSized() == false) {
1396  Assert(m->errorCount > 0);
1397  return NULL;
1398  }
1399 
1400  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1401  Assert(sl != NULL);
1402 
1403  uint64_t offset = sl->getElementOffset(element);
1405  return LLVMInt32((int32_t)offset);
1406  else
1407  return LLVMInt64(offset);
1408 }
1409 
1410 void Target::markFuncWithTargetAttr(llvm::Function *func) {
1411  if (m_tf_attributes) {
1412  func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
1413  }
1414 }
1415 
1416 ///////////////////////////////////////////////////////////////////////////
1417 // Opt
1418 
1420  level = 1;
1421  fastMath = false;
1422  fastMaskedVload = false;
1423  force32BitAddressing = true;
1424  unrollLoops = true;
1425  disableAsserts = false;
1426  disableFMA = false;
1427  forceAlignedMemory = false;
1428  disableMaskAllOnOptimizations = false;
1429  disableHandlePseudoMemoryOps = false;
1430  disableBlendedMaskedStores = false;
1431  disableCoherentControlFlow = false;
1432  disableUniformControlFlow = false;
1433  disableGatherScatterOptimizations = false;
1434  disableMaskedStoreToStore = false;
1435  disableGatherScatterFlattening = false;
1436  disableUniformMemoryOptimizations = false;
1437  disableCoalescing = false;
1438  disableZMM = false;
1439 }
1440 
1441 ///////////////////////////////////////////////////////////////////////////
1442 // Globals
1443 
1445  target_registry = TargetLibRegistry::getTargetLibRegistry();
1446 
1447  mathLib = Globals::Math_ISPC;
1448  codegenOptLevel = Globals::Aggressive;
1449 
1450  includeStdlib = true;
1451  runCPP = true;
1452  debugPrint = false;
1453  dumpFile = false;
1454  printTarget = false;
1455  NoOmitFramePointer = false;
1456  debugIR = -1;
1457  disableWarnings = false;
1458  warningsAsErrors = false;
1459  quiet = false;
1460  forceColoredOutput = false;
1461  disableLineWrap = false;
1462  emitPerfWarnings = true;
1463  emitInstrumentation = false;
1464  noPragmaOnce = false;
1465  generateDebuggingSymbols = false;
1466  generateDWARFVersion = 3;
1467  enableFuzzTest = false;
1468  fuzzTestSeed = -1;
1469  mangleFunctionsWithTarget = false;
1470  isMultiTargetCompilation = false;
1471  errorLimit = -1;
1472  target = NULL;
1473  ctx = new llvm::LLVMContext;
1474 
1475 #ifdef ISPC_HOST_IS_WINDOWS
1476  _getcwd(currentDirectory, sizeof(currentDirectory));
1477 #else
1478  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1479  FATAL("Current directory path is too long!");
1480 #endif
1481  forceAlignment = -1;
1482  dllExport = false;
1483 
1484  // Target OS defaults to host OS.
1485  target_os = GetHostOS();
1486 }
1487 
1488 ///////////////////////////////////////////////////////////////////////////
1489 // SourcePos
1490 
1491 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1492  name = n;
1493  if (name == NULL) {
1494  if (m != NULL)
1495  name = m->module->getModuleIdentifier().c_str();
1496  else
1497  name = "(unknown)";
1498  }
1499  first_line = fl;
1500  first_column = fc;
1501  last_line = ll != 0 ? ll : fl;
1502  last_column = lc != 0 ? lc : fc;
1503 }
1504 
1505 llvm::DIFile *
1506 // llvm::MDFile*
1508  std::string directory, filename;
1509  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1510  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1511  return ret;
1512 }
1513 
1514 void SourcePos::Print() const {
1515  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column, last_line, last_column);
1516 }
1517 
1518 bool SourcePos::operator==(const SourcePos &p2) const {
1519  return (!strcmp(name, p2.name) && first_line == p2.first_line && first_column == p2.first_column &&
1520  last_line == p2.last_line && last_column == p2.last_column);
1521 }
1522 
1523 SourcePos Union(const SourcePos &p1, const SourcePos &p2) {
1524  if (strcmp(p1.name, p2.name) != 0)
1525  return p1;
1526 
1527  SourcePos ret;
1528  ret.name = p1.name;
1529  ret.first_line = std::min(p1.first_line, p2.first_line);
1530  ret.first_column = std::min(p1.first_column, p2.first_column);
1531  ret.last_line = std::max(p1.last_line, p2.last_line);
1532  ret.last_column = std::max(p1.last_column, p2.last_column);
1533  return ret;
1534 }
bool disableFMA
Definition: ispc.h:420
bool m_hasTranscendentals
Definition: ispc.h:366
TargetOS target_os
Definition: ispc.h:515
std::string ArchToString(Arch arch)
ISPCTarget
Definition: target_enums.h:55
Globals()
Definition: ispc.cpp:1444
Opt opt
Definition: ispc.h:509
int last_column
Definition: ispc.h:130
llvm::DIFile * GetDIFile() const
Definition: ispc.cpp:1507
const llvm::Target * m_target
Definition: ispc.h:273
static bool __os_has_avx_support()
Definition: ispc.cpp:96
This structure collects together a number of global variables.
Definition: ispc.h:502
std::vector< std::pair< std::string, std::string > > m_funcAttributes
Definition: ispc.h:309
int m_nativeVectorAlignment
Definition: ispc.h:326
AllCPUs()
Definition: ispc.cpp:300
ISPCTarget m_ispc_target
Definition: ispc.h:291
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1523
int first_line
Definition: ispc.h:127
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1359
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1491
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:81
const char * GetISATargetString() const
Definition: ispc.cpp:1322
std::string m_cpu
Definition: ispc.h:303
Opt()
Definition: ispc.cpp:1419
Target(Arch arch, const char *cpu, ISPCTarget isa, bool pic, bool printTarget)
Definition: ispc.cpp:443
std::string HumanReadableListOfNames()
Definition: ispc.cpp:405
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:436
std::string GetTripleString() const
Definition: ispc.cpp:1124
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1324
int m_nativeVectorWidth
Definition: ispc.h:319
Module * m
Definition: ispc.cpp:73
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1257
bool m_generatePIC
Definition: ispc.h:338
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:286
bool m_maskingIsFree
Definition: ispc.h:344
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:83
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:86
llvm::Module * module
Definition: module.h:151
static std::string SupportedCPUs()
Definition: ispc.cpp:1119
bool disableZMM
Definition: ispc.h:491
Definition: module.h:51
static llvm::Type * Int64Type
Definition: llvmutil.h:68
char currentDirectory[1024]
Definition: ispc.h:615
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:421
void GetDirectoryAndFileName(const std::string &currentDirectory, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:516
bool ISPCTargetIsX86(ISPCTarget target)
Header file with declarations for various LLVM utility stuff.
Arch m_arch
Definition: ispc.h:297
static ISPCTarget lGetSystemISA()
Definition: ispc.cpp:128
bool m_is32Bit
Definition: ispc.h:300
static TargetLibRegistry * getTargetLibRegistry()
CodegenOptLevel codegenOptLevel
Definition: ispc.h:524
bool m_hasRand
Definition: ispc.h:356
bool IsGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1352
bool m_hasRcpd
Definition: ispc.h:375
int m_maskBitCount
Definition: ispc.h:349
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:81
Representation of a range of positions in a source file.
Definition: ispc.h:123
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1380
std::string ISPCTargetToString(ISPCTarget target)
llvm::ConstantInt * LLVMInt32(int32_t ival)
Definition: llvmutil.cpp:233
int m_vectorWidth
Definition: ispc.h:335
void Print() const
Definition: ispc.cpp:1514
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:283
bool force32BitAddressing
Definition: ispc.h:412
static bool __os_has_avx512_support()
Definition: ispc.cpp:111
const char * name
Definition: ispc.h:126
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1410
void Error(SourcePos p, const char *fmt,...)
Definition: util.cpp:351
#define FATAL(message)
Definition: util.h:116
int m_dataTypeWidth
Definition: ispc.h:330
TargetOS GetHostOS()
static llvm::Type * Int32Type
Definition: llvmutil.h:67
int last_line
Definition: ispc.h:129
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:224
#define PTYPE(p)
Definition: llvmutil.h:47
#define ISPC_MAX_NVEC
Definition: ispc.h:69
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:283
bool m_hasGather
Definition: ispc.h:359
int first_column
Definition: ispc.h:128
llvm::DataLayout * m_dataLayout
Definition: ispc.h:284
bool m_hasScatter
Definition: ispc.h:362
#define Assert(expr)
Definition: util.h:128
ISA
Definition: ispc.h:157
const char * GetISAString() const
Definition: ispc.cpp:1287
bool m_valid
Definition: ispc.h:288
Globals * g
Definition: ispc.cpp:72
int generateDWARFVersion
Definition: ispc.h:596
static llvm::VectorType * MaskType
Definition: llvmutil.h:79
static const bool lIsTargetValidforArch(ISPCTarget target, Arch arch)
Definition: ispc.cpp:183
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:284
bool m_hasTrigonometry
Definition: ispc.h:369
bool ISPCTargetIsNeon(ISPCTarget target)
llvm::ConstantInt * LLVMInt64(int64_t ival)
Definition: llvmutil.cpp:241
bool m_hasHalf
Definition: ispc.h:353
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1518
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
int errorCount
Definition: module.h:144
ISA m_isa
Definition: ispc.h:294
bool m_hasVecPrefetch
Definition: ispc.h:378
llvm::AttrBuilder * m_tf_attributes
Definition: ispc.h:314
llvm::DIBuilder * diBuilder
Definition: module.h:154
Main ispc.header file. Defines Target, Globals and Opt classes.
void Warning(SourcePos p, const char *fmt,...)
Definition: util.cpp:378
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:426
Arch
Definition: target_enums.h:50
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1292
bool m_hasRsqrtd
Definition: ispc.h:372
CPUtype
Definition: ispc.cpp:200