Intel SPMD Program Compiler  1.12.0
ispc.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2019, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.cpp
35  @brief ispc global definitions
36 */
37 
38 #include "ispc.h"
39 #include "llvmutil.h"
40 #include "module.h"
41 #include "util.h"
42 #include <sstream>
43 #include <stdarg.h> /* va_list, va_start, va_arg, va_end */
44 #include <stdio.h>
45 #ifdef ISPC_HOST_IS_WINDOWS
46 #include <direct.h>
47 #include <windows.h>
48 #define strcasecmp stricmp
49 #if ISPC_LLVM_VERSION >= ISPC_LLVM_7_0
50 #include <intrin.h>
51 #endif
52 #else
53 #include <sys/types.h>
54 #include <unistd.h>
55 #endif
56 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
57 #include <llvm/Instructions.h>
58 #include <llvm/LLVMContext.h>
59 #include <llvm/Module.h>
60 #else /* 3.3+ */
61 #include <llvm/IR/Instructions.h>
62 #include <llvm/IR/LLVMContext.h>
63 #include <llvm/IR/Module.h>
64 #endif
65 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
66 #if ISPC_LLVM_VERSION >= ISPC_LLVM_6_0
67 #include <llvm/CodeGen/TargetLowering.h>
68 #include <llvm/CodeGen/TargetSubtargetInfo.h>
69 #else
70 #include <llvm/Target/TargetSubtargetInfo.h>
71 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
72 #include <llvm/Target/TargetLowering.h>
73 #endif
74 #endif
75 #endif
76 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5 // LLVM 3.5+
77 #include <llvm/IR/DIBuilder.h>
78 #include <llvm/IR/DebugInfo.h>
79 #else // LLVM 3.2, 3.3, 3.4
80 #include <llvm/DIBuilder.h>
81 #include <llvm/DebugInfo.h>
82 #endif
83 #if ISPC_LLVM_VERSION >= ISPC_LLVM_5_0 // LLVM 5.0+
84 #include <llvm/BinaryFormat/Dwarf.h>
85 #else // LLVM up to 4.x
86 #include <llvm/Support/Dwarf.h>
87 #endif
88 #include <llvm/Target/TargetMachine.h>
89 #include <llvm/Target/TargetOptions.h>
90 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
91 #include <llvm/DataLayout.h>
92 #else // LLVM 3.3+
93 #include <llvm/IR/Attributes.h>
94 #include <llvm/IR/DataLayout.h>
95 #endif
96 #include <llvm/Support/CodeGen.h>
97 #include <llvm/Support/Host.h>
98 #include <llvm/Support/TargetRegistry.h>
99 #include <llvm/Support/TargetSelect.h>
100 
103 
104 ///////////////////////////////////////////////////////////////////////////
105 // Target
106 
107 #if !defined(ISPC_HOST_IS_WINDOWS) && !defined(__arm__) && !defined(__aarch64__)
108 // __cpuid() and __cpuidex() are defined on Windows in <intrin.h> for x86/x64.
109 // On *nix they need to be defined manually through inline assembler.
110 static void __cpuid(int info[4], int infoType) {
111  __asm__ __volatile__("cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType));
112 }
113 
114 /* Save %ebx in case it's the PIC register */
115 static void __cpuidex(int info[4], int level, int count) {
116  __asm__ __volatile__("xchg{l}\t{%%}ebx, %1\n\t"
117  "cpuid\n\t"
118  "xchg{l}\t{%%}ebx, %1\n\t"
119  : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
120  : "0"(level), "2"(count));
121 }
122 #endif // !ISPC_HOST_IS_WINDOWS && !__ARM__ && !__AARCH64__
123 
124 #if !defined(__arm__) && !defined(__aarch64__)
125 static bool __os_has_avx_support() {
126 #if defined(ISPC_HOST_IS_WINDOWS)
127  // Check if the OS will save the YMM registers
128  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
129  return (xcrFeatureMask & 6) == 6;
130 #else // !defined(ISPC_HOST_IS_WINDOWS)
131  // Check xgetbv; this uses a .byte sequence instead of the instruction
132  // directly because older assemblers do not include support for xgetbv and
133  // there is no easy way to conditionally compile based on the assembler used.
134  int rEAX, rEDX;
135  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
136  return (rEAX & 6) == 6;
137 #endif // !defined(ISPC_HOST_IS_WINDOWS)
138 }
139 
140 static bool __os_has_avx512_support() {
141 #if defined(ISPC_HOST_IS_WINDOWS)
142  // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
143  // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
144  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
145  return (xcrFeatureMask & 0xE6) == 0xE6;
146 #else // !defined(ISPC_HOST_IS_WINDOWS)
147  // Check xgetbv; this uses a .byte sequence instead of the instruction
148  // directly because older assemblers do not include support for xgetbv and
149  // there is no easy way to conditionally compile based on the assembler used.
150  int rEAX, rEDX;
151  __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
152  return (rEAX & 0xE6) == 0xE6;
153 #endif // !defined(ISPC_HOST_IS_WINDOWS)
154 }
155 #endif // !__arm__ && !__aarch64__
156 
157 static const char *lGetSystemISA() {
158 #if defined(__arm__) || defined(__aarch64__)
159  return "neon-i32x4";
160 #else
161  int info[4];
162  __cpuid(info, 1);
163 
164  int info2[4];
165  // Call cpuid with eax=7, ecx=0
166  __cpuidex(info2, 7, 0);
167 
168  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
169  (info2[1] & (1 << 5)) != 0 && // AVX2
170  (info2[1] & (1 << 16)) != 0 && // AVX512 F
172  // We need to verify that AVX2 is also available,
173  // as well as AVX512, because our targets are supposed
174  // to use both.
175 
176  if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
177  (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
178  (info2[1] & (1 << 30)) != 0 && // AVX512 BW
179  (info2[1] & (1 << 31)) != 0) { // AVX512 VL
180  return "avx512skx-i32x16";
181  } else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
182  (info2[1] & (1 << 27)) != 0 && // AVX512 ER
183  (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
184  return "avx512knl-i32x16";
185  }
186  // If it's unknown AVX512 target, fall through and use AVX2
187  // or whatever is available in the machine.
188  }
189 
190  if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
191  (info[2] & (1 << 28)) != 0 && __os_has_avx_support()) { // AVX
192  // AVX1 for sure....
193  // Ivy Bridge?
194  if ((info[2] & (1 << 29)) != 0 && // F16C
195  (info[2] & (1 << 30)) != 0 && // RDRAND
196  (info2[1] & (1 << 5)) != 0) { // AVX2.
197  return "avx2-i32x8";
198  }
199  // Regular AVX
200  return "avx1-i32x8";
201  } else if ((info[2] & (1 << 19)) != 0)
202  return "sse4-i32x4";
203  else if ((info[3] & (1 << 26)) != 0)
204  return "sse2-i32x4";
205  else {
206  Error(SourcePos(), "Unable to detect supported SSE/AVX ISA. Exiting.");
207  exit(1);
208  }
209 #endif
210 }
211 
212 static const bool lIsISAValidforArch(const char *isa, const char *arch) {
213  bool ret = true;
214  // If target name starts with sse or avx, has to be x86 or x86-64.
215  if (!strncmp(isa, "sse", 3) || !strncmp(isa, "avx", 3)) {
216  if ((strcasecmp(arch, "x86-64") != 0) && (strcasecmp(arch, "x86") != 0))
217  ret = false;
218  } else if (!strcasecmp(isa, "neon-i8x16") || !strcasecmp(isa, "neon-i16x8")) {
219  if (strcasecmp(arch, "arm"))
220  ret = false;
221  } else if (!strcasecmp(isa, "neon-i32x4") || !strcasecmp(isa, "neon-i32x8") || !strcasecmp(isa, "neon")) {
222  if ((strcasecmp(arch, "arm") != 0) && (strcasecmp(arch, "aarch64") != 0))
223  ret = false;
224  } else if (!strcasecmp(isa, "nvptx")) {
225  if (strcasecmp(arch, "nvptx64"))
226  ret = false;
227  }
228 
229  return ret;
230 }
231 
232 typedef enum {
233  // Special value, indicates that no CPU is present.
234  CPU_None = 0,
235 
236  // 'Generic' CPU without any hardware SIMD capabilities.
238 
239  // A generic 64-bit specific x86 processor model which tries to be good
240  // for modern chips without enabling instruction set encodings past the
241  // basic SSE2 and 64-bit ones
243 
244  // Early Atom CPU. Supports SSSE3.
246 
247  // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
248  // but for ISPC the difference is negligible; ISPC doesn`t make use of it.
250 
251  // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
253 
254  // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
256 
257  // CPU in PS4/Xbox One.
259 
260  // Sandy Bridge. Supports AVX 1.
262 
263  // Ivy Bridge. Supports AVX 1 + RDRAND.
265 
266  // Haswell. Supports AVX 2.
268 
269 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
270  // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
272 #endif
273 
274 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
275  // Knights Landing - Xeon Phi.
276  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
277  // AVX-512CDI: Conflict Detection;
278  // AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
279  // new prefetch instructions.
280  CPU_KNL,
281 #endif
282 
283 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
284  // Skylake Xeon.
285  // Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
286  // AVX-512CDI: Conflict Detection;
287  // AVX-512VL: Vector Length Orthogonality;
288  // AVX-512DQ: New HPC ISA (vs AVX512F);
289  // AVX-512BW: Byte and Word Support.
290  CPU_SKX,
291 #endif
292 
293 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0
294  // Icelake client
295  CPU_ICL,
296 #endif
297 
298 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
299  // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
301 #endif
302 
303 // FIXME: LLVM supports a ton of different ARM CPU variants--not just
304 // cortex-a9 and a15. We should be able to handle any of them that also
305 // have NEON support.
306 #ifdef ISPC_ARM_ENABLED
307  // ARM Cortex A15. Supports NEON VFPv4.
308  CPU_CortexA15,
309 
310  // ARM Cortex A9. Supports NEON VFPv3.
311  CPU_CortexA9,
312 
313  // ARM Cortex A35, A53, A57.
314  CPU_CortexA35,
315  CPU_CortexA53,
316  CPU_CortexA57,
317 #endif
318 
319 #ifdef ISPC_NVPTX_ENABLED
320  // NVidia CUDA-compatible SM-35 architecture.
321  CPU_SM35,
322 #endif
323 
325 } CPUtype;
326 
327 class AllCPUs {
328  private:
329  std::vector<std::vector<std::string>> names;
330  std::vector<std::set<CPUtype>> compat;
331 
332  std::set<CPUtype> Set(int type, ...) {
333  std::set<CPUtype> retn;
334  va_list args;
335 
336  retn.insert((CPUtype)type);
337  va_start(args, type);
338  while ((type = va_arg(args, int)) != CPU_None)
339  retn.insert((CPUtype)type);
340  va_end(args);
341 
342  return retn;
343  }
344 
345  public:
347  names = std::vector<std::vector<std::string>>(sizeofCPUtype);
348  compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
349 
350  names[CPU_None].push_back("");
351 
352  names[CPU_Generic].push_back("generic");
353 
354  names[CPU_x86_64].push_back("x86-64");
355 
356  names[CPU_Bonnell].push_back("atom");
357  names[CPU_Bonnell].push_back("bonnell");
358 
359  names[CPU_Core2].push_back("core2");
360 
361  names[CPU_Penryn].push_back("penryn");
362 
363 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
364  names[CPU_Silvermont].push_back("slm");
365  names[CPU_Silvermont].push_back("silvermont");
366 #endif
367 
368  names[CPU_Nehalem].push_back("corei7");
369  names[CPU_Nehalem].push_back("nehalem");
370 
371  names[CPU_PS4].push_back("ps4");
372  names[CPU_PS4].push_back("btver2");
373 
374  names[CPU_SandyBridge].push_back("corei7-avx");
375  names[CPU_SandyBridge].push_back("sandybridge");
376 
377  names[CPU_IvyBridge].push_back("core-avx-i");
378  names[CPU_IvyBridge].push_back("ivybridge");
379 
380  names[CPU_Haswell].push_back("core-avx2");
381  names[CPU_Haswell].push_back("haswell");
382 
383 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
384  names[CPU_Broadwell].push_back("broadwell");
385 #endif
386 
387 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
388  names[CPU_KNL].push_back("knl");
389 #endif
390 
391 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
392  names[CPU_SKX].push_back("skx");
393 #endif
394 
395 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
396  names[CPU_ICL].push_back("icl");
397  names[CPU_ICL].push_back("icelake-client");
398 #endif
399 
400 #ifdef ISPC_ARM_ENABLED
401  names[CPU_CortexA15].push_back("cortex-a15");
402 
403  names[CPU_CortexA9].push_back("cortex-a9");
404 
405  names[CPU_CortexA35].push_back("cortex-a35");
406 
407  names[CPU_CortexA53].push_back("cortex-a53");
408 
409  names[CPU_CortexA57].push_back("cortex-a57");
410 #endif
411 
412 #ifdef ISPC_NVPTX_ENABLED
413  names[CPU_SM35].push_back("sm_35");
414 #endif
415  Assert(names.size() == sizeofCPUtype);
416 
417 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3 // LLVM 3.2 or 3.3
418 #define CPU_Silvermont CPU_Nehalem
419 #else /* LLVM 3.4+ */
420  compat[CPU_Silvermont] =
422 #endif
423 
424 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
425  compat[CPU_KNL] = Set(CPU_KNL, CPU_Generic, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem,
427 #endif
428 
429 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
430  compat[CPU_SKX] = Set(CPU_SKX, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem, CPU_Silvermont,
432 #endif
433 
434 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
435  compat[CPU_ICL] = Set(CPU_ICL, CPU_SKX, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem,
437  ;
438 #endif
439 
440 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
441 #define CPU_Broadwell CPU_Haswell
442 #else /* LLVM 3.6+ */
443  compat[CPU_Broadwell] =
446 #endif
455  compat[CPU_Nehalem] =
457  compat[CPU_Penryn] =
461  compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
462 
464 
465 #ifdef ISPC_ARM_ENABLED
466  compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15, CPU_None);
467  compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
468  compat[CPU_CortexA35] = Set(CPU_Generic, CPU_CortexA35, CPU_None);
469  compat[CPU_CortexA53] = Set(CPU_Generic, CPU_CortexA53, CPU_None);
470  compat[CPU_CortexA57] = Set(CPU_Generic, CPU_CortexA57, CPU_None);
471 #endif
472 
473 #ifdef ISPC_NVPTX_ENABLED
474  compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
475 #endif
476  }
477 
478  std::string HumanReadableListOfNames() {
479  std::stringstream CPUs;
480  for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
481  CPUs << names[i][0];
482  if (names[i].size() > 1) {
483  CPUs << " (synonyms: " << names[i][1];
484  for (int j = 2, je = names[i].size(); j < je; j++)
485  CPUs << ", " << names[i][j];
486  CPUs << ")";
487  }
488  if (i < sizeofCPUtype - 1)
489  CPUs << ", ";
490  }
491  return CPUs.str();
492  }
493 
494  std::string &GetDefaultNameFromType(CPUtype type) {
495  Assert((type >= CPU_None) && (type < sizeofCPUtype));
496  return names[type][0];
497  }
498 
499  CPUtype GetTypeFromName(std::string name) {
500  CPUtype retn = CPU_None;
501 
502  for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
503  for (int j = 0, je = names[i].size(); (retn == CPU_None) && (j < je); j++)
504  if (!name.compare(names[i][j]))
505  retn = (CPUtype)i;
506  return retn;
507  }
508 
509  bool BackwardCompatible(CPUtype what, CPUtype with) {
510  Assert((what > CPU_None) && (what < sizeofCPUtype));
511  Assert((with > CPU_None) && (with < sizeofCPUtype));
512  return compat[what].find(with) != compat[what].end();
513  }
514 };
515 
516 Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget,
517  std::string genericAsSmth)
518  : m_target(NULL), m_targetMachine(NULL), m_dataLayout(NULL), m_valid(false), m_isa(SSE2),
519  m_treatGenericAsSmth(genericAsSmth), m_arch(""), m_is32Bit(true), m_cpu(""), m_attributes(""),
520 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
521  m_tf_attributes(NULL),
522 #endif
523  m_nativeVectorWidth(-1), m_nativeVectorAlignment(-1), m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic),
524  m_maskingIsFree(false), m_maskBitCount(-1), m_hasHalf(false), m_hasRand(false), m_hasGather(false),
525  m_hasScatter(false), m_hasTranscendentals(false), m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false),
526  m_hasVecPrefetch(false) {
527  CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
528  AllCPUs a;
529  std::string featuresString;
530 
531  if (cpu) {
532  CPUID = a.GetTypeFromName(cpu);
533  if (CPUID == CPU_None) {
534  Error(SourcePos(),
535  "Error: CPU type \"%s\" unknown. Supported"
536  " CPUs: %s.",
537  cpu, a.HumanReadableListOfNames().c_str());
538  return;
539  }
540  }
541 
542  if (isa == NULL) {
543  // If a CPU was specified explicitly, try to pick the best
544  // possible ISA based on that.
545  switch (CPUID) {
546  case CPU_None:
547  // No CPU and no ISA, so use system info to figure out
548  // what this CPU supports.
549  isa = lGetSystemISA();
550  Warning(SourcePos(),
551  "No --target specified on command-line."
552  " Using default system target \"%s\".",
553  isa);
554  break;
555 
556  case CPU_Generic:
557  isa = "generic-1";
558  break;
559 
560 #ifdef ISPC_NVPTX_ENABLED
561  case CPU_SM35:
562  isa = "nvptx";
563  break;
564 #endif
565 
566 #ifdef ISPC_ARM_ENABLED
567  case CPU_CortexA9:
568  case CPU_CortexA15:
569  case CPU_CortexA35:
570  case CPU_CortexA53:
571  case CPU_CortexA57:
572  isa = "neon-i32x4";
573  break;
574 #endif
575 
576 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
577  case CPU_KNL:
578  isa = "avx512knl-i32x16";
579  break;
580 #endif
581 
582 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0
583  case CPU_ICL:
584 #endif
585 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
586  case CPU_SKX:
587  isa = "avx512skx-i32x16";
588  break;
589 #endif
590 
591 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
592  case CPU_Broadwell:
593 #endif
594  case CPU_Haswell:
595  isa = "avx2-i32x8";
596  break;
597 
598  case CPU_IvyBridge:
599  // No specific target for IvyBridge anymore.
600  isa = "avx1-i32x8";
601  break;
602 
603  case CPU_SandyBridge:
604  isa = "avx1-i32x8";
605  break;
606 
607  // Penryn is here because ISPC does not use SSE 4.2
608  case CPU_Penryn:
609  case CPU_Nehalem:
610 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4
611  case CPU_Silvermont:
612 #endif
613  isa = "sse4-i32x4";
614  break;
615 
616  case CPU_PS4:
617  isa = "avx1-i32x4";
618  break;
619 
620  default:
621  isa = "sse2-i32x4";
622  break;
623  }
624  if (CPUID != CPU_None)
625  Warning(SourcePos(),
626  "No --target specified on command-line."
627  " Using ISA \"%s\" based on specified CPU \"%s\".",
628  isa, cpu);
629  }
630 
631  if (!strcasecmp(isa, "host")) {
632  isa = lGetSystemISA();
633  }
634 
635  if (arch == NULL) {
636 #ifdef ISPC_ARM_ENABLED
637  if (!strncmp(isa, "neon", 4)) {
638 #if defined(__arm__)
639  arch = "arm";
640 #else
641  arch = "aarch64";
642 #endif
643  } else
644 #endif
645 #ifdef ISPC_NVPTX_ENABLED
646  if (!strncmp(isa, "nvptx", 5))
647  arch = "nvptx64";
648  else
649 #endif /* ISPC_NVPTX_ENABLED */
650  arch = "x86-64";
651  }
652 
653  bool error = false;
654 
655  // Make sure the target architecture is a known one; print an error
656  // with the valid ones otherwise.
657 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
658  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
659  iter != llvm::TargetRegistry::targets().end(); ++iter) {
660 #else
661  for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin(); iter != llvm::TargetRegistry::end();
662  ++iter) {
663 #endif
664  if (std::string(arch) == iter->getName()) {
665  this->m_target = &*iter;
666  break;
667  }
668  }
669  if (this->m_target == NULL) {
670  fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
671  llvm::TargetRegistry::iterator iter;
672 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
673  for (iter = llvm::TargetRegistry::targets().begin(); iter != llvm::TargetRegistry::targets().end(); ++iter)
674 #else
675  for (iter = llvm::TargetRegistry::begin(); iter != llvm::TargetRegistry::end(); ++iter)
676 #endif
677  fprintf(stderr, "%s ", iter->getName());
678  fprintf(stderr, "\n");
679  error = true;
680  } else {
681  this->m_arch = arch;
682  }
683 
684  // Ensure that we have a valid isa/arch combination.
685  if (!lIsISAValidforArch(isa, arch)) {
686  Error(SourcePos(), "arch = %s and target = %s is not a valid combination.", arch, isa);
687  return;
688  }
689 
690  // Check default LLVM generated targets
691  if (!strcasecmp(isa, "sse2") || !strcasecmp(isa, "sse2-i32x4")) {
692  this->m_isa = Target::SSE2;
693  this->m_nativeVectorWidth = 4;
694  this->m_nativeVectorAlignment = 16;
695  this->m_dataTypeWidth = 32;
696  this->m_vectorWidth = 4;
697  this->m_maskingIsFree = false;
698  this->m_maskBitCount = 32;
699  CPUfromISA = CPU_x86_64;
700  } else if (!strcasecmp(isa, "sse2-x2") || !strcasecmp(isa, "sse2-i32x8")) {
701  this->m_isa = Target::SSE2;
702  this->m_nativeVectorWidth = 4;
703  this->m_nativeVectorAlignment = 16;
704  this->m_dataTypeWidth = 32;
705  this->m_vectorWidth = 8;
706  this->m_maskingIsFree = false;
707  this->m_maskBitCount = 32;
708  CPUfromISA = CPU_Core2;
709  } else if (!strcasecmp(isa, "sse4") || !strcasecmp(isa, "sse4-i32x4")) {
710  this->m_isa = Target::SSE4;
711  this->m_nativeVectorWidth = 4;
712  this->m_nativeVectorAlignment = 16;
713  this->m_dataTypeWidth = 32;
714  this->m_vectorWidth = 4;
715  this->m_maskingIsFree = false;
716  this->m_maskBitCount = 32;
717  CPUfromISA = CPU_Nehalem;
718  } else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2") || !strcasecmp(isa, "sse4-i32x8")) {
719  this->m_isa = Target::SSE4;
720  this->m_nativeVectorWidth = 4;
721  this->m_nativeVectorAlignment = 16;
722  this->m_dataTypeWidth = 32;
723  this->m_vectorWidth = 8;
724  this->m_maskingIsFree = false;
725  this->m_maskBitCount = 32;
726  CPUfromISA = CPU_Nehalem;
727  } else if (!strcasecmp(isa, "sse4-i8x16")) {
728  this->m_isa = Target::SSE4;
729  this->m_nativeVectorWidth = 16;
730  this->m_nativeVectorAlignment = 16;
731  this->m_dataTypeWidth = 8;
732  this->m_vectorWidth = 16;
733  this->m_maskingIsFree = false;
734  this->m_maskBitCount = 8;
735  CPUfromISA = CPU_Nehalem;
736  } else if (!strcasecmp(isa, "sse4-i16x8")) {
737  this->m_isa = Target::SSE4;
738  this->m_nativeVectorWidth = 8;
739  this->m_nativeVectorAlignment = 16;
740  this->m_dataTypeWidth = 16;
741  this->m_vectorWidth = 8;
742  this->m_maskingIsFree = false;
743  this->m_maskBitCount = 16;
744  CPUfromISA = CPU_Nehalem;
745  } else if (!strcasecmp(isa, "generic-4") || !strcasecmp(isa, "generic-x4")) {
746  this->m_isa = Target::GENERIC;
747  this->m_nativeVectorWidth = 4;
748  this->m_nativeVectorAlignment = 16;
749  this->m_vectorWidth = 4;
750  this->m_maskingIsFree = true;
751  this->m_maskBitCount = 1;
752  this->m_hasHalf = true;
753  this->m_hasTranscendentals = true;
754  this->m_hasTrigonometry = true;
755  this->m_hasGather = this->m_hasScatter = true;
756  this->m_hasRsqrtd = this->m_hasRcpd = true;
757  CPUfromISA = CPU_Generic;
758  } else if (!strcasecmp(isa, "generic-8") || !strcasecmp(isa, "generic-x8")) {
759  this->m_isa = Target::GENERIC;
760  this->m_nativeVectorWidth = 8;
761  this->m_nativeVectorAlignment = 32;
762  this->m_vectorWidth = 8;
763  this->m_maskingIsFree = true;
764  this->m_maskBitCount = 1;
765  this->m_hasHalf = true;
766  this->m_hasTranscendentals = true;
767  this->m_hasTrigonometry = true;
768  this->m_hasGather = this->m_hasScatter = true;
769  this->m_hasRsqrtd = this->m_hasRcpd = true;
770  CPUfromISA = CPU_Generic;
771  } else if (!strcasecmp(isa, "generic-16") || !strcasecmp(isa, "generic-x16") ||
772  // We treat *-generic-16 as generic-16, but with special name mangling
773  strstr(isa, "-generic-16") || strstr(isa, "-generic-x16")) {
774  this->m_isa = Target::GENERIC;
775  if (strstr(isa, "-generic-16") || strstr(isa, "-generic-x16")) {
776  // It is used for appropriate name mangling and dispatch function during multitarget compilation
777  this->m_treatGenericAsSmth = isa;
778  // We need to create appropriate name for mangling.
779  // Remove "-x16" or "-16" and replace "-" with "_".
780  this->m_treatGenericAsSmth =
781  this->m_treatGenericAsSmth.substr(0, this->m_treatGenericAsSmth.find_last_of("-"));
782  std::replace(this->m_treatGenericAsSmth.begin(), this->m_treatGenericAsSmth.end(), '-', '_');
783  }
784  this->m_nativeVectorWidth = 16;
785  this->m_nativeVectorAlignment = 64;
786  this->m_vectorWidth = 16;
787  this->m_maskingIsFree = true;
788  this->m_maskBitCount = 1;
789  this->m_hasHalf = true;
790  this->m_hasTranscendentals = true;
791  // It's set to false, because stdlib implementation of math functions
792  // is faster on MIC, than "native" implementation provided by the
793  // icc compiler.
794  this->m_hasTrigonometry = false;
795  this->m_hasGather = this->m_hasScatter = true;
796  this->m_hasRsqrtd = this->m_hasRcpd = true;
797  // It's set to true, because MIC has hardware vector prefetch instruction
798  this->m_hasVecPrefetch = true;
799  CPUfromISA = CPU_Generic;
800  } else if (!strcasecmp(isa, "generic-32") || !strcasecmp(isa, "generic-x32")) {
801  this->m_isa = Target::GENERIC;
802  this->m_nativeVectorWidth = 32;
803  this->m_nativeVectorAlignment = 64;
804  this->m_vectorWidth = 32;
805  this->m_maskingIsFree = true;
806  this->m_maskBitCount = 1;
807  this->m_hasHalf = true;
808  this->m_hasTranscendentals = true;
809  this->m_hasTrigonometry = true;
810  this->m_hasGather = this->m_hasScatter = true;
811  this->m_hasRsqrtd = this->m_hasRcpd = true;
812  CPUfromISA = CPU_Generic;
813  } else if (!strcasecmp(isa, "generic-64") || !strcasecmp(isa, "generic-x64")) {
814  this->m_isa = Target::GENERIC;
815  this->m_nativeVectorWidth = 64;
816  this->m_nativeVectorAlignment = 64;
817  this->m_vectorWidth = 64;
818  this->m_maskingIsFree = true;
819  this->m_maskBitCount = 1;
820  this->m_hasHalf = true;
821  this->m_hasTranscendentals = true;
822  this->m_hasTrigonometry = true;
823  this->m_hasGather = this->m_hasScatter = true;
824  this->m_hasRsqrtd = this->m_hasRcpd = true;
825  CPUfromISA = CPU_Generic;
826  } else if (!strcasecmp(isa, "generic-1") || !strcasecmp(isa, "generic-x1")) {
827  this->m_isa = Target::GENERIC;
828  this->m_nativeVectorWidth = 1;
829  this->m_nativeVectorAlignment = 16;
830  this->m_vectorWidth = 1;
831  this->m_maskingIsFree = false;
832  this->m_maskBitCount = 32;
833  CPUfromISA = CPU_Generic;
834  } else if (!strcasecmp(isa, "avx1-i32x4")) {
835  this->m_isa = Target::AVX;
836  this->m_nativeVectorWidth = 8;
837  this->m_nativeVectorAlignment = 32;
838  this->m_dataTypeWidth = 32;
839  this->m_vectorWidth = 4;
840  this->m_maskingIsFree = false;
841  this->m_maskBitCount = 32;
842  CPUfromISA = CPU_SandyBridge;
843  } else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1") || !strcasecmp(isa, "avx1-i32x8")) {
844  this->m_isa = Target::AVX;
845  this->m_nativeVectorWidth = 8;
846  this->m_nativeVectorAlignment = 32;
847  this->m_dataTypeWidth = 32;
848  this->m_vectorWidth = 8;
849  this->m_maskingIsFree = false;
850  this->m_maskBitCount = 32;
851  CPUfromISA = CPU_SandyBridge;
852  } else if (!strcasecmp(isa, "avx-i64x4") || !strcasecmp(isa, "avx1-i64x4")) {
853  this->m_isa = Target::AVX;
854  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
855  this->m_nativeVectorAlignment = 32;
856  this->m_dataTypeWidth = 64;
857  this->m_vectorWidth = 4;
858  this->m_maskingIsFree = false;
859  this->m_maskBitCount = 64;
860  CPUfromISA = CPU_SandyBridge;
861  } else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2") || !strcasecmp(isa, "avx1-i32x16")) {
862  this->m_isa = Target::AVX;
863  this->m_nativeVectorWidth = 8;
864  this->m_nativeVectorAlignment = 32;
865  this->m_dataTypeWidth = 32;
866  this->m_vectorWidth = 16;
867  this->m_maskingIsFree = false;
868  this->m_maskBitCount = 32;
869  CPUfromISA = CPU_SandyBridge;
870  } else if (!strcasecmp(isa, "avx2") || !strcasecmp(isa, "avx2-i32x8")) {
871  this->m_isa = Target::AVX2;
872  this->m_nativeVectorWidth = 8;
873  this->m_nativeVectorAlignment = 32;
874  this->m_dataTypeWidth = 32;
875  this->m_vectorWidth = 8;
876  this->m_maskingIsFree = false;
877  this->m_maskBitCount = 32;
878  this->m_hasHalf = true;
879  this->m_hasRand = true;
880  this->m_hasGather = true;
881  CPUfromISA = CPU_Haswell;
882  } else if (!strcasecmp(isa, "avx2-i32x4")) {
883  this->m_isa = Target::AVX2;
884  this->m_nativeVectorWidth = 8;
885  this->m_nativeVectorAlignment = 32;
886  this->m_dataTypeWidth = 32;
887  this->m_vectorWidth = 4;
888  this->m_maskingIsFree = false;
889  this->m_maskBitCount = 32;
890  this->m_hasHalf = true;
891  this->m_hasRand = true;
892  this->m_hasGather = true;
893  CPUfromISA = CPU_Haswell;
894  } else if (!strcasecmp(isa, "avx2-x2") || !strcasecmp(isa, "avx2-i32x16")) {
895  this->m_isa = Target::AVX2;
896  this->m_nativeVectorWidth = 16;
897  this->m_nativeVectorAlignment = 32;
898  this->m_dataTypeWidth = 32;
899  this->m_vectorWidth = 16;
900  this->m_maskingIsFree = false;
901  this->m_maskBitCount = 32;
902  this->m_hasHalf = true;
903  this->m_hasRand = true;
904  this->m_hasGather = true;
905  CPUfromISA = CPU_Haswell;
906  } else if (!strcasecmp(isa, "avx2-i64x4")) {
907  this->m_isa = Target::AVX2;
908  this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
909  this->m_nativeVectorAlignment = 32;
910  this->m_dataTypeWidth = 64;
911  this->m_vectorWidth = 4;
912  this->m_maskingIsFree = false;
913  this->m_maskBitCount = 64;
914  this->m_hasHalf = true;
915  this->m_hasRand = true;
916  this->m_hasGather = true;
917  CPUfromISA = CPU_Haswell;
918  }
919 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
920  else if (!strcasecmp(isa, "avx512knl-i32x16")) {
921  this->m_isa = Target::KNL_AVX512;
922  this->m_nativeVectorWidth = 16;
923  this->m_nativeVectorAlignment = 64;
924  this->m_dataTypeWidth = 32;
925  this->m_vectorWidth = 16;
926  this->m_maskingIsFree = true;
927  this->m_maskBitCount = 8;
928  this->m_hasHalf = true;
929  this->m_hasRand = true;
930  this->m_hasGather = this->m_hasScatter = true;
931  this->m_hasTranscendentals = false;
932  // For MIC it is set to true due to performance reasons. The option should be tested.
933  this->m_hasTrigonometry = false;
934  this->m_hasRsqrtd = this->m_hasRcpd = false;
935  this->m_hasVecPrefetch = false;
936  CPUfromISA = CPU_KNL;
937  }
938 #endif
939 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
940  else if (!strcasecmp(isa, "avx512skx-i32x16")) {
941  this->m_isa = Target::SKX_AVX512;
942  this->m_nativeVectorWidth = 16;
943  this->m_nativeVectorAlignment = 64;
944  this->m_dataTypeWidth = 32;
945  this->m_vectorWidth = 16;
946  this->m_maskingIsFree = true;
947  this->m_maskBitCount = 8;
948  this->m_hasHalf = true;
949  this->m_hasRand = true;
950  this->m_hasGather = this->m_hasScatter = true;
951  this->m_hasTranscendentals = false;
952  // For MIC it is set to true due to performance reasons. The option should be tested.
953  this->m_hasTrigonometry = false;
954  this->m_hasRsqrtd = this->m_hasRcpd = false;
955  this->m_hasVecPrefetch = false;
956  CPUfromISA = CPU_SKX;
957  }
958 #endif
959 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
960  else if (!strcasecmp(isa, "avx512skx-i32x8")) {
961  this->m_isa = Target::SKX_AVX512;
962  this->m_nativeVectorWidth = 16;
963  this->m_nativeVectorAlignment = 64;
964  this->m_dataTypeWidth = 32;
965  this->m_vectorWidth = 8;
966  this->m_maskingIsFree = true;
967  this->m_maskBitCount = 8;
968  this->m_hasHalf = true;
969  this->m_hasRand = true;
970  this->m_hasGather = this->m_hasScatter = true;
971  this->m_hasTranscendentals = false;
972  // For MIC it is set to true due to performance reasons. The option should be tested.
973  this->m_hasTrigonometry = false;
974  this->m_hasRsqrtd = this->m_hasRcpd = false;
975  this->m_hasVecPrefetch = false;
976  CPUfromISA = CPU_SKX;
977  this->m_funcAttributes.push_back(std::make_pair("prefer-vector-width", "256"));
978  this->m_funcAttributes.push_back(std::make_pair("min-legal-vector-width", "256"));
979  }
980 #endif
981 #ifdef ISPC_ARM_ENABLED
982  else if (!strcasecmp(isa, "neon-i8x16")) {
983  this->m_isa = Target::NEON8;
984  this->m_nativeVectorWidth = 16;
985  this->m_nativeVectorAlignment = 16;
986  this->m_dataTypeWidth = 8;
987  this->m_vectorWidth = 16;
988  this->m_hasHalf = true; // ??
989  this->m_maskingIsFree = false;
990  this->m_maskBitCount = 8;
991  } else if (!strcasecmp(isa, "neon-i16x8")) {
992  this->m_isa = Target::NEON16;
993  this->m_nativeVectorWidth = 8;
994  this->m_nativeVectorAlignment = 16;
995  this->m_dataTypeWidth = 16;
996  this->m_vectorWidth = 8;
997  this->m_hasHalf = true; // ??
998  this->m_maskingIsFree = false;
999  this->m_maskBitCount = 16;
1000  } else if (!strcasecmp(isa, "neon") || !strcasecmp(isa, "neon-i32x4")) {
1001  this->m_isa = Target::NEON32;
1002  this->m_nativeVectorWidth = 4;
1003  this->m_nativeVectorAlignment = 16;
1004  this->m_dataTypeWidth = 32;
1005  this->m_vectorWidth = 4;
1006  this->m_hasHalf = true; // ??
1007  this->m_maskingIsFree = false;
1008  this->m_maskBitCount = 32;
1009  } else if (!strcasecmp(isa, "neon-i32x8")) {
1010  this->m_isa = Target::NEON32;
1011  this->m_nativeVectorWidth = 4;
1012  this->m_nativeVectorAlignment = 16;
1013  this->m_dataTypeWidth = 32;
1014  this->m_vectorWidth = 8;
1015  this->m_hasHalf = true; // ??
1016  this->m_maskingIsFree = false;
1017  this->m_maskBitCount = 32;
1018  }
1019 #endif
1020 #ifdef ISPC_NVPTX_ENABLED
1021  else if (!strcasecmp(isa, "nvptx")) {
1022  this->m_isa = Target::NVPTX;
1023  this->m_cpu = "sm_35";
1024  this->m_nativeVectorWidth = 32;
1025  this->m_nativeVectorAlignment = 32;
1026  this->m_vectorWidth = 1;
1027  this->m_hasHalf = true;
1028  this->m_maskingIsFree = true;
1029  this->m_maskBitCount = 1;
1030  this->m_hasTranscendentals = true;
1031  this->m_hasTrigonometry = true;
1032  this->m_hasGather = this->m_hasScatter = false;
1033  CPUfromISA = CPU_SM35;
1034  }
1035 #endif /* ISPC_NVPTX_ENABLED */
1036  else {
1037  Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.", isa, SupportedTargets());
1038  error = true;
1039  }
1040 
1041 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
1042  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4) && !strncmp(arch, "arm", 3))
1043  CPUID = CPU_CortexA9;
1044 #endif
1045 #if defined(ISPC_ARM_ENABLED) && !defined(__aarch64__)
1046  if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4) && !strncmp(arch, "aarch64", 7))
1047  CPUID = CPU_CortexA35;
1048 #endif
1049  if (CPUID == CPU_None) {
1050  cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
1051  } else {
1052  if ((CPUfromISA != CPU_None) && !a.BackwardCompatible(CPUID, CPUfromISA)) {
1053  Error(SourcePos(),
1054  "The requested CPU is incompatible"
1055  " with the CPU %s needs: %s vs. %s!",
1056  isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
1057  return;
1058  }
1059  cpu = a.GetDefaultNameFromType(CPUID).c_str();
1060  }
1061  this->m_cpu = cpu;
1062 
1063  if (!error) {
1064  // Create TargetMachine
1065  std::string triple = GetTripleString();
1066 
1067 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_8
1068  llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ : llvm::Reloc::Default;
1069 #else
1070  llvm::Optional<llvm::Reloc::Model> relocModel;
1071  if (m_generatePIC) {
1072  relocModel = llvm::Reloc::PIC_;
1073  }
1074 #endif
1075  llvm::TargetOptions options;
1076 #ifdef ISPC_ARM_ENABLED
1077  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 || m_isa == Target::NEON32)
1078  options.FloatABIType = llvm::FloatABI::Hard;
1079  if (strcmp("arm", arch) == 0) {
1080  this->m_funcAttributes.push_back(std::make_pair("target-features", "+neon,+fp16"));
1081  featuresString = "+neon,+fp16";
1082  } else if (strcmp("aarch64", arch) == 0) {
1083  this->m_funcAttributes.push_back(std::make_pair("target-features", "+neon"));
1084  featuresString = "+neon";
1085  }
1086 #endif
1087  if (g->opt.disableFMA == false)
1088  options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1089 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1090  if (g->NoOmitFramePointer)
1091  options.NoFramePointerElim = true;
1092 #ifdef ISPC_HOST_IS_WINDOWS
1093  if (strcmp("x86", arch) == 0) {
1094  // Workaround for issue #503 (LLVM issue 14646).
1095  // It's Win32 specific.
1096  options.NoFramePointerElim = true;
1097  }
1098 #endif
1099 #endif
1100  m_targetMachine = m_target->createTargetMachine(triple, m_cpu, featuresString, options, relocModel);
1101  Assert(m_targetMachine != NULL);
1102 
1103  // Set Optimization level for llvm codegen based on Optimization level
1104  // requested by user via ISPC Optimization Flag. Mapping is :
1105  // ISPC O0 -> Codegen O0
1106  // ISPC O1,O2,O3,default -> Codegen O3
1107  llvm::CodeGenOpt::Level cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1108  switch (g->codegenOptLevel) {
1109  case Globals::CodegenOptLevel::None:
1110  cOptLevel = llvm::CodeGenOpt::Level::None;
1111  break;
1112 
1113  case Globals::CodegenOptLevel::Aggressive:
1114  cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1115  break;
1116  }
1117  m_targetMachine->setOptLevel(cOptLevel);
1118 
1119 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1120  m_targetMachine->setAsmVerbosityDefault(true);
1121 #else /* LLVM 3.7+ */
1122  m_targetMachine->Options.MCOptions.AsmVerbose = true;
1123 #endif
1124 
1125 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1126  // Change default version of generated DWARF.
1127  if (g->generateDWARFVersion != 0) {
1128  m_targetMachine->Options.MCOptions.DwarfVersion = g->generateDWARFVersion;
1129  }
1130 #endif
1131 
1132  // Initialize TargetData/DataLayout in 3 steps.
1133  // 1. Get default data layout first
1134  std::string dl_string;
1135 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6
1136  dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation();
1137 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1138  dl_string = m_targetMachine->createDataLayout().getStringRepresentation();
1139 #else // LLVM 3.5- or LLVM 3.7
1140  dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
1141 #endif
1142  // 2. Adjust for generic
1143  if (m_isa == Target::GENERIC) {
1144  // <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
1145  // that to the regular datalayout string for IA..
1146  // For generic-4 target we need to treat <4 x i1> as 128 bit value
1147  // in terms of required memory storage and alignment, as this is
1148  // translated to __m128 type.
1149  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
1150  "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
1151  "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1152  }
1153 #ifdef ISPC_NVPTX_ENABLED
1154  else if (m_isa == Target::NVPTX) {
1155  dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:"
1156  "32-v64:64:64-v128:128:128-n16:32:64";
1157  }
1158 #endif
1159 
1160  // 3. Finally set member data
1161  m_dataLayout = new llvm::DataLayout(dl_string);
1162 
1163  // Set is32Bit
1164  // This indicates if we are compiling for 32 bit platform
1165  // and can assume 32 bit runtime.
1166  // FIXME: all generic targets are handled as 64 bit, which is incorrect.
1167 
1168  this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
1169 
1170  // TO-DO : Revisit addition of "target-features" and "target-cpu" for ARM support.
1171  llvm::AttrBuilder fattrBuilder;
1172 #ifdef ISPC_ARM_ENABLED
1173  if (m_isa == Target::NEON8 || m_isa == Target::NEON16 || m_isa == Target::NEON32)
1174  fattrBuilder.addAttribute("target-cpu", this->m_cpu);
1175 #endif
1176  for (auto const &f_attr : m_funcAttributes)
1177  fattrBuilder.addAttribute(f_attr.first, f_attr.second);
1178 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1179  this->m_tf_attributes =
1180  new llvm::AttributeSet(llvm::AttributeSet::get(*g->ctx, llvm::AttributeSet::FunctionIndex, fattrBuilder));
1181 #else // LLVM 5.0+
1182  this->m_tf_attributes = new llvm::AttrBuilder(fattrBuilder);
1183 #endif
1184 
1186  }
1187 
1188  m_valid = !error;
1189 
1190  if (printTarget) {
1191  printf("Target Triple: %s\n", m_targetMachine->getTargetTriple().str().c_str());
1192  printf("Target CPU: %s\n", m_targetMachine->getTargetCPU().str().c_str());
1193  printf("Target Feature String: %s\n", m_targetMachine->getTargetFeatureString().str().c_str());
1194  }
1195 
1196  return;
1197 }
1198 
1199 std::string Target::SupportedCPUs() {
1200  AllCPUs a;
1201  return a.HumanReadableListOfNames();
1202 }
1203 
1204 const char *Target::SupportedArchs() {
1205  return
1206 #ifdef ISPC_ARM_ENABLED
1207  "arm, aarch64, "
1208 #endif
1209  "x86, x86-64";
1210 }
1211 
1213  return "host, sse2-i32x4, sse2-i32x8, "
1214  "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
1215  "avx1-i32x4, "
1216  "avx1-i32x8, avx1-i32x16, avx1-i64x4, "
1217  "avx2-i32x4, avx2-i32x8, avx2-i32x16, avx2-i64x4, "
1218 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1219  "avx512knl-i32x16, "
1220 #endif
1221 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1222  "avx512skx-i32x16, "
1223 #endif
1224 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+
1225  "avx512skx-i32x8, "
1226 #endif
1227  "generic-x1, generic-x4, generic-x8, generic-x16, "
1228  "generic-x32, generic-x64, *-generic-x16"
1229 #ifdef ISPC_ARM_ENABLED
1230  ", neon-i8x16, neon-i16x8, neon-i32x4, neon-i32x8"
1231 #endif
1232 #ifdef ISPC_NVPTX_ENABLED
1233  ", nvptx"
1234 #endif
1235  ;
1236 }
1237 
1238 const char *Target::SupportedOSes() {
1239  return
1240 #if defined(ISPC_HOST_IS_WINDOWS)
1241 #if !defined(ISPC_WINDOWS_TARGET_OFF)
1242  "windows, "
1243 #endif
1244 #if !defined(ISPC_PS4_TARGET_OFF)
1245  "ps4, "
1246 #endif
1247 #elif defined(ISPC_HOST_IS_APPLE)
1248 #if !defined(ISPC_IOS_TARGET_OFF)
1249  "ios, "
1250 #endif
1251 #endif
1252 #if !defined(ISPC_LINUX_TARGET_OFF)
1253  "linux, "
1254 #endif
1255 #if !defined(ISPC_MACOS_TARGET_OFF)
1256  "macos, "
1257 #endif
1258 #if !defined(ISPC_ANDROID_TARGET_OFF)
1259  "android"
1260 #endif
1261  ;
1262 }
1263 
1264 std::string Target::GetTripleString() const {
1265  llvm::Triple triple;
1266  switch (g->target_os) {
1267  case OS_WINDOWS:
1268  if (m_arch == "x86") {
1269  triple.setArchName("i386");
1270  } else if (m_arch == "x86-64") {
1271  triple.setArchName("x86_64");
1272  } else if (m_arch == "arm") {
1273  Error(SourcePos(), "Arm is not supported on Windows.");
1274  exit(1);
1275  } else if (m_arch == "aarch64") {
1276  Error(SourcePos(), "Aarch64 is not supported on Windows.");
1277  exit(1);
1278  } else {
1279  Error(SourcePos(), "Unknown arch.");
1280  exit(1);
1281  }
1282  //"x86_64-pc-windows-msvc"
1283  triple.setVendor(llvm::Triple::VendorType::PC);
1284  triple.setOS(llvm::Triple::OSType::Win32);
1285  triple.setEnvironment(llvm::Triple::EnvironmentType::MSVC);
1286  break;
1287  case OS_LINUX:
1288  if (m_arch == "x86") {
1289  triple.setArchName("i386");
1290  } else if (m_arch == "x86-64") {
1291  triple.setArchName("x86_64");
1292  } else if (m_arch == "arm") {
1293  triple.setArchName("armv7");
1294  } else if (m_arch == "aarch64") {
1295  triple.setArchName("aarch64");
1296  } else {
1297  Error(SourcePos(), "Unknown arch.");
1298  exit(1);
1299  }
1300  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1301  triple.setOS(llvm::Triple::OSType::Linux);
1302  triple.setEnvironment(llvm::Triple::EnvironmentType::GNU);
1303  break;
1304  case OS_MAC:
1305  // asserts
1306  if (m_arch != "x86-64") {
1307  Error(SourcePos(), "macOS target supports only x86_64.");
1308  exit(1);
1309  }
1310  triple.setArch(llvm::Triple::ArchType::x86_64);
1311  triple.setVendor(llvm::Triple::VendorType::Apple);
1312  triple.setOS(llvm::Triple::OSType::MacOSX);
1313  break;
1314  case OS_ANDROID:
1315  if (m_arch == "x86") {
1316  triple.setArchName("i386");
1317  } else if (m_arch == "x86-64") {
1318  triple.setArchName("x86_64");
1319  } else if (m_arch == "arm") {
1320  triple.setArchName("armv7");
1321  } else if (m_arch == "aarch64") {
1322  triple.setArchName("aarch64");
1323  } else {
1324  Error(SourcePos(), "Unknown arch.");
1325  exit(1);
1326  }
1327  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1328  triple.setOS(llvm::Triple::OSType::Linux);
1329  triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
1330  break;
1331  case OS_IOS:
1332  if (m_arch != "aarch64") {
1333  Error(SourcePos(), "iOS target supports only aarch64.");
1334  exit(1);
1335  }
1336  // Note, for iOS arch need to be set to "arm64", instead of "aarch64".
1337  // Internet say this is for historical reasons.
1338  // "arm64-apple-ios"
1339  triple.setArchName("arm64");
1340  triple.setVendor(llvm::Triple::VendorType::Apple);
1341  triple.setOS(llvm::Triple::OSType::IOS);
1342  break;
1343  case OS_PS4:
1344  if (m_arch != "x86-64") {
1345  Error(SourcePos(), "PS4 target supports only x86_64.");
1346  exit(1);
1347  }
1348  // "x86_64-scei-ps4"
1349  triple.setArch(llvm::Triple::ArchType::x86_64);
1350  triple.setVendor(llvm::Triple::VendorType::SCEI);
1351  triple.setOS(llvm::Triple::OSType::PS4);
1352  break;
1353  default:
1354  Error(SourcePos(), "Invalid target OS.");
1355  exit(1);
1356  }
1357 
1358  return triple.str();
1359 }
1360 
1361 // This function returns string representation of ISA for the purpose of
1362 // mangling. And may return any unique string, preferably short, like
1363 // sse4, avx and etc.
1364 const char *Target::ISAToString(ISA isa) {
1365  switch (isa) {
1366 #ifdef ISPC_ARM_ENABLED
1367  case Target::NEON8:
1368  return "neon";
1369  case Target::NEON16:
1370  return "neon";
1371  case Target::NEON32:
1372  return "neon";
1373 #endif
1374  case Target::SSE2:
1375  return "sse2";
1376  case Target::SSE4:
1377  return "sse4";
1378  case Target::AVX:
1379  return "avx";
1380  case Target::AVX2:
1381  return "avx2";
1382 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1383  case Target::KNL_AVX512:
1384  return "avx512knl";
1385 #endif
1386 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1387  case Target::SKX_AVX512:
1388  return "avx512skx";
1389 #endif
1390  case Target::GENERIC:
1391  return "generic";
1392 #ifdef ISPC_NVPTX_ENABLED
1393  case Target::NVPTX:
1394  return "nvptx";
1395 #endif /* ISPC_NVPTX_ENABLED */
1396  default:
1397  FATAL("Unhandled target in ISAToString()");
1398  }
1399  return "";
1400 }
1401 
1402 const char *Target::GetISAString() const { return ISAToString(m_isa); }
1403 
1404 // This function returns string representation of default target corresponding
1405 // to ISA. I.e. for SSE4 it's sse4-i32x4, for AVX2 it's avx2-i32x8. This
1406 // string may be used to initialize Target.
1407 const char *Target::ISAToTargetString(ISA isa) {
1408  switch (isa) {
1409 #ifdef ISPC_ARM_ENABLED
1410  case Target::NEON8:
1411  return "neon-i8x16";
1412  case Target::NEON16:
1413  return "neon-i16x8";
1414  case Target::NEON32:
1415  return "neon-i32x4";
1416 #endif
1417  case Target::SSE2:
1418  return "sse2-i32x4";
1419  case Target::SSE4:
1420  return "sse4-i32x4";
1421  case Target::AVX:
1422  return "avx1-i32x8";
1423  case Target::AVX2:
1424  return "avx2-i32x8";
1425 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1426  case Target::KNL_AVX512:
1427  return "avx512knl-i32x16";
1428 #endif
1429 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1430  case Target::SKX_AVX512:
1431  return "avx512skx-i32x16";
1432 #endif
1433  case Target::GENERIC:
1434  return "generic-4";
1435 #ifdef ISPC_NVPTX_ENABLED
1436  case Target::NVPTX:
1437  return "nvptx";
1438 #endif /* ISPC_NVPTX_ENABLED */
1439  default:
1440  FATAL("Unhandled target in ISAToTargetString()");
1441  }
1442  return "";
1443 }
1444 
1445 const char *Target::GetISATargetString() const { return ISAToString(m_isa); }
1446 
1447 static bool lGenericTypeLayoutIndeterminate(llvm::Type *type) {
1448  if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() || type->isIntegerTy() ||
1449  type->isLabelTy() || type->isMetadataTy())
1450  return false;
1451 
1453  return true;
1454 
1455  llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(type);
1456  if (at != NULL)
1457  return lGenericTypeLayoutIndeterminate(at->getElementType());
1458 
1459  llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(type);
1460  if (pt != NULL)
1461  return false;
1462 
1463  llvm::StructType *st = llvm::dyn_cast<llvm::StructType>(type);
1464  if (st != NULL) {
1465  for (int i = 0; i < (int)st->getNumElements(); ++i)
1466  if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
1467  return true;
1468  return false;
1469  }
1470 
1471  Assert(llvm::isa<llvm::VectorType>(type));
1472  return true;
1473 }
1474 
1475 llvm::Value *Target::SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd) {
1477  llvm::Value *index[1] = {LLVMInt32(1)};
1478  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1479  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1480  llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1481 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1482  llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep", insertAtEnd);
1483 #else /* LLVM 3.7+ */
1484  llvm::Instruction *gep =
1485  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "sizeof_gep", insertAtEnd);
1486 #endif
1488  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "sizeof_int", insertAtEnd);
1489  else
1490  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "sizeof_int", insertAtEnd);
1491  }
1492 
1493  uint64_t byteSize = getDataLayout()->getTypeStoreSize(type);
1495  return LLVMInt32((int32_t)byteSize);
1496  else
1497  return LLVMInt64(byteSize);
1498 }
1499 
1500 llvm::Value *Target::StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd) {
1501  if (m_isa == Target::GENERIC && lGenericTypeLayoutIndeterminate(type) == true) {
1502  llvm::Value *indices[2] = {LLVMInt32(0), LLVMInt32(element)};
1503  llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1504  llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1505  llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1506 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1507  llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep", insertAtEnd);
1508 #else /* LLVM 3.7+ */
1509  llvm::Instruction *gep =
1510  llvm::GetElementPtrInst::Create(PTYPE(voidPtr), voidPtr, arrayRef, "offset_gep", insertAtEnd);
1511 #endif
1513  return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "offset_int", insertAtEnd);
1514  else
1515  return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type, "offset_int", insertAtEnd);
1516  }
1517 
1518  llvm::StructType *structType = llvm::dyn_cast<llvm::StructType>(type);
1519  if (structType == NULL || structType->isSized() == false) {
1520  Assert(m->errorCount > 0);
1521  return NULL;
1522  }
1523 
1524  const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
1525  Assert(sl != NULL);
1526 
1527  uint64_t offset = sl->getElementOffset(element);
1529  return LLVMInt32((int32_t)offset);
1530  else
1531  return LLVMInt64(offset);
1532 }
1533 
1534 void Target::markFuncWithTargetAttr(llvm::Function *func) {
1535 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
1536  if (m_tf_attributes) {
1537 #if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
1538  func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
1539 #else // LLVM 5.0+
1540  func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
1541 #endif
1542  }
1543 #endif
1544 }
1545 
1546 ///////////////////////////////////////////////////////////////////////////
1547 // Opt
1548 
1550  level = 1;
1551  fastMath = false;
1552  fastMaskedVload = false;
1553  force32BitAddressing = true;
1554  unrollLoops = true;
1555  disableAsserts = false;
1556  disableFMA = false;
1557  forceAlignedMemory = false;
1558  disableMaskAllOnOptimizations = false;
1559  disableHandlePseudoMemoryOps = false;
1560  disableBlendedMaskedStores = false;
1561  disableCoherentControlFlow = false;
1562  disableUniformControlFlow = false;
1563  disableGatherScatterOptimizations = false;
1564  disableMaskedStoreToStore = false;
1565  disableGatherScatterFlattening = false;
1566  disableUniformMemoryOptimizations = false;
1567  disableCoalescing = false;
1568 }
1569 
1570 ///////////////////////////////////////////////////////////////////////////
1571 // Globals
1572 
1574  mathLib = Globals::Math_ISPC;
1575  codegenOptLevel = Globals::Aggressive;
1576 
1577  includeStdlib = true;
1578  runCPP = true;
1579  debugPrint = false;
1580  dumpFile = false;
1581  printTarget = false;
1582  NoOmitFramePointer = false;
1583  debugIR = -1;
1584  disableWarnings = false;
1585  warningsAsErrors = false;
1586  quiet = false;
1587  forceColoredOutput = false;
1588  disableLineWrap = false;
1589  emitPerfWarnings = true;
1590  emitInstrumentation = false;
1591  noPragmaOnce = false;
1592  generateDebuggingSymbols = false;
1593 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
1594  generateDWARFVersion = 3;
1595 #endif
1596  enableFuzzTest = false;
1597  fuzzTestSeed = -1;
1598  mangleFunctionsWithTarget = false;
1599 
1600  ctx = new llvm::LLVMContext;
1601 
1602 #ifdef ISPC_HOST_IS_WINDOWS
1603  _getcwd(currentDirectory, sizeof(currentDirectory));
1604 #else
1605  if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL)
1606  FATAL("Current directory path is too long!");
1607 #endif
1608  forceAlignment = -1;
1609  dllExport = false;
1610 
1611  // Target OS defaults to host OS.
1612  target_os = GetHostOS();
1613 }
1614 
1615 ///////////////////////////////////////////////////////////////////////////
1616 // SourcePos
1617 
1618 SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
1619  name = n;
1620  if (name == NULL) {
1621  if (m != NULL)
1622  name = m->module->getModuleIdentifier().c_str();
1623  else
1624  name = "(unknown)";
1625  }
1626  first_line = fl;
1627  first_column = fc;
1628  last_line = ll != 0 ? ll : fl;
1629  last_column = lc != 0 ? lc : fc;
1630 }
1631 
1632 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1633 llvm::DIFile
1634 #else /* LLVM 3.7+ */
1635 llvm::DIFile *
1636 // llvm::MDFile*
1637 #endif
1639  std::string directory, filename;
1640  GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
1641 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1642  llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
1643  Assert(ret.Verify());
1644 #else /* LLVM 3.7+ */
1645  llvm::DIFile *ret = m->diBuilder->createFile(filename, directory);
1646 #endif
1647  return ret;
1648 }
1649 
1650 void SourcePos::Print() const {
1651  printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column, last_line, last_column);
1652 }
1653 
1654 bool SourcePos::operator==(const SourcePos &p2) const {
1655  return (!strcmp(name, p2.name) && first_line == p2.first_line && first_column == p2.first_column &&
1656  last_line == p2.last_line && last_column == p2.last_column);
1657 }
1658 
1659 SourcePos Union(const SourcePos &p1, const SourcePos &p2) {
1660  if (strcmp(p1.name, p2.name) != 0)
1661  return p1;
1662 
1663  SourcePos ret;
1664  ret.name = p1.name;
1665  ret.first_line = std::min(p1.first_line, p2.first_line);
1666  ret.first_column = std::min(p1.first_column, p2.first_column);
1667  ret.last_line = std::max(p1.last_line, p2.last_line);
1668  ret.last_column = std::max(p1.last_column, p2.last_column);
1669  return ret;
1670 }
1671 
1672 TargetOS StringToOS(std::string os) {
1673  std::string supportedOses = Target::SupportedOSes();
1674  if (supportedOses.find(os) == std::string::npos) {
1675  return OS_ERROR;
1676  }
1677  if (os == "windows") {
1678  return OS_WINDOWS;
1679  } else if (os == "linux") {
1680  return OS_LINUX;
1681  } else if (os == "macos") {
1682  return OS_MAC;
1683  } else if (os == "android") {
1684  return OS_ANDROID;
1685  } else if (os == "ios") {
1686  return OS_IOS;
1687  } else if (os == "ps4") {
1688  return OS_PS4;
1689  }
1690  return OS_ERROR;
1691 }
1692 
1693 constexpr TargetOS GetHostOS() {
1694 #if defined(ISPC_HOST_IS_WINDOWS) && !defined(ISPC_WINDOWS_TARGET_OFF)
1695  return OS_WINDOWS;
1696 #elif defined(ISPC_HOST_IS_LINUX) && !defined(ISPC_LINUX_TARGET_OFF)
1697  return OS_LINUX;
1698 #elif defined(ISPC_HOST_IS_APPLE) && !defined(ISPC_MACOS_TARGET_OFF)
1699  return OS_MAC;
1700 #else
1701  return OS_ERROR;
1702 #endif
1703 }
bool disableFMA
Definition: ispc.h:460
#define CPU_Broadwell
bool m_hasTranscendentals
Definition: ispc.h:406
TargetOS target_os
Definition: ispc.h:547
#define CPU_Silvermont
Definition: ispc.h:119
Globals()
Definition: ispc.cpp:1573
Opt opt
Definition: ispc.h:542
int last_column
Definition: ispc.h:138
const llvm::Target * m_target
Definition: ispc.h:307
static bool __os_has_avx_support()
Definition: ispc.cpp:125
This structure collects together a number of global variables.
Definition: ispc.h:538
std::vector< std::pair< std::string, std::string > > m_funcAttributes
Definition: ispc.h:343
int m_nativeVectorAlignment
Definition: ispc.h:366
AllCPUs()
Definition: ispc.cpp:346
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
Definition: ispc.cpp:1659
int first_line
Definition: ispc.h:135
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1475
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
Definition: ispc.cpp:1618
static llvm::VectorType * BoolVectorType
Definition: llvmutil.h:91
const char * GetISATargetString() const
Definition: ispc.cpp:1445
std::string m_cpu
Definition: ispc.h:337
Opt()
Definition: ispc.cpp:1549
std::string m_arch
Definition: ispc.h:331
static const char * lGetSystemISA()
Definition: ispc.cpp:157
bool NoOmitFramePointer
Definition: ispc.h:574
std::string HumanReadableListOfNames()
Definition: ispc.cpp:478
bool BackwardCompatible(CPUtype what, CPUtype with)
Definition: ispc.cpp:509
std::string GetTripleString() const
Definition: ispc.cpp:1264
static const bool lIsISAValidforArch(const char *isa, const char *arch)
Definition: ispc.cpp:212
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
Definition: ispc.cpp:1447
int m_nativeVectorWidth
Definition: ispc.h:359
Module * m
Definition: ispc.cpp:102
static const char * ISAToString(Target::ISA isa)
Definition: ispc.cpp:1364
bool m_generatePIC
Definition: ispc.h:378
static const char * SupportedArchs()
Definition: ispc.cpp:1204
std::set< CPUtype > Set(int type,...)
Definition: ispc.cpp:332
bool m_maskingIsFree
Definition: ispc.h:384
#define Assert(expr)
Definition: ispc.h:163
static llvm::VectorType * Int1VectorType
Definition: llvmutil.h:92
void GetDirectoryAndFileName(const std::string &currentDir, const std::string &relativeName, std::string *directory, std::string *filename)
Definition: util.cpp:513
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool printTarget, std::string genenricAsSmth="")
Definition: ispc.cpp:516
static void __cpuidex(int info[4], int level, int count)
Definition: ispc.cpp:115
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:228
TargetOS StringToOS(std::string os)
Definition: ispc.cpp:1672
llvm::Module * module
Definition: module.h:156
static std::string SupportedCPUs()
Definition: ispc.cpp:1199
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
TargetOS
Definition: ispc.h:119
Definition: module.h:56
static llvm::Type * Int64Type
Definition: llvmutil.h:78
char currentDirectory[1024]
Definition: ispc.h:649
std::string & GetDefaultNameFromType(CPUtype type)
Definition: ispc.cpp:494
Header file with declarations for various LLVM utility stuff.
bool m_is32Bit
Definition: ispc.h:334
CodegenOptLevel codegenOptLevel
Definition: ispc.h:556
bool m_hasRand
Definition: ispc.h:396
bool m_hasRcpd
Definition: ispc.h:415
int m_maskBitCount
Definition: ispc.h:389
static void __cpuid(int info[4], int infoType)
Definition: ispc.cpp:110
Representation of a range of positions in a source file.
Definition: ispc.h:131
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
Definition: ispc.cpp:1500
Definition: ispc.h:119
int m_vectorWidth
Definition: ispc.h:375
void Print() const
Definition: ispc.cpp:1650
llvm::TargetMachine * m_targetMachine
Definition: ispc.h:317
bool force32BitAddressing
Definition: ispc.h:452
static bool __os_has_avx512_support()
Definition: ispc.cpp:140
const char * name
Definition: ispc.h:134
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1534
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:375
#define FATAL(message)
Definition: util.h:112
int m_dataTypeWidth
Definition: ispc.h:370
static llvm::Type * Int32Type
Definition: llvmutil.h:77
int last_line
Definition: ispc.h:137
const llvm::DataLayout * getDataLayout() const
Definition: ispc.h:258
#define PTYPE(p)
Definition: llvmutil.h:55
#define ISPC_MAX_NVEC
Definition: ispc.h:66
std::vector< std::vector< std::string > > names
Definition: ispc.cpp:329
Definition: ispc.h:119
bool m_hasGather
Definition: ispc.h:399
int first_column
Definition: ispc.h:136
llvm::DataLayout * m_dataLayout
Definition: ispc.h:318
bool m_hasScatter
Definition: ispc.h:402
ISA
Definition: ispc.h:179
Definition: ispc.h:119
const char * GetISAString() const
Definition: ispc.cpp:1402
bool m_valid
Definition: ispc.h:322
Globals * g
Definition: ispc.cpp:101
static llvm::VectorType * MaskType
Definition: llvmutil.h:89
static const char * SupportedOSes()
Definition: ispc.cpp:1238
std::vector< std::set< CPUtype > > compat
Definition: ispc.cpp:330
bool m_hasTrigonometry
Definition: ispc.h:409
std::string m_treatGenericAsSmth
Definition: ispc.h:328
bool m_hasHalf
Definition: ispc.h:393
bool operator==(const SourcePos &p2) const
Definition: ispc.cpp:1654
constexpr TargetOS GetHostOS()
Definition: ispc.cpp:1693
Definition: ispc.h:119
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::ConstantInt * LLVMInt64(int64_t i)
Definition: llvmutil.cpp:236
int errorCount
Definition: module.h:149
llvm::LLVMContext * ctx
Definition: ispc.h:645
static const char * SupportedTargets()
Definition: ispc.cpp:1212
ISA m_isa
Definition: ispc.h:325
bool m_hasVecPrefetch
Definition: ispc.h:418
llvm::DIFile GetDIFile() const
Definition: ispc.cpp:1638
llvm::DIBuilder * diBuilder
Definition: module.h:159
Main ispc.header file. Defines Target, Globals and Opt classes.
CPUtype GetTypeFromName(std::string name)
Definition: ispc.cpp:499
static const char * ISAToTargetString(Target::ISA isa)
Definition: ispc.cpp:1407
bool m_hasRsqrtd
Definition: ispc.h:412
CPUtype
Definition: ispc.cpp:232