Intel SPMD Program Compiler  1.12.0
builtins.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2019, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file builtins.cpp
35  @brief Definitions of functions related to setting up the standard library
36  and other builtins.
37 */
38 
39 #include "builtins.h"
40 #include "ctx.h"
41 #include "expr.h"
42 #include "llvmutil.h"
43 #include "module.h"
44 #include "sym.h"
45 #include "type.h"
46 #include "util.h"
47 
48 #include <math.h>
49 #include <stdlib.h>
50 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
51 #include <llvm/Attributes.h>
52 #include <llvm/DerivedTypes.h>
53 #include <llvm/Instructions.h>
54 #include <llvm/Intrinsics.h>
55 #include <llvm/LLVMContext.h>
56 #include <llvm/Module.h>
57 #include <llvm/Type.h>
58 #else
59 #include <llvm/IR/Attributes.h>
60 #include <llvm/IR/DerivedTypes.h>
61 #include <llvm/IR/Instructions.h>
62 #include <llvm/IR/Intrinsics.h>
63 #include <llvm/IR/LLVMContext.h>
64 #include <llvm/IR/Module.h>
65 #include <llvm/IR/Type.h>
66 #endif
67 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
68 #include <llvm/Linker/Linker.h>
69 #else
70 #include <llvm/Linker.h>
71 #endif
72 #include <llvm/ADT/Triple.h>
73 #include <llvm/Support/MemoryBuffer.h>
74 #include <llvm/Target/TargetMachine.h>
75 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
76 #include <llvm/Bitcode/ReaderWriter.h>
77 #else
78 #include <llvm/Bitcode/BitcodeReader.h>
79 #endif
80 
81 extern int yyparse();
82 struct yy_buffer_state;
83 extern yy_buffer_state *yy_scan_string(const char *);
84 
85 /** Given an LLVM type, try to find the equivalent ispc type. Note that
86  this is an under-constrained problem due to LLVM's type representations
87  carrying less information than ispc's. (For example, LLVM doesn't
88  distinguish between signed and unsigned integers in its types.)
89 
90  Because this function is only used for generating ispc declarations of
91  functions defined in LLVM bitcode in the builtins-*.ll files, in practice
92  we can get enough of what we need for the relevant cases to make things
93  work, partially with the help of the intAsUnsigned parameter, which
94  indicates whether LLVM integer types should be treated as being signed
95  or unsigned.
96 
97  */
98 static const Type *lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
99  if (t == LLVMTypes::VoidType)
100  return AtomicType::Void;
101 
102  // uniform
103  else if (t == LLVMTypes::BoolType)
105  else if (t == LLVMTypes::Int8Type)
106  return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8;
107  else if (t == LLVMTypes::Int16Type)
108  return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16;
109  else if (t == LLVMTypes::Int32Type)
110  return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
111  else if (t == LLVMTypes::FloatType)
113  else if (t == LLVMTypes::DoubleType)
115  else if (t == LLVMTypes::Int64Type)
116  return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
117 
118  // varying
119  if (t == LLVMTypes::Int8VectorType)
120  return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
121  else if (t == LLVMTypes::Int16VectorType)
122  return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
123  else if (t == LLVMTypes::Int32VectorType)
124  return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
125  else if (t == LLVMTypes::FloatVectorType)
127  else if (t == LLVMTypes::DoubleVectorType)
129  else if (t == LLVMTypes::Int64VectorType)
130  return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
131  else if (t == LLVMTypes::MaskType)
133 
134  // pointers to uniform
135  else if (t == LLVMTypes::Int8PointerType)
137  else if (t == LLVMTypes::Int16PointerType)
139  else if (t == LLVMTypes::Int32PointerType)
141  else if (t == LLVMTypes::Int64PointerType)
143  else if (t == LLVMTypes::FloatPointerType)
145  else if (t == LLVMTypes::DoublePointerType)
147 
148  // pointers to varying
149  else if (t == LLVMTypes::Int8VectorPointerType)
151  else if (t == LLVMTypes::Int16VectorPointerType)
153  else if (t == LLVMTypes::Int32VectorPointerType)
155  else if (t == LLVMTypes::Int64VectorPointerType)
157  else if (t == LLVMTypes::FloatVectorPointerType)
161 
162  return NULL;
163 }
164 
165 static void lCreateSymbol(const std::string &name, const Type *returnType, llvm::SmallVector<const Type *, 8> &argTypes,
166  const llvm::FunctionType *ftype, llvm::Function *func, SymbolTable *symbolTable) {
167  SourcePos noPos;
168  noPos.name = "__stdlib";
169 
170  FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
171 
172  Debug(noPos, "Created builtin symbol \"%s\" [%s]\n", name.c_str(), funcType->GetString().c_str());
173 
174  Symbol *sym = new Symbol(name, noPos, funcType);
175  sym->function = func;
176  symbolTable->AddFunction(sym);
177 }
178 
179 /** Given an LLVM function declaration, synthesize the equivalent ispc
180  symbol for the function (if possible). Returns true on success, false
181  on failure.
182  */
183 static bool lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
184  SourcePos noPos;
185  noPos.name = "__stdlib";
186 
187  const llvm::FunctionType *ftype = func->getFunctionType();
188  std::string name = func->getName();
189 
190  if (name.size() < 3 || name[0] != '_' || name[1] != '_')
191  return false;
192 
193  Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".", name.c_str());
194 
195  // An unfortunate hack: we want this builtin function to have the
196  // signature "int __sext_varying_bool(bool)", but the ispc function
197  // symbol creation code below assumes that any LLVM vector of i32s is a
198  // varying int32. Here, we need that to be interpreted as a varying
199  // bool, so just have a one-off override for that one...
200  if (g->target->getMaskBitCount() != 1 && name == "__sext_varying_bool") {
201  const Type *returnType = AtomicType::VaryingInt32;
202  llvm::SmallVector<const Type *, 8> argTypes;
203  argTypes.push_back(AtomicType::VaryingBool);
204 
205  FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
206 
207  Symbol *sym = new Symbol(name, noPos, funcType);
208  sym->function = func;
209  symbolTable->AddFunction(sym);
210  return true;
211  }
212 
213  // If the function has any parameters with integer types, we'll make
214  // two Symbols for two overloaded versions of the function, one with
215  // all of the integer types treated as signed integers and one with all
216  // of them treated as unsigned.
217  for (int i = 0; i < 2; ++i) {
218  bool intAsUnsigned = (i == 1);
219 
220  const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(), intAsUnsigned);
221  if (returnType == NULL) {
222  Debug(SourcePos(),
223  "Failed: return type not representable for "
224  "builtin %s.",
225  name.c_str());
226  // return type not representable in ispc -> not callable from ispc
227  return false;
228  }
229 
230  // Iterate over the arguments and try to find their equivalent ispc
231  // types. Track if any of the arguments has an integer type.
232  bool anyIntArgs = false;
233  llvm::SmallVector<const Type *, 8> argTypes;
234  for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
235  const llvm::Type *llvmArgType = ftype->getParamType(j);
236  const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
237  if (type == NULL) {
238  Debug(SourcePos(),
239  "Failed: type of parameter %d not "
240  "representable for builtin %s",
241  j, name.c_str());
242  return false;
243  }
244  anyIntArgs |= (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
245  argTypes.push_back(type);
246  }
247 
248  // Always create the symbol the first time through, in particular
249  // so that we get symbols for things with no integer types!
250  if (i == 0 || anyIntArgs == true)
251  lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
252  }
253 
254  return true;
255 }
256 
257 /** Given an LLVM module, create ispc symbols for the functions in the
258  module.
259  */
260 static void lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
261 #if 0
262  // FIXME: handle globals?
263  Assert(module->global_empty());
264 #endif
265 
266  llvm::Module::iterator iter;
267  for (iter = module->begin(); iter != module->end(); ++iter) {
268 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
269  llvm::Function *func = iter;
270 #else /* LLVM 3.8+ */
271  llvm::Function *func = &*iter;
272 #endif
273  lCreateISPCSymbol(func, symbolTable);
274  }
275 }
276 
277 /** In many of the builtins-*.ll files, we have declarations of various LLVM
278  intrinsics that are then used in the implementation of various target-
279  specific functions. This function loops over all of the intrinsic
280  declarations and makes sure that the signature we have in our .ll file
281  matches the signature of the actual intrinsic.
282 */
283 static void lCheckModuleIntrinsics(llvm::Module *module) {
284  llvm::Module::iterator iter;
285  for (iter = module->begin(); iter != module->end(); ++iter) {
286 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
287  llvm::Function *func = iter;
288 #else /* LLVM 3.8+ */
289  llvm::Function *func = &*iter;
290 #endif
291  if (!func->isIntrinsic())
292  continue;
293 
294  const std::string funcName = func->getName().str();
295  // Work around http://llvm.org/bugs/show_bug.cgi?id=10438; only
296  // check the llvm.x86.* intrinsics for now...
297  if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
298  llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
299  if (id == 0)
300  fprintf(stderr, "FATAL: intrinsic is not found: %s \n", funcName.c_str());
301  Assert(id != 0);
302  llvm::Type *intrinsicType = llvm::Intrinsic::getType(*g->ctx, id);
303  intrinsicType = llvm::PointerType::get(intrinsicType, 0);
304  Assert(func->getType() == intrinsicType);
305  }
306  }
307 }
308 
309 /** We'd like to have all of these functions declared as 'internal' in
310  their respective bitcode files so that if they aren't needed by the
311  user's program they are elimiated from the final output. However, if
312  we do so, then they aren't brought in by the LinkModules() call below
313  since they aren't yet used by anything in the module they're being
314  linked with (in LLVM 3.1, at least).
315 
316  Therefore, we don't declare them as internal when we first define them,
317  but instead mark them as internal after they've been linked in. This
318  is admittedly a kludge.
319  */
320 static void lSetInternalFunctions(llvm::Module *module) {
321  // clang-format off
322  const char *names[] = {
323  "__add_float",
324  "__add_int32",
325  "__add_uniform_double",
326  "__add_uniform_int32",
327  "__add_uniform_int64",
328  "__add_varying_double",
329  "__add_varying_int32",
330  "__add_varying_int64",
331  "__all",
332  "__any",
333  "__aos_to_soa3_double",
334  "__aos_to_soa3_double1",
335  "__aos_to_soa3_double16",
336  "__aos_to_soa3_double4",
337  "__aos_to_soa3_double8",
338  "__aos_to_soa3_int64",
339  "__aos_to_soa3_float",
340 //#ifdef ISPC_NVPTX_ENABLED
341  "__aos_to_soa3_float1",
342 //#endif /* ISPC_NVPTX_ENABLED */
343  "__aos_to_soa3_float16",
344  "__aos_to_soa3_float4",
345  "__aos_to_soa3_float8",
346  "__aos_to_soa3_int32",
347  "__aos_to_soa4_double",
348  "__aos_to_soa4_double1",
349  "__aos_to_soa4_double16",
350  "__aos_to_soa4_double4",
351  "__aos_to_soa4_double8",
352  "__aos_to_soa4_int64",
353  "__aos_to_soa4_float",
354 //#ifdef ISPC_NVPTX_ENABLED
355  "__aos_to_soa4_float1",
356 //#endif /* ISPC_NVPTX_ENABLED */
357  "__aos_to_soa4_float16",
358  "__aos_to_soa4_float4",
359  "__aos_to_soa4_float8",
360  "__aos_to_soa4_int32",
361  "__atomic_add_int32_global",
362  "__atomic_add_int64_global",
363  "__atomic_add_uniform_int32_global",
364  "__atomic_add_uniform_int64_global",
365  "__atomic_and_int32_global",
366  "__atomic_and_int64_global",
367  "__atomic_and_uniform_int32_global",
368  "__atomic_and_uniform_int64_global",
369  "__atomic_compare_exchange_double_global",
370  "__atomic_compare_exchange_float_global",
371  "__atomic_compare_exchange_int32_global",
372  "__atomic_compare_exchange_int64_global",
373  "__atomic_compare_exchange_uniform_double_global",
374  "__atomic_compare_exchange_uniform_float_global",
375  "__atomic_compare_exchange_uniform_int32_global",
376  "__atomic_compare_exchange_uniform_int64_global",
377  "__atomic_max_uniform_int32_global",
378  "__atomic_max_uniform_int64_global",
379  "__atomic_min_uniform_int32_global",
380  "__atomic_min_uniform_int64_global",
381  "__atomic_or_int32_global",
382  "__atomic_or_int64_global",
383  "__atomic_or_uniform_int32_global",
384  "__atomic_or_uniform_int64_global",
385  "__atomic_sub_int32_global",
386  "__atomic_sub_int64_global",
387  "__atomic_sub_uniform_int32_global",
388  "__atomic_sub_uniform_int64_global",
389  "__atomic_swap_double_global",
390  "__atomic_swap_float_global",
391  "__atomic_swap_int32_global",
392  "__atomic_swap_int64_global",
393  "__atomic_swap_uniform_double_global",
394  "__atomic_swap_uniform_float_global",
395  "__atomic_swap_uniform_int32_global",
396  "__atomic_swap_uniform_int64_global",
397  "__atomic_umax_uniform_uint32_global",
398  "__atomic_umax_uniform_uint64_global",
399  "__atomic_umin_uniform_uint32_global",
400  "__atomic_umin_uniform_uint64_global",
401  "__atomic_xor_int32_global",
402  "__atomic_xor_int64_global",
403  "__atomic_xor_uniform_int32_global",
404  "__atomic_xor_uniform_int64_global",
405 //#ifdef ISPC_NVPTX_ENABLED
406  "__atomic_add_varying_int32_global",
407  "__atomic_add_varying_int64_global",
408  "__atomic_and_varying_int32_global",
409  "__atomic_and_varying_int64_global",
410  "__atomic_compare_exchange_varying_double_global",
411  "__atomic_compare_exchange_varying_float_global",
412  "__atomic_compare_exchange_varying_int32_global",
413  "__atomic_compare_exchange_varying_int64_global",
414  "__atomic_max_varying_int32_global",
415  "__atomic_max_varying_int64_global",
416  "__atomic_min_varying_int32_global",
417  "__atomic_min_varying_int64_global",
418  "__atomic_or_varying_int32_global",
419  "__atomic_or_varying_int64_global",
420  "__atomic_sub_varying_int32_global",
421  "__atomic_sub_varying_int64_global",
422  "__atomic_swap_varying_double_global",
423  "__atomic_swap_varying_float_global",
424  "__atomic_swap_varying_int32_global",
425  "__atomic_swap_varying_int64_global",
426  "__atomic_umax_varying_uint32_global",
427  "__atomic_umax_varying_uint64_global",
428  "__atomic_umin_varying_uint32_global",
429  "__atomic_umin_varying_uint64_global",
430  "__atomic_xor_uniform_int32_global",
431  "__atomic_xor_uniform_int64_global",
432  "__atomic_xor_varying_int32_global",
433  "__atomic_xor_varying_int64_global",
434  "__atomic_xor_varying_int32_global",
435  "__atomic_xor_varying_int64_global",
436 //#endif /* ISPC_NVPTX_ENABLED */
437  "__broadcast_double",
438  "__broadcast_float",
439  "__broadcast_i16",
440  "__broadcast_i32",
441  "__broadcast_i64",
442  "__broadcast_i8",
443  "__cast_mask_to_i1",
444  "__cast_mask_to_i8",
445  "__cast_mask_to_i16",
446  "__ceil_uniform_double",
447  "__ceil_uniform_float",
448  "__ceil_varying_double",
449  "__ceil_varying_float",
450  "__clock",
451  "__count_trailing_zeros_i32",
452  "__count_trailing_zeros_i64",
453  "__count_leading_zeros_i32",
454  "__count_leading_zeros_i64",
455  "__delete_uniform_32rt",
456  "__delete_uniform_64rt",
457  "__delete_varying_32rt",
458  "__delete_varying_64rt",
459  "__do_assert_uniform",
460  "__do_assert_varying",
461  "__do_print",
462 //#ifdef ISPC_NVPTX_ENABLED
463  "__do_print_nvptx",
464 //#endif /* ISPC_NVPTX_ENABLED */
465  "__doublebits_uniform_int64",
466  "__doublebits_varying_int64",
467  "__exclusive_scan_add_double",
468  "__exclusive_scan_add_float",
469  "__exclusive_scan_add_i32",
470  "__exclusive_scan_add_i64",
471  "__exclusive_scan_and_i32",
472  "__exclusive_scan_and_i64",
473  "__exclusive_scan_or_i32",
474  "__exclusive_scan_or_i64",
475  "__extract_int16",
476  "__extract_int32",
477  "__extract_int64",
478  "__extract_int8",
479 //#ifdef ISPC_NVPTX_ENABLED
480  "__extract_float",
481  "__extract_double",
482 //#endif /* ISPC_NVPTX_ENABLED */
483  "__extract_mask_low",
484  "__extract_mask_hi",
485  "__fastmath",
486  "__float_to_half_uniform",
487  "__float_to_half_varying",
488  "__floatbits_uniform_int32",
489  "__floatbits_varying_int32",
490  "__floor_uniform_double",
491  "__floor_uniform_float",
492  "__floor_varying_double",
493  "__floor_varying_float",
494  "__get_system_isa",
495  "__half_to_float_uniform",
496  "__half_to_float_varying",
497  "__insert_int16",
498  "__insert_int32",
499  "__insert_int64",
500  "__insert_int8",
501 //#ifdef ISPC_NVPTX_ENABLED
502  "__insert_float",
503  "__insert_double",
504 //#endif /* ISPC_NVPTX_ENABLED */
505  "__intbits_uniform_double",
506  "__intbits_uniform_float",
507  "__intbits_varying_double",
508  "__intbits_varying_float",
509  "__max_uniform_double",
510  "__max_uniform_float",
511  "__max_uniform_int32",
512  "__max_uniform_int64",
513  "__max_uniform_uint32",
514  "__max_uniform_uint64",
515  "__max_varying_double",
516  "__max_varying_float",
517  "__max_varying_int32",
518  "__max_varying_int64",
519  "__max_varying_uint32",
520  "__max_varying_uint64",
521  "__memory_barrier",
522  "__memcpy32",
523  "__memcpy64",
524  "__memmove32",
525  "__memmove64",
526  "__memset32",
527  "__memset64",
528  "__min_uniform_double",
529  "__min_uniform_float",
530  "__min_uniform_int32",
531  "__min_uniform_int64",
532  "__min_uniform_uint32",
533  "__min_uniform_uint64",
534  "__min_varying_double",
535  "__min_varying_float",
536  "__min_varying_int32",
537  "__min_varying_int64",
538  "__min_varying_uint32",
539  "__min_varying_uint64",
540  "__movmsk",
541 //#ifdef ISPC_NVPTX_ENABLED
542  "__movmsk_ptx",
543 //#endif /* ISPC_NVPTX_ENABLED */
544  "__new_uniform_32rt",
545  "__new_uniform_64rt",
546  "__new_varying32_32rt",
547  "__new_varying32_64rt",
548  "__new_varying64_64rt",
549  "__none",
550  "__num_cores",
551  "__packed_load_active",
552  "__packed_store_active",
553  "__packed_store_active2",
554  "__padds_vi8",
555  "__padds_vi16",
556  "__paddus_vi8",
557  "__paddus_vi16",
558  "__popcnt_int32",
559  "__popcnt_int64",
560  "__prefetch_read_uniform_1",
561  "__prefetch_read_uniform_2",
562  "__prefetch_read_uniform_3",
563  "__prefetch_read_uniform_nt",
564  "__pseudo_prefetch_read_varying_1",
565  "__pseudo_prefetch_read_varying_2",
566  "__pseudo_prefetch_read_varying_3",
567  "__pseudo_prefetch_read_varying_nt",
568  "__psubs_vi8",
569  "__psubs_vi16",
570  "__psubus_vi8",
571  "__psubus_vi16",
572  "__rcp_fast_uniform_float",
573  "__rcp_uniform_float",
574  "__rcp_fast_varying_float",
575  "__rcp_varying_float",
576  "__rcp_uniform_double",
577  "__rcp_varying_double",
578  "__rdrand_i16",
579  "__rdrand_i32",
580  "__rdrand_i64",
581  "__reduce_add_double",
582  "__reduce_add_float",
583  "__reduce_add_int8",
584  "__reduce_add_int16",
585  "__reduce_add_int32",
586  "__reduce_add_int64",
587  "__reduce_equal_double",
588  "__reduce_equal_float",
589  "__reduce_equal_int32",
590  "__reduce_equal_int64",
591  "__reduce_max_double",
592  "__reduce_max_float",
593  "__reduce_max_int32",
594  "__reduce_max_int64",
595  "__reduce_max_uint32",
596  "__reduce_max_uint64",
597  "__reduce_min_double",
598  "__reduce_min_float",
599  "__reduce_min_int32",
600  "__reduce_min_int64",
601  "__reduce_min_uint32",
602  "__reduce_min_uint64",
603  "__rotate_double",
604  "__rotate_float",
605  "__rotate_i16",
606  "__rotate_i32",
607  "__rotate_i64",
608  "__rotate_i8",
609  "__round_uniform_double",
610  "__round_uniform_float",
611  "__round_varying_double",
612  "__round_varying_float",
613  "__rsqrt_fast_varying_float",
614  "__rsqrt_uniform_float",
615  "__rsqrt_fast_uniform_float",
616  "__rsqrt_varying_float",
617  "__rsqrt_uniform_double",
618  "__rsqrt_varying_double",
619  "__set_system_isa",
620  "__sext_uniform_bool",
621  "__sext_varying_bool",
622  "__shift_double",
623  "__shift_float",
624  "__shift_i16",
625  "__shift_i32",
626  "__shift_i64",
627  "__shift_i8",
628  "__shuffle2_double",
629  "__shuffle2_float",
630  "__shuffle2_i16",
631  "__shuffle2_i32",
632  "__shuffle2_i64",
633  "__shuffle2_i8",
634  "__shuffle_double",
635  "__shuffle_float",
636  "__shuffle_i16",
637  "__shuffle_i32",
638  "__shuffle_i64",
639  "__shuffle_i8",
640  "__soa_to_aos3_double",
641  "__soa_to_aos3_double16",
642  "__soa_to_aos3_double4",
643  "__soa_to_aos3_double8",
644  "__soa_to_aos3_int64",
645  "__soa_to_aos3_float",
646  "__soa_to_aos3_float16",
647  "__soa_to_aos3_float4",
648  "__soa_to_aos3_float8",
649  "__soa_to_aos3_int32",
650  "__soa_to_aos4_float",
651 //#ifdef ISPC_NVPTX_ENABLED
652  "__soa_to_aos3_double1",
653  "__soa_to_aos3_float1",
654  "__soa_to_aos4_float1",
655  "__soa_to_aos4_double1",
656 //#endif /* ISPC_NVPTX_ENABLED */
657  "__soa_to_aos4_double16",
658  "__soa_to_aos4_double4",
659  "__soa_to_aos4_double8",
660  "__soa_to_aos4_double",
661  "__soa_to_aos4_int64",
662  "__soa_to_aos4_float16",
663  "__soa_to_aos4_float4",
664  "__soa_to_aos4_float8",
665  "__soa_to_aos4_int32",
666  "__sqrt_uniform_double",
667  "__sqrt_uniform_float",
668  "__sqrt_varying_double",
669  "__sqrt_varying_float",
670  "__stdlib_acosf",
671  "__stdlib_asinf",
672  "__stdlib_atan",
673  "__stdlib_atan2",
674  "__stdlib_atan2f",
675  "__stdlib_atanf",
676  "__stdlib_cos",
677  "__stdlib_cosf",
678  "__stdlib_exp",
679  "__stdlib_expf",
680  "__stdlib_log",
681  "__stdlib_logf",
682  "__stdlib_pow",
683  "__stdlib_powf",
684  "__stdlib_sin",
685  "__stdlib_asin",
686  "__stdlib_sincos",
687  "__stdlib_sincosf",
688  "__stdlib_sinf",
689  "__stdlib_tan",
690  "__stdlib_tanf",
691  "__streaming_load_uniform_double",
692  "__streaming_load_uniform_float",
693  "__streaming_load_uniform_i8",
694  "__streaming_load_uniform_i16",
695  "__streaming_load_uniform_i32",
696  "__streaming_load_uniform_i64",
697  "__streaming_load_varying_double",
698  "__streaming_load_varying_float",
699  "__streaming_load_varying_i8",
700  "__streaming_load_varying_i16",
701  "__streaming_load_varying_i32",
702  "__streaming_load_varying_i64",
703  "__streaming_store_uniform_double",
704  "__streaming_store_uniform_float",
705  "__streaming_store_uniform_i8",
706  "__streaming_store_uniform_i16",
707  "__streaming_store_uniform_i32",
708  "__streaming_store_uniform_i64",
709  "__streaming_store_varying_double",
710  "__streaming_store_varying_float",
711  "__streaming_store_varying_i8",
712  "__streaming_store_varying_i16",
713  "__streaming_store_varying_i32",
714  "__streaming_store_varying_i64",
715  "__svml_sind",
716  "__svml_asind",
717  "__svml_cosd",
718  "__svml_acosd",
719  "__svml_sincosd",
720  "__svml_tand",
721  "__svml_atand",
722  "__svml_atan2d",
723  "__svml_expd",
724  "__svml_logd",
725  "__svml_powd",
726  "__svml_sinf",
727  "__svml_asinf",
728  "__svml_cosf",
729  "__svml_acosf",
730  "__svml_sincosf",
731  "__svml_tanf",
732  "__svml_atanf",
733  "__svml_atan2f",
734  "__svml_expf",
735  "__svml_logf",
736  "__svml_powf",
737  "__log_uniform_float",
738  "__log_varying_float",
739  "__exp_uniform_float",
740  "__exp_varying_float",
741  "__pow_uniform_float",
742  "__pow_varying_float",
743  "__log_uniform_double",
744  "__log_varying_double",
745  "__exp_uniform_double",
746  "__exp_varying_double",
747  "__pow_uniform_double",
748  "__pow_varying_double",
749  "__sin_varying_float",
750  "__asin_varying_float",
751  "__cos_varying_float",
752  "__acos_varying_float",
753  "__sincos_varying_float",
754  "__tan_varying_float",
755  "__atan_varying_float",
756  "__atan2_varying_float",
757  "__sin_uniform_float",
758  "__asin_uniform_float",
759  "__cos_uniform_float",
760  "__acos_uniform_float",
761  "__sincos_uniform_float",
762  "__tan_uniform_float",
763  "__atan_uniform_float",
764  "__atan2_uniform_float",
765  "__sin_varying_double",
766  "__asin_varying_double",
767  "__cos_varying_double",
768  "__acos_varying_double",
769  "__sincos_varying_double",
770  "__tan_varying_double",
771  "__atan_varying_double",
772  "__atan2_varying_double",
773  "__sin_uniform_double",
774  "__asin_uniform_double",
775  "__cos_uniform_double",
776  "__acos_uniform_double",
777  "__sincos_uniform_double",
778  "__tan_uniform_double",
779  "__atan_uniform_double",
780  "__atan2_uniform_double",
781  "__undef_uniform",
782  "__undef_varying",
783  "__vec4_add_float",
784  "__vec4_add_int32",
785  "__vselect_float",
786 //#ifdef ISPC_NVPTX_ENABLED
787  "__program_index",
788  "__program_count",
789  "__warp_index",
790  "__task_index0",
791  "__task_index1",
792  "__task_index2",
793  "__task_index",
794  "__task_count0",
795  "__task_count1",
796  "__task_count2",
797  "__task_count",
798  "__cvt_loc2gen",
799  "__cvt_loc2gen_var",
800  "__cvt_const2gen",
801  "__puts_nvptx",
802  "ISPCAlloc",
803  "ISPCLaunch",
804  "ISPCSync",
805 //#endif /* ISPC_NVPTX_ENABLED */
806  "__vselect_i32"
807  };
808  // clang-format on
809  int count = sizeof(names) / sizeof(names[0]);
810  for (int i = 0; i < count; ++i) {
811  llvm::Function *f = module->getFunction(names[i]);
812  if (f != NULL && f->empty() == false) {
813  f->setLinkage(llvm::GlobalValue::InternalLinkage);
814  // TO-DO : Revisit adding this back for ARM support.
815  // g->target->markFuncWithTargetAttr(f);
816  }
817  }
818 }
819 
820 /** This utility function takes serialized binary LLVM bitcode and adds its
821  definitions to the given module. Functions in the bitcode that can be
822  mapped to ispc functions are also added to the symbol table.
823 
824  @param bitcode Binary LLVM bitcode (e.g. the contents of a *.bc file)
825  @param length Length of the bitcode buffer
826  @param module Module to link the bitcode into
827  @param symbolTable Symbol table to add definitions to
828  */
829 void AddBitcodeToModule(const unsigned char *bitcode, int length, llvm::Module *module, SymbolTable *symbolTable,
830  bool warn) {
831  llvm::StringRef sb = llvm::StringRef((char *)bitcode, length);
832 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
833  llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
834 #else // LLVM 3.6+
835  llvm::MemoryBufferRef bcBuf = llvm::MemoryBuffer::getMemBuffer(sb)->getMemBufferRef();
836 #endif
837 
838 #if ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 // LLVM 4.0+
839  llvm::Expected<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
840  if (!ModuleOrErr) {
841  Error(SourcePos(), "Error parsing stdlib bitcode: %s", toString(ModuleOrErr.takeError()).c_str());
842  } else {
843  llvm::Module *bcModule = ModuleOrErr.get().release();
844 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
845  llvm::ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
846  if (std::error_code EC = ModuleOrErr.getError())
847  Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
848  else {
849  llvm::Module *bcModule = ModuleOrErr.get().release();
850 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_5 || ISPC_LLVM_VERSION == ISPC_LLVM_3_6
851  llvm::ErrorOr<llvm::Module *> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
852  if (std::error_code EC = ModuleOrErr.getError())
853  Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
854  else {
855  llvm::Module *bcModule = ModuleOrErr.get();
856 #else // LLVM 3.2 - 3.4
857  std::string bcErr;
858  llvm::Module *bcModule = llvm::ParseBitcodeFile(bcBuf, *g->ctx, &bcErr);
859  if (!bcModule)
860  Error(SourcePos(), "Error parsing stdlib bitcode: %s", bcErr.c_str());
861  else {
862 #endif
863  // FIXME: this feels like a bad idea, but the issue is that when we
864  // set the llvm::Module's target triple in the ispc Module::Module
865  // constructor, we start by calling llvm::sys::getHostTriple() (and
866  // then change the arch if needed). Somehow that ends up giving us
867  // strings like 'x86_64-apple-darwin11.0.0', while the stuff we
868  // compile to bitcode with clang has module triples like
869  // 'i386-apple-macosx10.7.0'. And then LLVM issues a warning about
870  // linking together modules with incompatible target triples..
871  llvm::Triple mTriple(m->module->getTargetTriple());
872  llvm::Triple bcTriple(bcModule->getTargetTriple());
873  Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n", mTriple.str().c_str(), bcTriple.str().c_str());
874 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
875  // FIXME: More ugly and dangerous stuff. We really haven't set up
876  // proper build and runtime infrastructure for ispc to do
877  // cross-compilation, yet it's at minimum useful to be able to emit
878  // ARM code from x86 for ispc development. One side-effect is that
879  // when the build process turns builtins/builtins.c to LLVM bitcode
880  // for us to link in at runtime, that bitcode has been compiled for
881  // an IA target, which in turn causes the checks in the following
882  // code to (appropraitely) fail.
883  //
884  // In order to be able to have some ability to generate ARM code on
885  // IA, we'll just skip those tests in that case and allow the
886  // setTargetTriple() and setDataLayout() calls below to shove in
887  // the values for an ARM target. This maybe won't cause problems
888  // in the generated code, since bulitins.c doesn't do anything too
889  // complex w.r.t. struct layouts, etc.
890  if (g->target->getISA() != Target::NEON32 && g->target->getISA() != Target::NEON16 &&
891  g->target->getISA() != Target::NEON8)
892 #endif // !__arm__
893 #ifdef ISPC_NVPTX_ENABLED
894  if (g->target->getISA() != Target::NVPTX)
895 #endif /* ISPC_NVPTX_ENABLED */
896  // Disable this code for cross compilation
897 #if 0
898  {
899  Assert(bcTriple.getArch() == llvm::Triple::UnknownArch || mTriple.getArch() == bcTriple.getArch());
900  Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
901  mTriple.getVendor() == bcTriple.getVendor());
902 
903  // We unconditionally set module DataLayout to library, but we must
904  // ensure that library and module DataLayouts are compatible.
905  // If they are not, we should recompile the library for problematic
906  // architecture and investigate what happened.
907  // Generally we allow library DataLayout to be subset of module
908  // DataLayout or library DataLayout to be empty.
909 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
910  if (!VerifyDataLayoutCompatibility(module->getDataLayoutStr(), bcModule->getDataLayoutStr()) && warn) {
911  Warning(SourcePos(),
912  "Module DataLayout is incompatible with "
913  "library DataLayout:\n"
914  "Module DL: %s\n"
915  "Library DL: %s\n",
916  module->getDataLayoutStr().c_str(), bcModule->getDataLayoutStr().c_str());
917  }
918 #else
919  if (!VerifyDataLayoutCompatibility(module->getDataLayout(), bcModule->getDataLayout()) && warn) {
920  Warning(SourcePos(),
921  "Module DataLayout is incompatible with "
922  "library DataLayout:\n"
923  "Module DL: %s\n"
924  "Library DL: %s\n",
925  module->getDataLayout().c_str(), bcModule->getDataLayout().c_str());
926  }
927 #endif
928  }
929 #endif
930  bcModule->setTargetTriple(mTriple.str());
931  bcModule->setDataLayout(module->getDataLayout());
932 
933 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2-3.5
934  std::string(linkError);
935 
936  if (llvm::Linker::LinkModules(module, bcModule, llvm::Linker::DestroySource, &linkError))
937  Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
938 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 // 3.6-3.7
939  llvm::Linker::LinkModules(module, bcModule);
940 #else // LLVM 3.8+
941  // A hack to move over declaration, which have no definition.
942  // New linker is kind of smart and think it knows better what to do, so
943  // it removes unused declarations without definitions.
944  // This trick should be legal, as both modules use the same LLVMContext.
945  for (llvm::Function &f : *bcModule) {
946  if (f.isDeclaration()) {
947  // Declarations with uses will be moved by Linker.
948  if (f.getNumUses() > 0)
949  continue;
950  module->getOrInsertFunction(f.getName(), f.getFunctionType(), f.getAttributes());
951  }
952  }
953 
954  std::unique_ptr<llvm::Module> M(bcModule);
955  if (llvm::Linker::linkModules(*module, std::move(M))) {
956  Error(SourcePos(), "Error linking stdlib bitcode.");
957  }
958 #endif
959 
960  lSetInternalFunctions(module);
961  if (symbolTable != NULL)
962  lAddModuleSymbols(module, symbolTable);
963  lCheckModuleIntrinsics(module);
964  }
965 }
966 
967 /** Utility routine that defines a constant int32 with given value, adding
968  the symbol to both the ispc symbol table and the given LLVM module.
969  */
970 static void lDefineConstantInt(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable,
971  std::vector<llvm::Constant *> &dbg_sym) {
972  Symbol *sym = new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(), SC_STATIC);
973  sym->constValue = new ConstExpr(sym->type, val, SourcePos());
974  llvm::Type *ltype = LLVMTypes::Int32Type;
975  llvm::Constant *linit = LLVMInt32(val);
976 #if ISPC_LLVM_VERSION < ISPC_LLVM_3_6
977  // Use WeakODRLinkage rather than InternalLinkage so that a definition
978  // survives even if it's not used in the module, so that the symbol is
979  // there in the debugger.
980  llvm::GlobalValue::LinkageTypes linkage =
981  g->generateDebuggingSymbols ? llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
982  sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage, linit, name);
983 #else // LLVM 3.6+
984  auto GV = new llvm::GlobalVariable(*module, ltype, true, llvm::GlobalValue::InternalLinkage, linit, name);
985  dbg_sym.push_back(GV);
986  sym->storagePtr = GV;
987 #endif
988  symbolTable->AddVariable(sym);
989 
990  if (m->diBuilder != NULL) {
991 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
992  llvm::DIFile file;
993  llvm::DIType diType = sym->type->GetDIType(file);
994  Assert(diType.Verify());
995 #else // LLVM 3.7+
996  llvm::DIFile *file = m->diCompileUnit->getFile();
997  llvm::DICompileUnit *cu = m->diCompileUnit;
998  llvm::DIType *diType = sym->type->GetDIType(file);
999 #endif
1000  // FIXME? DWARF says that this (and programIndex below) should
1001  // have the DW_AT_artifical attribute. It's not clear if this
1002  // matters for anything though.
1003 
1004 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
1005  llvm::DIGlobalVariable var =
1006  m->diBuilder->createGlobalVariable(name, file, 0 /* line */, diType, true /* static */, sym->storagePtr);
1007 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_6 // LLVM 3.6
1008  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1009  Assert(sym_const_storagePtr);
1010  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(file, name, name, file, 0 /* line */, diType,
1011  true /* static */, sym_const_storagePtr);
1012 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
1013  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1014  Assert(sym_const_storagePtr);
1015  m->diBuilder->createGlobalVariable(cu, name, name, file, 0 /* line */, diType, true /* static */,
1016  sym_const_storagePtr);
1017 #else // LLVM 4.0+
1018  llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
1019  llvm::DIGlobalVariableExpression *var =
1020  m->diBuilder->createGlobalVariableExpression(cu, name, name, file, 0 /* line */, diType, true /* static */);
1021  sym_GV_storagePtr->addDebugInfo(var);
1022 #endif
1023 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1024  Assert(var.Verify());
1025 #else // LLVM 3.7+
1026  // coming soon
1027 #endif
1028  }
1029 }
1030 
1031 static void lDefineConstantIntFunc(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable,
1032  std::vector<llvm::Constant *> &dbg_sym) {
1033  llvm::SmallVector<const Type *, 8> args;
1035  Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
1036 
1037  llvm::Function *func = module->getFunction(name);
1038  dbg_sym.push_back(func);
1039  Assert(func != NULL); // it should be declared already...
1040 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
1041  func->addFnAttr(llvm::Attributes::AlwaysInline);
1042 #else // LLVM 3.3+
1043  func->addFnAttr(llvm::Attribute::AlwaysInline);
1044 #endif
1045  llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
1046  llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
1047 
1048  sym->function = func;
1049  symbolTable->AddVariable(sym);
1050 }
1051 
1052 static void lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable,
1053  std::vector<llvm::Constant *> &dbg_sym) {
1054  Symbol *sym = new Symbol("programIndex", SourcePos(), AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
1055 
1056  int pi[ISPC_MAX_NVEC];
1057  for (int i = 0; i < g->target->getVectorWidth(); ++i)
1058  pi[i] = i;
1059  sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
1060 
1061  llvm::Type *ltype = LLVMTypes::Int32VectorType;
1062  llvm::Constant *linit = LLVMInt32Vector(pi);
1063 #if ISPC_LLVM_VERSION < ISPC_LLVM_3_6
1064  // See comment in lDefineConstantInt() for why WeakODRLinkage is used here
1065  llvm::GlobalValue::LinkageTypes linkage =
1066  g->generateDebuggingSymbols ? llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
1067  sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage, linit, sym->name.c_str());
1068 #else // LLVM 3.6+
1069  auto GV =
1070  new llvm::GlobalVariable(*module, ltype, true, llvm::GlobalValue::InternalLinkage, linit, sym->name.c_str());
1071  dbg_sym.push_back(GV);
1072  sym->storagePtr = GV;
1073 #endif
1074  symbolTable->AddVariable(sym);
1075 
1076  if (m->diBuilder != NULL) {
1077 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1078  llvm::DIFile file;
1079  llvm::DIType diType = sym->type->GetDIType(file);
1080  Assert(diType.Verify());
1081 #else // LLVM 3.7+
1082  llvm::DIFile *file = m->diCompileUnit->getFile();
1083  llvm::DICompileUnit *cu = m->diCompileUnit;
1084  llvm::DIType *diType = sym->type->GetDIType(file);
1085 #endif
1086 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6 // LLVM 3.6
1087  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1088  Assert(sym_const_storagePtr);
1089  llvm::DIGlobalVariable var =
1090  m->diBuilder->createGlobalVariable(file, sym->name.c_str(), sym->name.c_str(), file, 0 /* line */, diType,
1091  false /* static */, sym_const_storagePtr);
1092 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
1093  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(sym->name.c_str(), file, 0 /* line */, diType,
1094  false /* static */, sym->storagePtr);
1095 #elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
1096  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1097  Assert(sym_const_storagePtr);
1098  m->diBuilder->createGlobalVariable(cu, sym->name.c_str(), sym->name.c_str(), file, 0 /* line */, diType,
1099  false /* static */, sym_const_storagePtr);
1100 #else // LLVM 4.0+
1101  llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
1102  llvm::DIGlobalVariableExpression *var = m->diBuilder->createGlobalVariableExpression(
1103  cu, sym->name.c_str(), sym->name.c_str(), file, 0 /* line */, diType, false /* static */);
1104  sym_GV_storagePtr->addDebugInfo(var);
1105 #endif
1106 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1107  Assert(var.Verify());
1108 #else // LLVM 3.7+
1109  // coming soon
1110 #endif
1111  }
1112 }
1113 
1114 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
1115 static void emitLLVMUsed(llvm::Module &module, std::vector<llvm::Constant *> &list) {
1116  // Convert list to what ConstantArray needs.
1117  llvm::SmallVector<llvm::Constant *, 8> UsedArray;
1118  UsedArray.reserve(list.size());
1119  for (auto c : list) {
1120  UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(llvm::cast<llvm::Constant>(c),
1122  }
1123 
1124  llvm::ArrayType *ATy = llvm::ArrayType::get(LLVMTypes::Int8PointerType, UsedArray.size());
1125 
1126  auto *GV = new llvm::GlobalVariable(module, ATy, false, llvm::GlobalValue::AppendingLinkage,
1127  llvm::ConstantArray::get(ATy, UsedArray), "llvm.used");
1128 
1129  GV->setSection("llvm.metadata");
1130 }
1131 #endif
1132 
1133 void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, bool includeStdlibISPC) {
1134  // debug_symbols are symbols that supposed to be preserved in debug information.
1135  // They will be referenced in llvm.used intrinsic to prevent they removal from
1136  // the object file.
1137  std::vector<llvm::Constant *> debug_symbols;
1138  bool runtime32 = g->target->is32Bit();
1139  bool warn = g->target->getISA() != Target::GENERIC;
1140  bool target_is_windows = g->target_os == TargetOS::OS_WINDOWS;
1141 
1142 #define EXPORT_MODULE_COND_WARN(export_module, warnings) \
1143  extern const unsigned char export_module[]; \
1144  extern int export_module##_length; \
1145  AddBitcodeToModule(export_module, export_module##_length, module, symbolTable, warnings);
1146 
1147 #define EXPORT_MODULE(export_module) \
1148  extern const unsigned char export_module[]; \
1149  extern int export_module##_length; \
1150  AddBitcodeToModule(export_module, export_module##_length, module, symbolTable, true);
1151 
1152  // Add the definitions from the compiled builtins.c file.
1153  // When compiling for "generic" target family, data layout warnings for
1154  // "builtins_bitcode_c" have to be switched off: its DL is incompatible
1155  // with the DL of "generic". Anyway, AddBitcodeToModule() corrects this
1156  // automatically if DLs differ (by copying module`s DL to export`s DL).
1157 
1158  // Unlike regular builtins and dispatch module, which don't care about mangling of external functions,
1159  // so they only differentiate Windows/Unix and 32/64 bit, builtins-c need to take care about mangling.
1160  // Hence, different version for all potentially supported OSes. Those that are not supported in current
1161  // build are will have zero length.
1162 
1163  switch (g->target_os) {
1164  case TargetOS::OS_WINDOWS:
1165  if (runtime32) {
1166  EXPORT_MODULE_COND_WARN(builtins_bitcode_windows_i386_c_32bit, warn);
1167  } else {
1168  EXPORT_MODULE_COND_WARN(builtins_bitcode_windows_x86_64_c_64bit, warn);
1169  }
1170  break;
1171  case TargetOS::OS_LINUX:
1172  if (runtime32) {
1173  if (g->target->getArch() == "x86") {
1174  EXPORT_MODULE_COND_WARN(builtins_bitcode_linux_i386_c_32bit, warn);
1175  }
1176  if (g->target->getArch() == "arm") {
1177  EXPORT_MODULE_COND_WARN(builtins_bitcode_linux_armv7_c_32bit, warn);
1178  }
1179  } else {
1180  if (g->target->getArch() == "x86-64") {
1181  EXPORT_MODULE_COND_WARN(builtins_bitcode_linux_x86_64_c_64bit, warn);
1182  }
1183  if (g->target->getArch() == "aarch64") {
1184  EXPORT_MODULE_COND_WARN(builtins_bitcode_linux_aarch64_c_64bit, warn);
1185  }
1186  }
1187  break;
1188  case TargetOS::OS_MAC:
1189  if (runtime32) {
1190  Error(SourcePos(), "doesn't exist");
1191  EXPORT_MODULE_COND_WARN(builtins_bitcode_macos_i386_c_32bit, warn);
1192  } else {
1193  EXPORT_MODULE_COND_WARN(builtins_bitcode_macos_x86_64_c_64bit, warn);
1194  }
1195  break;
1196  case TargetOS::OS_ANDROID:
1197  if (runtime32) {
1198  if (g->target->getArch() == "x86") {
1199  EXPORT_MODULE_COND_WARN(builtins_bitcode_android_i386_c_32bit, warn);
1200  }
1201  if (g->target->getArch() == "arm") {
1202  EXPORT_MODULE_COND_WARN(builtins_bitcode_android_armv7_c_32bit, warn);
1203  }
1204  } else {
1205  if (g->target->getArch() == "x86-64") {
1206  EXPORT_MODULE_COND_WARN(builtins_bitcode_android_x86_64_c_64bit, warn);
1207  }
1208  if (g->target->getArch() == "aarch64") {
1209  EXPORT_MODULE_COND_WARN(builtins_bitcode_android_aarch64_c_64bit, warn);
1210  }
1211  }
1212  break;
1213  case TargetOS::OS_IOS:
1214  if (runtime32) {
1215  Error(SourcePos(), "doesn't exist");
1216  EXPORT_MODULE_COND_WARN(builtins_bitcode_ios_i386_c_32bit, warn);
1217  } else {
1218  EXPORT_MODULE_COND_WARN(builtins_bitcode_ios_arm64_c_64bit, warn);
1219  }
1220  break;
1221  case TargetOS::OS_PS4:
1222  if (runtime32) {
1223  Error(SourcePos(), "doesn't exist");
1224  EXPORT_MODULE_COND_WARN(builtins_bitcode_ps4_i386_c_32bit, warn);
1225  } else {
1226  EXPORT_MODULE_COND_WARN(builtins_bitcode_ps4_x86_64_c_64bit, warn);
1227  }
1228  break;
1229  default:
1230  Error(SourcePos(), "Unsupported OS\n");
1231  }
1232 
1233  // NVPTX target is depricated and will be removed soon.
1234  /*
1235  #ifdef ISPC_NVPTX_ENABLED
1236  case Target::NVPTX: {
1237  if (runtime32) {
1238  fprintf(stderr, "Unfortunatly 32bit targets are not supported at the moment .. \n");
1239  assert(0);
1240  } else {
1241  EXPORT_MODULE(builtins_bitcode_nvptx_64bit);
1242  }
1243  break;
1244  };
1245  #endif
1246  */
1247 
1248  // Next, add the target's custom implementations of the various needed
1249  // builtin functions (e.g. __masked_store_32(), etc).
1250  if (target_is_windows) {
1251 #ifdef ISPC_HOST_IS_WINDOWS // supported only on Windows
1252  switch (g->target->getISA()) {
1253 #ifdef ISPC_ARM_ENABLED
1254  case Target::NEON8: {
1255  if (runtime32) {
1256  EXPORT_MODULE(builtins_bitcode_win_neon_i8x16_32bit);
1257  } else {
1258  EXPORT_MODULE(builtins_bitcode_win_neon_i8x16_64bit);
1259  }
1260  break;
1261  }
1262  case Target::NEON16: {
1263  if (runtime32) {
1264  EXPORT_MODULE(builtins_bitcode_win_neon_i16x8_32bit);
1265  } else {
1266  EXPORT_MODULE(builtins_bitcode_win_neon_i16x8_64bit);
1267  }
1268  break;
1269  }
1270  case Target::NEON32: {
1271  switch (g->target->getVectorWidth()) {
1272  case 4:
1273  if (runtime32) {
1274  EXPORT_MODULE(builtins_bitcode_win_neon_i32x4_32bit);
1275  } else {
1276  EXPORT_MODULE(builtins_bitcode_win_neon_i32x4_64bit);
1277  }
1278  break;
1279  case 8:
1280  if (runtime32) {
1281  EXPORT_MODULE(builtins_bitcode_win_neon_i32x8_32bit);
1282  } else {
1283  EXPORT_MODULE(builtins_bitcode_win_neon_i32x8_64bit);
1284  }
1285  break;
1286  default:
1287  FATAL("logic error in DefineStdlib");
1288  }
1289  break;
1290  }
1291 #endif
1292  case Target::SSE2: {
1293  switch (g->target->getVectorWidth()) {
1294  case 4:
1295  if (runtime32) {
1296  EXPORT_MODULE(builtins_bitcode_win_sse2_i32x4_32bit);
1297  } else {
1298  EXPORT_MODULE(builtins_bitcode_win_sse2_i32x4_64bit);
1299  }
1300  break;
1301  case 8:
1302  if (runtime32) {
1303  EXPORT_MODULE(builtins_bitcode_win_sse2_i32x8_32bit);
1304  } else {
1305  EXPORT_MODULE(builtins_bitcode_win_sse2_i32x8_64bit);
1306  }
1307  break;
1308  default:
1309  FATAL("logic error in DefineStdlib");
1310  }
1311  break;
1312  }
1313  case Target::SSE4: {
1314  switch (g->target->getVectorWidth()) {
1315  case 4:
1316  if (runtime32) {
1317  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x4_32bit);
1318  } else {
1319  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x4_64bit);
1320  }
1321  break;
1322  case 8:
1323  if (runtime32) {
1324  if (g->target->getMaskBitCount() == 16) {
1325  EXPORT_MODULE(builtins_bitcode_win_sse4_i16x8_32bit);
1326  } else {
1327  Assert(g->target->getMaskBitCount() == 32);
1328  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x8_32bit);
1329  }
1330  } else {
1331  if (g->target->getMaskBitCount() == 16) {
1332  EXPORT_MODULE(builtins_bitcode_win_sse4_i16x8_64bit);
1333  } else {
1334  Assert(g->target->getMaskBitCount() == 32);
1335  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x8_64bit);
1336  }
1337  }
1338  break;
1339  case 16:
1340  Assert(g->target->getMaskBitCount() == 8);
1341  if (runtime32) {
1342  EXPORT_MODULE(builtins_bitcode_win_sse4_i8x16_32bit);
1343  } else {
1344  EXPORT_MODULE(builtins_bitcode_win_sse4_i8x16_64bit);
1345  }
1346  break;
1347  default:
1348  FATAL("logic error in DefineStdlib");
1349  }
1350  break;
1351  }
1352  case Target::AVX: {
1353  switch (g->target->getVectorWidth()) {
1354  case 4:
1355  if (g->target->getDataTypeWidth() == 32) {
1356  // Note here that for avx1-i32x4 we are using bitcode file for
1357  // sse4-i32x4. This is intentional and good enough.
1358  // AVX target implies appropriate target-feature attrbute,
1359  // which forces LLVM to generate AVX code, even for SSE4
1360  // intrinsics. Except that the only "missing" feature in sse4
1361  // target is implemenation of __masked_[store|load]_[i32|i64]
1362  // using maskmov instruction. But it's not very popular
1363  // intrinsics, so we assume the implementation to be good
1364  // enough at the moment.
1365  if (runtime32) {
1366  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x4_32bit);
1367  } else {
1368  EXPORT_MODULE(builtins_bitcode_win_sse4_i32x4_64bit);
1369  }
1370  } else if (g->target->getDataTypeWidth() == 64) {
1371  if (runtime32) {
1372  EXPORT_MODULE(builtins_bitcode_win_avx1_i64x4_32bit);
1373  } else {
1374  EXPORT_MODULE(builtins_bitcode_win_avx1_i64x4_64bit);
1375  }
1376  } else {
1377  FATAL("logic error in DefineStdlib");
1378  }
1379  break;
1380  case 8:
1381  if (runtime32) {
1382  EXPORT_MODULE(builtins_bitcode_win_avx1_i32x8_32bit);
1383  } else {
1384  EXPORT_MODULE(builtins_bitcode_win_avx1_i32x8_64bit);
1385  }
1386  break;
1387  case 16:
1388  if (runtime32) {
1389  EXPORT_MODULE(builtins_bitcode_win_avx1_i32x16_32bit);
1390  } else {
1391  EXPORT_MODULE(builtins_bitcode_win_avx1_i32x16_64bit);
1392  }
1393  break;
1394  default:
1395  FATAL("logic error in DefineStdlib");
1396  }
1397  break;
1398  }
1399  case Target::AVX2: {
1400  switch (g->target->getVectorWidth()) {
1401  case 4:
1402  if (g->target->getDataTypeWidth() == 32) {
1403  if (runtime32) {
1404  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x4_32bit);
1405  } else {
1406  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x4_64bit);
1407  }
1408  } else if (g->target->getDataTypeWidth() == 64) {
1409  if (runtime32) {
1410  EXPORT_MODULE(builtins_bitcode_win_avx2_i64x4_32bit);
1411  } else {
1412  EXPORT_MODULE(builtins_bitcode_win_avx2_i64x4_64bit);
1413  }
1414  } else {
1415  FATAL("logic error in DefineStdlib");
1416  }
1417  break;
1418  case 8:
1419  if (runtime32) {
1420  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x8_32bit);
1421  } else {
1422  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x8_64bit);
1423  }
1424  break;
1425  case 16:
1426  if (runtime32) {
1427  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x16_32bit);
1428  } else {
1429  EXPORT_MODULE(builtins_bitcode_win_avx2_i32x16_64bit);
1430  }
1431  break;
1432  default:
1433  FATAL("logic error in DefineStdlib");
1434  }
1435  break;
1436  }
1437 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1438  case Target::KNL_AVX512: {
1439  switch (g->target->getVectorWidth()) {
1440  case 16:
1441  if (runtime32) {
1442  EXPORT_MODULE(builtins_bitcode_win_avx512knl_i32x16_32bit);
1443  } else {
1444  EXPORT_MODULE(builtins_bitcode_win_avx512knl_i32x16_64bit);
1445  }
1446  break;
1447  default:
1448  FATAL("logic error in DefineStdlib");
1449  }
1450  break;
1451  }
1452 #endif
1453 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1454  case Target::SKX_AVX512: {
1455  switch (g->target->getVectorWidth()) {
1456  case 8:
1457  if (runtime32) {
1458  EXPORT_MODULE(builtins_bitcode_win_avx512skx_i32x8_32bit);
1459  } else {
1460  EXPORT_MODULE(builtins_bitcode_win_avx512skx_i32x8_64bit);
1461  }
1462  break;
1463  case 16:
1464  if (runtime32) {
1465  EXPORT_MODULE(builtins_bitcode_win_avx512skx_i32x16_32bit);
1466  } else {
1467  EXPORT_MODULE(builtins_bitcode_win_avx512skx_i32x16_64bit);
1468  }
1469  break;
1470  default:
1471  FATAL("logic error in DefineStdlib");
1472  }
1473  break;
1474  }
1475 #endif
1476  case Target::GENERIC: {
1477  switch (g->target->getVectorWidth()) {
1478  case 4:
1479  if (runtime32) {
1480  EXPORT_MODULE(builtins_bitcode_win_generic_4_32bit);
1481  } else {
1482  EXPORT_MODULE(builtins_bitcode_win_generic_4_64bit);
1483  }
1484  break;
1485  case 8:
1486  if (runtime32) {
1487  EXPORT_MODULE(builtins_bitcode_win_generic_8_32bit);
1488  } else {
1489  EXPORT_MODULE(builtins_bitcode_win_generic_8_64bit);
1490  }
1491  break;
1492  case 16:
1493  if (runtime32) {
1494  EXPORT_MODULE(builtins_bitcode_win_generic_16_32bit);
1495  } else {
1496  EXPORT_MODULE(builtins_bitcode_win_generic_16_64bit);
1497  }
1498  break;
1499  case 32:
1500  if (runtime32) {
1501  EXPORT_MODULE(builtins_bitcode_win_generic_32_32bit);
1502  } else {
1503  EXPORT_MODULE(builtins_bitcode_win_generic_32_64bit);
1504  }
1505  break;
1506  case 64:
1507  if (runtime32) {
1508  EXPORT_MODULE(builtins_bitcode_win_generic_64_32bit);
1509  } else {
1510  EXPORT_MODULE(builtins_bitcode_win_generic_64_64bit);
1511  }
1512  break;
1513  case 1:
1514  if (runtime32) {
1515  EXPORT_MODULE(builtins_bitcode_win_generic_1_32bit);
1516  } else {
1517  EXPORT_MODULE(builtins_bitcode_win_generic_1_64bit);
1518  }
1519  break;
1520  default:
1521  FATAL("logic error in DefineStdlib");
1522  }
1523  break;
1524  }
1525  default:
1526  FATAL("logic error");
1527  }
1528 #endif
1529  } else {
1530  switch (g->target->getISA()) {
1531 #ifdef ISPC_ARM_ENABLED
1532  case Target::NEON8: {
1533  if (runtime32) {
1534  EXPORT_MODULE(builtins_bitcode_unix_neon_i8x16_32bit);
1535  } else {
1536  EXPORT_MODULE(builtins_bitcode_unix_neon_i8x16_64bit);
1537  }
1538  break;
1539  }
1540  case Target::NEON16: {
1541  if (runtime32) {
1542  EXPORT_MODULE(builtins_bitcode_unix_neon_i16x8_32bit);
1543  } else {
1544  EXPORT_MODULE(builtins_bitcode_unix_neon_i16x8_64bit);
1545  }
1546  break;
1547  }
1548  case Target::NEON32: {
1549  switch (g->target->getVectorWidth()) {
1550  case 4:
1551  if (runtime32) {
1552  EXPORT_MODULE(builtins_bitcode_unix_neon_i32x4_32bit);
1553  } else {
1554  EXPORT_MODULE(builtins_bitcode_unix_neon_i32x4_64bit);
1555  }
1556  break;
1557  case 8:
1558  if (runtime32) {
1559  EXPORT_MODULE(builtins_bitcode_unix_neon_i32x8_32bit);
1560  } else {
1561  EXPORT_MODULE(builtins_bitcode_unix_neon_i32x8_64bit);
1562  }
1563  break;
1564  default:
1565  FATAL("logic error in DefineStdlib");
1566  }
1567  break;
1568  }
1569 #endif
1570  case Target::SSE2: {
1571  switch (g->target->getVectorWidth()) {
1572  case 4:
1573  if (runtime32) {
1574  EXPORT_MODULE(builtins_bitcode_unix_sse2_i32x4_32bit);
1575  } else {
1576  EXPORT_MODULE(builtins_bitcode_unix_sse2_i32x4_64bit);
1577  }
1578  break;
1579  case 8:
1580  if (runtime32) {
1581  EXPORT_MODULE(builtins_bitcode_unix_sse2_i32x8_32bit);
1582  } else {
1583  EXPORT_MODULE(builtins_bitcode_unix_sse2_i32x8_64bit);
1584  }
1585  break;
1586  default:
1587  FATAL("logic error in DefineStdlib");
1588  }
1589  break;
1590  }
1591  case Target::SSE4: {
1592  switch (g->target->getVectorWidth()) {
1593  case 4:
1594  if (runtime32) {
1595  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x4_32bit);
1596  } else {
1597  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x4_64bit);
1598  }
1599  break;
1600  case 8:
1601  if (runtime32) {
1602  if (g->target->getMaskBitCount() == 16) {
1603  EXPORT_MODULE(builtins_bitcode_unix_sse4_i16x8_32bit);
1604  } else {
1605  Assert(g->target->getMaskBitCount() == 32);
1606  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x8_32bit);
1607  }
1608  } else {
1609  if (g->target->getMaskBitCount() == 16) {
1610  EXPORT_MODULE(builtins_bitcode_unix_sse4_i16x8_64bit);
1611  } else {
1612  Assert(g->target->getMaskBitCount() == 32);
1613  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x8_64bit);
1614  }
1615  }
1616  break;
1617  case 16:
1618  Assert(g->target->getMaskBitCount() == 8);
1619  if (runtime32) {
1620  EXPORT_MODULE(builtins_bitcode_unix_sse4_i8x16_32bit);
1621  } else {
1622  EXPORT_MODULE(builtins_bitcode_unix_sse4_i8x16_64bit);
1623  }
1624  break;
1625  default:
1626  FATAL("logic error in DefineStdlib");
1627  }
1628  break;
1629  }
1630  case Target::AVX: {
1631  switch (g->target->getVectorWidth()) {
1632  case 4:
1633  if (g->target->getDataTypeWidth() == 32) {
1634  // Note here that for avx1-i32x4 we are using bitcode file for
1635  // sse4-i32x4. This is intentional and good enough.
1636  // AVX target implies appropriate target-feature attrbute,
1637  // which forces LLVM to generate AVX code, even for SSE4
1638  // intrinsics. Except that the only "missing" feature in sse4
1639  // target is implemenation of __masked_[store|load]_[i32|i64]
1640  // using maskmov instruction. But it's not very popular
1641  // intrinsics, so we assume the implementation to be good
1642  // enough at the moment.
1643  if (runtime32) {
1644  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x4_32bit);
1645  } else {
1646  EXPORT_MODULE(builtins_bitcode_unix_sse4_i32x4_64bit);
1647  }
1648  } else if (g->target->getDataTypeWidth() == 64) {
1649  if (runtime32) {
1650  EXPORT_MODULE(builtins_bitcode_unix_avx1_i64x4_32bit);
1651  } else {
1652  EXPORT_MODULE(builtins_bitcode_unix_avx1_i64x4_64bit);
1653  }
1654  } else {
1655  FATAL("logic error in DefineStdlib");
1656  }
1657  break;
1658  case 8:
1659  if (runtime32) {
1660  EXPORT_MODULE(builtins_bitcode_unix_avx1_i32x8_32bit);
1661  } else {
1662  EXPORT_MODULE(builtins_bitcode_unix_avx1_i32x8_64bit);
1663  }
1664  break;
1665  case 16:
1666  if (runtime32) {
1667  EXPORT_MODULE(builtins_bitcode_unix_avx1_i32x16_32bit);
1668  } else {
1669  EXPORT_MODULE(builtins_bitcode_unix_avx1_i32x16_64bit);
1670  }
1671  break;
1672  default:
1673  FATAL("logic error in DefineStdlib");
1674  }
1675  break;
1676  }
1677  case Target::AVX2: {
1678  switch (g->target->getVectorWidth()) {
1679  case 4:
1680  if (g->target->getDataTypeWidth() == 32) {
1681  if (runtime32) {
1682  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x4_32bit);
1683  } else {
1684  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x4_64bit);
1685  }
1686  } else if (g->target->getDataTypeWidth() == 64) {
1687  if (runtime32) {
1688  EXPORT_MODULE(builtins_bitcode_unix_avx2_i64x4_32bit);
1689  } else {
1690  EXPORT_MODULE(builtins_bitcode_unix_avx2_i64x4_64bit);
1691  }
1692  } else {
1693  FATAL("logic error in DefineStdlib");
1694  }
1695  break;
1696  case 8:
1697  if (runtime32) {
1698  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x8_32bit);
1699  } else {
1700  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x8_64bit);
1701  }
1702  break;
1703  case 16:
1704  if (runtime32) {
1705  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x16_32bit);
1706  } else {
1707  EXPORT_MODULE(builtins_bitcode_unix_avx2_i32x16_64bit);
1708  }
1709  break;
1710  default:
1711  FATAL("logic error in DefineStdlib");
1712  }
1713  break;
1714  }
1715 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1716  case Target::KNL_AVX512: {
1717  switch (g->target->getVectorWidth()) {
1718  case 16:
1719  if (runtime32) {
1720  EXPORT_MODULE(builtins_bitcode_unix_avx512knl_i32x16_32bit);
1721  } else {
1722  EXPORT_MODULE(builtins_bitcode_unix_avx512knl_i32x16_64bit);
1723  }
1724  break;
1725  default:
1726  FATAL("logic error in DefineStdlib");
1727  }
1728  break;
1729  }
1730 #endif
1731 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1732  case Target::SKX_AVX512: {
1733  switch (g->target->getVectorWidth()) {
1734  case 8:
1735  if (runtime32) {
1736  EXPORT_MODULE(builtins_bitcode_unix_avx512skx_i32x8_32bit);
1737  } else {
1738  EXPORT_MODULE(builtins_bitcode_unix_avx512skx_i32x8_64bit);
1739  }
1740  break;
1741  case 16:
1742  if (runtime32) {
1743  EXPORT_MODULE(builtins_bitcode_unix_avx512skx_i32x16_32bit);
1744  } else {
1745  EXPORT_MODULE(builtins_bitcode_unix_avx512skx_i32x16_64bit);
1746  }
1747  break;
1748  default:
1749  FATAL("logic error in DefineStdlib");
1750  }
1751  break;
1752  }
1753 #endif
1754  case Target::GENERIC: {
1755  switch (g->target->getVectorWidth()) {
1756  case 4:
1757  if (runtime32) {
1758  EXPORT_MODULE(builtins_bitcode_unix_generic_4_32bit);
1759  } else {
1760  EXPORT_MODULE(builtins_bitcode_unix_generic_4_64bit);
1761  }
1762  break;
1763  case 8:
1764  if (runtime32) {
1765  EXPORT_MODULE(builtins_bitcode_unix_generic_8_32bit);
1766  } else {
1767  EXPORT_MODULE(builtins_bitcode_unix_generic_8_64bit);
1768  }
1769  break;
1770  case 16:
1771  if (runtime32) {
1772  EXPORT_MODULE(builtins_bitcode_unix_generic_16_32bit);
1773  } else {
1774  EXPORT_MODULE(builtins_bitcode_unix_generic_16_64bit);
1775  }
1776  break;
1777  case 32:
1778  if (runtime32) {
1779  EXPORT_MODULE(builtins_bitcode_unix_generic_32_32bit);
1780  } else {
1781  EXPORT_MODULE(builtins_bitcode_unix_generic_32_64bit);
1782  }
1783  break;
1784  case 64:
1785  if (runtime32) {
1786  EXPORT_MODULE(builtins_bitcode_unix_generic_64_32bit);
1787  } else {
1788  EXPORT_MODULE(builtins_bitcode_unix_generic_64_64bit);
1789  }
1790  break;
1791  case 1:
1792  if (runtime32) {
1793  EXPORT_MODULE(builtins_bitcode_unix_generic_1_32bit);
1794  } else {
1795  EXPORT_MODULE(builtins_bitcode_unix_generic_1_64bit);
1796  }
1797  break;
1798  default:
1799  FATAL("logic error in DefineStdlib");
1800  }
1801  break;
1802  }
1803  default:
1804  FATAL("logic error");
1805  }
1806  }
1807 
1808  // define the 'programCount' builtin variable
1809 #ifdef ISPC_NVPTX_ENABLED
1810  if (g->target->getISA() == Target::NVPTX) {
1811  lDefineConstantInt("programCount", 32, module, symbolTable, debug_symbols);
1812  } else {
1813 #endif /* ISPC_NVPTX_ENABLED */
1814  lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable, debug_symbols);
1815 #ifdef ISPC_NVPTX_ENABLED
1816  }
1817 #endif /* ISPC_NVPTX_ENABLED */
1818 
1819  // define the 'programIndex' builtin
1820  lDefineProgramIndex(module, symbolTable, debug_symbols);
1821 
1822  // Define __math_lib stuff. This is used by stdlib.ispc, for example, to
1823  // figure out which math routines to end up calling...
1824  lDefineConstantInt("__math_lib", (int)g->mathLib, module, symbolTable, debug_symbols);
1825  lDefineConstantInt("__math_lib_ispc", (int)Globals::Math_ISPC, module, symbolTable, debug_symbols);
1826  lDefineConstantInt("__math_lib_ispc_fast", (int)Globals::Math_ISPCFast, module, symbolTable, debug_symbols);
1827  lDefineConstantInt("__math_lib_svml", (int)Globals::Math_SVML, module, symbolTable, debug_symbols);
1828  lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module, symbolTable, debug_symbols);
1829  lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module, symbolTable, debug_symbols);
1830 
1831  lDefineConstantInt("__have_native_half", g->target->hasHalf(), module, symbolTable, debug_symbols);
1832  lDefineConstantInt("__have_native_rand", g->target->hasRand(), module, symbolTable, debug_symbols);
1833  lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(), module, symbolTable,
1834  debug_symbols);
1835  lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(), module, symbolTable, debug_symbols);
1836  lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(), module, symbolTable, debug_symbols);
1837  lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(), module, symbolTable, debug_symbols);
1838 
1839 #ifdef ISPC_NVPTX_ENABLED
1840  lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX), module, symbolTable,
1841  debug_symbols);
1842 #else
1843  lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable, debug_symbols);
1844 #endif /* ISPC_NVPTX_ENABLED */
1845 
1846  if (g->forceAlignment != -1) {
1847  llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
1848  alignment->setInitializer(LLVMInt32(g->forceAlignment));
1849  }
1850 
1851  // LLVM 3.6 is only because it was not tested with earlier versions.
1852 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+
1853  if (g->generateDebuggingSymbols) {
1854  emitLLVMUsed(*module, debug_symbols);
1855  }
1856 #endif
1857 
1858  if (includeStdlibISPC) {
1859  // If the user wants the standard library to be included, parse the
1860  // serialized version of the stdlib.ispc file to get its
1861  // definitions added.
1862  extern const char stdlib_mask1_code[], stdlib_mask8_code[];
1863  extern const char stdlib_mask16_code[], stdlib_mask32_code[], stdlib_mask64_code[];
1864  if (g->target->getISA() == Target::GENERIC && g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
1865  yy_scan_string(stdlib_mask32_code);
1866  } else {
1867  switch (g->target->getMaskBitCount()) {
1868  case 1:
1869  yy_scan_string(stdlib_mask1_code);
1870  break;
1871  case 8:
1872  yy_scan_string(stdlib_mask8_code);
1873  break;
1874  case 16:
1875  yy_scan_string(stdlib_mask16_code);
1876  break;
1877  case 32:
1878  yy_scan_string(stdlib_mask32_code);
1879  break;
1880  case 64:
1881  yy_scan_string(stdlib_mask64_code);
1882  break;
1883  default:
1884  FATAL("Unhandled mask bit size for stdlib.ispc");
1885  }
1886  }
1887  yyparse();
1888  }
1889 }
llvm::Value * storagePtr
Definition: sym.h:71
static const AtomicType * VaryingInt32
Definition: type.h:335
static llvm::Type * FloatType
Definition: llvmutil.h:79
static bool lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable)
Definition: builtins.cpp:183
void AddBitcodeToModule(const unsigned char *bitcode, int length, llvm::Module *module, SymbolTable *symbolTable, bool warn)
Definition: builtins.cpp:829
TargetOS target_os
Definition: ispc.h:547
static const AtomicType * VaryingInt16
Definition: type.h:334
llvm::Function * function
Definition: sym.h:75
static llvm::Type * Int32VectorPointerType
Definition: llvmutil.h:102
void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, bool includeStdlibISPC)
Definition: builtins.cpp:1133
#define EXPORT_MODULE(export_module)
Opt opt
Definition: ispc.h:542
Declaration of the FunctionEmitContext class
bool AddFunction(Symbol *symbol)
Definition: sym.cpp:126
static void lDefineConstantInt(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable, std::vector< llvm::Constant *> &dbg_sym)
Definition: builtins.cpp:970
static const AtomicType * VaryingUInt64
Definition: type.h:341
static llvm::Type * DoubleType
Definition: llvmutil.h:80
Module * m
Definition: ispc.cpp:102
static void lCheckModuleIntrinsics(llvm::Module *module)
Definition: builtins.cpp:283
Target * target
Definition: ispc.h:544
bool AddVariable(Symbol *symbol)
Definition: sym.cpp:85
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
static const AtomicType * VaryingDouble
Definition: type.h:342
Expression representing a compile-time constant value.
Definition: expr.h:362
static llvm::Type * BoolType
Definition: llvmutil.h:73
Symbol table that holds all known symbols during parsing and compilation.
Definition: sym.h:117
static void lDefineConstantIntFunc(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable, std::vector< llvm::Constant *> &dbg_sym)
Definition: builtins.cpp:1031
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:308
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:95
int getMaskBitCount() const
Definition: ispc.h:285
static const AtomicType * UniformUInt32
Definition: type.h:338
static llvm::Type * FloatVectorPointerType
Definition: llvmutil.h:104
int getDataTypeWidth() const
Definition: ispc.h:277
Declarations of functions related to builtins and the standard library.
bool hasHalf() const
Definition: ispc.h:287
static llvm::Type * Int8PointerType
Definition: llvmutil.h:82
static llvm::Type * Int32PointerType
Definition: llvmutil.h:84
std::string name
Definition: sym.h:70
static llvm::Type * Int16VectorPointerType
Definition: llvmutil.h:101
static llvm::Type * Int16Type
Definition: llvmutil.h:76
static llvm::Type * DoubleVectorPointerType
Definition: llvmutil.h:105
std::string getArch() const
Definition: ispc.h:267
static const AtomicType * UniformUInt16
Definition: type.h:337
static void lSetInternalFunctions(llvm::Module *module)
Definition: builtins.cpp:320
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:899
#define Assert(expr)
Definition: ispc.h:163
ConstExpr * constValue
Definition: sym.h:86
header file with declarations for symbol and symbol table classes.
static const AtomicType * UniformBool
Definition: type.h:332
static llvm::Type * VoidType
Definition: llvmutil.h:70
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:228
static void lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable, std::vector< llvm::Constant *> &dbg_sym)
Definition: builtins.cpp:1052
llvm::Module * module
Definition: module.h:156
static llvm::Type * Int8VectorPointerType
Definition: llvmutil.h:100
Globals * g
Definition: ispc.cpp:101
static const AtomicType * UniformUInt64
Definition: type.h:341
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:93
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:351
bool VerifyDataLayoutCompatibility(const std::string &module_dl, const std::string &lib_dl)
Definition: util.cpp:565
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:97
static llvm::Type * Int64Type
Definition: llvmutil.h:78
static llvm::Type * Int8Type
Definition: llvmutil.h:75
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:96
Header file with declarations for various LLVM utility stuff.
static llvm::Type * Int64PointerType
Definition: llvmutil.h:85
bool hasRcpd() const
Definition: ispc.h:301
static llvm::Type * FloatPointerType
Definition: llvmutil.h:86
static void lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable)
Definition: builtins.cpp:260
bool hasRand() const
Definition: ispc.h:289
Representation of a range of positions in a source file.
Definition: ispc.h:131
static llvm::Type * Int16PointerType
Definition: llvmutil.h:83
bool generateDebuggingSymbols
Definition: ispc.h:620
Definition: ispc.h:119
static const AtomicType * VaryingBool
Definition: type.h:332
bool hasTranscendentals() const
Definition: ispc.h:295
bool fastMaskedVload
Definition: ispc.h:442
const char * name
Definition: ispc.h:134
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:375
static const AtomicType * VaryingInt64
Definition: type.h:340
#define EXPORT_MODULE_COND_WARN(export_module, warnings)
int getVectorWidth() const
Definition: ispc.h:279
#define FATAL(message)
Definition: util.h:112
int yyparse()
static const AtomicType * UniformUInt8
Definition: type.h:336
static llvm::Type * Int64VectorPointerType
Definition: llvmutil.h:103
static llvm::Type * Int32Type
Definition: llvmutil.h:77
MathLib mathLib
Definition: ispc.h:552
static llvm::Type * DoublePointerType
Definition: llvmutil.h:87
#define ISPC_MAX_NVEC
Definition: ispc.h:66
Definition: ispc.h:119
static bool Equal(const Type *a, const Type *b)
Definition: type.cpp:3114
static const AtomicType * VaryingUInt16
Definition: type.h:337
bool hasTrigonometry() const
Definition: ispc.h:297
static const AtomicType * VaryingInt8
Definition: type.h:333
static const AtomicType * UniformFloat
Definition: type.h:339
Definition: ispc.h:119
ISA getISA() const
Definition: ispc.h:263
static const AtomicType * UniformInt32
Definition: type.h:335
Type representing a function (return type + argument types)
Definition: type.h:858
Representation of a program symbol.
Definition: sym.h:63
Interface class that defines the type abstraction.
Definition: type.h:95
static const AtomicType * UniformDouble
Definition: type.h:342
Expr abstract base class and expression implementations.
static const AtomicType * Void
Definition: type.h:343
static llvm::VectorType * MaskType
Definition: llvmutil.h:89
int forceAlignment
Definition: ispc.h:661
yy_buffer_state * yy_scan_string(const char *)
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:98
bool hasRsqrtd() const
Definition: ispc.h:299
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:94
static const AtomicType * VaryingUInt8
Definition: type.h:336
Definition: ispc.h:119
bool is32Bit() const
Definition: ispc.h:269
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
static const AtomicType * UniformInt64
Definition: type.h:340
llvm::LLVMContext * ctx
Definition: ispc.h:645
static const AtomicType * UniformInt16
Definition: type.h:334
const Type * type
Definition: sym.h:83
llvm::DIBuilder * diBuilder
Definition: module.h:159
std::string GetString() const
Definition: type.cpp:2568
void Debug(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:363
static const Type * lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned)
Definition: builtins.cpp:98
static const AtomicType * VaryingUInt32
Definition: type.h:338
static const AtomicType * VaryingFloat
Definition: type.h:339
static void lCreateSymbol(const std::string &name, const Type *returnType, llvm::SmallVector< const Type *, 8 > &argTypes, const llvm::FunctionType *ftype, llvm::Function *func, SymbolTable *symbolTable)
Definition: builtins.cpp:165
static const AtomicType * UniformInt8
Definition: type.h:333
File with declarations for classes related to type representation.