Intel SPMD Program Compiler  1.9.1
builtins.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2010-2015, Intel Corporation
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  * Neither the name of Intel Corporation nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file builtins.cpp
35  @brief Definitions of functions related to setting up the standard library
36  and other builtins.
37 */
38 
39 #include "builtins.h"
40 #include "type.h"
41 #include "util.h"
42 #include "sym.h"
43 #include "expr.h"
44 #include "llvmutil.h"
45 #include "module.h"
46 #include "ctx.h"
47 
48 #include <math.h>
49 #include <stdlib.h>
50 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
51  #include <llvm/Attributes.h>
52  #include <llvm/LLVMContext.h>
53  #include <llvm/Module.h>
54  #include <llvm/Type.h>
55  #include <llvm/Instructions.h>
56  #include <llvm/Intrinsics.h>
57  #include <llvm/DerivedTypes.h>
58 #else
59  #include <llvm/IR/Attributes.h>
60  #include <llvm/IR/LLVMContext.h>
61  #include <llvm/IR/Module.h>
62  #include <llvm/IR/Type.h>
63  #include <llvm/IR/Instructions.h>
64  #include <llvm/IR/Intrinsics.h>
65  #include <llvm/IR/DerivedTypes.h>
66 #endif
67 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
68  #include <llvm/Linker/Linker.h>
69 #else
70  #include <llvm/Linker.h>
71 #endif
72 #include <llvm/Target/TargetMachine.h>
73 #include <llvm/ADT/Triple.h>
74 #include <llvm/Support/MemoryBuffer.h>
75 #include <llvm/Bitcode/ReaderWriter.h>
76 
77 extern int yyparse();
78 struct yy_buffer_state;
79 extern yy_buffer_state *yy_scan_string(const char *);
80 
81 
82 /** Given an LLVM type, try to find the equivalent ispc type. Note that
83  this is an under-constrained problem due to LLVM's type representations
84  carrying less information than ispc's. (For example, LLVM doesn't
85  distinguish between signed and unsigned integers in its types.)
86 
87  Because this function is only used for generating ispc declarations of
88  functions defined in LLVM bitcode in the builtins-*.ll files, in practice
89  we can get enough of what we need for the relevant cases to make things
90  work, partially with the help of the intAsUnsigned parameter, which
91  indicates whether LLVM integer types should be treated as being signed
92  or unsigned.
93 
94  */
95 static const Type *
96 lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
97  if (t == LLVMTypes::VoidType)
98  return AtomicType::Void;
99 
100  // uniform
101  else if (t == LLVMTypes::BoolType)
103  else if (t == LLVMTypes::Int8Type)
104  return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8;
105  else if (t == LLVMTypes::Int16Type)
106  return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16;
107  else if (t == LLVMTypes::Int32Type)
108  return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
109  else if (t == LLVMTypes::FloatType)
111  else if (t == LLVMTypes::DoubleType)
113  else if (t == LLVMTypes::Int64Type)
114  return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
115 
116  // varying
117  if (t == LLVMTypes::Int8VectorType)
118  return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
119  else if (t == LLVMTypes::Int16VectorType)
120  return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
121  else if (t == LLVMTypes::Int32VectorType)
122  return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
123  else if (t == LLVMTypes::FloatVectorType)
125  else if (t == LLVMTypes::DoubleVectorType)
127  else if (t == LLVMTypes::Int64VectorType)
128  return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
129  else if (t == LLVMTypes::MaskType)
131 
132  // pointers to uniform
133  else if (t == LLVMTypes::Int8PointerType)
134  return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt8 :
136  else if (t == LLVMTypes::Int16PointerType)
137  return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt16 :
139  else if (t == LLVMTypes::Int32PointerType)
140  return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt32 :
142  else if (t == LLVMTypes::Int64PointerType)
143  return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt64 :
145  else if (t == LLVMTypes::FloatPointerType)
147  else if (t == LLVMTypes::DoublePointerType)
149 
150  // pointers to varying
151  else if (t == LLVMTypes::Int8VectorPointerType)
152  return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt8 :
154  else if (t == LLVMTypes::Int16VectorPointerType)
155  return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt16 :
157  else if (t == LLVMTypes::Int32VectorPointerType)
158  return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt32 :
160  else if (t == LLVMTypes::Int64VectorPointerType)
161  return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt64 :
163  else if (t == LLVMTypes::FloatVectorPointerType)
167 
168  return NULL;
169 }
170 
171 
172 static void
173 lCreateSymbol(const std::string &name, const Type *returnType,
174  llvm::SmallVector<const Type *, 8> &argTypes,
175  const llvm::FunctionType *ftype, llvm::Function *func,
176  SymbolTable *symbolTable) {
177  SourcePos noPos;
178  noPos.name = "__stdlib";
179 
180  FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
181 
182  Debug(noPos, "Created builtin symbol \"%s\" [%s]\n", name.c_str(),
183  funcType->GetString().c_str());
184 
185  Symbol *sym = new Symbol(name, noPos, funcType);
186  sym->function = func;
187  symbolTable->AddFunction(sym);
188 }
189 
190 
191 /** Given an LLVM function declaration, synthesize the equivalent ispc
192  symbol for the function (if possible). Returns true on success, false
193  on failure.
194  */
195 static bool
196 lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
197  SourcePos noPos;
198  noPos.name = "__stdlib";
199 
200  const llvm::FunctionType *ftype = func->getFunctionType();
201  std::string name = func->getName();
202 
203  if (name.size() < 3 || name[0] != '_' || name[1] != '_')
204  return false;
205 
206  Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".",
207  name.c_str());
208 
209  // An unfortunate hack: we want this builtin function to have the
210  // signature "int __sext_varying_bool(bool)", but the ispc function
211  // symbol creation code below assumes that any LLVM vector of i32s is a
212  // varying int32. Here, we need that to be interpreted as a varying
213  // bool, so just have a one-off override for that one...
214  if (g->target->getMaskBitCount() != 1 && name == "__sext_varying_bool") {
215  const Type *returnType = AtomicType::VaryingInt32;
216  llvm::SmallVector<const Type *, 8> argTypes;
217  argTypes.push_back(AtomicType::VaryingBool);
218 
219  FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
220 
221  Symbol *sym = new Symbol(name, noPos, funcType);
222  sym->function = func;
223  symbolTable->AddFunction(sym);
224  return true;
225  }
226 
227  // If the function has any parameters with integer types, we'll make
228  // two Symbols for two overloaded versions of the function, one with
229  // all of the integer types treated as signed integers and one with all
230  // of them treated as unsigned.
231  for (int i = 0; i < 2; ++i) {
232  bool intAsUnsigned = (i == 1);
233 
234  const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
235  intAsUnsigned);
236  if (returnType == NULL) {
237  Debug(SourcePos(), "Failed: return type not representable for "
238  "builtin %s.", name.c_str());
239  // return type not representable in ispc -> not callable from ispc
240  return false;
241  }
242 
243  // Iterate over the arguments and try to find their equivalent ispc
244  // types. Track if any of the arguments has an integer type.
245  bool anyIntArgs = false;
246  llvm::SmallVector<const Type *, 8> argTypes;
247  for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
248  const llvm::Type *llvmArgType = ftype->getParamType(j);
249  const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
250  if (type == NULL) {
251  Debug(SourcePos(), "Failed: type of parameter %d not "
252  "representable for builtin %s", j, name.c_str());
253  return false;
254  }
255  anyIntArgs |=
256  (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
257  argTypes.push_back(type);
258  }
259 
260  // Always create the symbol the first time through, in particular
261  // so that we get symbols for things with no integer types!
262  if (i == 0 || anyIntArgs == true)
263  lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
264  }
265 
266  return true;
267 }
268 
269 
270 /** Given an LLVM module, create ispc symbols for the functions in the
271  module.
272  */
273 static void
274 lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
275 #if 0
276  // FIXME: handle globals?
277  Assert(module->global_empty());
278 #endif
279 
280  llvm::Module::iterator iter;
281  for (iter = module->begin(); iter != module->end(); ++iter) {
282 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
283  llvm::Function *func = iter;
284 #else /* LLVM 3.8+ */
285  llvm::Function *func = &*iter;
286 #endif
287  lCreateISPCSymbol(func, symbolTable);
288  }
289 }
290 
291 
292 /** In many of the builtins-*.ll files, we have declarations of various LLVM
293  intrinsics that are then used in the implementation of various target-
294  specific functions. This function loops over all of the intrinsic
295  declarations and makes sure that the signature we have in our .ll file
296  matches the signature of the actual intrinsic.
297 */
298 static void
299 lCheckModuleIntrinsics(llvm::Module *module) {
300  llvm::Module::iterator iter;
301  for (iter = module->begin(); iter != module->end(); ++iter) {
302 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
303  llvm::Function *func = iter;
304 #else /* LLVM 3.8+ */
305  llvm::Function *func = &*iter;
306 #endif
307  if (!func->isIntrinsic())
308  continue;
309 
310  const std::string funcName = func->getName().str();
311  // Work around http://llvm.org/bugs/show_bug.cgi?id=10438; only
312  // check the llvm.x86.* intrinsics for now...
313  if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
314  llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
315  if (id == 0) fprintf(stderr, "FATAL: intrinsic is not found: %s \n", funcName.c_str());
316  Assert(id != 0);
317  llvm::Type *intrinsicType =
318  llvm::Intrinsic::getType(*g->ctx, id);
319  intrinsicType = llvm::PointerType::get(intrinsicType, 0);
320  Assert(func->getType() == intrinsicType);
321  }
322  }
323 }
324 
325 
326 /** We'd like to have all of these functions declared as 'internal' in
327  their respective bitcode files so that if they aren't needed by the
328  user's program they are elimiated from the final output. However, if
329  we do so, then they aren't brought in by the LinkModules() call below
330  since they aren't yet used by anything in the module they're being
331  linked with (in LLVM 3.1, at least).
332 
333  Therefore, we don't declare them as internal when we first define them,
334  but instead mark them as internal after they've been linked in. This
335  is admittedly a kludge.
336  */
337 static void
338 lSetInternalFunctions(llvm::Module *module) {
339  const char *names[] = {
340  "__add_float",
341  "__add_int32",
342  "__add_uniform_double",
343  "__add_uniform_int32",
344  "__add_uniform_int64",
345  "__add_varying_double",
346  "__add_varying_int32",
347  "__add_varying_int64",
348  "__all",
349  "__any",
350  "__aos_to_soa3_float",
351 //#ifdef ISPC_NVPTX_ENABLED
352  "__aos_to_soa3_float1",
353 //#endif /* ISPC_NVPTX_ENABLED */
354  "__aos_to_soa3_float16",
355  "__aos_to_soa3_float4",
356  "__aos_to_soa3_float8",
357  "__aos_to_soa3_int32",
358  "__aos_to_soa4_float",
359 //#ifdef ISPC_NVPTX_ENABLED
360  "__aos_to_soa4_float1",
361 //#endif /* ISPC_NVPTX_ENABLED */
362  "__aos_to_soa4_float16",
363  "__aos_to_soa4_float4",
364  "__aos_to_soa4_float8",
365  "__aos_to_soa4_int32",
366  "__atomic_add_int32_global",
367  "__atomic_add_int64_global",
368  "__atomic_add_uniform_int32_global",
369  "__atomic_add_uniform_int64_global",
370  "__atomic_and_int32_global",
371  "__atomic_and_int64_global",
372  "__atomic_and_uniform_int32_global",
373  "__atomic_and_uniform_int64_global",
374  "__atomic_compare_exchange_double_global",
375  "__atomic_compare_exchange_float_global",
376  "__atomic_compare_exchange_int32_global",
377  "__atomic_compare_exchange_int64_global",
378  "__atomic_compare_exchange_uniform_double_global",
379  "__atomic_compare_exchange_uniform_float_global",
380  "__atomic_compare_exchange_uniform_int32_global",
381  "__atomic_compare_exchange_uniform_int64_global",
382  "__atomic_max_uniform_int32_global",
383  "__atomic_max_uniform_int64_global",
384  "__atomic_min_uniform_int32_global",
385  "__atomic_min_uniform_int64_global",
386  "__atomic_or_int32_global",
387  "__atomic_or_int64_global",
388  "__atomic_or_uniform_int32_global",
389  "__atomic_or_uniform_int64_global",
390  "__atomic_sub_int32_global",
391  "__atomic_sub_int64_global",
392  "__atomic_sub_uniform_int32_global",
393  "__atomic_sub_uniform_int64_global",
394  "__atomic_swap_double_global",
395  "__atomic_swap_float_global",
396  "__atomic_swap_int32_global",
397  "__atomic_swap_int64_global",
398  "__atomic_swap_uniform_double_global",
399  "__atomic_swap_uniform_float_global",
400  "__atomic_swap_uniform_int32_global",
401  "__atomic_swap_uniform_int64_global",
402  "__atomic_umax_uniform_uint32_global",
403  "__atomic_umax_uniform_uint64_global",
404  "__atomic_umin_uniform_uint32_global",
405  "__atomic_umin_uniform_uint64_global",
406  "__atomic_xor_int32_global",
407  "__atomic_xor_int64_global",
408  "__atomic_xor_uniform_int32_global",
409  "__atomic_xor_uniform_int64_global",
410 //#ifdef ISPC_NVPTX_ENABLED
411  "__atomic_add_varying_int32_global",
412  "__atomic_add_varying_int64_global",
413  "__atomic_and_varying_int32_global",
414  "__atomic_and_varying_int64_global",
415  "__atomic_compare_exchange_varying_double_global",
416  "__atomic_compare_exchange_varying_float_global",
417  "__atomic_compare_exchange_varying_int32_global",
418  "__atomic_compare_exchange_varying_int64_global",
419  "__atomic_max_varying_int32_global",
420  "__atomic_max_varying_int64_global",
421  "__atomic_min_varying_int32_global",
422  "__atomic_min_varying_int64_global",
423  "__atomic_or_varying_int32_global",
424  "__atomic_or_varying_int64_global",
425  "__atomic_sub_varying_int32_global",
426  "__atomic_sub_varying_int64_global",
427  "__atomic_swap_varying_double_global",
428  "__atomic_swap_varying_float_global",
429  "__atomic_swap_varying_int32_global",
430  "__atomic_swap_varying_int64_global",
431  "__atomic_umax_varying_uint32_global",
432  "__atomic_umax_varying_uint64_global",
433  "__atomic_umin_varying_uint32_global",
434  "__atomic_umin_varying_uint64_global",
435  "__atomic_xor_uniform_int32_global",
436  "__atomic_xor_uniform_int64_global",
437  "__atomic_xor_varying_int32_global",
438  "__atomic_xor_varying_int64_global",
439  "__atomic_xor_varying_int32_global",
440  "__atomic_xor_varying_int64_global",
441 //#endif /* ISPC_NVPTX_ENABLED */
442  "__broadcast_double",
443  "__broadcast_float",
444  "__broadcast_i16",
445  "__broadcast_i32",
446  "__broadcast_i64",
447  "__broadcast_i8",
448  "__cast_mask_to_i1",
449  "__cast_mask_to_i16",
450  "__ceil_uniform_double",
451  "__ceil_uniform_float",
452  "__ceil_varying_double",
453  "__ceil_varying_float",
454  "__clock",
455  "__count_trailing_zeros_i32",
456  "__count_trailing_zeros_i64",
457  "__count_leading_zeros_i32",
458  "__count_leading_zeros_i64",
459  "__delete_uniform_32rt",
460  "__delete_uniform_64rt",
461  "__delete_varying_32rt",
462  "__delete_varying_64rt",
463  "__do_assert_uniform",
464  "__do_assert_varying",
465  "__do_print",
466 //#ifdef ISPC_NVPTX_ENABLED
467  "__do_print_nvptx",
468 //#endif /* ISPC_NVPTX_ENABLED */
469  "__doublebits_uniform_int64",
470  "__doublebits_varying_int64",
471  "__exclusive_scan_add_double",
472  "__exclusive_scan_add_float",
473  "__exclusive_scan_add_i32",
474  "__exclusive_scan_add_i64",
475  "__exclusive_scan_and_i32",
476  "__exclusive_scan_and_i64",
477  "__exclusive_scan_or_i32",
478  "__exclusive_scan_or_i64",
479  "__extract_int16",
480  "__extract_int32",
481  "__extract_int64",
482  "__extract_int8",
483 //#ifdef ISPC_NVPTX_ENABLED
484  "__extract_float",
485  "__extract_double",
486 //#endif /* ISPC_NVPTX_ENABLED */
487  "__extract_mask_low",
488  "__extract_mask_hi",
489  "__fastmath",
490  "__float_to_half_uniform",
491  "__float_to_half_varying",
492  "__floatbits_uniform_int32",
493  "__floatbits_varying_int32",
494  "__floor_uniform_double",
495  "__floor_uniform_float",
496  "__floor_varying_double",
497  "__floor_varying_float",
498  "__get_system_isa",
499  "__half_to_float_uniform",
500  "__half_to_float_varying",
501  "__insert_int16",
502  "__insert_int32",
503  "__insert_int64",
504  "__insert_int8",
505 //#ifdef ISPC_NVPTX_ENABLED
506  "__insert_float",
507  "__insert_double",
508 //#endif /* ISPC_NVPTX_ENABLED */
509  "__intbits_uniform_double",
510  "__intbits_uniform_float",
511  "__intbits_varying_double",
512  "__intbits_varying_float",
513  "__max_uniform_double",
514  "__max_uniform_float",
515  "__max_uniform_int32",
516  "__max_uniform_int64",
517  "__max_uniform_uint32",
518  "__max_uniform_uint64",
519  "__max_varying_double",
520  "__max_varying_float",
521  "__max_varying_int32",
522  "__max_varying_int64",
523  "__max_varying_uint32",
524  "__max_varying_uint64",
525  "__memory_barrier",
526  "__memcpy32",
527  "__memcpy64",
528  "__memmove32",
529  "__memmove64",
530  "__memset32",
531  "__memset64",
532  "__min_uniform_double",
533  "__min_uniform_float",
534  "__min_uniform_int32",
535  "__min_uniform_int64",
536  "__min_uniform_uint32",
537  "__min_uniform_uint64",
538  "__min_varying_double",
539  "__min_varying_float",
540  "__min_varying_int32",
541  "__min_varying_int64",
542  "__min_varying_uint32",
543  "__min_varying_uint64",
544  "__movmsk",
545 //#ifdef ISPC_NVPTX_ENABLED
546  "__movmsk_ptx",
547 //#endif /* ISPC_NVPTX_ENABLED */
548  "__new_uniform_32rt",
549  "__new_uniform_64rt",
550  "__new_varying32_32rt",
551  "__new_varying32_64rt",
552  "__new_varying64_64rt",
553  "__none",
554  "__num_cores",
555  "__packed_load_active",
556  "__packed_store_active",
557  "__packed_store_active2",
558  "__padds_vi8",
559  "__padds_vi16",
560  "__paddus_vi8",
561  "__paddus_vi16",
562  "__popcnt_int32",
563  "__popcnt_int64",
564  "__prefetch_read_uniform_1",
565  "__prefetch_read_uniform_2",
566  "__prefetch_read_uniform_3",
567  "__prefetch_read_uniform_nt",
568  "__pseudo_prefetch_read_varying_1",
569  "__pseudo_prefetch_read_varying_2",
570  "__pseudo_prefetch_read_varying_3",
571  "__pseudo_prefetch_read_varying_nt",
572  "__psubs_vi8",
573  "__psubs_vi16",
574  "__psubus_vi8",
575  "__psubus_vi16",
576  "__rcp_uniform_float",
577  "__rcp_varying_float",
578  "__rcp_uniform_double",
579  "__rcp_varying_double",
580  "__rdrand_i16",
581  "__rdrand_i32",
582  "__rdrand_i64",
583  "__reduce_add_double",
584  "__reduce_add_float",
585  "__reduce_add_int8",
586  "__reduce_add_int16",
587  "__reduce_add_int32",
588  "__reduce_add_int64",
589  "__reduce_equal_double",
590  "__reduce_equal_float",
591  "__reduce_equal_int32",
592  "__reduce_equal_int64",
593  "__reduce_max_double",
594  "__reduce_max_float",
595  "__reduce_max_int32",
596  "__reduce_max_int64",
597  "__reduce_max_uint32",
598  "__reduce_max_uint64",
599  "__reduce_min_double",
600  "__reduce_min_float",
601  "__reduce_min_int32",
602  "__reduce_min_int64",
603  "__reduce_min_uint32",
604  "__reduce_min_uint64",
605  "__rotate_double",
606  "__rotate_float",
607  "__rotate_i16",
608  "__rotate_i32",
609  "__rotate_i64",
610  "__rotate_i8",
611  "__round_uniform_double",
612  "__round_uniform_float",
613  "__round_varying_double",
614  "__round_varying_float",
615  "__rsqrt_uniform_float",
616  "__rsqrt_varying_float",
617  "__rsqrt_uniform_double",
618  "__rsqrt_varying_double",
619  "__set_system_isa",
620  "__sext_uniform_bool",
621  "__sext_varying_bool",
622  "__shift_double",
623  "__shift_float",
624  "__shift_i16",
625  "__shift_i32",
626  "__shift_i64",
627  "__shift_i8",
628  "__shuffle2_double",
629  "__shuffle2_float",
630  "__shuffle2_i16",
631  "__shuffle2_i32",
632  "__shuffle2_i64",
633  "__shuffle2_i8",
634  "__shuffle_double",
635  "__shuffle_float",
636  "__shuffle_i16",
637  "__shuffle_i32",
638  "__shuffle_i64",
639  "__shuffle_i8",
640  "__soa_to_aos3_float",
641  "__soa_to_aos3_float16",
642  "__soa_to_aos3_float4",
643  "__soa_to_aos3_float8",
644  "__soa_to_aos3_int32",
645  "__soa_to_aos4_float",
646 //#ifdef ISPC_NVPTX_ENABLED
647  "__soa_to_aos3_float1",
648  "__soa_to_aos4_float1",
649 //#endif /* ISPC_NVPTX_ENABLED */
650  "__soa_to_aos4_float16",
651  "__soa_to_aos4_float4",
652  "__soa_to_aos4_float8",
653  "__soa_to_aos4_int32",
654  "__sqrt_uniform_double",
655  "__sqrt_uniform_float",
656  "__sqrt_varying_double",
657  "__sqrt_varying_float",
658  "__stdlib_acosf",
659  "__stdlib_asinf",
660  "__stdlib_atan",
661  "__stdlib_atan2",
662  "__stdlib_atan2f",
663  "__stdlib_atanf",
664  "__stdlib_cos",
665  "__stdlib_cosf",
666  "__stdlib_exp",
667  "__stdlib_expf",
668  "__stdlib_log",
669  "__stdlib_logf",
670  "__stdlib_pow",
671  "__stdlib_powf",
672  "__stdlib_sin",
673  "__stdlib_asin",
674  "__stdlib_sincos",
675  "__stdlib_sincosf",
676  "__stdlib_sinf",
677  "__stdlib_tan",
678  "__stdlib_tanf",
679  "__svml_sind",
680  "__svml_asind",
681  "__svml_cosd",
682  "__svml_acosd",
683  "__svml_sincosd",
684  "__svml_tand",
685  "__svml_atand",
686  "__svml_atan2d",
687  "__svml_expd",
688  "__svml_logd",
689  "__svml_powd",
690  "__svml_sinf",
691  "__svml_asinf",
692  "__svml_cosf",
693  "__svml_acosf",
694  "__svml_sincosf",
695  "__svml_tanf",
696  "__svml_atanf",
697  "__svml_atan2f",
698  "__svml_expf",
699  "__svml_logf",
700  "__svml_powf",
701  "__log_uniform_float",
702  "__log_varying_float",
703  "__exp_uniform_float",
704  "__exp_varying_float",
705  "__pow_uniform_float",
706  "__pow_varying_float",
707  "__log_uniform_double",
708  "__log_varying_double",
709  "__exp_uniform_double",
710  "__exp_varying_double",
711  "__pow_uniform_double",
712  "__pow_varying_double",
713  "__sin_varying_float",
714  "__asin_varying_float",
715  "__cos_varying_float",
716  "__acos_varying_float",
717  "__sincos_varying_float",
718  "__tan_varying_float",
719  "__atan_varying_float",
720  "__atan2_varying_float",
721  "__sin_uniform_float",
722  "__asin_uniform_float",
723  "__cos_uniform_float",
724  "__acos_uniform_float",
725  "__sincos_uniform_float",
726  "__tan_uniform_float",
727  "__atan_uniform_float",
728  "__atan2_uniform_float",
729  "__sin_varying_double",
730  "__asin_varying_double",
731  "__cos_varying_double",
732  "__acos_varying_double",
733  "__sincos_varying_double",
734  "__tan_varying_double",
735  "__atan_varying_double",
736  "__atan2_varying_double",
737  "__sin_uniform_double",
738  "__asin_uniform_double",
739  "__cos_uniform_double",
740  "__acos_uniform_double",
741  "__sincos_uniform_double",
742  "__tan_uniform_double",
743  "__atan_uniform_double",
744  "__atan2_uniform_double",
745  "__undef_uniform",
746  "__undef_varying",
747  "__vec4_add_float",
748  "__vec4_add_int32",
749  "__vselect_float",
750 //#ifdef ISPC_NVPTX_ENABLED
751  "__program_index",
752  "__program_count",
753  "__warp_index",
754  "__task_index0",
755  "__task_index1",
756  "__task_index2",
757  "__task_index",
758  "__task_count0",
759  "__task_count1",
760  "__task_count2",
761  "__task_count",
762  "__cvt_loc2gen",
763  "__cvt_loc2gen_var",
764  "__cvt_const2gen",
765  "__puts_nvptx",
766  "ISPCAlloc",
767  "ISPCLaunch",
768  "ISPCSync",
769 //#endif /* ISPC_NVPTX_ENABLED */
770  "__vselect_i32"
771  };
772 
773  int count = sizeof(names) / sizeof(names[0]);
774  for (int i = 0; i < count; ++i) {
775  llvm::Function *f = module->getFunction(names[i]);
776  if (f != NULL && f->empty() == false) {
777  f->setLinkage(llvm::GlobalValue::InternalLinkage);
779  }
780  }
781 }
782 
783 
784 /** This utility function takes serialized binary LLVM bitcode and adds its
785  definitions to the given module. Functions in the bitcode that can be
786  mapped to ispc functions are also added to the symbol table.
787 
788  @param bitcode Binary LLVM bitcode (e.g. the contents of a *.bc file)
789  @param length Length of the bitcode buffer
790  @param module Module to link the bitcode into
791  @param symbolTable Symbol table to add definitions to
792  */
793 void
794 AddBitcodeToModule(const unsigned char *bitcode, int length,
795  llvm::Module *module, SymbolTable *symbolTable, bool warn) {
796  llvm::StringRef sb = llvm::StringRef((char *)bitcode, length);
797 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
798  llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
799 #else // LLVM 3.6+
800  llvm::MemoryBufferRef bcBuf = llvm::MemoryBuffer::getMemBuffer(sb)->getMemBufferRef();
801 #endif
802 
803 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
804  llvm::ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
805  if (std::error_code EC = ModuleOrErr.getError())
806  Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
807  else {
808  llvm::Module *bcModule = ModuleOrErr.get().release();
809 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_5 || ISPC_LLVM_VERSION == ISPC_LLVM_3_6
810  llvm::ErrorOr<llvm::Module *> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
811  if (std::error_code EC = ModuleOrErr.getError())
812  Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
813  else {
814  llvm::Module *bcModule = ModuleOrErr.get();
815 #else // LLVM 3.2 - 3.4
816  std::string bcErr;
817  llvm::Module *bcModule = llvm::ParseBitcodeFile(bcBuf, *g->ctx, &bcErr);
818  if (!bcModule)
819  Error(SourcePos(), "Error parsing stdlib bitcode: %s", bcErr.c_str());
820  else {
821 #endif
822  // FIXME: this feels like a bad idea, but the issue is that when we
823  // set the llvm::Module's target triple in the ispc Module::Module
824  // constructor, we start by calling llvm::sys::getHostTriple() (and
825  // then change the arch if needed). Somehow that ends up giving us
826  // strings like 'x86_64-apple-darwin11.0.0', while the stuff we
827  // compile to bitcode with clang has module triples like
828  // 'i386-apple-macosx10.7.0'. And then LLVM issues a warning about
829  // linking together modules with incompatible target triples..
830  llvm::Triple mTriple(m->module->getTargetTriple());
831  llvm::Triple bcTriple(bcModule->getTargetTriple());
832  Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
833  mTriple.str().c_str(), bcTriple.str().c_str());
834 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
835  // FIXME: More ugly and dangerous stuff. We really haven't set up
836  // proper build and runtime infrastructure for ispc to do
837  // cross-compilation, yet it's at minimum useful to be able to emit
838  // ARM code from x86 for ispc development. One side-effect is that
839  // when the build process turns builtins/builtins.c to LLVM bitcode
840  // for us to link in at runtime, that bitcode has been compiled for
841  // an IA target, which in turn causes the checks in the following
842  // code to (appropraitely) fail.
843  //
844  // In order to be able to have some ability to generate ARM code on
845  // IA, we'll just skip those tests in that case and allow the
846  // setTargetTriple() and setDataLayout() calls below to shove in
847  // the values for an ARM target. This maybe won't cause problems
848  // in the generated code, since bulitins.c doesn't do anything too
849  // complex w.r.t. struct layouts, etc.
850  if (g->target->getISA() != Target::NEON32 &&
851  g->target->getISA() != Target::NEON16 &&
852  g->target->getISA() != Target::NEON8)
853 #endif // !__arm__
854 #ifdef ISPC_NVPTX_ENABLED
855  if (g->target->getISA() != Target::NVPTX)
856 #endif /* ISPC_NVPTX_ENABLED */
857  {
858  Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
859  mTriple.getArch() == bcTriple.getArch());
860  Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
861  mTriple.getVendor() == bcTriple.getVendor());
862 
863  // We unconditionally set module DataLayout to library, but we must
864  // ensure that library and module DataLayouts are compatible.
865  // If they are not, we should recompile the library for problematic
866  // architecture and investigate what happened.
867  // Generally we allow library DataLayout to be subset of module
868  // DataLayout or library DataLayout to be empty.
869 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_5
870  if (!VerifyDataLayoutCompatibility(module->getDataLayoutStr(),
871  bcModule->getDataLayoutStr())
872  && warn) {
873  Warning(SourcePos(), "Module DataLayout is incompatible with "
874  "library DataLayout:\n"
875  "Module DL: %s\n"
876  "Library DL: %s\n",
877  module->getDataLayoutStr().c_str(),
878  bcModule->getDataLayoutStr().c_str());
879  }
880 #else
881  if (!VerifyDataLayoutCompatibility(module->getDataLayout(),
882  bcModule->getDataLayout())
883  && warn) {
884  Warning(SourcePos(), "Module DataLayout is incompatible with "
885  "library DataLayout:\n"
886  "Module DL: %s\n"
887  "Library DL: %s\n",
888  module->getDataLayout().c_str(),
889  bcModule->getDataLayout().c_str());
890  }
891 #endif
892  }
893 
894  bcModule->setTargetTriple(mTriple.str());
895  bcModule->setDataLayout(module->getDataLayout());
896 
897 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // 3.2-3.5
898  std::string(linkError);
899 
900  if (llvm::Linker::LinkModules(module, bcModule,
901  llvm::Linker::DestroySource,
902  &linkError))
903  Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
904 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 // 3.6-3.7
905  llvm::Linker::LinkModules(module, bcModule);
906 #else // LLVM 3.8+
907  // A hack to move over declaration, which have no definition.
908  // New linker is kind of smart and think it knows better what to do, so
909  // it removes unused declarations without definitions.
910  // This trick should be legal, as both modules use the same LLVMContext.
911  for (llvm::Function& f : *bcModule) {
912  if (f.isDeclaration()) {
913  // Declarations with uses will be moved by Linker.
914  if (f.getNumUses() > 0)
915  continue;
916  module->getOrInsertFunction(f.getName(), f.getFunctionType(),
917  f.getAttributes());
918  }
919  }
920 
921  std::unique_ptr<llvm::Module> M(bcModule);
922  if (llvm::Linker::linkModules(*module, std::move(M))) {
923  Error(SourcePos(), "Error linking stdlib bitcode.");
924  }
925 #endif
926 
927  lSetInternalFunctions(module);
928  if (symbolTable != NULL)
929  lAddModuleSymbols(module, symbolTable);
930  lCheckModuleIntrinsics(module);
931  }
932 }
933 
934 
935 /** Utility routine that defines a constant int32 with given value, adding
936  the symbol to both the ispc symbol table and the given LLVM module.
937  */
938 static void
939 lDefineConstantInt(const char *name, int val, llvm::Module *module,
940  SymbolTable *symbolTable) {
941  Symbol *sym =
942  new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
943  SC_STATIC);
944  sym->constValue = new ConstExpr(sym->type, val, SourcePos());
945  llvm::Type *ltype = LLVMTypes::Int32Type;
946  llvm::Constant *linit = LLVMInt32(val);
947  // Use WeakODRLinkage rather than InternalLinkage so that a definition
948  // survives even if it's not used in the module, so that the symbol is
949  // there in the debugger.
950  llvm::GlobalValue::LinkageTypes linkage = g->generateDebuggingSymbols ?
951  llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
952  sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage,
953  linit, name);
954  symbolTable->AddVariable(sym);
955 
956  if (m->diBuilder != NULL) {
957 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
958  llvm::DIFile file;
959  llvm::DIType diType = sym->type->GetDIType(file);
960  Assert(diType.Verify());
961 #else // LLVM 3.7+
962  llvm::DIFile *file =
963  m->diBuilder->createFile(m->diCompileUnit->getFilename(),
964  m->diCompileUnit->getDirectory());
965  llvm::DIType *diType = sym->type->GetDIType(file);
966 // Assert(diType.Verify());
967 #endif
968  // FIXME? DWARF says that this (and programIndex below) should
969  // have the DW_AT_artifical attribute. It's not clear if this
970  // matters for anything though.
971 
972 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
973  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(
974  name,
975  file,
976  0 /* line */,
977  diType,
978  true /* static */,
979  sym->storagePtr);
980 #elif ISPC_LLVM_VERSION == ISPC_LLVM_3_6 // LLVM 3.6
981  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
982  Assert(sym_const_storagePtr);
983  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(
984  file,
985  name,
986  name,
987  file,
988  0 /* line */,
989  diType,
990  true /* static */,
991  sym_const_storagePtr);
992 #else // LLVM 3.7+
993  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
994  Assert(sym_const_storagePtr);
995  m->diBuilder->createGlobalVariable(
996  file,
997  name,
998  name,
999  file,
1000  0 /* line */,
1001  diType,
1002  true /* static */,
1003  sym_const_storagePtr);
1004 #endif
1005 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1006  Assert(var.Verify());
1007 #else // LLVM 3.7+
1008  //coming soon
1009 #endif
1010  }
1011 }
1012 
1013 
1014 
1015 static void
1016 lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
1017  SymbolTable *symbolTable) {
1018  llvm::SmallVector<const Type *, 8> args;
1020  Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
1021 
1022  llvm::Function *func = module->getFunction(name);
1023  Assert(func != NULL); // it should be declared already...
1024 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
1025  func->addFnAttr(llvm::Attributes::AlwaysInline);
1026 #else // LLVM 3.3+
1027  func->addFnAttr(llvm::Attribute::AlwaysInline);
1028 #endif
1029  llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
1030  llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
1031 
1032  sym->function = func;
1033  symbolTable->AddVariable(sym);
1034 }
1035 
1036 
1037 
1038 static void
1039 lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
1040  Symbol *sym =
1041  new Symbol("programIndex", SourcePos(),
1042  AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
1043 
1044  int pi[ISPC_MAX_NVEC];
1045  for (int i = 0; i < g->target->getVectorWidth(); ++i)
1046  pi[i] = i;
1047  sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
1048 
1049  llvm::Type *ltype = LLVMTypes::Int32VectorType;
1050  llvm::Constant *linit = LLVMInt32Vector(pi);
1051  // See comment in lDefineConstantInt() for why WeakODRLinkage is used here
1052  llvm::GlobalValue::LinkageTypes linkage = g->generateDebuggingSymbols ?
1053  llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
1054  sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage,
1055  linit, sym->name.c_str());
1056  symbolTable->AddVariable(sym);
1057 
1058  if (m->diBuilder != NULL) {
1059 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1060  llvm::DIFile file;
1061  llvm::DIType diType = sym->type->GetDIType(file);
1062  Assert(diType.Verify());
1063 #else // LLVM 3.7+
1064  llvm::DIFile *file =
1065  m->diBuilder->createFile(m->diCompileUnit->getFilename(),
1066  m->diCompileUnit->getDirectory());
1067  llvm::DIType *diType = sym->type->GetDIType(file);
1068 // Assert(diType.Verify());
1069 #endif
1070 #if ISPC_LLVM_VERSION == ISPC_LLVM_3_6 // LLVM 3.6
1071  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1072  Assert(sym_const_storagePtr);
1073  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(
1074  file,
1075  sym->name.c_str(),
1076  sym->name.c_str(),
1077  file,
1078  0 /* line */,
1079  diType,
1080  false /* static */,
1081  sym_const_storagePtr);
1082 #elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_5
1083  llvm::DIGlobalVariable var = m->diBuilder->createGlobalVariable(
1084  sym->name.c_str(),
1085  file,
1086  0 /* line */,
1087  diType,
1088  false /* static */,
1089  sym->storagePtr);
1090 #else // LLVM 3.7+
1091  llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
1092  Assert(sym_const_storagePtr);
1093  m->diBuilder->createGlobalVariable(
1094  file,
1095  sym->name.c_str(),
1096  sym->name.c_str(),
1097  file,
1098  0 /* line */,
1099  diType,
1100  false /* static */,
1101  sym_const_storagePtr);
1102 #endif
1103 #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
1104  Assert(var.Verify());
1105 #else // LLVM 3.7+
1106  //coming soon
1107 #endif
1108  }
1109 }
1110 
1111 
1112 void
1113 DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
1114  bool includeStdlibISPC) {
1115  bool runtime32 = g->target->is32Bit();
1116  bool warn = g->target->getISA() != Target::GENERIC;
1117 
1118 #define EXPORT_MODULE_COND_WARN(export_module, warnings) \
1119  extern unsigned char export_module[]; \
1120  extern int export_module##_length; \
1121  AddBitcodeToModule(export_module, export_module##_length, \
1122  module, symbolTable, warnings);
1123 
1124 #define EXPORT_MODULE(export_module) \
1125  extern unsigned char export_module[]; \
1126  extern int export_module##_length; \
1127  AddBitcodeToModule(export_module, export_module##_length, \
1128  module, symbolTable, true);
1129 
1130  // Add the definitions from the compiled builtins.c file.
1131  // When compiling for "generic" target family, data layout warnings for
1132  // "builtins_bitcode_c" have to be switched off: its DL is incompatible
1133  // with the DL of "generic". Anyway, AddBitcodeToModule() corrects this
1134  // automatically if DLs differ (by copying module`s DL to export`s DL).
1135  if (runtime32) {
1136  EXPORT_MODULE_COND_WARN(builtins_bitcode_c_32, warn);
1137  }
1138  else {
1139  EXPORT_MODULE_COND_WARN(builtins_bitcode_c_64, warn);
1140  }
1141 
1142  // Next, add the target's custom implementations of the various needed
1143  // builtin functions (e.g. __masked_store_32(), etc).
1144  switch (g->target->getISA()) {
1145 #ifdef ISPC_NVPTX_ENABLED
1146  case Target::NVPTX:
1147  {
1148  if (runtime32) {
1149  fprintf(stderr, "Unfortunatly 32bit targets are not supported at the moment .. \n");
1150  assert(0);
1151  }
1152  else {
1153  EXPORT_MODULE(builtins_bitcode_nvptx_64bit);
1154  }
1155  break;
1156  };
1157 #endif /* ISPC_NVPTX_ENABLED */
1158 
1159 #ifdef ISPC_ARM_ENABLED
1160  case Target::NEON8: {
1161  if (runtime32) {
1162  EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
1163  }
1164  else {
1165  EXPORT_MODULE(builtins_bitcode_neon_8_64bit);
1166  }
1167  break;
1168  }
1169  case Target::NEON16: {
1170  if (runtime32) {
1171  EXPORT_MODULE(builtins_bitcode_neon_16_32bit);
1172  }
1173  else {
1174  EXPORT_MODULE(builtins_bitcode_neon_16_64bit);
1175  }
1176  break;
1177  }
1178  case Target::NEON32: {
1179  if (runtime32) {
1180  EXPORT_MODULE(builtins_bitcode_neon_32_32bit);
1181  }
1182  else {
1183  EXPORT_MODULE(builtins_bitcode_neon_32_64bit);
1184  }
1185  break;
1186  }
1187 #endif
1188  case Target::SSE2: {
1189  switch (g->target->getVectorWidth()) {
1190  case 4:
1191  if (runtime32) {
1192  EXPORT_MODULE(builtins_bitcode_sse2_32bit);
1193  }
1194  else {
1195  EXPORT_MODULE(builtins_bitcode_sse2_64bit);
1196  }
1197  break;
1198  case 8:
1199  if (runtime32) {
1200  EXPORT_MODULE(builtins_bitcode_sse2_x2_32bit);
1201  }
1202  else {
1203  EXPORT_MODULE(builtins_bitcode_sse2_x2_64bit);
1204  }
1205  break;
1206  default:
1207  FATAL("logic error in DefineStdlib");
1208  }
1209  break;
1210  }
1211  case Target::SSE4: {
1212  switch (g->target->getVectorWidth()) {
1213  case 4:
1214  if (runtime32) {
1215  EXPORT_MODULE(builtins_bitcode_sse4_32bit);
1216  }
1217  else {
1218  EXPORT_MODULE(builtins_bitcode_sse4_64bit);
1219  }
1220  break;
1221  case 8:
1222  if (runtime32) {
1223  if (g->target->getMaskBitCount() == 16) {
1224  EXPORT_MODULE(builtins_bitcode_sse4_16_32bit);
1225  }
1226  else {
1227  Assert(g->target->getMaskBitCount() == 32);
1228  EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
1229  }
1230  }
1231  else {
1232  if (g->target->getMaskBitCount() == 16) {
1233  EXPORT_MODULE(builtins_bitcode_sse4_16_64bit);
1234  }
1235  else {
1236  Assert(g->target->getMaskBitCount() == 32);
1237  EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
1238  }
1239  }
1240  break;
1241  case 16:
1242  Assert(g->target->getMaskBitCount() == 8);
1243  if (runtime32) {
1244  EXPORT_MODULE(builtins_bitcode_sse4_8_32bit);
1245  }
1246  else {
1247  EXPORT_MODULE(builtins_bitcode_sse4_8_64bit);
1248  }
1249  break;
1250  default:
1251  FATAL("logic error in DefineStdlib");
1252  }
1253  break;
1254  }
1255  case Target::AVX: {
1256  switch (g->target->getVectorWidth()) {
1257  case 4:
1258  if (g->target->getDataTypeWidth() == 32) {
1259  // Note here that for avx1-i32x4 we are using bitcode file for
1260  // sse4-i32x4. This is intentional and good enough.
1261  // AVX target implies appropriate target-feature attrbute,
1262  // which forces LLVM to generate AVX code, even for SSE4
1263  // intrinsics. Except that the only "missing" feature in sse4
1264  // target is implemenation of __masked_[store|load]_[i32|i64]
1265  // using maskmov instruction. But it's not very popular
1266  // intrinsics, so we assume the implementation to be good
1267  // enough at the moment.
1268  if (runtime32) {
1269  EXPORT_MODULE(builtins_bitcode_sse4_32bit);
1270  }
1271  else {
1272  EXPORT_MODULE(builtins_bitcode_sse4_64bit);
1273  }
1274  } else if (g->target->getDataTypeWidth() == 64) {
1275  if (runtime32) {
1276  EXPORT_MODULE(builtins_bitcode_avx1_i64x4_32bit);
1277  }
1278  else {
1279  EXPORT_MODULE(builtins_bitcode_avx1_i64x4_64bit);
1280  }
1281  } else {
1282  FATAL("logic error in DefineStdlib");
1283  }
1284  break;
1285  case 8:
1286  if (runtime32) {
1287  EXPORT_MODULE(builtins_bitcode_avx1_32bit);
1288  }
1289  else {
1290  EXPORT_MODULE(builtins_bitcode_avx1_64bit);
1291  }
1292  break;
1293  case 16:
1294  if (runtime32) {
1295  EXPORT_MODULE(builtins_bitcode_avx1_x2_32bit);
1296  }
1297  else {
1298  EXPORT_MODULE(builtins_bitcode_avx1_x2_64bit);
1299  }
1300  break;
1301  default:
1302  FATAL("logic error in DefineStdlib");
1303  }
1304  break;
1305  }
1306  case Target::AVX11: {
1307  switch (g->target->getVectorWidth()) {
1308  case 4:
1309  if (runtime32) {
1310  EXPORT_MODULE(builtins_bitcode_avx11_i64x4_32bit);
1311  }
1312  else {
1313  EXPORT_MODULE(builtins_bitcode_avx11_i64x4_64bit);
1314  }
1315  break;
1316  case 8:
1317  if (runtime32) {
1318  EXPORT_MODULE(builtins_bitcode_avx11_32bit);
1319  }
1320  else {
1321  EXPORT_MODULE(builtins_bitcode_avx11_64bit);
1322  }
1323  break;
1324  case 16:
1325  if (runtime32) {
1326  EXPORT_MODULE(builtins_bitcode_avx11_x2_32bit);
1327  }
1328  else {
1329  EXPORT_MODULE(builtins_bitcode_avx11_x2_64bit);
1330  }
1331  break;
1332  default:
1333  FATAL("logic error in DefineStdlib");
1334  }
1335  break;
1336  }
1337  case Target::AVX2: {
1338  switch (g->target->getVectorWidth()) {
1339  case 4:
1340  if (runtime32) {
1341  EXPORT_MODULE(builtins_bitcode_avx2_i64x4_32bit);
1342  }
1343  else {
1344  EXPORT_MODULE(builtins_bitcode_avx2_i64x4_64bit);
1345  }
1346  break;
1347  case 8:
1348  if (runtime32) {
1349  EXPORT_MODULE(builtins_bitcode_avx2_32bit);
1350  }
1351  else {
1352  EXPORT_MODULE(builtins_bitcode_avx2_64bit);
1353  }
1354  break;
1355  case 16:
1356  if (runtime32) {
1357  EXPORT_MODULE(builtins_bitcode_avx2_x2_32bit);
1358  }
1359  else {
1360  EXPORT_MODULE(builtins_bitcode_avx2_x2_64bit);
1361  }
1362  break;
1363  default:
1364  FATAL("logic error in DefineStdlib");
1365  }
1366  break;
1367  }
1368 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
1369  case Target::KNL_AVX512: {
1370  switch (g->target->getVectorWidth()) {
1371  case 16:
1372  if (runtime32) {
1373  EXPORT_MODULE(builtins_bitcode_knl_32bit);
1374  }
1375  else {
1376  EXPORT_MODULE(builtins_bitcode_knl_64bit);
1377  }
1378  break;
1379  default:
1380  FATAL("logic error in DefineStdlib");
1381  }
1382  break;
1383  }
1384 #endif
1385 #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
1386  case Target::SKX_AVX512: {
1387  switch (g->target->getVectorWidth()) {
1388  case 16:
1389  if (runtime32) {
1390  EXPORT_MODULE(builtins_bitcode_skx_32bit);
1391  }
1392  else {
1393  EXPORT_MODULE(builtins_bitcode_skx_64bit);
1394  }
1395  break;
1396  default:
1397  FATAL("logic error in DefineStdlib");
1398  }
1399  break;
1400  }
1401 #endif
1402  case Target::GENERIC: {
1403  switch (g->target->getVectorWidth()) {
1404  case 4:
1405  if (runtime32) {
1406  EXPORT_MODULE(builtins_bitcode_generic_4_32bit);
1407  }
1408  else {
1409  EXPORT_MODULE(builtins_bitcode_generic_4_64bit);
1410  }
1411  break;
1412  case 8:
1413  if (runtime32) {
1414  EXPORT_MODULE(builtins_bitcode_generic_8_32bit);
1415  }
1416  else {
1417  EXPORT_MODULE(builtins_bitcode_generic_8_64bit);
1418  }
1419  break;
1420  case 16:
1421  if (runtime32) {
1422  EXPORT_MODULE(builtins_bitcode_generic_16_32bit);
1423  }
1424  else {
1425  EXPORT_MODULE(builtins_bitcode_generic_16_64bit);
1426  }
1427  break;
1428  case 32:
1429  if (runtime32) {
1430  EXPORT_MODULE(builtins_bitcode_generic_32_32bit);
1431  }
1432  else {
1433  EXPORT_MODULE(builtins_bitcode_generic_32_64bit);
1434  }
1435  break;
1436  case 64:
1437  if (runtime32) {
1438  EXPORT_MODULE(builtins_bitcode_generic_64_32bit);
1439  }
1440  else {
1441  EXPORT_MODULE(builtins_bitcode_generic_64_64bit);
1442  }
1443  break;
1444  case 1:
1445  if (runtime32) {
1446  EXPORT_MODULE(builtins_bitcode_generic_1_32bit);
1447  }
1448  else {
1449  EXPORT_MODULE(builtins_bitcode_generic_1_64bit);
1450  }
1451  break;
1452  default:
1453  FATAL("logic error in DefineStdlib");
1454  }
1455  break;
1456  }
1457  default:
1458  FATAL("logic error");
1459  }
1460 
1461  // define the 'programCount' builtin variable
1462 #ifdef ISPC_NVPTX_ENABLED
1463  if (g->target->getISA() == Target::NVPTX)
1464  {
1465  lDefineConstantInt("programCount", 32, module, symbolTable);
1466  }
1467  else
1468  {
1469 #endif /* ISPC_NVPTX_ENABLED */
1470  lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
1471 #ifdef ISPC_NVPTX_ENABLED
1472  }
1473 #endif /* ISPC_NVPTX_ENABLED */
1474 
1475  // define the 'programIndex' builtin
1476  lDefineProgramIndex(module, symbolTable);
1477 
1478  // Define __math_lib stuff. This is used by stdlib.ispc, for example, to
1479  // figure out which math routines to end up calling...
1480  lDefineConstantInt("__math_lib", (int)g->mathLib, module, symbolTable);
1481  lDefineConstantInt("__math_lib_ispc", (int)Globals::Math_ISPC, module,
1482  symbolTable);
1483  lDefineConstantInt("__math_lib_ispc_fast", (int)Globals::Math_ISPCFast,
1484  module, symbolTable);
1485  lDefineConstantInt("__math_lib_svml", (int)Globals::Math_SVML, module,
1486  symbolTable);
1487  lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
1488  symbolTable);
1489  lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
1490  module, symbolTable);
1491 
1492  lDefineConstantInt("__have_native_half", g->target->hasHalf(), module,
1493  symbolTable);
1494  lDefineConstantInt("__have_native_rand", g->target->hasRand(), module,
1495  symbolTable);
1496  lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
1497  module, symbolTable);
1498  lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(),
1499  module, symbolTable);
1500  lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(),
1501  module, symbolTable);
1502  lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
1503  module, symbolTable);
1504 
1505 #ifdef ISPC_NVPTX_ENABLED
1506  lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
1507  module, symbolTable);
1508 #else
1509  lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable);
1510 #endif /* ISPC_NVPTX_ENABLED */
1511 
1512  if (g->forceAlignment != -1) {
1513  llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
1514  alignment->setInitializer(LLVMInt32(g->forceAlignment));
1515  }
1516 
1517  if (includeStdlibISPC) {
1518  // If the user wants the standard library to be included, parse the
1519  // serialized version of the stdlib.ispc file to get its
1520  // definitions added.
1521  extern char stdlib_mask1_code[], stdlib_mask8_code[];
1522  extern char stdlib_mask16_code[], stdlib_mask32_code[], stdlib_mask64_code[];
1523  if (g->target->getISA() == Target::GENERIC &&
1524  g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
1525  yy_scan_string(stdlib_mask32_code);
1526  }
1527  else {
1528  switch (g->target->getMaskBitCount()) {
1529  case 1:
1530  yy_scan_string(stdlib_mask1_code);
1531  break;
1532  case 8:
1533  yy_scan_string(stdlib_mask8_code);
1534  break;
1535  case 16:
1536  yy_scan_string(stdlib_mask16_code);
1537  break;
1538  case 32:
1539  yy_scan_string(stdlib_mask32_code);
1540  break;
1541  case 64:
1542  yy_scan_string(stdlib_mask64_code);
1543  break;
1544  default:
1545  FATAL("Unhandled mask bit size for stdlib.ispc");
1546  }
1547  }
1548  yyparse();
1549  }
1550 }
llvm::Value * storagePtr
Definition: sym.h:72
static const AtomicType * VaryingInt32
Definition: type.h:349
static llvm::Type * FloatType
Definition: llvmutil.h:76
static bool lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable)
Definition: builtins.cpp:196
void AddBitcodeToModule(const unsigned char *bitcode, int length, llvm::Module *module, SymbolTable *symbolTable, bool warn)
Definition: builtins.cpp:794
static const AtomicType * VaryingInt16
Definition: type.h:348
static void lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable)
Definition: builtins.cpp:1039
llvm::Function * function
Definition: sym.h:76
static llvm::Type * Int32VectorPointerType
Definition: llvmutil.h:99
void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, bool includeStdlibISPC)
Definition: builtins.cpp:1113
std::string GetString() const
Definition: type.cpp:3011
Opt opt
Definition: ispc.h:541
Declaration of the FunctionEmitContext class
bool AddFunction(Symbol *symbol)
Definition: sym.cpp:147
static const AtomicType * VaryingUInt64
Definition: type.h:355
static llvm::Type * DoubleType
Definition: llvmutil.h:77
#define EXPORT_MODULE_COND_WARN(export_module, warnings)
int getVectorWidth() const
Definition: ispc.h:283
Module * m
Definition: ispc.cpp:89
static void lCheckModuleIntrinsics(llvm::Module *module)
Definition: builtins.cpp:299
Target * target
Definition: ispc.h:543
bool hasRsqrtd() const
Definition: ispc.h:303
static void lDefineConstantIntFunc(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable)
Definition: builtins.cpp:1016
bool AddVariable(Symbol *symbol)
Definition: sym.cpp:98
static const AtomicType * VaryingDouble
Definition: type.h:356
int getDataTypeWidth() const
Definition: ispc.h:281
Expression representing a compile-time constant value.
Definition: expr.h:390
static llvm::Type * BoolType
Definition: llvmutil.h:70
#define Assert(expr)
Definition: ispc.h:170
Symbol table that holds all known symbols during parsing and compilation.
Definition: sym.h:119
llvm::Constant * LLVMInt32Vector(int32_t i)
Definition: llvmutil.cpp:379
static llvm::VectorType * Int32VectorType
Definition: llvmutil.h:92
static const AtomicType * UniformUInt32
Definition: type.h:352
static llvm::Type * FloatVectorPointerType
Definition: llvmutil.h:101
Declarations of functions related to builtins and the standard library.
static llvm::Type * Int8PointerType
Definition: llvmutil.h:79
static llvm::Type * Int32PointerType
Definition: llvmutil.h:81
std::string name
Definition: sym.h:71
static llvm::Type * Int16VectorPointerType
Definition: llvmutil.h:98
static llvm::Type * Int16Type
Definition: llvmutil.h:73
static llvm::Type * DoubleVectorPointerType
Definition: llvmutil.h:102
#define EXPORT_MODULE(export_module)
static const AtomicType * UniformUInt16
Definition: type.h:351
static void lSetInternalFunctions(llvm::Module *module)
Definition: builtins.cpp:338
static PointerType * GetUniform(const Type *t, bool isSlice=false)
Definition: type.cpp:963
ConstExpr * constValue
Definition: sym.h:87
bool hasRand() const
Definition: ispc.h:293
header file with declarations for symbol and symbol table classes.
static const AtomicType * UniformBool
Definition: type.h:346
static llvm::Type * VoidType
Definition: llvmutil.h:67
llvm::ConstantInt * LLVMInt32(int32_t i)
Definition: llvmutil.cpp:263
llvm::Module * module
Definition: module.h:158
static llvm::Type * Int8VectorPointerType
Definition: llvmutil.h:97
Globals * g
Definition: ispc.cpp:88
static const AtomicType * UniformUInt64
Definition: type.h:355
int getMaskBitCount() const
Definition: ispc.h:289
static llvm::VectorType * Int8VectorType
Definition: llvmutil.h:90
void Error(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:385
bool hasHalf() const
Definition: ispc.h:291
bool VerifyDataLayoutCompatibility(const std::string &module_dl, const std::string &lib_dl)
Definition: util.cpp:603
static llvm::VectorType * FloatVectorType
Definition: llvmutil.h:94
static llvm::Type * Int64Type
Definition: llvmutil.h:75
static llvm::Type * Int8Type
Definition: llvmutil.h:72
virtual llvm::DIType GetDIType(llvm::DIDescriptor scope) const =0
static llvm::VectorType * Int64VectorType
Definition: llvmutil.h:93
Header file with declarations for various LLVM utility stuff.
ISA getISA() const
Definition: ispc.h:267
static llvm::Type * Int64PointerType
Definition: llvmutil.h:82
bool hasRcpd() const
Definition: ispc.h:305
static llvm::Type * FloatPointerType
Definition: llvmutil.h:83
static void lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable)
Definition: builtins.cpp:274
Representation of a range of positions in a source file.
Definition: ispc.h:134
static llvm::Type * Int16PointerType
Definition: llvmutil.h:80
bool generateDebuggingSymbols
Definition: ispc.h:607
static const AtomicType * VaryingBool
Definition: type.h:346
bool fastMaskedVload
Definition: ispc.h:441
const char * name
Definition: ispc.h:138
void markFuncWithTargetAttr(llvm::Function *func)
Definition: ispc.cpp:1477
void Warning(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:410
static const AtomicType * VaryingInt64
Definition: type.h:354
#define FATAL(message)
Definition: util.h:113
int yyparse()
static const AtomicType * UniformUInt8
Definition: type.h:350
static llvm::Type * Int64VectorPointerType
Definition: llvmutil.h:100
static llvm::Type * Int32Type
Definition: llvmutil.h:74
MathLib mathLib
Definition: ispc.h:548
static llvm::Type * DoublePointerType
Definition: llvmutil.h:84
bool hasTrigonometry() const
Definition: ispc.h:301
#define ISPC_MAX_NVEC
Definition: ispc.h:68
static bool Equal(const Type *a, const Type *b)
Definition: type.cpp:3619
static const AtomicType * VaryingUInt16
Definition: type.h:351
static const AtomicType * VaryingInt8
Definition: type.h:347
static const AtomicType * UniformFloat
Definition: type.h:353
static const AtomicType * UniformInt32
Definition: type.h:349
Type representing a function (return type + argument types)
Definition: type.h:883
Representation of a program symbol.
Definition: sym.h:63
Interface class that defines the type abstraction.
Definition: type.h:101
static const AtomicType * UniformDouble
Definition: type.h:356
Expr abstract base class and expression implementations.
static const AtomicType * Void
Definition: type.h:357
static llvm::VectorType * MaskType
Definition: llvmutil.h:86
int forceAlignment
Definition: ispc.h:648
yy_buffer_state * yy_scan_string(const char *)
static llvm::VectorType * DoubleVectorType
Definition: llvmutil.h:95
static void lDefineConstantInt(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable)
Definition: builtins.cpp:939
static llvm::VectorType * Int16VectorType
Definition: llvmutil.h:91
static const AtomicType * VaryingUInt8
Definition: type.h:350
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
static const AtomicType * UniformInt64
Definition: type.h:354
llvm::LLVMContext * ctx
Definition: ispc.h:632
static const AtomicType * UniformInt16
Definition: type.h:348
const Type * type
Definition: sym.h:84
llvm::DIBuilder * diBuilder
Definition: module.h:161
void Debug(SourcePos p, const char *format,...) PRINTF_FUNC
Definition: util.cpp:398
bool is32Bit() const
Definition: ispc.h:273
static const Type * lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned)
Definition: builtins.cpp:96
static const AtomicType * VaryingUInt32
Definition: type.h:352
static const AtomicType * VaryingFloat
Definition: type.h:353
static void lCreateSymbol(const std::string &name, const Type *returnType, llvm::SmallVector< const Type *, 8 > &argTypes, const llvm::FunctionType *ftype, llvm::Function *func, SymbolTable *symbolTable)
Definition: builtins.cpp:173
static const AtomicType * UniformInt8
Definition: type.h:347
File with declarations for classes related to type representation.
bool hasTranscendentals() const
Definition: ispc.h:299