Intel SPMD Program Compiler  1.3.0
builtins.cpp
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2010-2012, Intel Corporation
00003   All rights reserved.
00004 
00005   Redistribution and use in source and binary forms, with or without
00006   modification, are permitted provided that the following conditions are
00007   met:
00008 
00009     * Redistributions of source code must retain the above copyright
00010       notice, this list of conditions and the following disclaimer.
00011 
00012     * Redistributions in binary form must reproduce the above copyright
00013       notice, this list of conditions and the following disclaimer in the
00014       documentation and/or other materials provided with the distribution.
00015 
00016     * Neither the name of Intel Corporation nor the names of its
00017       contributors may be used to endorse or promote products derived from
00018       this software without specific prior written permission.
00019 
00020 
00021    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00022    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00023    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00024    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00025    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
00032 */
00033 
00034 /** @file builtins.cpp
00035     @brief Definitions of functions related to setting up the standard library 
00036            and other builtins.
00037 */
00038 
00039 #include "builtins.h"
00040 #include "type.h"
00041 #include "util.h"
00042 #include "sym.h"
00043 #include "expr.h"
00044 #include "llvmutil.h"
00045 #include "module.h"
00046 #include "ctx.h"
00047 
00048 #include <math.h>
00049 #include <stdlib.h>
00050 #include <llvm/LLVMContext.h>
00051 #include <llvm/Module.h>
00052 #include <llvm/Type.h>
00053 #include <llvm/DerivedTypes.h>
00054 #include <llvm/Instructions.h>
00055 #include <llvm/Intrinsics.h>
00056 #include <llvm/Linker.h>
00057 #include <llvm/Target/TargetMachine.h>
00058 #include <llvm/ADT/Triple.h>
00059 #include <llvm/Support/MemoryBuffer.h>
00060 #include <llvm/Bitcode/ReaderWriter.h>
00061 
00062 extern int yyparse();
00063 struct yy_buffer_state;
00064 extern yy_buffer_state *yy_scan_string(const char *);
00065 
00066 
00067 /** Given an LLVM type, try to find the equivalent ispc type.  Note that
00068     this is an under-constrained problem due to LLVM's type representations
00069     carrying less information than ispc's.  (For example, LLVM doesn't
00070     distinguish between signed and unsigned integers in its types.)
00071 
00072     Because this function is only used for generating ispc declarations of
00073     functions defined in LLVM bitcode in the builtins-*.ll files, in practice
00074     we can get enough of what we need for the relevant cases to make things
00075     work, partially with the help of the intAsUnsigned parameter, which
00076     indicates whether LLVM integer types should be treated as being signed
00077     or unsigned.
00078 
00079  */
00080 static const Type *
00081 lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
00082     if (t == LLVMTypes::VoidType)
00083         return AtomicType::Void;
00084 
00085     // uniform
00086     else if (t == LLVMTypes::BoolType)
00087         return AtomicType::UniformBool;
00088     else if (t == LLVMTypes::Int8Type)
00089         return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8;
00090     else if (t == LLVMTypes::Int16Type)
00091         return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16;
00092     else if (t == LLVMTypes::Int32Type)
00093         return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
00094     else if (t == LLVMTypes::FloatType)
00095         return AtomicType::UniformFloat;
00096     else if (t == LLVMTypes::DoubleType)
00097         return AtomicType::UniformDouble;
00098     else if (t == LLVMTypes::Int64Type)
00099         return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
00100 
00101     // varying
00102     if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
00103         t == LLVMTypes::MaskType)
00104         return AtomicType::VaryingBool;
00105     else if (t == LLVMTypes::Int8VectorType)
00106         return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
00107     else if (t == LLVMTypes::Int16VectorType)
00108         return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
00109     else if (t == LLVMTypes::Int32VectorType)
00110         return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
00111     else if (t == LLVMTypes::FloatVectorType)
00112         return AtomicType::VaryingFloat;
00113     else if (t == LLVMTypes::DoubleVectorType)
00114         return AtomicType::VaryingDouble;
00115     else if (t == LLVMTypes::Int64VectorType)
00116         return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
00117 
00118     // pointers to uniform
00119     else if (t == LLVMTypes::Int8PointerType)
00120         return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt8 :
00121                                        AtomicType::UniformInt8);
00122     else if (t == LLVMTypes::Int16PointerType)
00123         return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt16 :
00124                                        AtomicType::UniformInt16);
00125     else if (t == LLVMTypes::Int32PointerType)
00126         return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt32 :
00127                                        AtomicType::UniformInt32);
00128     else if (t == LLVMTypes::Int64PointerType)
00129         return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt64 :
00130                                        AtomicType::UniformInt64);
00131     else if (t == LLVMTypes::FloatPointerType)
00132         return PointerType::GetUniform(AtomicType::UniformFloat);
00133     else if (t == LLVMTypes::DoublePointerType)
00134         return PointerType::GetUniform(AtomicType::UniformDouble);
00135 
00136     // pointers to varying
00137     else if (t == LLVMTypes::Int8VectorPointerType)
00138         return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt8 :
00139                                        AtomicType::VaryingInt8);
00140     else if (t == LLVMTypes::Int16VectorPointerType)
00141         return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt16 :
00142                                        AtomicType::VaryingInt16);
00143     else if (t == LLVMTypes::Int32VectorPointerType)
00144         return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt32 :
00145                                        AtomicType::VaryingInt32);
00146     else if (t == LLVMTypes::Int64VectorPointerType)
00147         return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt64 :
00148                                        AtomicType::VaryingInt64);
00149     else if (t == LLVMTypes::FloatVectorPointerType)
00150         return PointerType::GetUniform(AtomicType::VaryingFloat);
00151     else if (t == LLVMTypes::DoubleVectorPointerType)
00152         return PointerType::GetUniform(AtomicType::VaryingDouble);
00153 
00154     return NULL;
00155 }
00156 
00157 
00158 static void
00159 lCreateSymbol(const std::string &name, const Type *returnType, 
00160               llvm::SmallVector<const Type *, 8> &argTypes, 
00161               const llvm::FunctionType *ftype, llvm::Function *func, 
00162               SymbolTable *symbolTable) {
00163     SourcePos noPos;
00164     noPos.name = "__stdlib";
00165 
00166     FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
00167 
00168     Debug(noPos, "Created builtin symbol \"%s\" [%s]\n", name.c_str(),
00169           funcType->GetString().c_str());
00170 
00171     Symbol *sym = new Symbol(name, noPos, funcType);
00172     sym->function = func;
00173     symbolTable->AddFunction(sym);
00174 }
00175 
00176 
00177 /** Given an LLVM function declaration, synthesize the equivalent ispc
00178     symbol for the function (if possible).  Returns true on success, false
00179     on failure.
00180  */
00181 static bool
00182 lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
00183     SourcePos noPos;
00184     noPos.name = "__stdlib";
00185 
00186     const llvm::FunctionType *ftype = func->getFunctionType();
00187     std::string name = func->getName();
00188 
00189     if (name.size() < 3 || name[0] != '_' || name[1] != '_')
00190         return false;
00191 
00192     Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".",
00193           name.c_str());
00194 
00195     // An unfortunate hack: we want this builtin function to have the
00196     // signature "int __sext_varying_bool(bool)", but the ispc function
00197     // symbol creation code below assumes that any LLVM vector of i32s is a
00198     // varying int32.  Here, we need that to be interpreted as a varying
00199     // bool, so just have a one-off override for that one...
00200     if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
00201         const Type *returnType = AtomicType::VaryingInt32;
00202         llvm::SmallVector<const Type *, 8> argTypes;
00203         argTypes.push_back(AtomicType::VaryingBool);
00204 
00205         FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
00206 
00207         Symbol *sym = new Symbol(name, noPos, funcType);
00208         sym->function = func;
00209         symbolTable->AddFunction(sym);
00210         return true;
00211     }
00212 
00213     // If the function has any parameters with integer types, we'll make
00214     // two Symbols for two overloaded versions of the function, one with
00215     // all of the integer types treated as signed integers and one with all
00216     // of them treated as unsigned.
00217     for (int i = 0; i < 2; ++i) {
00218         bool intAsUnsigned = (i == 1);
00219 
00220         const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
00221                                                      intAsUnsigned);
00222         if (returnType == NULL) {
00223             Debug(SourcePos(), "Failed: return type not representable for "
00224                   "builtin %s.", name.c_str());
00225             // return type not representable in ispc -> not callable from ispc
00226             return false;
00227         }
00228 
00229         // Iterate over the arguments and try to find their equivalent ispc
00230         // types.  Track if any of the arguments has an integer type.
00231         bool anyIntArgs = false;
00232         llvm::SmallVector<const Type *, 8> argTypes;
00233         for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
00234             const llvm::Type *llvmArgType = ftype->getParamType(j);
00235             const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
00236             if (type == NULL) {
00237                 Debug(SourcePos(), "Failed: type of parameter %d not "
00238                       "representable for builtin %s", j, name.c_str());
00239                 return false;
00240             }
00241             anyIntArgs |= 
00242                 (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
00243             argTypes.push_back(type);
00244         }
00245 
00246         // Always create the symbol the first time through, in particular
00247         // so that we get symbols for things with no integer types!
00248         if (i == 0 || anyIntArgs == true)
00249             lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
00250     }
00251 
00252     return true;
00253 }
00254 
00255 
00256 /** Given an LLVM module, create ispc symbols for the functions in the
00257     module.
00258  */
00259 static void
00260 lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
00261 #if 0
00262     // FIXME: handle globals?
00263     Assert(module->global_empty());
00264 #endif
00265 
00266     llvm::Module::iterator iter;
00267     for (iter = module->begin(); iter != module->end(); ++iter) {
00268         llvm::Function *func = iter;
00269         lCreateISPCSymbol(func, symbolTable);
00270     }
00271 }
00272 
00273 
00274 /** In many of the builtins-*.ll files, we have declarations of various LLVM
00275     intrinsics that are then used in the implementation of various target-
00276     specific functions.  This function loops over all of the intrinsic 
00277     declarations and makes sure that the signature we have in our .ll file
00278     matches the signature of the actual intrinsic.
00279 */
00280 static void
00281 lCheckModuleIntrinsics(llvm::Module *module) {
00282     llvm::Module::iterator iter;
00283     for (iter = module->begin(); iter != module->end(); ++iter) {
00284         llvm::Function *func = iter;
00285         if (!func->isIntrinsic())
00286             continue;
00287 
00288         const std::string funcName = func->getName().str();
00289         // Work around http://llvm.org/bugs/show_bug.cgi?id=10438; only
00290         // check the llvm.x86.* intrinsics for now...
00291         if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
00292             llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
00293             Assert(id != 0);
00294             llvm::Type *intrinsicType = 
00295                 llvm::Intrinsic::getType(*g->ctx, id);
00296             intrinsicType = llvm::PointerType::get(intrinsicType, 0);
00297             Assert(func->getType() == intrinsicType);
00298         }
00299     }
00300 }
00301 
00302 
00303 /** We'd like to have all of these functions declared as 'internal' in
00304     their respective bitcode files so that if they aren't needed by the
00305     user's program they are elimiated from the final output.  However, if
00306     we do so, then they aren't brought in by the LinkModules() call below
00307     since they aren't yet used by anything in the module they're being
00308     linked with (in LLVM 3.1, at least).
00309 
00310     Therefore, we don't declare them as internal when we first define them,
00311     but instead mark them as internal after they've been linked in.  This
00312     is admittedly a kludge.
00313  */
00314 static void
00315 lSetInternalFunctions(llvm::Module *module) {
00316     const char *names[] = {
00317         "__add_float",
00318         "__add_int32",
00319         "__add_uniform_double",
00320         "__add_uniform_int32",
00321         "__add_uniform_int64",
00322         "__add_varying_double",
00323         "__add_varying_int32",
00324         "__add_varying_int64",
00325         "__aos_to_soa3_float",
00326         "__aos_to_soa3_float16",
00327         "__aos_to_soa3_float4",
00328         "__aos_to_soa3_float8",
00329         "__aos_to_soa3_int32",
00330         "__aos_to_soa4_float",
00331         "__aos_to_soa4_float16",
00332         "__aos_to_soa4_float4",
00333         "__aos_to_soa4_float8",
00334         "__aos_to_soa4_int32",
00335         "__atomic_add_int32_global",
00336         "__atomic_add_int64_global",
00337         "__atomic_add_uniform_int32_global",
00338         "__atomic_add_uniform_int64_global",
00339         "__atomic_and_int32_global",
00340         "__atomic_and_int64_global",
00341         "__atomic_and_uniform_int32_global",
00342         "__atomic_and_uniform_int64_global",
00343         "__atomic_compare_exchange_double_global",
00344         "__atomic_compare_exchange_float_global",
00345         "__atomic_compare_exchange_int32_global",
00346         "__atomic_compare_exchange_int64_global",
00347         "__atomic_compare_exchange_uniform_double_global",
00348         "__atomic_compare_exchange_uniform_float_global",
00349         "__atomic_compare_exchange_uniform_int32_global",
00350         "__atomic_compare_exchange_uniform_int64_global",
00351         "__atomic_max_uniform_int32_global",
00352         "__atomic_max_uniform_int64_global",
00353         "__atomic_min_uniform_int32_global",
00354         "__atomic_min_uniform_int64_global",
00355         "__atomic_or_int32_global",
00356         "__atomic_or_int64_global",
00357         "__atomic_or_uniform_int32_global",
00358         "__atomic_or_uniform_int64_global",
00359         "__atomic_sub_int32_global",
00360         "__atomic_sub_int64_global",
00361         "__atomic_sub_uniform_int32_global",
00362         "__atomic_sub_uniform_int64_global",
00363         "__atomic_swap_double_global",
00364         "__atomic_swap_float_global",
00365         "__atomic_swap_int32_global",
00366         "__atomic_swap_int64_global",
00367         "__atomic_swap_uniform_double_global",
00368         "__atomic_swap_uniform_float_global",
00369         "__atomic_swap_uniform_int32_global",
00370         "__atomic_swap_uniform_int64_global",
00371         "__atomic_umax_uniform_uint32_global",
00372         "__atomic_umax_uniform_uint64_global",
00373         "__atomic_umin_uniform_uint32_global",
00374         "__atomic_umin_uniform_uint64_global",
00375         "__atomic_xor_int32_global",
00376         "__atomic_xor_int64_global",
00377         "__atomic_xor_uniform_int32_global",
00378         "__atomic_xor_uniform_int64_global",
00379         "__broadcast_double",
00380         "__broadcast_float",
00381         "__broadcast_i16",
00382         "__broadcast_i32",
00383         "__broadcast_i64",
00384         "__broadcast_i8",
00385         "__ceil_uniform_double",
00386         "__ceil_uniform_float",
00387         "__ceil_varying_double",
00388         "__ceil_varying_float",
00389         "__clock",
00390         "__count_trailing_zeros_i32",
00391         "__count_trailing_zeros_i64",
00392         "__count_leading_zeros_i32",
00393         "__count_leading_zeros_i64",
00394         "__delete_uniform",
00395         "__delete_varying",
00396         "__do_assert_uniform",
00397         "__do_assert_varying",
00398         "__do_print", 
00399         "__doublebits_uniform_int64",
00400         "__doublebits_varying_int64",
00401         "__exclusive_scan_add_double",
00402         "__exclusive_scan_add_float",
00403         "__exclusive_scan_add_i32",
00404         "__exclusive_scan_add_i64",
00405         "__exclusive_scan_and_i32",
00406         "__exclusive_scan_and_i64",
00407         "__exclusive_scan_or_i32",
00408         "__exclusive_scan_or_i64",
00409         "__extract_int16",
00410         "__extract_int32",
00411         "__extract_int64",
00412         "__extract_int8",
00413         "__fastmath",
00414         "__float_to_half_uniform",
00415         "__float_to_half_varying",
00416         "__floatbits_uniform_int32",
00417         "__floatbits_varying_int32",
00418         "__floor_uniform_double",
00419         "__floor_uniform_float",
00420         "__floor_varying_double",
00421         "__floor_varying_float",
00422         "__half_to_float_uniform",
00423         "__half_to_float_varying",
00424         "__insert_int16",
00425         "__insert_int32",
00426         "__insert_int64",
00427         "__insert_int8",
00428         "__intbits_uniform_double",
00429         "__intbits_uniform_float",
00430         "__intbits_varying_double",
00431         "__intbits_varying_float",
00432         "__max_uniform_double",
00433         "__max_uniform_float",
00434         "__max_uniform_int32",
00435         "__max_uniform_int64",
00436         "__max_uniform_uint32",
00437         "__max_uniform_uint64",
00438         "__max_varying_double",
00439         "__max_varying_float",
00440         "__max_varying_int32",
00441         "__max_varying_int64",
00442         "__max_varying_uint32",
00443         "__max_varying_uint64",
00444         "__memory_barrier",
00445         "__memcpy32",
00446         "__memcpy64",
00447         "__memmove32",
00448         "__memmove64",
00449         "__memset32",
00450         "__memset64",
00451         "__min_uniform_double",
00452         "__min_uniform_float",
00453         "__min_uniform_int32",
00454         "__min_uniform_int64",
00455         "__min_uniform_uint32",
00456         "__min_uniform_uint64",
00457         "__min_varying_double",
00458         "__min_varying_float",
00459         "__min_varying_int32",
00460         "__min_varying_int64",
00461         "__min_varying_uint32",
00462         "__min_varying_uint64",
00463         "__movmsk",
00464         "__new_uniform",
00465         "__new_varying32",
00466         "__new_varying64",
00467         "__num_cores",
00468         "__packed_load_active",
00469         "__packed_store_active",
00470         "__pause",
00471         "__popcnt_int32",
00472         "__popcnt_int64",
00473         "__prefetch_read_uniform_1",
00474         "__prefetch_read_uniform_2",
00475         "__prefetch_read_uniform_3",
00476         "__prefetch_read_uniform_nt",
00477         "__rcp_uniform_float",
00478         "__rcp_varying_float",
00479         "__reduce_add_double",
00480         "__reduce_add_float",
00481         "__reduce_add_int32",
00482         "__reduce_add_int64",
00483         "__reduce_add_uint32",
00484         "__reduce_add_uint64",
00485         "__reduce_equal_double",
00486         "__reduce_equal_float",
00487         "__reduce_equal_int32",
00488         "__reduce_equal_int64",
00489         "__reduce_max_double",
00490         "__reduce_max_float",
00491         "__reduce_max_int32",
00492         "__reduce_max_int64",
00493         "__reduce_max_uint32",
00494         "__reduce_max_uint64",
00495         "__reduce_min_double",
00496         "__reduce_min_float",
00497         "__reduce_min_int32",
00498         "__reduce_min_int64",
00499         "__reduce_min_uint32",
00500         "__reduce_min_uint64",
00501         "__rotate_double",
00502         "__rotate_float",
00503         "__rotate_i16",
00504         "__rotate_i32",
00505         "__rotate_i64",
00506         "__rotate_i8",
00507         "__round_uniform_double",
00508         "__round_uniform_float",
00509         "__round_varying_double",
00510         "__round_varying_float",
00511         "__rsqrt_uniform_float",
00512         "__rsqrt_varying_float",
00513         "__sext_uniform_bool",
00514         "__sext_varying_bool",
00515         "__shuffle2_double",
00516         "__shuffle2_float",
00517         "__shuffle2_i16",
00518         "__shuffle2_i32",
00519         "__shuffle2_i64",
00520         "__shuffle2_i8",
00521         "__shuffle_double",
00522         "__shuffle_float",
00523         "__shuffle_i16",
00524         "__shuffle_i32",
00525         "__shuffle_i64",
00526         "__shuffle_i8",
00527         "__soa_to_aos3_float",
00528         "__soa_to_aos3_float16",
00529         "__soa_to_aos3_float4",
00530         "__soa_to_aos3_float8",
00531         "__soa_to_aos3_int32",
00532         "__soa_to_aos4_float",
00533         "__soa_to_aos4_float16",
00534         "__soa_to_aos4_float4",
00535         "__soa_to_aos4_float8",
00536         "__soa_to_aos4_int32",
00537         "__sqrt_uniform_double",
00538         "__sqrt_uniform_float",
00539         "__sqrt_varying_double",
00540         "__sqrt_varying_float",
00541         "__stdlib_acosf",
00542         "__stdlib_asinf",
00543         "__stdlib_atan",
00544         "__stdlib_atan2",
00545         "__stdlib_atan2f",
00546         "__stdlib_atanf",
00547         "__stdlib_cos",
00548         "__stdlib_cosf",
00549         "__stdlib_exp",
00550         "__stdlib_expf",
00551         "__stdlib_log",
00552         "__stdlib_logf",
00553         "__stdlib_pow",
00554         "__stdlib_powf",
00555         "__stdlib_sin",
00556         "__stdlib_sincos",
00557         "__stdlib_sincosf",
00558         "__stdlib_sinf",
00559         "__stdlib_tan",
00560         "__stdlib_tanf",
00561         "__svml_sin",
00562         "__svml_cos",
00563         "__svml_sincos",
00564         "__svml_tan",
00565         "__svml_atan",
00566         "__svml_atan2",
00567         "__svml_exp",
00568         "__svml_log",
00569         "__svml_pow",
00570         "__undef_uniform",
00571         "__undef_varying",
00572         "__vec4_add_float",
00573         "__vec4_add_int32",
00574         "__vselect_float",
00575         "__vselect_i32",
00576     };
00577 
00578     int count = sizeof(names) / sizeof(names[0]);
00579     for (int i = 0; i < count; ++i) {
00580         llvm::Function *f = module->getFunction(names[i]);
00581         if (f != NULL && f->empty() == false)
00582             f->setLinkage(llvm::GlobalValue::InternalLinkage);
00583     }
00584 }
00585 
00586 
00587 /** This utility function takes serialized binary LLVM bitcode and adds its
00588     definitions to the given module.  Functions in the bitcode that can be
00589     mapped to ispc functions are also added to the symbol table.
00590 
00591     @param bitcode     Binary LLVM bitcode (e.g. the contents of a *.bc file)
00592     @param length      Length of the bitcode buffer
00593     @param module      Module to link the bitcode into
00594     @param symbolTable Symbol table to add definitions to
00595  */
00596 void
00597 AddBitcodeToModule(const unsigned char *bitcode, int length,
00598                    llvm::Module *module, SymbolTable *symbolTable) {
00599     std::string bcErr;
00600     llvm::StringRef sb = llvm::StringRef((char *)bitcode, length);
00601     llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
00602     llvm::Module *bcModule = llvm::ParseBitcodeFile(bcBuf, *g->ctx, &bcErr);
00603     if (!bcModule)
00604         Error(SourcePos(), "Error parsing stdlib bitcode: %s", bcErr.c_str());
00605     else {
00606         // FIXME: this feels like a bad idea, but the issue is that when we
00607         // set the llvm::Module's target triple in the ispc Module::Module
00608         // constructor, we start by calling llvm::sys::getHostTriple() (and
00609         // then change the arch if needed).  Somehow that ends up giving us
00610         // strings like 'x86_64-apple-darwin11.0.0', while the stuff we
00611         // compile to bitcode with clang has module triples like
00612         // 'i386-apple-macosx10.7.0'.  And then LLVM issues a warning about
00613         // linking together modules with incompatible target triples..
00614         llvm::Triple mTriple(m->module->getTargetTriple());
00615         llvm::Triple bcTriple(bcModule->getTargetTriple());
00616         Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
00617                mTriple.getArch() == bcTriple.getArch());
00618         Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
00619                mTriple.getVendor() == bcTriple.getVendor());
00620         bcModule->setTargetTriple(mTriple.str());
00621 
00622         std::string(linkError);
00623         if (llvm::Linker::LinkModules(module, bcModule, 
00624                                       llvm::Linker::DestroySource,
00625                                       &linkError))
00626             Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
00627         lSetInternalFunctions(module);
00628         if (symbolTable != NULL)
00629             lAddModuleSymbols(module, symbolTable);
00630         lCheckModuleIntrinsics(module);
00631     }
00632 }
00633 
00634 
00635 /** Utility routine that defines a constant int32 with given value, adding
00636     the symbol to both the ispc symbol table and the given LLVM module.
00637  */
00638 static void
00639 lDefineConstantInt(const char *name, int val, llvm::Module *module,
00640                    SymbolTable *symbolTable) {
00641     Symbol *sym = 
00642         new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
00643                    SC_STATIC);
00644     sym->constValue = new ConstExpr(sym->type, val, SourcePos());
00645     llvm::Type *ltype = LLVMTypes::Int32Type;
00646     llvm::Constant *linit = LLVMInt32(val);
00647     // Use WeakODRLinkage rather than InternalLinkage so that a definition
00648     // survives even if it's not used in the module, so that the symbol is
00649     // there in the debugger.
00650     llvm::GlobalValue::LinkageTypes linkage = g->generateDebuggingSymbols ?
00651         llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
00652     sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage,
00653                                                linit, name);
00654     symbolTable->AddVariable(sym);
00655 
00656     if (m->diBuilder != NULL) {
00657         llvm::DIFile file;
00658         llvm::DIType diType = sym->type->GetDIType(file);
00659         Assert(diType.Verify());
00660         // FIXME? DWARF says that this (and programIndex below) should
00661         // have the DW_AT_artifical attribute.  It's not clear if this
00662         // matters for anything though.
00663         llvm::DIGlobalVariable var = 
00664             m->diBuilder->createGlobalVariable(name, 
00665                                                file,
00666                                                0 /* line */,
00667                                                diType,
00668                                                true /* static */,
00669                                                sym->storagePtr);
00670         Assert(var.Verify());
00671     }
00672 }
00673 
00674 
00675 
00676 static void
00677 lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
00678                        SymbolTable *symbolTable) {
00679     llvm::SmallVector<const Type *, 8> args;
00680     FunctionType *ft = new FunctionType(AtomicType::UniformInt32, args, SourcePos());
00681     Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
00682 
00683     llvm::Function *func = module->getFunction(name);
00684     Assert(func != NULL); // it should be declared already...
00685     func->addFnAttr(llvm::Attribute::AlwaysInline);
00686     llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
00687     llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
00688 
00689     sym->function = func;
00690     symbolTable->AddVariable(sym);
00691 }
00692 
00693 
00694 
00695 static void
00696 lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
00697     Symbol *sym = 
00698         new Symbol("programIndex", SourcePos(), 
00699                    AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
00700 
00701     int pi[ISPC_MAX_NVEC];
00702     for (int i = 0; i < g->target.vectorWidth; ++i)
00703         pi[i] = i;
00704     sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
00705 
00706     llvm::Type *ltype = LLVMTypes::Int32VectorType;
00707     llvm::Constant *linit = LLVMInt32Vector(pi);
00708     // See comment in lDefineConstantInt() for why WeakODRLinkage is used here
00709     llvm::GlobalValue::LinkageTypes linkage = g->generateDebuggingSymbols ?
00710         llvm::GlobalValue::WeakODRLinkage : llvm::GlobalValue::InternalLinkage;
00711     sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, linkage,
00712                                                linit, sym->name.c_str());
00713     symbolTable->AddVariable(sym);
00714 
00715     if (m->diBuilder != NULL) {
00716         llvm::DIFile file;
00717         llvm::DIType diType = sym->type->GetDIType(file);
00718         Assert(diType.Verify());
00719         llvm::DIGlobalVariable var =
00720             m->diBuilder->createGlobalVariable(sym->name.c_str(), 
00721                                                file,
00722                                                0 /* line */,
00723                                                diType,
00724                                                false /* static */,
00725                                                sym->storagePtr);
00726         Assert(var.Verify());
00727     }
00728 }
00729 
00730 
00731 void
00732 DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
00733              bool includeStdlibISPC) {
00734     // Add the definitions from the compiled builtins-c.c file
00735     if (g->target.is32Bit) {
00736         extern unsigned char builtins_bitcode_c_32[];
00737         extern int builtins_bitcode_c_32_length;
00738         AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length, 
00739                            module, symbolTable);
00740     }
00741     else {
00742         extern unsigned char builtins_bitcode_c_64[];
00743         extern int builtins_bitcode_c_64_length;
00744         AddBitcodeToModule(builtins_bitcode_c_64, builtins_bitcode_c_64_length, 
00745                            module, symbolTable);
00746     }
00747 
00748     // Next, add the target's custom implementations of the various needed
00749     // builtin functions (e.g. __masked_store_32(), etc).
00750     switch (g->target.isa) {
00751     case Target::SSE2:
00752         extern unsigned char builtins_bitcode_sse2[];
00753         extern int builtins_bitcode_sse2_length;
00754         extern unsigned char builtins_bitcode_sse2_x2[];
00755         extern int builtins_bitcode_sse2_x2_length;
00756         switch (g->target.vectorWidth) {
00757         case 4: 
00758             AddBitcodeToModule(builtins_bitcode_sse2, builtins_bitcode_sse2_length, 
00759                                module, symbolTable);
00760             break;
00761         case 8:
00762             AddBitcodeToModule(builtins_bitcode_sse2_x2, builtins_bitcode_sse2_x2_length, 
00763                                module, symbolTable);
00764             break;
00765         default:
00766             FATAL("logic error in DefineStdlib");
00767         }
00768         break;
00769     case Target::SSE4:
00770         extern unsigned char builtins_bitcode_sse4[];
00771         extern int builtins_bitcode_sse4_length;
00772         extern unsigned char builtins_bitcode_sse4_x2[];
00773         extern int builtins_bitcode_sse4_x2_length;
00774         switch (g->target.vectorWidth) {
00775         case 4: 
00776             AddBitcodeToModule(builtins_bitcode_sse4,
00777                                builtins_bitcode_sse4_length, 
00778                                module, symbolTable);
00779             break;
00780         case 8:
00781             AddBitcodeToModule(builtins_bitcode_sse4_x2, 
00782                                builtins_bitcode_sse4_x2_length, 
00783                                module, symbolTable);
00784             break;
00785         default:
00786             FATAL("logic error in DefineStdlib");
00787         }
00788         break;
00789     case Target::AVX:
00790         switch (g->target.vectorWidth) {
00791         case 8:
00792             extern unsigned char builtins_bitcode_avx1[];
00793             extern int builtins_bitcode_avx1_length;
00794             AddBitcodeToModule(builtins_bitcode_avx1, 
00795                                builtins_bitcode_avx1_length, 
00796                                module, symbolTable);
00797             break;
00798         case 16:
00799             extern unsigned char builtins_bitcode_avx1_x2[];
00800             extern int builtins_bitcode_avx1_x2_length;
00801             AddBitcodeToModule(builtins_bitcode_avx1_x2, 
00802                                builtins_bitcode_avx1_x2_length,
00803                                module,  symbolTable);
00804             break;
00805         default:
00806             FATAL("logic error in DefineStdlib");
00807         }
00808         break;
00809     case Target::AVX11:
00810         switch (g->target.vectorWidth) {
00811         case 8:
00812             extern unsigned char builtins_bitcode_avx11[];
00813             extern int builtins_bitcode_avx11_length;
00814             AddBitcodeToModule(builtins_bitcode_avx11, 
00815                                builtins_bitcode_avx11_length, 
00816                                module, symbolTable);
00817             break;
00818         case 16:
00819             extern unsigned char builtins_bitcode_avx11_x2[];
00820             extern int builtins_bitcode_avx11_x2_length;
00821             AddBitcodeToModule(builtins_bitcode_avx11_x2, 
00822                                builtins_bitcode_avx11_x2_length,
00823                                module,  symbolTable);
00824             break;
00825         default:
00826             FATAL("logic error in DefineStdlib");
00827         }
00828         break;
00829     case Target::AVX2:
00830         switch (g->target.vectorWidth) {
00831         case 8:
00832             extern unsigned char builtins_bitcode_avx2[];
00833             extern int builtins_bitcode_avx2_length;
00834             AddBitcodeToModule(builtins_bitcode_avx2, 
00835                                builtins_bitcode_avx2_length, 
00836                                module, symbolTable);
00837             break;
00838         case 16:
00839             extern unsigned char builtins_bitcode_avx2_x2[];
00840             extern int builtins_bitcode_avx2_x2_length;
00841             AddBitcodeToModule(builtins_bitcode_avx2_x2, 
00842                                builtins_bitcode_avx2_x2_length,
00843                                module,  symbolTable);
00844             break;
00845         default:
00846             FATAL("logic error in DefineStdlib");
00847         }
00848         break;
00849     case Target::GENERIC:
00850         switch (g->target.vectorWidth) {
00851         case 4:
00852             extern unsigned char builtins_bitcode_generic_4[];
00853             extern int builtins_bitcode_generic_4_length;
00854             AddBitcodeToModule(builtins_bitcode_generic_4, 
00855                                builtins_bitcode_generic_4_length, 
00856                                module, symbolTable);
00857             break;
00858         case 8:
00859             extern unsigned char builtins_bitcode_generic_8[];
00860             extern int builtins_bitcode_generic_8_length;
00861             AddBitcodeToModule(builtins_bitcode_generic_8, 
00862                                builtins_bitcode_generic_8_length, 
00863                                module, symbolTable);
00864             break;
00865         case 16:
00866             extern unsigned char builtins_bitcode_generic_16[];
00867             extern int builtins_bitcode_generic_16_length;
00868             AddBitcodeToModule(builtins_bitcode_generic_16, 
00869                                builtins_bitcode_generic_16_length, 
00870                                module, symbolTable);
00871             break;
00872         case 32:
00873             extern unsigned char builtins_bitcode_generic_32[];
00874             extern int builtins_bitcode_generic_32_length;
00875             AddBitcodeToModule(builtins_bitcode_generic_32, 
00876                                builtins_bitcode_generic_32_length, 
00877                                module, symbolTable);
00878             break;
00879         case 64:
00880             extern unsigned char builtins_bitcode_generic_64[];
00881             extern int builtins_bitcode_generic_64_length;
00882             AddBitcodeToModule(builtins_bitcode_generic_64, 
00883                                builtins_bitcode_generic_64_length, 
00884                                module, symbolTable);
00885             break;
00886     case 1:
00887             extern unsigned char builtins_bitcode_generic_1[];
00888             extern int builtins_bitcode_generic_1_length;
00889             AddBitcodeToModule(builtins_bitcode_generic_1, 
00890                                builtins_bitcode_generic_1_length, 
00891                                module, symbolTable);
00892             break;
00893         default:
00894             FATAL("logic error in DefineStdlib");
00895         }
00896         break;
00897     default:
00898         FATAL("logic error");
00899     }
00900 
00901     // define the 'programCount' builtin variable
00902     lDefineConstantInt("programCount", g->target.vectorWidth, module, symbolTable);
00903 
00904     // define the 'programIndex' builtin
00905     lDefineProgramIndex(module, symbolTable);
00906 
00907     // Define __math_lib stuff.  This is used by stdlib.ispc, for example, to
00908     // figure out which math routines to end up calling...
00909     lDefineConstantInt("__math_lib", (int)g->mathLib, module, symbolTable);
00910     lDefineConstantInt("__math_lib_ispc", (int)Globals::Math_ISPC, module,
00911                        symbolTable);
00912     lDefineConstantInt("__math_lib_ispc_fast", (int)Globals::Math_ISPCFast, 
00913                        module, symbolTable);
00914     lDefineConstantInt("__math_lib_svml", (int)Globals::Math_SVML, module,
00915                        symbolTable);
00916     lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
00917                        symbolTable);
00918     lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
00919                            module, symbolTable);
00920 
00921     lDefineConstantInt("__have_native_half", g->target.hasHalf, module, 
00922                        symbolTable);
00923     lDefineConstantInt("__have_native_rand", g->target.hasRand, module, 
00924                        symbolTable);
00925     lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
00926                        module, symbolTable);
00927 
00928     if (includeStdlibISPC) {
00929         // If the user wants the standard library to be included, parse the
00930         // serialized version of the stdlib.ispc file to get its
00931         // definitions added.
00932       if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
00933             extern char stdlib_generic_code[];
00934             yy_scan_string(stdlib_generic_code);
00935             yyparse();
00936         }
00937         else {
00938             extern char stdlib_x86_code[];
00939             yy_scan_string(stdlib_x86_code);
00940             yyparse();
00941         }
00942     }
00943 }