Intel SPMD Program Compiler  1.3.0
ispc.h
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2010-2012, Intel Corporation
00003   All rights reserved.
00004 
00005   Redistribution and use in source and binary forms, with or without
00006   modification, are permitted provided that the following conditions are
00007   met:
00008 
00009     * Redistributions of source code must retain the above copyright
00010       notice, this list of conditions and the following disclaimer.
00011 
00012     * Redistributions in binary form must reproduce the above copyright
00013       notice, this list of conditions and the following disclaimer in the
00014       documentation and/or other materials provided with the distribution.
00015 
00016     * Neither the name of Intel Corporation nor the names of its
00017       contributors may be used to endorse or promote products derived from
00018       this software without specific prior written permission.
00019 
00020 
00021    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00022    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00023    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00024    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00025    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
00032 */
00033 
00034 /** @file ispc.h
00035     @brief Main ispc.header file
00036 */
00037 
00038 #ifndef ISPC_H
00039 #define ISPC_H
00040 
00041 #define ISPC_VERSION "1.3.0"
00042 
00043 #if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2)
00044 #error "Only LLVM 3.0, 3.1, and the 3.2 development branch are supported"
00045 #endif
00046 
00047 #if defined(_WIN32) || defined(_WIN64)
00048 #define ISPC_IS_WINDOWS
00049 #elif defined(__linux__)
00050 #define ISPC_IS_LINUX
00051 #elif defined(__APPLE__)
00052 #define ISPC_IS_APPLE
00053 #endif
00054 
00055 #include <stdint.h>
00056 #include <stdlib.h>
00057 #include <stdio.h>
00058 #include <vector>
00059 #include <string>
00060 
00061 /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
00062     targets.
00063  */
00064 #define ISPC_MAX_NVEC 64
00065 
00066 // Forward declarations of a number of widely-used LLVM types
00067 namespace llvm {
00068     class BasicBlock;
00069     class Constant;
00070     class ConstantValue;
00071     class DIBuilder;
00072     class DIDescriptor;
00073     class DIFile;
00074     class DIType;
00075     class Function;
00076     class FunctionType;
00077     class LLVMContext;
00078     class Module;
00079     class Target;
00080     class TargetMachine;
00081     class Type;
00082     class Value;
00083 }
00084 
00085 
00086 class ArrayType;
00087 class AST;
00088 class ASTNode;
00089 class AtomicType;
00090 class FunctionEmitContext;
00091 class Expr;
00092 class ExprList;
00093 class Function;
00094 class FunctionType;
00095 class Module;
00096 class PointerType;
00097 class Stmt;
00098 class Symbol;
00099 class SymbolTable;
00100 class Type;
00101 struct VariableDeclaration;
00102 
00103 enum StorageClass {
00104     SC_NONE,
00105     SC_EXTERN,
00106     SC_STATIC,
00107     SC_TYPEDEF,
00108     SC_EXTERN_C
00109 };
00110 
00111 
00112 /** @brief Representation of a range of positions in a source file.
00113 
00114     This class represents a range of characters in a source file
00115     (e.g. those that span a token's definition), from starting line and
00116     column to ending line and column.  (These values are tracked by the
00117     lexing code).  Both lines and columns are counted starting from one.
00118  */
00119 struct SourcePos {
00120     SourcePos(const char *n = NULL, int fl = 0, int fc = 0,
00121               int ll = 0, int lc = 0);
00122 
00123     const char *name;
00124     int first_line;
00125     int first_column;
00126     int last_line;
00127     int last_column;
00128 
00129     /** Prints the filename and line/column range to standard output. */
00130     void Print() const;
00131 
00132     /** Returns a LLVM DIFile object that represents the SourcePos's file */
00133     llvm::DIFile GetDIFile() const;
00134 
00135     bool operator==(const SourcePos &p2) const;
00136 };
00137 
00138 
00139 /** Returns a SourcePos that encompasses the extent of both of the given
00140     extents. */
00141 SourcePos Union(const SourcePos &p1, const SourcePos &p2);
00142 
00143 
00144 
00145 // Assert
00146 
00147 extern void DoAssert(const char *file, int line, const char *expr);
00148 extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *expr);
00149 
00150 #define Assert(expr)                                            \
00151     ((void)((expr) ? 0 : ((void)DoAssert (__FILE__, __LINE__, #expr), 0)))
00152 
00153 #define AssertPos(pos, expr)                                     \
00154     ((void)((expr) ? 0 : ((void)DoAssertPos (pos, __FILE__, __LINE__, #expr), 0)))
00155 
00156 
00157 /** @brief Structure that defines a compilation target 
00158 
00159     This structure defines a compilation target for the ispc compiler.
00160 */
00161 struct Target {
00162     /** Initializes the given Target pointer for a target of the given
00163         name, if the name is a known target.  Returns true if the
00164         target was initialized and false if the name is unknown. */
00165     static bool GetTarget(const char *arch, const char *cpu, const char *isa,
00166                           bool pic, Target *);
00167 
00168     /** Returns a comma-delimited string giving the names of the currently
00169         supported target ISAs. */
00170     static const char *SupportedTargetISAs();
00171 
00172     /** Returns a comma-delimited string giving the names of the currently
00173         supported target CPUs. */
00174     static std::string SupportedTargetCPUs();
00175 
00176     /** Returns a comma-delimited string giving the names of the currently
00177         supported target architectures. */
00178     static const char *SupportedTargetArchs();
00179 
00180     /** Returns a triple string specifying the target architecture, vendor,
00181         and environment. */
00182     std::string GetTripleString() const;
00183 
00184     /** Returns the LLVM TargetMachine object corresponding to this
00185         target. */
00186     llvm::TargetMachine *GetTargetMachine() const;
00187     
00188     /** Returns a string like "avx" encoding the target. */
00189     const char *GetISAString() const;
00190 
00191     /** Returns the size of the given type */
00192     llvm::Value *SizeOf(llvm::Type *type,
00193                         llvm::BasicBlock *insertAtEnd);
00194 
00195     /** Given a structure type and an element number in the structure,
00196         returns a value corresponding to the number of bytes from the start
00197         of the structure where the element is located. */
00198     llvm::Value *StructOffset(llvm::Type *type,
00199                               int element, llvm::BasicBlock *insertAtEnd);
00200 
00201     /** llvm Target object representing this target. */
00202     const llvm::Target *target;
00203 
00204     /** Enumerator giving the instruction sets that the compiler can
00205         target.  These should be ordered from "worse" to "better" in that
00206         if a processor supports multiple target ISAs, then the most
00207         flexible/performant of them will apear last in the enumerant.  Note
00208         also that __best_available_isa() needs to be updated if ISAs are
00209         added or the enumerant values are reordered.  */
00210     enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
00211 
00212     /** Instruction set being compiled to. */
00213     ISA isa;
00214 
00215     /** Target system architecture.  (e.g. "x86-64", "x86"). */
00216     std::string arch;
00217 
00218     /** Is the target architecture 32 or 64 bit */
00219     bool is32Bit;
00220 
00221     /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
00222     std::string cpu;
00223 
00224     /** Target-specific attributes to pass along to the LLVM backend */
00225     std::string attributes;
00226 
00227     /** Native vector width of the vector instruction set.  Note that this
00228         value is directly derived from the ISA Being used (e.g. it's 4 for
00229         SSE, 8 for AVX, etc.) */
00230     int nativeVectorWidth;
00231 
00232     /** Actual vector width currently being compiled to.  This may be an
00233         integer multiple of the native vector width, for example if we're
00234         "doubling up" and compiling 8-wide on a 4-wide SSE system. */
00235     int vectorWidth;
00236 
00237     /** Indicates whether position independent code should be generated. */
00238     bool generatePIC;
00239 
00240     /** Is there overhead associated with masking on the target
00241         architecture; e.g. there is on SSE, due to extra blends and the
00242         like, but there isn't with an ISA that supports masking
00243         natively. */
00244     bool maskingIsFree;
00245 
00246     /** How many bits are used to store each element of the mask: e.g. this
00247         is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
00248         the generic target. */
00249     int maskBitCount;
00250 
00251     /** Indicates whether the target has native support for float/half
00252         conversions. */
00253     bool hasHalf;
00254 
00255     /** Indicates whether there is an ISA random number instruciton. */
00256     bool hasRand;
00257 
00258     /** Indicates whether the target has support for transcendentals (beyond
00259         sqrt, which we assume that all of them handle). */
00260     bool hasTranscendentals;
00261 };
00262 
00263 
00264 /** @brief Structure that collects optimization options
00265 
00266     This structure collects all of the options related to optimization of
00267     generated code. 
00268 */
00269 struct Opt {
00270     Opt();
00271     
00272     /** Optimization level.  Currently, the only valid values are 0,
00273         indicating essentially no optimization, and 1, indicating as much
00274         optimization as possible. */
00275     int level;
00276 
00277     /** Indicates whether "fast and loose" numerically unsafe optimizations
00278         should be performed.  This is false by default. */
00279     bool fastMath;
00280 
00281     /** Indicates whether an vector load should be issued for masked loads
00282         on platforms that don't have a native masked vector load.  (This may
00283         lead to accessing memory up to programCount-1 elements past the end of
00284         arrays, so is unsafe in general.) */
00285     bool fastMaskedVload;
00286 
00287     /** Indicates when loops should be unrolled (when doing so seems like
00288         it will make sense. */
00289     bool unrollLoops;
00290 
00291     /** Indicates if addressing math will be done with 32-bit math, even on
00292         64-bit systems.  (This is generally noticably more efficient,
00293         though at the cost of addressing >2GB).
00294      */ 
00295     bool force32BitAddressing;
00296 
00297     /** Indicates whether Assert() statements should be ignored (for
00298         performance in the generated code). */
00299     bool disableAsserts;
00300 
00301     /** If enabled, disables the various optimizations that kick in when
00302         the execution mask can be determined to be "all on" at compile
00303         time. */
00304     bool disableMaskAllOnOptimizations;
00305 
00306     /** If enabled, the various __pseudo* memory ops (gather/scatter,
00307         masked load/store) are left in their __pseudo* form, for better
00308         understanding of the structure of generated code when reading
00309         it. */
00310     bool disableHandlePseudoMemoryOps;
00311 
00312     /** On targets that don't have a masked store instruction but do have a
00313         blending instruction, by default, we simulate masked stores by
00314         loading the old value, blending, and storing the result.  This can
00315         potentially be unsafe in multi-threaded code, in that it writes to
00316         locations that aren't supposed to be written to.  Setting this
00317         value to true disables this work-around, and instead implements
00318         masked stores by 'scalarizing' them, so that we iterate over the
00319         ISIMD lanes and do a scalar write for the ones that are running. */
00320     bool disableBlendedMaskedStores;
00321 
00322     /** Disables the 'coherent control flow' constructs in the
00323         language. (e.g. this causes "cif" statements to be demoted to "if"
00324         statements.)  This is likely only useful for measuring the impact
00325         of coherent control flow. */
00326     bool disableCoherentControlFlow;
00327 
00328     /** Disables uniform control flow optimizations (e.g. this changes an
00329         "if" statement with a uniform condition to have a varying
00330         condition).  This is likely only useful for measuring the impact of
00331         uniform control flow. */
00332     bool disableUniformControlFlow;
00333 
00334     /** Disables the backend optimizations related to gather/scatter
00335         (e.g. transforming gather from sequential locations to an unaligned
00336         load, etc.)  This is likely only useful for measuring the impact of
00337         these optimizations. */
00338     bool disableGatherScatterOptimizations;
00339 
00340     /** Disables the optimization that demotes masked stores to regular
00341         stores when the store is happening at the same control flow level
00342         where the variable was declared.  This is likely only useful for
00343         measuring the impact of this optimization. */
00344     bool disableMaskedStoreToStore;
00345 
00346     /** Disables the optimization that detects when the execution mask is
00347         all on and emits code for gathers and scatters that doesn't loop
00348         over the SIMD lanes but just does the scalar loads and stores
00349         directly. */
00350     bool disableGatherScatterFlattening;
00351 
00352     /** Disables the optimizations that detect when arrays are being
00353         indexed with 'uniform' values and issue scalar loads/stores rather
00354         than gathers/scatters.  This is likely only useful for measuring
00355         the impact of this optimization. */
00356     bool disableUniformMemoryOptimizations;
00357 
00358     /** Disables optimizations that coalesce incoherent scalar memory
00359         access from gathers into wider vector operations, when possible. */
00360     bool disableCoalescing;
00361 };
00362 
00363 /** @brief This structure collects together a number of global variables. 
00364 
00365     This structure collects a number of global variables that mostly
00366     represent parameter settings for this compilation run.  In particular,
00367     none of these values should change after compilation befins; their
00368     values are all set during command-line argument processing or very
00369     early during the compiler's execution, before any files are parsed.
00370   */
00371 struct Globals {
00372     Globals();
00373 
00374     /** Optimization option settings */
00375     Opt opt;
00376     /** Compilation target information */
00377     Target target;
00378 
00379     /** There are a number of math libraries that can be used for
00380         transcendentals and the like during program compilation. */
00381     enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System };
00382     MathLib mathLib;
00383 
00384     /** Records whether the ispc standard library should be made available
00385         to the program during compilations. (Default is true.) */
00386     bool includeStdlib;
00387 
00388     /** Indicates whether the C pre-processor should be run over the
00389         program source before compiling it.  (Default is true.) */
00390     bool runCPP;
00391 
00392     /** When \c true, voluminous debugging output will be printed during
00393         ispc's execution. */
00394     bool debugPrint;
00395 
00396     /** Indicates whether all warning messages should be surpressed. */
00397     bool disableWarnings;
00398 
00399     /** Indicates whether warnings should be issued as errors. */
00400     bool warningsAsErrors;
00401 
00402     /** Indicates whether line wrapping of error messages to the terminal
00403         width should be disabled. */
00404     bool disableLineWrap;
00405 
00406     /** Indicates whether additional warnings should be issued about
00407         possible performance pitfalls. */
00408     bool emitPerfWarnings;
00409 
00410     /** Indicates whether all printed output should be surpressed. */
00411     bool quiet;
00412 
00413     /** Always use ANSI escape sequences to colorize warning and error
00414         messages, even if piping output to a file, etc. */
00415     bool forceColoredOutput;
00416 
00417     /** Indicates whether calls should be emitted in the program to an
00418         externally-defined program instrumentation function. (See the
00419         "Instrumenting your ispc programs" section in the user's
00420         manual.) */
00421     bool emitInstrumentation; 
00422 
00423     /** Indicates whether ispc should generate debugging symbols for the
00424         program in its output. */
00425     bool generateDebuggingSymbols;
00426    
00427     /** If true, function names are mangled by appending the target ISA and
00428         vector width to them. */
00429     bool mangleFunctionsWithTarget;
00430 
00431     /** If enabled, the lexer will randomly replace some tokens returned
00432         with other tokens, in order to test error condition handling in the
00433         compiler. */
00434     bool enableFuzzTest;
00435 
00436     /** Seed for random number generator used for fuzz testing. */
00437     int fuzzTestSeed;
00438 
00439     /** Global LLVMContext object */
00440     llvm::LLVMContext *ctx;
00441 
00442     /** Current working directory when the ispc compiler starts
00443         execution. */
00444     char currentDirectory[1024];
00445 
00446     /** Arguments to pass along to the C pre-processor, if it is run on the
00447         program before compilation. */
00448     std::vector<std::string> cppArgs;
00449 
00450     /** Additional user-provided directories to search when processing
00451         #include directives in the preprocessor. */
00452     std::vector<std::string> includePath;
00453 };
00454 
00455 enum {
00456     COST_ASSIGN = 1,
00457     COST_COHERENT_BREAK_CONTINE = 4,
00458     COST_COMPLEX_ARITH_OP = 4,
00459     COST_DELETE = 32,
00460     COST_DEREF = 4,
00461     COST_FUNCALL = 4,
00462     COST_FUNPTR_UNIFORM = 12,
00463     COST_FUNPTR_VARYING = 24,
00464     COST_GATHER = 8,
00465     COST_GOTO = 4,
00466     COST_LOAD = 2,
00467     COST_NEW = 32,
00468     COST_REGULAR_BREAK_CONTINUE = 2,
00469     COST_RETURN = 4,
00470     COST_SELECT = 4,
00471     COST_SIMPLE_ARITH_LOGIC_OP = 1,
00472     COST_SYNC = 32,
00473     COST_TASK_LAUNCH = 32,
00474     COST_TYPECAST_COMPLEX = 4,
00475     COST_TYPECAST_SIMPLE = 1,
00476     COST_UNIFORM_IF = 2,
00477     COST_VARYING_IF = 3,
00478     COST_UNIFORM_LOOP = 4,
00479     COST_VARYING_LOOP = 6,
00480     COST_UNIFORM_SWITCH = 4,
00481     COST_VARYING_SWITCH = 12,
00482     COST_ASSERT = 8,
00483 
00484     CHECK_MASK_AT_FUNCTION_START_COST = 16,
00485     PREDICATE_SAFE_IF_STATEMENT_COST = 6,
00486 };
00487 
00488 extern Globals *g;
00489 extern Module *m;
00490 
00491 #endif // ISPC_H