|
Intel SPMD Program Compiler
1.3.0
|
00001 /* 00002 Copyright (c) 2010-2012, Intel Corporation 00003 All rights reserved. 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions are 00007 met: 00008 00009 * Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 * Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 * Neither the name of Intel Corporation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00022 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00023 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00024 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00025 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 00034 /** @file ispc.h 00035 @brief Main ispc.header file 00036 */ 00037 00038 #ifndef ISPC_H 00039 #define ISPC_H 00040 00041 #define ISPC_VERSION "1.3.0" 00042 00043 #if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) 00044 #error "Only LLVM 3.0, 3.1, and the 3.2 development branch are supported" 00045 #endif 00046 00047 #if defined(_WIN32) || defined(_WIN64) 00048 #define ISPC_IS_WINDOWS 00049 #elif defined(__linux__) 00050 #define ISPC_IS_LINUX 00051 #elif defined(__APPLE__) 00052 #define ISPC_IS_APPLE 00053 #endif 00054 00055 #include <stdint.h> 00056 #include <stdlib.h> 00057 #include <stdio.h> 00058 #include <vector> 00059 #include <string> 00060 00061 /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation 00062 targets. 00063 */ 00064 #define ISPC_MAX_NVEC 64 00065 00066 // Forward declarations of a number of widely-used LLVM types 00067 namespace llvm { 00068 class BasicBlock; 00069 class Constant; 00070 class ConstantValue; 00071 class DIBuilder; 00072 class DIDescriptor; 00073 class DIFile; 00074 class DIType; 00075 class Function; 00076 class FunctionType; 00077 class LLVMContext; 00078 class Module; 00079 class Target; 00080 class TargetMachine; 00081 class Type; 00082 class Value; 00083 } 00084 00085 00086 class ArrayType; 00087 class AST; 00088 class ASTNode; 00089 class AtomicType; 00090 class FunctionEmitContext; 00091 class Expr; 00092 class ExprList; 00093 class Function; 00094 class FunctionType; 00095 class Module; 00096 class PointerType; 00097 class Stmt; 00098 class Symbol; 00099 class SymbolTable; 00100 class Type; 00101 struct VariableDeclaration; 00102 00103 enum StorageClass { 00104 SC_NONE, 00105 SC_EXTERN, 00106 SC_STATIC, 00107 SC_TYPEDEF, 00108 SC_EXTERN_C 00109 }; 00110 00111 00112 /** @brief Representation of a range of positions in a source file. 00113 00114 This class represents a range of characters in a source file 00115 (e.g. those that span a token's definition), from starting line and 00116 column to ending line and column. (These values are tracked by the 00117 lexing code). Both lines and columns are counted starting from one. 00118 */ 00119 struct SourcePos { 00120 SourcePos(const char *n = NULL, int fl = 0, int fc = 0, 00121 int ll = 0, int lc = 0); 00122 00123 const char *name; 00124 int first_line; 00125 int first_column; 00126 int last_line; 00127 int last_column; 00128 00129 /** Prints the filename and line/column range to standard output. */ 00130 void Print() const; 00131 00132 /** Returns a LLVM DIFile object that represents the SourcePos's file */ 00133 llvm::DIFile GetDIFile() const; 00134 00135 bool operator==(const SourcePos &p2) const; 00136 }; 00137 00138 00139 /** Returns a SourcePos that encompasses the extent of both of the given 00140 extents. */ 00141 SourcePos Union(const SourcePos &p1, const SourcePos &p2); 00142 00143 00144 00145 // Assert 00146 00147 extern void DoAssert(const char *file, int line, const char *expr); 00148 extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *expr); 00149 00150 #define Assert(expr) \ 00151 ((void)((expr) ? 0 : ((void)DoAssert (__FILE__, __LINE__, #expr), 0))) 00152 00153 #define AssertPos(pos, expr) \ 00154 ((void)((expr) ? 0 : ((void)DoAssertPos (pos, __FILE__, __LINE__, #expr), 0))) 00155 00156 00157 /** @brief Structure that defines a compilation target 00158 00159 This structure defines a compilation target for the ispc compiler. 00160 */ 00161 struct Target { 00162 /** Initializes the given Target pointer for a target of the given 00163 name, if the name is a known target. Returns true if the 00164 target was initialized and false if the name is unknown. */ 00165 static bool GetTarget(const char *arch, const char *cpu, const char *isa, 00166 bool pic, Target *); 00167 00168 /** Returns a comma-delimited string giving the names of the currently 00169 supported target ISAs. */ 00170 static const char *SupportedTargetISAs(); 00171 00172 /** Returns a comma-delimited string giving the names of the currently 00173 supported target CPUs. */ 00174 static std::string SupportedTargetCPUs(); 00175 00176 /** Returns a comma-delimited string giving the names of the currently 00177 supported target architectures. */ 00178 static const char *SupportedTargetArchs(); 00179 00180 /** Returns a triple string specifying the target architecture, vendor, 00181 and environment. */ 00182 std::string GetTripleString() const; 00183 00184 /** Returns the LLVM TargetMachine object corresponding to this 00185 target. */ 00186 llvm::TargetMachine *GetTargetMachine() const; 00187 00188 /** Returns a string like "avx" encoding the target. */ 00189 const char *GetISAString() const; 00190 00191 /** Returns the size of the given type */ 00192 llvm::Value *SizeOf(llvm::Type *type, 00193 llvm::BasicBlock *insertAtEnd); 00194 00195 /** Given a structure type and an element number in the structure, 00196 returns a value corresponding to the number of bytes from the start 00197 of the structure where the element is located. */ 00198 llvm::Value *StructOffset(llvm::Type *type, 00199 int element, llvm::BasicBlock *insertAtEnd); 00200 00201 /** llvm Target object representing this target. */ 00202 const llvm::Target *target; 00203 00204 /** Enumerator giving the instruction sets that the compiler can 00205 target. These should be ordered from "worse" to "better" in that 00206 if a processor supports multiple target ISAs, then the most 00207 flexible/performant of them will apear last in the enumerant. Note 00208 also that __best_available_isa() needs to be updated if ISAs are 00209 added or the enumerant values are reordered. */ 00210 enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS }; 00211 00212 /** Instruction set being compiled to. */ 00213 ISA isa; 00214 00215 /** Target system architecture. (e.g. "x86-64", "x86"). */ 00216 std::string arch; 00217 00218 /** Is the target architecture 32 or 64 bit */ 00219 bool is32Bit; 00220 00221 /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */ 00222 std::string cpu; 00223 00224 /** Target-specific attributes to pass along to the LLVM backend */ 00225 std::string attributes; 00226 00227 /** Native vector width of the vector instruction set. Note that this 00228 value is directly derived from the ISA Being used (e.g. it's 4 for 00229 SSE, 8 for AVX, etc.) */ 00230 int nativeVectorWidth; 00231 00232 /** Actual vector width currently being compiled to. This may be an 00233 integer multiple of the native vector width, for example if we're 00234 "doubling up" and compiling 8-wide on a 4-wide SSE system. */ 00235 int vectorWidth; 00236 00237 /** Indicates whether position independent code should be generated. */ 00238 bool generatePIC; 00239 00240 /** Is there overhead associated with masking on the target 00241 architecture; e.g. there is on SSE, due to extra blends and the 00242 like, but there isn't with an ISA that supports masking 00243 natively. */ 00244 bool maskingIsFree; 00245 00246 /** How many bits are used to store each element of the mask: e.g. this 00247 is 32 on SSE/AVX, since that matches the HW better, but it's 1 for 00248 the generic target. */ 00249 int maskBitCount; 00250 00251 /** Indicates whether the target has native support for float/half 00252 conversions. */ 00253 bool hasHalf; 00254 00255 /** Indicates whether there is an ISA random number instruciton. */ 00256 bool hasRand; 00257 00258 /** Indicates whether the target has support for transcendentals (beyond 00259 sqrt, which we assume that all of them handle). */ 00260 bool hasTranscendentals; 00261 }; 00262 00263 00264 /** @brief Structure that collects optimization options 00265 00266 This structure collects all of the options related to optimization of 00267 generated code. 00268 */ 00269 struct Opt { 00270 Opt(); 00271 00272 /** Optimization level. Currently, the only valid values are 0, 00273 indicating essentially no optimization, and 1, indicating as much 00274 optimization as possible. */ 00275 int level; 00276 00277 /** Indicates whether "fast and loose" numerically unsafe optimizations 00278 should be performed. This is false by default. */ 00279 bool fastMath; 00280 00281 /** Indicates whether an vector load should be issued for masked loads 00282 on platforms that don't have a native masked vector load. (This may 00283 lead to accessing memory up to programCount-1 elements past the end of 00284 arrays, so is unsafe in general.) */ 00285 bool fastMaskedVload; 00286 00287 /** Indicates when loops should be unrolled (when doing so seems like 00288 it will make sense. */ 00289 bool unrollLoops; 00290 00291 /** Indicates if addressing math will be done with 32-bit math, even on 00292 64-bit systems. (This is generally noticably more efficient, 00293 though at the cost of addressing >2GB). 00294 */ 00295 bool force32BitAddressing; 00296 00297 /** Indicates whether Assert() statements should be ignored (for 00298 performance in the generated code). */ 00299 bool disableAsserts; 00300 00301 /** If enabled, disables the various optimizations that kick in when 00302 the execution mask can be determined to be "all on" at compile 00303 time. */ 00304 bool disableMaskAllOnOptimizations; 00305 00306 /** If enabled, the various __pseudo* memory ops (gather/scatter, 00307 masked load/store) are left in their __pseudo* form, for better 00308 understanding of the structure of generated code when reading 00309 it. */ 00310 bool disableHandlePseudoMemoryOps; 00311 00312 /** On targets that don't have a masked store instruction but do have a 00313 blending instruction, by default, we simulate masked stores by 00314 loading the old value, blending, and storing the result. This can 00315 potentially be unsafe in multi-threaded code, in that it writes to 00316 locations that aren't supposed to be written to. Setting this 00317 value to true disables this work-around, and instead implements 00318 masked stores by 'scalarizing' them, so that we iterate over the 00319 ISIMD lanes and do a scalar write for the ones that are running. */ 00320 bool disableBlendedMaskedStores; 00321 00322 /** Disables the 'coherent control flow' constructs in the 00323 language. (e.g. this causes "cif" statements to be demoted to "if" 00324 statements.) This is likely only useful for measuring the impact 00325 of coherent control flow. */ 00326 bool disableCoherentControlFlow; 00327 00328 /** Disables uniform control flow optimizations (e.g. this changes an 00329 "if" statement with a uniform condition to have a varying 00330 condition). This is likely only useful for measuring the impact of 00331 uniform control flow. */ 00332 bool disableUniformControlFlow; 00333 00334 /** Disables the backend optimizations related to gather/scatter 00335 (e.g. transforming gather from sequential locations to an unaligned 00336 load, etc.) This is likely only useful for measuring the impact of 00337 these optimizations. */ 00338 bool disableGatherScatterOptimizations; 00339 00340 /** Disables the optimization that demotes masked stores to regular 00341 stores when the store is happening at the same control flow level 00342 where the variable was declared. This is likely only useful for 00343 measuring the impact of this optimization. */ 00344 bool disableMaskedStoreToStore; 00345 00346 /** Disables the optimization that detects when the execution mask is 00347 all on and emits code for gathers and scatters that doesn't loop 00348 over the SIMD lanes but just does the scalar loads and stores 00349 directly. */ 00350 bool disableGatherScatterFlattening; 00351 00352 /** Disables the optimizations that detect when arrays are being 00353 indexed with 'uniform' values and issue scalar loads/stores rather 00354 than gathers/scatters. This is likely only useful for measuring 00355 the impact of this optimization. */ 00356 bool disableUniformMemoryOptimizations; 00357 00358 /** Disables optimizations that coalesce incoherent scalar memory 00359 access from gathers into wider vector operations, when possible. */ 00360 bool disableCoalescing; 00361 }; 00362 00363 /** @brief This structure collects together a number of global variables. 00364 00365 This structure collects a number of global variables that mostly 00366 represent parameter settings for this compilation run. In particular, 00367 none of these values should change after compilation befins; their 00368 values are all set during command-line argument processing or very 00369 early during the compiler's execution, before any files are parsed. 00370 */ 00371 struct Globals { 00372 Globals(); 00373 00374 /** Optimization option settings */ 00375 Opt opt; 00376 /** Compilation target information */ 00377 Target target; 00378 00379 /** There are a number of math libraries that can be used for 00380 transcendentals and the like during program compilation. */ 00381 enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System }; 00382 MathLib mathLib; 00383 00384 /** Records whether the ispc standard library should be made available 00385 to the program during compilations. (Default is true.) */ 00386 bool includeStdlib; 00387 00388 /** Indicates whether the C pre-processor should be run over the 00389 program source before compiling it. (Default is true.) */ 00390 bool runCPP; 00391 00392 /** When \c true, voluminous debugging output will be printed during 00393 ispc's execution. */ 00394 bool debugPrint; 00395 00396 /** Indicates whether all warning messages should be surpressed. */ 00397 bool disableWarnings; 00398 00399 /** Indicates whether warnings should be issued as errors. */ 00400 bool warningsAsErrors; 00401 00402 /** Indicates whether line wrapping of error messages to the terminal 00403 width should be disabled. */ 00404 bool disableLineWrap; 00405 00406 /** Indicates whether additional warnings should be issued about 00407 possible performance pitfalls. */ 00408 bool emitPerfWarnings; 00409 00410 /** Indicates whether all printed output should be surpressed. */ 00411 bool quiet; 00412 00413 /** Always use ANSI escape sequences to colorize warning and error 00414 messages, even if piping output to a file, etc. */ 00415 bool forceColoredOutput; 00416 00417 /** Indicates whether calls should be emitted in the program to an 00418 externally-defined program instrumentation function. (See the 00419 "Instrumenting your ispc programs" section in the user's 00420 manual.) */ 00421 bool emitInstrumentation; 00422 00423 /** Indicates whether ispc should generate debugging symbols for the 00424 program in its output. */ 00425 bool generateDebuggingSymbols; 00426 00427 /** If true, function names are mangled by appending the target ISA and 00428 vector width to them. */ 00429 bool mangleFunctionsWithTarget; 00430 00431 /** If enabled, the lexer will randomly replace some tokens returned 00432 with other tokens, in order to test error condition handling in the 00433 compiler. */ 00434 bool enableFuzzTest; 00435 00436 /** Seed for random number generator used for fuzz testing. */ 00437 int fuzzTestSeed; 00438 00439 /** Global LLVMContext object */ 00440 llvm::LLVMContext *ctx; 00441 00442 /** Current working directory when the ispc compiler starts 00443 execution. */ 00444 char currentDirectory[1024]; 00445 00446 /** Arguments to pass along to the C pre-processor, if it is run on the 00447 program before compilation. */ 00448 std::vector<std::string> cppArgs; 00449 00450 /** Additional user-provided directories to search when processing 00451 #include directives in the preprocessor. */ 00452 std::vector<std::string> includePath; 00453 }; 00454 00455 enum { 00456 COST_ASSIGN = 1, 00457 COST_COHERENT_BREAK_CONTINE = 4, 00458 COST_COMPLEX_ARITH_OP = 4, 00459 COST_DELETE = 32, 00460 COST_DEREF = 4, 00461 COST_FUNCALL = 4, 00462 COST_FUNPTR_UNIFORM = 12, 00463 COST_FUNPTR_VARYING = 24, 00464 COST_GATHER = 8, 00465 COST_GOTO = 4, 00466 COST_LOAD = 2, 00467 COST_NEW = 32, 00468 COST_REGULAR_BREAK_CONTINUE = 2, 00469 COST_RETURN = 4, 00470 COST_SELECT = 4, 00471 COST_SIMPLE_ARITH_LOGIC_OP = 1, 00472 COST_SYNC = 32, 00473 COST_TASK_LAUNCH = 32, 00474 COST_TYPECAST_COMPLEX = 4, 00475 COST_TYPECAST_SIMPLE = 1, 00476 COST_UNIFORM_IF = 2, 00477 COST_VARYING_IF = 3, 00478 COST_UNIFORM_LOOP = 4, 00479 COST_VARYING_LOOP = 6, 00480 COST_UNIFORM_SWITCH = 4, 00481 COST_VARYING_SWITCH = 12, 00482 COST_ASSERT = 8, 00483 00484 CHECK_MASK_AT_FUNCTION_START_COST = 16, 00485 PREDICATE_SAFE_IF_STATEMENT_COST = 6, 00486 }; 00487 00488 extern Globals *g; 00489 extern Module *m; 00490 00491 #endif // ISPC_H
1.7.5.1