45 #ifdef ISPC_HOST_IS_WINDOWS 48 #define strcasecmp stricmp 51 #include <sys/types.h> 54 #include <llvm/CodeGen/TargetLowering.h> 55 #include <llvm/CodeGen/TargetSubtargetInfo.h> 56 #include <llvm/IR/DIBuilder.h> 57 #include <llvm/IR/DebugInfo.h> 58 #include <llvm/IR/Instructions.h> 59 #include <llvm/IR/LLVMContext.h> 60 #include <llvm/IR/Module.h> 62 #include <llvm/BinaryFormat/Dwarf.h> 63 #include <llvm/IR/Attributes.h> 64 #include <llvm/IR/DataLayout.h> 65 #include <llvm/Support/CodeGen.h> 66 #include <llvm/Support/Host.h> 67 #include <llvm/Support/TargetRegistry.h> 68 #include <llvm/Support/TargetSelect.h> 69 #include <llvm/Target/TargetMachine.h> 70 #include <llvm/Target/TargetOptions.h> 78 #if !defined(ISPC_HOST_IS_WINDOWS) && !defined(__arm__) && !defined(__aarch64__) 81 static void __cpuid(
int info[4],
int infoType) {
82 __asm__ __volatile__(
"cpuid" :
"=a"(info[0]),
"=b"(info[1]),
"=c"(info[2]),
"=d"(info[3]) :
"0"(infoType));
86 static void __cpuidex(
int info[4],
int level,
int count) {
87 __asm__ __volatile__(
"xchg{l}\t{%%}ebx, %1\n\t" 89 "xchg{l}\t{%%}ebx, %1\n\t" 90 :
"=a"(info[0]),
"=r"(info[1]),
"=c"(info[2]),
"=d"(info[3])
91 :
"0"(level),
"2"(count));
93 #endif // !ISPC_HOST_IS_WINDOWS && !__ARM__ && !__AARCH64__ 95 #if !defined(__arm__) && !defined(__aarch64__) 97 #if defined(ISPC_HOST_IS_WINDOWS) 99 unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
100 return (xcrFeatureMask & 6) == 6;
101 #else // !defined(ISPC_HOST_IS_WINDOWS) 106 __asm__ __volatile__(
".byte 0x0f, 0x01, 0xd0" :
"=a"(rEAX),
"=d"(rEDX) :
"c"(0));
107 return (rEAX & 6) == 6;
108 #endif // !defined(ISPC_HOST_IS_WINDOWS) 112 #if defined(ISPC_HOST_IS_WINDOWS) 115 unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
116 return (xcrFeatureMask & 0xE6) == 0xE6;
117 #else // !defined(ISPC_HOST_IS_WINDOWS) 122 __asm__ __volatile__(
".byte 0x0f, 0x01, 0xd0" :
"=a"(rEAX),
"=d"(rEDX) :
"c"(0));
123 return (rEAX & 0xE6) == 0xE6;
124 #endif // !defined(ISPC_HOST_IS_WINDOWS) 126 #endif // !__arm__ && !__aarch64__ 129 #if defined(__arm__) || defined(__aarch64__) 139 if ((info[2] & (1 << 27)) != 0 &&
140 (info2[1] & (1 << 5)) != 0 &&
141 (info2[1] & (1 << 16)) != 0 &&
147 if ((info2[1] & (1 << 17)) != 0 &&
148 (info2[1] & (1 << 28)) != 0 &&
149 (info2[1] & (1 << 30)) != 0 &&
150 (info2[1] & (1 << 31)) != 0) {
152 }
else if ((info2[1] & (1 << 26)) != 0 &&
153 (info2[1] & (1 << 27)) != 0 &&
154 (info2[1] & (1 << 28)) != 0) {
161 if ((info[2] & (1 << 27)) != 0 &&
165 if ((info[2] & (1 << 29)) != 0 &&
166 (info[2] & (1 << 30)) != 0 &&
167 (info2[1] & (1 << 5)) != 0) {
172 }
else if ((info[2] & (1 << 19)) != 0)
174 else if ((info[3] & (1 << 26)) != 0)
177 Error(
SourcePos(),
"Unable to detect supported SSE/AVX ISA. Exiting.");
254 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 265 #ifdef ISPC_ARM_ENABLED 283 std::vector<std::vector<std::string>>
names;
286 std::set<CPUtype>
Set(
int type, ...) {
287 std::set<CPUtype> retn;
291 va_start(args, type);
292 while ((type = va_arg(args,
int)) !=
CPU_None)
301 names = std::vector<std::vector<std::string>>(
sizeofCPUtype);
323 names[
CPU_PS4].push_back(
"btver2");
324 names[
CPU_PS4].push_back(
"ps4");
337 names[
CPU_KNL].push_back(
"knl");
339 names[
CPU_SKX].push_back(
"skx");
341 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+ 342 names[CPU_ICL].push_back(
"icelake-client");
343 names[CPU_ICL].push_back(
"icl");
346 #ifdef ISPC_ARM_ENABLED 347 names[CPU_CortexA15].push_back(
"cortex-a15");
349 names[CPU_CortexA9].push_back(
"cortex-a9");
351 names[CPU_CortexA35].push_back(
"cortex-a35");
353 names[CPU_CortexA53].push_back(
"cortex-a53");
355 names[CPU_CortexA57].push_back(
"cortex-a57");
369 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+ 396 #ifdef ISPC_ARM_ENABLED 406 std::stringstream CPUs;
409 if (names[i].size() > 1) {
410 CPUs <<
" (synonyms: " << names[i][1];
411 for (
int j = 2, je = names[i].size(); j < je; j++)
412 CPUs <<
", " << names[i][j];
415 if (i < sizeofCPUtype - 1)
423 return names[type][0];
430 for (
int j = 0, je = names[i].size(); (retn ==
CPU_None) && (j < je); j++)
431 if (!name.compare(names[i][j]))
439 return compat[what].find(with) != compat[what].end();
444 : m_target(NULL), m_targetMachine(NULL), m_dataLayout(NULL), m_valid(false), m_ispc_target(ispc_target),
445 m_isa(SSE2), m_arch(
Arch::
none), m_is32Bit(true), m_cpu(
""), m_attributes(
""), m_tf_attributes(NULL),
446 m_nativeVectorWidth(-1), m_nativeVectorAlignment(-1), m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic),
447 m_maskingIsFree(false), m_maskBitCount(-1), m_hasHalf(false), m_hasRand(false), m_hasGather(false),
448 m_hasScatter(false), m_hasTranscendentals(false), m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false),
449 m_hasVecPrefetch(false) {
452 std::string featuresString;
458 "Error: CPU type \"%s\" unknown. Supported" 475 "No --target specified on command-line." 476 " Using default system target \"%s\".",
477 target_string.c_str());
485 #ifdef ISPC_ARM_ENABLED 499 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0 534 "No --target specified on command-line." 535 " Using ISA \"%s\" based on specified CPU \"%s\".",
536 target_string.c_str(), cpu);
545 #ifdef ISPC_ARM_ENABLED 561 for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::targets().begin();
562 iter != llvm::TargetRegistry::targets().end(); ++iter) {
569 std::string error_message;
570 error_message =
"Invalid architecture \"";
572 error_message +=
"\"\nOptions: ";
573 llvm::TargetRegistry::iterator iter;
574 const char *separator =
"";
575 for (iter = llvm::TargetRegistry::targets().begin(); iter != llvm::TargetRegistry::targets().end(); ++iter) {
576 error_message += separator;
577 error_message += iter->getName();
580 error_message +=
".";
591 Error(
SourcePos(),
"arch = %s and target = %s is not a valid combination.", str_arch.c_str(),
592 target_string.c_str());
597 bool unsupported_target =
false;
770 #if ISPC_LLVM_VERSION >= ISPC_LLVM_8_0 // LLVM 8.0+ 787 this->
m_funcAttributes.push_back(std::make_pair(
"prefer-vector-width",
"256"));
788 this->
m_funcAttributes.push_back(std::make_pair(
"min-legal-vector-width",
"256"));
791 unsupported_target =
true;
812 this->
m_funcAttributes.push_back(std::make_pair(
"prefer-vector-width",
"256"));
813 this->
m_funcAttributes.push_back(std::make_pair(
"min-legal-vector-width",
"256"));
815 this->
m_funcAttributes.push_back(std::make_pair(
"prefer-vector-width",
"512"));
816 this->
m_funcAttributes.push_back(std::make_pair(
"min-legal-vector-width",
"512"));
903 #ifdef ISPC_ARM_ENABLED 905 this->
m_isa = Target::NEON;
915 this->
m_isa = Target::NEON;
925 this->
m_isa = Target::NEON;
935 this->
m_isa = Target::NEON;
949 unsupported_target =
true;
952 #ifdef ISPC_WASM_ENABLED 954 this->
m_isa = Target::WASM;
972 unsupported_target =
true;
978 unsupported_target =
true;
982 if (unsupported_target) {
986 FATAL(target_string.c_str());
989 #if defined(ISPC_ARM_ENABLED) && !defined(__arm__) 991 CPUID = CPU_CortexA9;
993 #if defined(ISPC_ARM_ENABLED) && !defined(__aarch64__) 995 CPUID = CPU_CortexA35;
1003 "The requested CPU (%s) is incompatible" 1004 " with the CPU required for %s target (%s)",
1016 llvm::Optional<llvm::Reloc::Model> relocModel;
1018 relocModel = llvm::Reloc::PIC_;
1020 llvm::TargetOptions options;
1021 #ifdef ISPC_ARM_ENABLED 1022 options.FloatABIType = llvm::FloatABI::Hard;
1025 this->
m_funcAttributes.push_back(std::make_pair(
"target-features",
"+crypto,+fp-armv8,+neon,+sha2"));
1027 this->
m_funcAttributes.push_back(std::make_pair(
"target-features",
"+neon,+fp16"));
1029 featuresString =
"+neon,+fp16";
1033 std::make_pair(
"target-features",
"+aes,+crc,+crypto,+fp-armv8,+neon,+sha2"));
1035 this->
m_funcAttributes.push_back(std::make_pair(
"target-features",
"+neon"));
1037 featuresString =
"+neon";
1041 options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
1050 llvm::CodeGenOpt::Level cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1052 case Globals::CodegenOptLevel::None:
1053 cOptLevel = llvm::CodeGenOpt::Level::None;
1056 case Globals::CodegenOptLevel::Aggressive:
1057 cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
1071 std::string dl_string;
1072 dl_string =
m_targetMachine->createDataLayout().getStringRepresentation();
1080 dl_string =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" 1081 "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-" 1082 "f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
1096 llvm::AttrBuilder fattrBuilder;
1097 #ifdef ISPC_ARM_ENABLED 1098 if (
m_isa == Target::NEON)
1099 fattrBuilder.addAttribute(
"target-cpu", this->
m_cpu);
1102 fattrBuilder.addAttribute(f_attr.first, f_attr.second);
1111 printf(
"Target Triple: %s\n",
m_targetMachine->getTargetTriple().str().c_str());
1112 printf(
"Target CPU: %s\n",
m_targetMachine->getTargetCPU().str().c_str());
1113 printf(
"Target Feature String: %s\n",
m_targetMachine->getTargetFeatureString().str().c_str());
1125 llvm::Triple triple;
1129 triple.setArchName(
"i386");
1131 triple.setArchName(
"x86_64");
1143 triple.setVendor(llvm::Triple::VendorType::PC);
1144 triple.setOS(llvm::Triple::OSType::Win32);
1145 triple.setEnvironment(llvm::Triple::EnvironmentType::MSVC);
1150 triple.setArchName(
"i386");
1152 triple.setArchName(
"x86_64");
1154 triple.setArchName(
"armv7");
1156 triple.setArchName(
"aarch64");
1161 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1162 triple.setOS(llvm::Triple::OSType::Linux);
1164 triple.setEnvironment(llvm::Triple::EnvironmentType::GNU);
1166 triple.setEnvironment(llvm::Triple::EnvironmentType::GNUEABIHF);
1174 triple.setArchName(
"i386");
1176 triple.setArchName(
"amd64");
1178 triple.setArchName(
"armv7");
1180 triple.setArchName(
"aarch64");
1185 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1186 triple.setOS(llvm::Triple::OSType::FreeBSD);
1194 triple.setArch(llvm::Triple::ArchType::x86_64);
1195 triple.setVendor(llvm::Triple::VendorType::Apple);
1196 triple.setOS(llvm::Triple::OSType::MacOSX);
1200 triple.setArchName(
"i386");
1202 triple.setArchName(
"x86_64");
1204 triple.setArchName(
"armv7");
1206 triple.setArchName(
"aarch64");
1211 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1212 triple.setOS(llvm::Triple::OSType::Linux);
1213 triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
1223 triple.setArchName(
"arm64");
1224 triple.setVendor(llvm::Triple::VendorType::Apple);
1225 triple.setOS(llvm::Triple::OSType::IOS);
1233 triple.setArch(llvm::Triple::ArchType::x86_64);
1234 triple.setVendor(llvm::Triple::VendorType::SCEI);
1235 triple.setOS(llvm::Triple::OSType::PS4);
1242 triple.setArch(llvm::Triple::ArchType::wasm32);
1243 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
1244 triple.setOS(llvm::Triple::OSType::UnknownOS);
1251 return triple.str();
1259 #ifdef ISPC_ARM_ENABLED 1263 #ifdef ISPC_WASM_ENABLED 1282 FATAL(
"Unhandled target in ISAToString()");
1294 #ifdef ISPC_ARM_ENABLED 1296 return "neon-i32x4";
1298 #ifdef ISPC_WASM_ENABLED 1300 return "wasm-i32x4";
1303 return "sse2-i32x4";
1305 return "sse4-i32x4";
1307 return "avx1-i32x8";
1309 return "avx2-i32x8";
1311 return "avx512knl-i32x16";
1313 return "avx512skx-i32x16";
1317 FATAL(
"Unhandled target in ISAToTargetString()");
1325 if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() || type->isIntegerTy() ||
1326 type->isLabelTy() || type->isMetadataTy())
1332 llvm::ArrayType *at = llvm::dyn_cast<llvm::ArrayType>(type);
1336 llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(type);
1340 llvm::StructType *st = llvm::dyn_cast<llvm::StructType>(type);
1342 for (
int i = 0; i < (int)st->getNumElements(); ++i)
1348 Assert(llvm::isa<llvm::VectorType>(type));
1362 llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1363 llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1364 llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
1365 llvm::Instruction *gep =
1366 llvm::GetElementPtrInst::Create(
PTYPE(voidPtr), voidPtr, arrayRef,
"sizeof_gep", insertAtEnd);
1373 uint64_t byteSize =
getDataLayout()->getTypeStoreSize(type);
1383 llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
1384 llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
1385 llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
1386 llvm::Instruction *gep =
1387 llvm::GetElementPtrInst::Create(
PTYPE(voidPtr), voidPtr, arrayRef,
"offset_gep", insertAtEnd);
1394 llvm::StructType *structType = llvm::dyn_cast<llvm::StructType>(type);
1395 if (structType == NULL || structType->isSized() ==
false) {
1400 const llvm::StructLayout *sl =
getDataLayout()->getStructLayout(structType);
1403 uint64_t offset = sl->getElementOffset(element);
1412 func->addAttributes(llvm::AttributeList::FunctionIndex, *
m_tf_attributes);
1422 fastMaskedVload =
false;
1423 force32BitAddressing =
true;
1425 disableAsserts =
false;
1427 forceAlignedMemory =
false;
1428 disableMaskAllOnOptimizations =
false;
1429 disableHandlePseudoMemoryOps =
false;
1430 disableBlendedMaskedStores =
false;
1431 disableCoherentControlFlow =
false;
1432 disableUniformControlFlow =
false;
1433 disableGatherScatterOptimizations =
false;
1434 disableMaskedStoreToStore =
false;
1435 disableGatherScatterFlattening =
false;
1436 disableUniformMemoryOptimizations =
false;
1437 disableCoalescing =
false;
1450 includeStdlib =
true;
1454 printTarget =
false;
1455 NoOmitFramePointer =
false;
1457 disableWarnings =
false;
1458 warningsAsErrors =
false;
1460 forceColoredOutput =
false;
1461 disableLineWrap =
false;
1462 emitPerfWarnings =
true;
1463 emitInstrumentation =
false;
1464 noPragmaOnce =
false;
1465 generateDebuggingSymbols =
false;
1466 generateDWARFVersion = 3;
1467 enableFuzzTest =
false;
1469 mangleFunctionsWithTarget =
false;
1470 isMultiTargetCompilation =
false;
1473 ctx =
new llvm::LLVMContext;
1475 #ifdef ISPC_HOST_IS_WINDOWS 1476 _getcwd(currentDirectory,
sizeof(currentDirectory));
1478 if (getcwd(currentDirectory,
sizeof(currentDirectory)) == NULL)
1479 FATAL(
"Current directory path is too long!");
1481 forceAlignment = -1;
1495 name = m->
module->getModuleIdentifier().c_str();
1501 last_line = ll != 0 ? ll : fl;
1502 last_column = lc != 0 ? lc : fc;
1508 std::string directory, filename;
1510 llvm::DIFile *ret = m->
diBuilder->createFile(filename, directory);
1515 printf(
" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column, last_line, last_column);
bool m_hasTranscendentals
std::string ArchToString(Arch arch)
llvm::DIFile * GetDIFile() const
const llvm::Target * m_target
static bool __os_has_avx_support()
This structure collects together a number of global variables.
std::vector< std::pair< std::string, std::string > > m_funcAttributes
int m_nativeVectorAlignment
SourcePos Union(const SourcePos &p1, const SourcePos &p2)
llvm::Value * SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd)
SourcePos(const char *n=NULL, int fl=0, int fc=0, int ll=0, int lc=0)
static llvm::VectorType * BoolVectorType
const char * GetISATargetString() const
Target(Arch arch, const char *cpu, ISPCTarget isa, bool pic, bool printTarget)
std::string HumanReadableListOfNames()
bool BackwardCompatible(CPUtype what, CPUtype with)
std::string GetTripleString() const
static bool lGenericTypeLayoutIndeterminate(llvm::Type *type)
static const char * ISAToString(Target::ISA isa)
std::set< CPUtype > Set(int type,...)
static llvm::VectorType * Int1VectorType
static void __cpuidex(int info[4], int level, int count)
static std::string SupportedCPUs()
static llvm::Type * Int64Type
char currentDirectory[1024]
std::string & GetDefaultNameFromType(CPUtype type)
void GetDirectoryAndFileName(const std::string ¤tDirectory, const std::string &relativeName, std::string *directory, std::string *filename)
bool ISPCTargetIsX86(ISPCTarget target)
Header file with declarations for various LLVM utility stuff.
static ISPCTarget lGetSystemISA()
static TargetLibRegistry * getTargetLibRegistry()
CodegenOptLevel codegenOptLevel
bool IsGenericTypeLayoutIndeterminate(llvm::Type *type)
static void __cpuid(int info[4], int infoType)
Representation of a range of positions in a source file.
llvm::Value * StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd)
std::string ISPCTargetToString(ISPCTarget target)
llvm::ConstantInt * LLVMInt32(int32_t ival)
llvm::TargetMachine * m_targetMachine
bool force32BitAddressing
static bool __os_has_avx512_support()
void markFuncWithTargetAttr(llvm::Function *func)
void Error(SourcePos p, const char *fmt,...)
static llvm::Type * Int32Type
const llvm::DataLayout * getDataLayout() const
std::vector< std::vector< std::string > > names
llvm::DataLayout * m_dataLayout
const char * GetISAString() const
static llvm::VectorType * MaskType
static const bool lIsTargetValidforArch(ISPCTarget target, Arch arch)
std::vector< std::set< CPUtype > > compat
bool ISPCTargetIsNeon(ISPCTarget target)
llvm::ConstantInt * LLVMInt64(int64_t ival)
bool operator==(const SourcePos &p2) const
Declaration of the Module class, which is the ispc-side representation of the results of compiling a ...
llvm::AttrBuilder * m_tf_attributes
llvm::DIBuilder * diBuilder
Main ispc.header file. Defines Target, Globals and Opt classes.
void Warning(SourcePos p, const char *fmt,...)
CPUtype GetTypeFromName(std::string name)
static const char * ISAToTargetString(Target::ISA isa)