forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
llvm-3.3.patch
180 lines (174 loc) · 5.95 KB
/
llvm-3.3.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
diff -u -r -N llvm-3.3.src/lib/Support/Host.cpp llvm-3.3/lib/Support/Host.cpp
--- llvm-3.3.src/lib/Support/Host.cpp 2013-05-04 02:36:23.000000000 -0500
+++ llvm-3.3/lib/Support/Host.cpp 2014-06-05 15:08:41.975312974 -0500
@@ -52,8 +52,54 @@
/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
/// specified arguments. If we can't run cpuid on the host, return true.
-static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+ #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+ #else
+ return true;
+ #endif
+#elif defined(_MSC_VER)
+ // The MSVC intrinsic is portable across x86 and x64.
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
+/// 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
#if defined(__GNUC__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
@@ -64,16 +110,22 @@
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
- : "a" (value));
+ : "a" (value),
+ "c" (subleaf));
return false;
#elif defined(_MSC_VER)
- int registers[4];
- __cpuid(registers, value);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
+ // __cpuidex was added in MSVC++ 9.0 SP1
+ #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
#else
return true;
#endif
@@ -86,11 +138,13 @@
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
- : "a" (value));
+ : "a" (value),
+ "c" (subleaf));
return false;
#elif defined(_MSC_VER)
__asm {
mov eax,value
+ mov ecx,subleaf
cpuid
mov esi,rEAX
mov dword ptr [esi],eax
@@ -102,8 +156,6 @@
mov dword ptr [esi],edx
}
return false;
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
#else
return true;
#endif
@@ -148,21 +200,27 @@
unsigned Model = 0;
DetectX86FamilyModel(EAX, Family, Model);
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+
+ unsigned MaxLeaf = EAX;
bool HasSSE3 = (ECX & 0x1);
+ bool HasSSE41 = (ECX & 0x80000);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
+ bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
+ !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
+ (EBX & 0x20);
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
switch (Family) {
case 3:
@@ -244,7 +302,8 @@
// 17h. All processors are manufactured using the 45 nm process.
//
// 45nm: Penryn , Wolfdale, Yorkfield (XE)
- return "penryn";
+ // Not all Penryn processors support SSE 4.1 (such as the Pentium brand)
+ return HasSSE41 ? "penryn" : "core2";
case 26: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 45 nm process.
@@ -269,10 +328,20 @@
// Ivy Bridge:
case 58:
+ case 62: // Ivy Bridge EP
// Not all Ivy Bridge processors support AVX (such as the Pentium
// versions instead of the i7 versions).
return HasAVX ? "core-avx-i" : "corei7";
+ // Haswell:
+ case 60:
+ case 63:
+ case 69:
+ case 70:
+ // Not all Haswell processors support AVX too (such as the Pentium
+ // versions instead of the i7 versions).
+ return HasAVX2 ? "core-avx2" : "corei7";
+
case 28: // Most 45 nm Intel Atom processors
case 38: // 45 nm Atom Lincroft
case 39: // 32 nm Atom Medfield