summaryrefslogtreecommitdiff
path: root/core/glibc/glibc-2.15-avx.patch
blob: 5439da8d0de6b1e1a10e6b721a2978fa5aec8c01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 6867c6e..3a615fc 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
 
 libm_ifunc (__ieee754_atan2,
 	    HAS_FMA4 ? __ieee754_atan2_fma4
-	    : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+	    : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
 strong_alias (__ieee754_atan2, __atan2_finite)
 
 # define __ieee754_atan2 __ieee754_atan2_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 3c65028..7b2320a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
 
 libm_ifunc (__ieee754_exp,
 	    HAS_FMA4 ? __ieee754_exp_fma4
-	    : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
+	    : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
 strong_alias (__ieee754_exp, __exp_finite)
 
 # define __ieee754_exp __ieee754_exp_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index 3b468d0..ab277d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);
 
 libm_ifunc (__ieee754_log,
 	    HAS_FMA4 ? __ieee754_log_fma4
-	    : (HAS_AVX ? __ieee754_log_avx
+	    : (HAS_YMM_USABLE ? __ieee754_log_avx
 	       : __ieee754_log_sse2));
 strong_alias (__ieee754_log, __log_finite)
 
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 3160201..78c7e09 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
 #  define __atan_fma4 ((void *) 0)
 # endif
 
-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
+		   HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
 
 # define atan __atan_sse2
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 1ba9dbc..417acd0 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
 #  define __sin_fma4 ((void *) 0)
 # endif
 
-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
+		    HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
 weak_alias (__cos, cos)
 
-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
+		    HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
 weak_alias (__sin, sin)
 
 # define __cos __cos_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 8f6601e..3047155 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
 #  define __tan_fma4 ((void *) 0)
 # endif
 
-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
+		  HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
 
 # define tan __tan_sse2
 #endif
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 65b0ee9..76d146c 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -144,6 +144,18 @@ __init_cpu_features (void)
   else
     kind = arch_kind_other;
 
+  if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+    {
+      /* Reset the AVX bit in case OSXSAVE is disabled.  */
+      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
+	  && ({ unsigned int xcrlow;
+		unsigned int xcrhigh;
+		asm ("xgetbv"
+		     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+		(xcrlow & 6) == 6; }))
+	__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+    }
+
   __cpu_features.family = family;
   __cpu_features.model = model;
   atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 2a1df39..2dc75ab 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -1,5 +1,5 @@
 /* This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -22,11 +22,13 @@
 #define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
+#define bit_YMM_Usable			(1 << 6)
 
 #define bit_SSE2	(1 << 26)
 #define bit_SSSE3	(1 << 9)
 #define bit_SSE4_1	(1 << 19)
 #define bit_SSE4_2	(1 << 20)
+#define bit_OSXSAVE	(1 << 27)
 #define bit_AVX		(1 << 28)
 #define bit_POPCOUNT	(1 << 23)
 #define bit_FMA		(1 << 12)
@@ -48,6 +50,7 @@
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
+# define index_YMM_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -92,7 +95,7 @@ extern struct cpu_features
 
 
 extern void __init_cpu_features (void) attribute_hidden;
-#define INIT_ARCH()\
+# define INIT_ARCH() \
   do							\
     if (__cpu_features.kind == arch_kind_unknown)	\
       __init_cpu_features ();				\
@@ -125,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
+# define index_YMM_Usable		FEATURE_INDEX_1
 
-#define HAS_ARCH_FEATURE(idx, bit) \
-  ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
+# define HAS_ARCH_FEATURE(name) \
+  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
 
-#define HAS_FAST_REP_STRING \
-  HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
+# define HAS_FAST_REP_STRING	HAS_ARCH_FEATURE (Fast_Rep_String)
 
-#define HAS_FAST_COPY_BACKWARD \
-  HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
+# define HAS_FAST_COPY_BACKWARD	HAS_ARCH_FEATURE (Fast_Copy_Backward)
 
-#define HAS_SLOW_BSF \
-  HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+# define HAS_SLOW_BSF		HAS_ARCH_FEATURE (Slow_BSF)
 
-#define HAS_PREFER_SSE_FOR_MEMOP \
-  HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
 
-#define HAS_FAST_UNALIGNED_LOAD \
-  HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+
+# define HAS_YMM_USABLE		HAS_ARCH_FEATURE (YMM_Usable)
 
 #endif	/* __ASSEMBLER__ */
-- 
1.7.9