util/bufferiszero: Use __attribute__((target)) for avx2/avx512
Use the attribute, which is supported by clang, instead of
the #pragma, which is not supported and, for some reason,
also not detected by the meson probe, so we fail by -Werror.
Include only <immintrin.h> as that is the outermost "official"
header for these intrinsics -- emmintrin.h and smmintrin -- are
older SSE2 and SSE4 specific headers, while the immintrin.h
includes all of the Intel intrinsics.
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/meson.build b/meson.build
index 6d212f6..58d8cd6 100644
--- a/meson.build
+++ b/meson.build
@@ -2338,11 +2338,9 @@
config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
.require(cc.links('''
- #pragma GCC push_options
- #pragma GCC target("avx2")
#include <cpuid.h>
#include <immintrin.h>
- static int bar(void *a) {
+ static int __attribute__((target("avx2"))) bar(void *a) {
__m256i x = *(__m256i *)a;
return _mm256_testz_si256(x, x);
}
@@ -2352,11 +2350,9 @@
config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
.require(cc.links('''
- #pragma GCC push_options
- #pragma GCC target("avx512f")
#include <cpuid.h>
#include <immintrin.h>
- static int bar(void *a) {
+ static int __attribute__((target("avx512f"))) bar(void *a) {
__m512i x = *(__m512i *)a;
return _mm512_test_epi64_mask(x, x);
}
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index ec3cd4c..1790ded 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -64,18 +64,11 @@
}
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
-/* Do not use push_options pragmas unnecessarily, because clang
- * does not support them.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC push_options
-#pragma GCC target("sse2")
-#endif
-#include <emmintrin.h>
+#include <immintrin.h>
/* Note that each of these vectorized functions require len >= 64. */
-static bool
+static bool __attribute__((target("sse2")))
buffer_zero_sse2(const void *buf, size_t len)
{
__m128i t = _mm_loadu_si128(buf);
@@ -104,20 +97,9 @@
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
}
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC pop_options
-#endif
#ifdef CONFIG_AVX2_OPT
-/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
- * the includes have to be within the corresponding push_options region, and
- * therefore the regions themselves have to be ordered with increasing ISA.
- */
-#pragma GCC push_options
-#pragma GCC target("sse4")
-#include <smmintrin.h>
-
-static bool
+static bool __attribute__((target("sse4")))
buffer_zero_sse4(const void *buf, size_t len)
{
__m128i t = _mm_loadu_si128(buf);
@@ -145,12 +127,7 @@
return _mm_testz_si128(t, t);
}
-#pragma GCC pop_options
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#include <immintrin.h>
-
-static bool
+static bool __attribute__((target("avx2")))
buffer_zero_avx2(const void *buf, size_t len)
{
/* Begin with an unaligned head of 32 bytes. */
@@ -176,15 +153,10 @@
return _mm256_testz_si256(t, t);
}
-#pragma GCC pop_options
#endif /* CONFIG_AVX2_OPT */
#ifdef CONFIG_AVX512F_OPT
-#pragma GCC push_options
-#pragma GCC target("avx512f")
-#include <immintrin.h>
-
-static bool
+static bool __attribute__((target("avx512f")))
buffer_zero_avx512(const void *buf, size_t len)
{
/* Begin with an unaligned head of 64 bytes. */
@@ -210,8 +182,7 @@
return !_mm512_test_epi64_mask(t, t);
}
-#pragma GCC pop_options
-#endif
+#endif /* CONFIG_AVX512F_OPT */
/* Note that for test_buffer_is_zero_next_accel, the most preferred