Enable Fused-Multiply-Add (FMA) and F16C/CVT16 vector extensions on MSVC (#375)

* Enable Fused-Multiply-Add (FMA) instructions on MSVC

__FMA__ macro does not exist in MSVC

* Enable F16C/CVT16 vector extensions on MSVC

__F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512

* MSVC cvt intrinsics

* Add __SSE3__ macro for MSVC too because why not

even though it's not currently used for anything when AVX is defined
This commit is contained in:
anzz1 2023-03-28 22:44:29 +03:00 committed by GitHub
parent f1217055ea
commit 5a5f8b1501
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

18
ggml.c
View file

@ -79,6 +79,19 @@ static int sched_yield (void) {
typedef void* thread_ret_t;
#endif
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
#ifndef __FMA__
#define __FMA__
#endif
#ifndef __F16C__
#define __F16C__
#endif
#ifndef __SSE3__
#define __SSE3__
#endif
#endif
#ifdef __HAIKU__
#define static_assert(cond, msg) _Static_assert(cond, msg)
#endif
@ -172,8 +185,13 @@ typedef double ggml_float;
#ifdef __F16C__
#ifdef _MSC_VER
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
#else
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
#endif
#elif defined(__POWER9_VECTOR__)