Skip to content
Snippets Groups Projects
Commit a29ee3d9 authored by Marco Clemencic's avatar Marco Clemencic
Browse files

Update instructionsetLevel for VectorClass 2.01.02

parent 311bc22d
Branches
Tags
No related merge requests found
...@@ -33,12 +33,14 @@ if(VectorClass_INCLUDE_DIR AND NOT VectorClass_VERSION) ...@@ -33,12 +33,14 @@ if(VectorClass_INCLUDE_DIR AND NOT VectorClass_VERSION)
set(VectorClass_VERSION 0.0) set(VectorClass_VERSION 0.0)
file(STRINGS ${VectorClass_INCLUDE_DIR}/instrset.h _vectorclass_guard REGEX "define +INSTRSET_H +[0-9]+") file(STRINGS ${VectorClass_INCLUDE_DIR}/instrset.h _vectorclass_guard REGEX "define +INSTRSET_H +[0-9]+")
list(GET _vectorclass_guard 0 _vectorclass_guard) list(GET _vectorclass_guard 0 _vectorclass_guard)
if(_vectorclass_guard MATCHES "INSTRSET_H +([0-9]+)") if(_vectorclass_guard MATCHES "INSTRSET_H +([0-9][0-9][0-9][0-9][0-9])")
string(REGEX REPLACE "([0-9]+)([0-9][0-9])([0-9][0-9])" "\\1.\\2.\\3" VectorClass_VERSION "${CMAKE_MATCH_1}")
elseif(_vectorclass_guard MATCHES "INSTRSET_H +([0-9][0-9][0-9])")
string(REGEX REPLACE "([0-9]+)([0-9][0-9])" "\\1.\\2" VectorClass_VERSION "${CMAKE_MATCH_1}") string(REGEX REPLACE "([0-9]+)([0-9][0-9])" "\\1.\\2" VectorClass_VERSION "${CMAKE_MATCH_1}")
endif() endif()
set(VectorClass_VERSION "${VectorClass_VERSION}" CACHE INTERNAL "") set(VectorClass_VERSION "${VectorClass_VERSION}" CACHE INTERNAL "")
endif() endif()
if(NOT VectorClass_INCLUDE_DIR OR VectorClass_VERSION VERSION_LESS 1.25) if(NOT VectorClass_INCLUDE_DIR OR VectorClass_VERSION VERSION_LESS 2.01.02)
if(VectorClass_INCLUDE_DIR) if(VectorClass_INCLUDE_DIR)
message(STATUS "Found VectorClass instrset_detect ${VectorClass_VERSION} at ${VectorClass_INCLUDE_DIR}") message(STATUS "Found VectorClass instrset_detect ${VectorClass_VERSION} at ${VectorClass_INCLUDE_DIR}")
endif() endif()
......
...@@ -28,7 +28,7 @@ int main( int argc, char* argv[] ) { ...@@ -28,7 +28,7 @@ int main( int argc, char* argv[] ) {
const std::vector<std::string> sets{// "80386", const std::vector<std::string> sets{// "80386",
"sse", "sse2", "sse", "sse2",
"sse3", // Note: This is reported by Linux as 'pni' "sse3", // Note: This is reported by Linux as 'pni'
"ssse3", "sse4_1", "sse4_2", "avx", "avx2", "avx512f", "avx512vl", "avx512bw"}; "ssse3", "sse4_1", "sse4_2", "avx", "avx2", "avx512f", "avx512vl"};
if ( argc == 1 ) { if ( argc == 1 ) {
const std::size_t level = System::instructionsetLevel() - 1; const std::size_t level = System::instructionsetLevel() - 1;
......
This diff is collapsed.
/************************** instrset_detect.cpp **************************** /************************** instrset_detect.cpp ****************************
* Author: Agner Fog * Author: Agner Fog
* Date created: 2012-05-30 * Date created: 2012-05-30
* Last modified: 2017-05-02 * Last modified: 2019-08-01
* Version: 1.28 * Version: 2.00.00
* Project: vector classes * Project: vector class library
* Description: * Description:
* Functions for checking which instruction sets are supported. * Functions for checking which instruction sets are supported.
* *
* (c) Copyright 2012-2017 GNU General Public License http://www.gnu.org/licenses * (c) Copyright 2012-2019 Agner Fog.
\*****************************************************************************/ * Apache License version 2.0 or later.
******************************************************************************/
#include "instrset.h" #include "instrset.h"
...@@ -16,56 +17,21 @@ ...@@ -16,56 +17,21 @@
namespace VCL_NAMESPACE { namespace VCL_NAMESPACE {
#endif #endif
// Define interface to cpuid instruction.
// input: eax = functionnumber, ecx = 0
// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
static inline void cpuid( int output[4], int functionnumber ) {
#if defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
int a, b, c, d;
__asm( "cpuid" : "=a"( a ), "=b"( b ), "=c"( c ), "=d"( d ) : "a"( functionnumber ), "c"( 0 ) : );
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
#elif defined( _MSC_VER ) || defined( __INTEL_COMPILER ) // Microsoft or Intel compiler, intrin.h included
__cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
#else // unknown platform. try inline assembly with masm/intel syntax
__asm {
mov eax, functionnumber
xor ecx, ecx
cpuid;
mov esi, output
mov [esi], eax
mov [esi+4], ebx
mov [esi+8], ecx
mov [esi+12], edx
}
#endif
}
// Define interface to xgetbv instruction // Define interface to xgetbv instruction
static inline int64_t xgetbv( int ctr ) { static inline uint64_t xgetbv( int ctr ) {
#if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \ #if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \
( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) // Microsoft or Intel compiler supporting _xgetbv ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 )
// intrinsic // Microsoft or Intel compiler supporting _xgetbv intrinsic
return _xgetbv( ctr ); // intrinsic function for XGETBV return uint64_t( _xgetbv( ctr ) ); // intrinsic function for XGETBV
#elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax #elif defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
uint32_t a, d; uint32_t a, d;
__asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : ); __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
return a | ( uint64_t( d ) << 32 ); return a | ( uint64_t( d ) << 32 );
#else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
// syntax
uint32_t a, d; uint32_t a, d;
__asm { __asm {
mov ecx, ctr mov ecx, ctr
...@@ -83,7 +49,7 @@ namespace VCL_NAMESPACE { ...@@ -83,7 +49,7 @@ namespace VCL_NAMESPACE {
/* find supported instruction set /* find supported instruction set
return value: return value:
0 = 80386 instruction set 0 = 80386 instruction set
1 or above = SSE (XMM) supported by CPU (not testing for O.S. support) 1 or above = SSE (XMM) supported by CPU (not testing for OS support)
2 or above = SSE2 2 or above = SSE2
3 or above = SSE3 3 or above = SSE3
4 or above = Supplementary SSE3 (SSSE3) 4 or above = Supplementary SSE3 (SSSE3)
...@@ -92,8 +58,7 @@ namespace VCL_NAMESPACE { ...@@ -92,8 +58,7 @@ namespace VCL_NAMESPACE {
7 or above = AVX supported by CPU and operating system 7 or above = AVX supported by CPU and operating system
8 or above = AVX2 8 or above = AVX2
9 or above = AVX512F 9 or above = AVX512F
10 or above = AVX512VL 10 or above = AVX512VL, AVX512BW, AVX512DQ
11 or above = AVX512BW, AVX512DQ
*/ */
int instrset_detect( void ) { int instrset_detect( void ) {
...@@ -134,11 +99,10 @@ namespace VCL_NAMESPACE { ...@@ -134,11 +99,10 @@ namespace VCL_NAMESPACE {
cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512 if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512
iset = 9; iset = 9;
cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL
iset = 10;
if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ
iset = 11; iset = 10;
return iset; return iset;
} }
...@@ -182,6 +146,22 @@ namespace VCL_NAMESPACE { ...@@ -182,6 +146,22 @@ namespace VCL_NAMESPACE {
return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER
} }
// detect if CPU supports the AVX512VBMI instruction set
bool hasAVX512VBMI( void ) {
if ( instrset_detect() < 10 ) return false; // must have AVX512BW
int abcd[4]; // cpuid results
cpuid( abcd, 7 ); // call cpuid function 7
return ( ( abcd[2] & ( 1 << 1 ) ) != 0 ); // ecx bit 1 indicates AVX512VBMI
}
// detect if CPU supports the AVX512VBMI2 instruction set
bool hasAVX512VBMI2( void ) {
if ( instrset_detect() < 10 ) return false; // must have AVX512BW
int abcd[4]; // cpuid results
cpuid( abcd, 7 ); // call cpuid function 7
return ( ( abcd[2] & ( 1 << 6 ) ) != 0 ); // ecx bit 6 indicates AVX512VBMI2
}
#ifdef VCL_NAMESPACE #ifdef VCL_NAMESPACE
} }
#endif #endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment