diff --git a/CMakeLists.txt b/CMakeLists.txt index 68a86ca..701b417 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,4 +41,5 @@ if (WIN32) endif () add_subdirectory(tests) +add_subdirectory(src/misc) add_subdirectory(src/simd) diff --git a/cmake/detect_os.cmake b/cmake/detect_os.cmake index 7d383fa..c15c1e8 100644 --- a/cmake/detect_os.cmake +++ b/cmake/detect_os.cmake @@ -40,18 +40,18 @@ function(add_os_definitions target) set(alicho_def_cygwin 1) # 明确是 Cygwin set(alicho_def_unix 1) # 提供 Unix API set(alicho_def_posix 1) # 提供 POSIX API - message(STATUS "检测到 **Cygwin** 环境 (运行于 Windows)") +# message(STATUS "检测到 **Cygwin** 环境 (运行于 Windows)") elseif(WIN32) # 非 Cygwin 的 Windows 环境 (MSVC, MinGW, etc.) set(alicho_def_windows 1) - message(STATUS "检测到 **Windows** 操作系统 (非 Cygwin)") +# message(STATUS "检测到 **Windows** 操作系统 (非 Cygwin)") elseif(ANDROID) # Android 平台 (通常需要特定工具链设置 ANDROID 变量) set(alicho_def_android 1) set(alicho_def_unix 1) # Android NDK 基于 Unix set(alicho_def_posix 1) # NDK 提供 POSIX API set(alicho_def_mobile 1) # 移动平台 - message(STATUS "检测到 **Android** 操作系统") +# message(STATUS "检测到 **Android** 操作系统") elseif(IOS) # iOS 平台 (通常需要特定工具链设置 IOS 变量) # 需要在 APPLE 之前判断,因为 iOS 下 APPLE 也为 TRUE @@ -60,7 +60,7 @@ function(add_os_definitions target) set(alicho_def_posix 1) # 提供 POSIX API set(alicho_def_mobile 1) # 移动平台 set(alicho_def_apple 1) # iOS 是 Apple 生态的一部分 - message(STATUS "检测到 **iOS** 操作系统") +# message(STATUS "检测到 **iOS** 操作系统") elseif(APPLE) # 此时排除了 iOS,确定是 macOS set(alicho_def_macos 1) @@ -74,10 +74,10 @@ function(add_os_definitions target) set(alicho_def_posix 1) if(CMAKE_SYSTEM_NAME MATCHES "Linux") set(alicho_def_linux 1) - message(STATUS "检测到 **Linux** 操作系统") +# message(STATUS "检测到 **Linux** 操作系统") elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") set(alicho_def_freebsd 1) - message(STATUS "检测到 **FreeBSD** 操作系统") +# message(STATUS "检测到 **FreeBSD** 操作系统") else() message(WARNING "检测到未知的 类Unix 操作系统: ${CMAKE_SYSTEM_NAME}") endif() @@ -89,11 +89,11 @@ function(add_os_definitions target) if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(alicho_def_arch_64bit 1) set(alicho_def_arch_32bit 0) # 明确设置为 0 - message(STATUS "检测到 **64-bit** 架构") +# message(STATUS "检测到 **64-bit** 架构") elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) set(alicho_def_arch_64bit 0) # 明确设置为 0 set(alicho_def_arch_32bit 1) - message(STATUS "检测到 **32-bit** 架构") +# message(STATUS "检测到 **32-bit** 架构") else() # 对于未知或未定义的指针大小,两者都保持 0 message(WARNING "无法明确检测到 32-bit 或 64-bit 架构 (CMAKE_SIZEOF_VOID_P = ${CMAKE_SIZEOF_VOID_P})。将两者都设置为 0。") @@ -102,13 +102,13 @@ function(add_os_definitions target) # 检测特定架构类型 if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|i386|i486|i586|i686|i786|x86_64|AMD64") set(alicho_def_x86 1) - message(STATUS "检测到 **x86/x64** 架构") +# message(STATUS "检测到 **x86/x64** 架构") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64") set(alicho_def_arm 1) - message(STATUS "检测到 **ARM** 架构") +# message(STATUS "检测到 **ARM** 架构") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv|riscv64|riscv32") set(alicho_def_riscv 1) - message(STATUS "检测到 **RISC-V** 架构") +# message(STATUS "检测到 **RISC-V** 架构") endif() # --- 阶段 2: 组装定义列表 --- diff --git a/cmake/retrieve_files.cmake b/cmake/retrieve_files.cmake index 87a2df8..76c2cfd 100644 --- a/cmake/retrieve_files.cmake +++ b/cmake/retrieve_files.cmake @@ -67,7 +67,7 @@ function(retrieve_files_custom path extension out_files) return() endif() - message(STATUS "正在检索目录: ${path}") +# message(STATUS "正在检索目录: ${path}") # 2. 构建文件匹配模式 set(file_patterns "") @@ -559,7 +559,7 @@ function(simple_library library_type) retrieve_files(${CMAKE_CURRENT_SOURCE_DIR} source_files) add_library(${PROJECT_NAME} ${library_type} ${source_files}) target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - target_link_libraries(${PROJECT_NAME} PRIVATE audio_backend_project_options) + target_link_libraries(${PROJECT_NAME} PRIVATE audio_backend_project_options ${ARG_LINK_LIBRARIES}) message(STATUS "创建库目标: ${PROJECT_NAME},类型: ${library_type},引用路径: ${CMAKE_CURRENT_SOURCE_DIR}") add_os_definitions(${PROJECT_NAME}) endfunction() diff --git a/src/misc/CMakeLists.txt b/src/misc/CMakeLists.txt new file mode 100644 index 0000000..328bd74 --- /dev/null +++ b/src/misc/CMakeLists.txt @@ -0,0 +1,3 @@ +project(alicho_misc) + +simple_library(STATIC) diff --git a/src/misc/lazy_singleton.h b/src/misc/lazy_singleton.h new file mode 100644 index 0000000..587d101 --- /dev/null +++ b/src/misc/lazy_singleton.h @@ -0,0 +1,20 @@ +#pragma once + +template +class lazy_singleton { +public: + static T& instance() { + static T instance_; + return instance_; + } + + // 禁止拷贝和赋值 + lazy_singleton(const lazy_singleton&) = delete; + lazy_singleton& operator=(const lazy_singleton&) = delete; + // 禁止移动构造和移动赋值 + lazy_singleton(lazy_singleton&&) = delete; + lazy_singleton& operator=(lazy_singleton&&) = delete; +protected: + lazy_singleton() = default; + virtual ~lazy_singleton() = default; +}; diff --git a/src/misc/t.cpp b/src/misc/t.cpp new file mode 100644 index 0000000..05918d7 --- /dev/null +++ b/src/misc/t.cpp @@ -0,0 +1 @@ +#include "t.h" diff --git a/src/misc/t.h b/src/misc/t.h new file mode 100644 index 0000000..7c134e5 --- /dev/null +++ b/src/misc/t.h @@ -0,0 +1,5 @@ +#pragma once + +class t { + +}; diff --git a/src/simd/CMakeLists.txt b/src/simd/CMakeLists.txt index a2f2dca..cebfcf7 100644 --- a/src/simd/CMakeLists.txt +++ b/src/simd/CMakeLists.txt @@ -1,3 +1,4 @@ project(alicho_simd) -simple_library(${CMAKE_CURRENT_SOURCE_DIR} STATIC) +simple_library(STATIC) +target_link_libraries(${PROJECT_NAME} PUBLIC alicho_misc) diff --git a/src/simd/aligned_allocator.h b/src/simd/aligned_allocator.h new file mode 100644 index 0000000..2840f86 --- /dev/null +++ b/src/simd/aligned_allocator.h @@ -0,0 +1,230 @@ +#pragma once +#include +#include +#include +#include + +constexpr size_t ALIGNMENT_SSE = 16; // SSE要求16字节对齐 +constexpr size_t ALIGNMENT_AVX = 32; // AVX要求32字节对齐 +constexpr size_t ALIGNMENT_AVX512 = 64; // AVX-512要求64字节对齐 +constexpr size_t ALIGNMENT_CACHE = 64; // CPU缓存行对齐(通常为64字节) + +inline auto aligned_malloc(size_t size, size_t alignment) -> void* { + if (alignment == 0 || (alignment & (alignment - 1)) != 0) { + // 对齐值必须是2的幂 + return nullptr; + } + +#if ALICHO_PLATFORM_WINDOWS + return _aligned_malloc(size, alignment); +#elif ALICHO_PLATFORM_POSIX || ALICHO_PLATFORM_UNIX + void* ptr = nullptr; + if (posix_memalign(&ptr, alignment, size) != 0) { + return nullptr; + } + return ptr; +#else + // 回退实现:手动对齐 + // 分配额外空间来存储原始指针和进行对齐 + size_t total_size = size + alignment + sizeof(void*); + void* raw_ptr = std::malloc(total_size); + if (!raw_ptr) { + return nullptr; + } + + // 计算对齐后的地址 + uintptr_t raw_addr = reinterpret_cast(raw_ptr); + uintptr_t aligned_addr = (raw_addr + sizeof(void*) + alignment - 1) & ~(alignment - 1); + void* aligned_ptr = reinterpret_cast(aligned_addr); + + // 在对齐地址前存储原始指针 + (reinterpret_cast(aligned_ptr))[-1] = raw_ptr; + + return aligned_ptr; +#endif +} + +inline void aligned_free(void* ptr) { + if (!ptr) { + return; + } +#if ALICHO_PLATFORM_WINDOWS + _aligned_free(ptr); +#elif ALICHO_PLATFORM_POSIX || ALICHO_PLATFORM_UNIX + std::free(ptr); +#else + // 回退实现:获取原始指针并释放 + void* raw_ptr = (reinterpret_cast(ptr))[-1]; + std::free(raw_ptr); +#endif +} + +// 对齐分配器模板类 +template +class aligned_allocator { +public: + using value_type = type; + using pointer = type*; + using const_pointer = const type*; + using reference = type&; + using const_reference = const type&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + template + struct rebind { + using other = aligned_allocator; + }; + + aligned_allocator() noexcept = default; + + template + aligned_allocator(const aligned_allocator&) noexcept {} + + auto allocate(size_type n) -> pointer { + if (n == 0) return nullptr; + + size_type size = n * sizeof(type); + void* ptr = aligned_malloc(size, alignment); + + if (!ptr) { + throw std::bad_alloc(); + } + + return static_cast(ptr); + } + + void deallocate(pointer p, size_type) noexcept { + aligned_free(p); + } + + template + void construct(u* p, args&&... in_args) { + ::new(static_cast(p)) u(std::forward(in_args)...); + } + + template + void destroy(u* p) { + p->~u(); + } + + static auto max_size() noexcept { + return std::numeric_limits::max() / sizeof(type); + } +}; + +template +bool operator==(const aligned_allocator&, const aligned_allocator&) noexcept { + return a1 == a2; +} + +template +bool operator!=(const aligned_allocator&, const aligned_allocator&) noexcept { + return a1 != a2; +} + +// 类型别名,方便使用不同对齐方式的分配器 +template +using sse_aligned_allocator = aligned_allocator; +template +using avx_aligned_allocator = aligned_allocator; +template +using avx512_aligned_allocator = aligned_allocator; +template +using cache_aligned_allocator = aligned_allocator; + +template +auto is_aligned(void* ptr) -> bool { + return (reinterpret_cast(ptr) % alignment) == 0; +} + +inline auto is_aligned(const void* ptr, size_t alignment) -> bool { + return (reinterpret_cast(ptr) % alignment) == 0; +} + +inline auto align_size(size_t size, size_t alignment) -> size_t { + return (size + alignment - 1) & ~(alignment - 1); +} + +template +auto align_pointer(void* ptr) -> void* { + const auto addr = reinterpret_cast(ptr); + const auto aligned_addr = (addr + alignment - 1) & ~(alignment - 1); + return reinterpret_cast(aligned_addr); +} + +template +class aligned_buffer { +public: + aligned_buffer() = default; + + explicit aligned_buffer(size_t size) : size_(size) { + allocate(size); + } + + void allocate(size_t new_size) { + if (data_) { + deallocate(); + } + + if (new_size == 0) { + data_ = nullptr; + size_ = 0; + return; + } + + data_ = static_cast(aligned_malloc(new_size * sizeof(type), alignment)); + if (!data_) { + throw std::bad_alloc(); + } + + size_ = new_size; + + // 对于非POD类型,需要构造对象 + if constexpr (!std::is_trivially_constructible_v) { + for (size_t i = 0; i < size_; ++i) { + new(&data_[i]) type(); + } + } + } + + void deallocate() { + if (!data_) + return; + // 对于非POD类型,需要析构对象 + if constexpr (!std::is_trivially_destructible_v) { + for (size_t i = 0; i < size_; ++i) { + data_[i].~T(); + } + } + aligned_free(data_); + data_ = nullptr; + size_ = 0; + } + + void resize(size_t size) { + if (size == size_) + return; + allocate(size); + } + + auto data() noexcept { return data_; } + auto data() const noexcept { return data_; } + auto size() const noexcept { return size_; } + auto empty() const noexcept { return size_ == 0; } + + auto& operator[](size_t index) noexcept { return data_[index]; } + const auto& operator[](size_t index) const noexcept { return data_[index]; } + + auto begin() noexcept { return data_; } + auto end() noexcept { return data_ + size_; } + auto begin() const noexcept { return data_; } + auto end() const noexcept { return data_ + size_; } + + [[nodiscard]] auto is_properly_aligned() const noexcept -> bool { + return is_aligned(data_); + } +private: + type* data_ = nullptr; + size_t size_ = 0; +}; diff --git a/src/simd/cpu_features.cpp b/src/simd/cpu_features.cpp index 3cd3612..9c38c48 100644 --- a/src/simd/cpu_features.cpp +++ b/src/simd/cpu_features.cpp @@ -1,4 +1,10 @@ #include "cpu_features.h" +#include +#include +#include +#include +#include +#include #if ALICHO_PLATFORM_WINDOWS #include @@ -14,7 +20,7 @@ #endif #endif -#if ALICHO_PLATFORM_ARM && define(__ARM_NEON) +#if ALICHO_PLATFORM_ARM && defined(__ARM_NEON) #include #endif @@ -80,8 +86,8 @@ auto cpu_feature_detector::recommended_simd_level() const noexcept -> simd_level case simd_level::SSE4: case simd_level::AVX: case simd_level::AVX2: - case simd_level::NENO: - case simd_level::NENO_FP16: + case simd_level::NEON: + case simd_level::NEON_FP16: return info_.max_simd_level; // 直接使用检测到的最高级别 default: case simd_level::NONE: @@ -261,10 +267,11 @@ void cpu_feature_detector::detect_arm_features() { info_.vendor = "ARM"; if (auto* fp = fopen("/proc/cpuinfo", "r")) { char line[1024]; - while (fgets(buffer, sizeof(buffer), fp)) { - if (strncmp(buffer, "Hardware", 8) == 0 || - strncmp(buffer, "model name", 10) == 0) { - char* sep = strchr(buffer, ':'); + char line[1024]; + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "Hardware", 8) == 0 || + strncmp(line, "model name", 10) == 0) { + char* sep = strchr(line, ':'); if (sep) { // 去除前导空格 char* value = sep + 1; @@ -333,7 +340,7 @@ auto cpu_feature_detector::cpuid(uint32_t function_id, uint32_t subfunction_id) return result; } #else -auto cpu_feature_detector::cpuid(uint32_t function_id, uint32_t subfunction_id = 0) -> cpu_id_result { +auto cpu_feature_detector::cpuid(uint32_t function_id, uint32_t subfunction_id) -> cpu_id_result { cpu_id_result result; uint32_t regs[4]; diff --git a/src/simd/cpu_features.h b/src/simd/cpu_features.h index d45d012..6522183 100644 --- a/src/simd/cpu_features.h +++ b/src/simd/cpu_features.h @@ -1,7 +1,9 @@ -#pragma once +#pragma once #include #include +#include "lazy_singleton.h" + enum class cpu_feature : uint32_t { // x86/x64特性 SSE = 1 << 0, // 流水线SIMD扩展 @@ -40,8 +42,8 @@ enum class simd_level { AVX, // AVX AVX2, // AVX2 + FMA AVX512, // AVX-512系列 - NENO, // ARM NEON - NENO_FP16 // ARM NEON + FP16 + NEON, // ARM NEON + NEON_FP16 // ARM NEON + FP16 }; struct cpu_info { @@ -59,21 +61,9 @@ struct cpu_info { [[nodiscard]] auto features_string() const -> std::string; }; -class cpu_feature_detector { +class cpu_feature_detector : public lazy_singleton { public: - static auto& instance() { - static cpu_feature_detector detector; - return detector; - } - - // 禁止拷贝和移动 - cpu_feature_detector(const cpu_feature_detector&) = delete; - - cpu_feature_detector& operator=(const cpu_feature_detector&) = delete; - - cpu_feature_detector(cpu_feature_detector&&) = delete; - - cpu_feature_detector& operator=(cpu_feature_detector&&) = delete; + friend class lazy_singleton; // 获取CPU信息 [[nodiscard]] const auto& get_cpu_info() const noexcept { return info_; } @@ -90,10 +80,10 @@ public: void print_info() const; -private: +protected: cpu_feature_detector(); - ~cpu_feature_detector() = default; + ~cpu_feature_detector() override = default; void detect_features(); @@ -125,7 +115,5 @@ inline auto cpu_supports(cpu_feature feature) noexcept -> bool { inline auto get_max_simd_level() noexcept -> simd_level { return get_cpu_info().max_simd_level; } inline auto get_recommended_simd_level() noexcept -> simd_level { - auto level = get_max_simd_level(); - if (level > simd_level::AVX2) { return simd_level::AVX2; } - return level; + return cpu_feature_detector::instance().recommended_simd_level(); } diff --git a/src/simd/simd.cpp b/src/simd/simd.cpp deleted file mode 100644 index 8dc280e..0000000 --- a/src/simd/simd.cpp +++ /dev/null @@ -1,3 +0,0 @@ -// -// Created by 46944 on 25-10-28. -// diff --git a/src/simd/simd.h b/src/simd/simd.h deleted file mode 100644 index b593491..0000000 --- a/src/simd/simd.h +++ /dev/null @@ -1,8 +0,0 @@ -// -// Created by 46944 on 25-10-28. -// - -#ifndef SIMD_H -#define SIMD_H - -#endif //SIMD_H diff --git a/src/simd/simd_func_dispatcher.cpp b/src/simd/simd_func_dispatcher.cpp new file mode 100644 index 0000000..d4b1b89 --- /dev/null +++ b/src/simd/simd_func_dispatcher.cpp @@ -0,0 +1,106 @@ +#include "simd_func_dispatcher.h" +#include + +void simd_func_dispatcher::print_registry_status() const { + printf("Registered SIMD Functions:\n"); + for (const auto& pair: func_registry_) { + const auto& func_name = pair.first; + const auto& holder = pair.second; + + printf("Function: %s\n", func_name.c_str()); + if (holder->has_implementation()) { + auto versions = holder->get_available_versions(); + printf(" Available Versions: "); + for (const auto& version: versions) { + switch (version) { + case simd_func_version::SCALAR: + printf("SCALAR "); + break; + case simd_func_version::SSE: + printf("SSE "); + break; + case simd_func_version::SSE3: + printf("SSE3 "); + break; + case simd_func_version::SSE4: + printf("SSE4 "); + break; + case simd_func_version::AVX: + printf("AVX "); + break; + case simd_func_version::AVX2: + printf("AVX2 "); + break; + case simd_func_version::AVX512: + printf("AVX512 "); + break; + case simd_func_version::NEON: + printf("NEON "); + break; + case simd_func_version::NEON_FP16: + printf("NEON_FP16 "); + break; + case simd_func_version::VECTOR: + printf("VECTOR "); + break; + default: + break; + } + } + printf("\n"); + } + else { printf(" No implementations registered.\n"); } + } +} + +const char* simd_func_version_to_string(simd_func_version version) { + switch (version) { + case simd_func_version::SCALAR: + return "SCALAR"; + case simd_func_version::SSE: + return "SSE"; + case simd_func_version::SSE3: + return "SSE3"; + case simd_func_version::SSE4: + return "SSE4"; + case simd_func_version::AVX: + return "AVX"; + case simd_func_version::AVX2: + return "AVX2"; + case simd_func_version::AVX512: + return "AVX512"; + case simd_func_version::NEON: + return "NEON"; + case simd_func_version::NEON_FP16: + return "NEON_FP16"; + case simd_func_version::VECTOR: + return "VECTOR"; + default: + break; + } + return "UNKNOWN"; +} + +simd_func_version string_to_simd_func_version(const std::string& version_str) { + if (version_str == "SCALAR") + return simd_func_version::SCALAR; + if (version_str == "SSE") + return simd_func_version::SSE; + if (version_str == "SSE3") + return simd_func_version::SSE3; + if (version_str == "SSE4") + return simd_func_version::SSE4; + if (version_str == "AVX") + return simd_func_version::AVX; + if (version_str == "AVX2") + return simd_func_version::AVX2; + if (version_str == "AVX512") + return simd_func_version::AVX512; + if (version_str == "NEON") + return simd_func_version::NEON; + if (version_str == "NEON_FP16") + return simd_func_version::NEON_FP16; + if (version_str == "VECTOR") + return simd_func_version::VECTOR; + return simd_func_version::SCALAR; // 默认返回SCALAR +} diff --git a/src/simd/simd_func_dispatcher.h b/src/simd/simd_func_dispatcher.h new file mode 100644 index 0000000..dd9211a --- /dev/null +++ b/src/simd/simd_func_dispatcher.h @@ -0,0 +1,210 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "cpu_features.h" + +enum class simd_func_version { + SCALAR = 0, // 标量实现 (默认回退) + SSE, // SSE实现 + SSE3, // SSE3/SSSE3实现 + SSE4, // SSE4_1/SSE4_2实现 + AVX, // AVX实现 + AVX2, // AVX2 + FMA实现 + AVX512, // AVX-512实现 + NEON, // NEON实现 + NEON_FP16, // NEON + FP16实现 + VECTOR, // 向量扩展实现 + + COUNT +}; + +constexpr auto simd_level_to_version(simd_level level) { + switch (level) { + case simd_level::NONE: + return simd_func_version::SCALAR; + case simd_level::SSE: + return simd_func_version::SSE; + case simd_level::SSE3: + return simd_func_version::SSE3; + case simd_level::SSE4: + return simd_func_version::SSE4; + case simd_level::AVX: + return simd_func_version::AVX; + case simd_level::AVX2: + return simd_func_version::AVX2; + case simd_level::AVX512: + return simd_func_version::AVX512; + case simd_level::NEON: + return simd_func_version::NEON; + case simd_level::NEON_FP16: + return simd_func_version::NEON_FP16; + } + + return simd_func_version::SCALAR; +} + +template +class multi_version_func; + +template +class multi_version_func { +public: + using func_type = std::function; + using func_arr = std::array(simd_func_version::COUNT)>; + + multi_version_func() = default; + + void register_version(simd_func_version version, func_type func) { + functions_[static_cast(version)] = std::move(func); + best_func_ = get_best_func(); // 更新最佳函数 + } + + const auto& get_best_func() const { + const auto recommended_level = get_recommended_simd_level(); + const auto referred_version = simd_level_to_version(recommended_level); + + // 从首选版本开始,向下查找可用的实现 + for (int v = static_cast(referred_version); v >= 0; --v) { + auto version = static_cast(v); + if (const auto& func = functions_[static_cast(version)]) { return func; } + } + + // 如果没有找到任何实现,返回一个空函数 + static const func_type empty_func = nullptr; + return empty_func; + } + + auto operator()(args... in_args) const { + if (!best_func_) { + throw std::runtime_error("没有可用的SIMD实现。"); + } + return best_func_(std::forward(in_args)...); + } + + // 检查是否有任何版本的实现 + auto has_implementation() const { + return std::any_of(functions_.begin(), functions_.end(), [](const auto& func) { return func != nullptr; }); + } + + auto get_available_versions() const { + std::vector available_versions; + for (size_t i = 0; i < functions_.size(); ++i) { + if (functions_[i]) { available_versions.push_back(static_cast(i)); } + } + return available_versions; + } + +private: + func_arr functions_{}; + func_type best_func_{ nullptr }; +}; + +class simd_func_dispatcher : public lazy_singleton { +public: + friend class lazy_singleton; + + // 注册函数(通过函数名) + template + void register_function(const std::string& func_name, + simd_func_version version, + std::function func) { + auto& holder = get_or_create_func(func_name); + holder.register_version(version, std::move(func)); + } + + // 获取函数 + template + const auto& get_function(const std::string& func_name) const { + const auto& it = func_registry_.find(func_name); + if (it == func_registry_.end()) { + throw std::runtime_error("函数 '" + func_name + "' 未注册"); + } + + auto* holder = static_cast*>(it->second.get()); + return holder->func; + } + + // 调用函数 + template + auto call_function(const std::string& func_name, args&&... in_args) const { + const auto& func = get_function(func_name); + return func(std::forward(in_args)...); + } + + // 列出所有已经注册的函数 + [[nodiscard]] auto list_functions() const -> std::vector { + std::vector func_names; + for (const auto& pair: func_registry_) { func_names.push_back(pair.first); } + return func_names; + } + + // 打印函数注册状态 + void print_registry_status() const; + +private: + // 类型擦除的函数持有者基类 + struct func_holder_base { + virtual ~func_holder_base() = default; + + [[nodiscard]] virtual auto get_available_versions() const -> std::vector = 0; + + [[nodiscard]] virtual auto has_implementation() const -> bool = 0; + }; + + // 具体的函数持有者模板 + template + struct func_holder : func_holder_base { + multi_version_func func; + + [[nodiscard]] auto get_available_versions() const -> std::vector override { + return func.get_available_versions(); + } + + [[nodiscard]] auto has_implementation() const -> bool override { return func.has_implementation(); } + }; + + // 获取或创建函数持有者(仅用于注册) + template + auto& get_or_create_func(const std::string& func_name) { + const auto& it = func_registry_.find(func_name); + if (it != func_registry_.end()) { + auto* holder = static_cast*>(it->second.get()); + return holder->func; + } + auto holder = std::make_unique>(); + auto* ptr = holder.get(); + func_registry_[func_name] = std::move(holder); + return ptr->func; + } + + std::unordered_map> func_registry_{}; +}; + +#define REGISTER_SIMD_FUNCTION(func_name, version, func) \ + simd_func_dispatcher::instance().register_function(func_name, version, func); + +#define GET_SIMD_FUNCTION(func_signature, func_name) \ + simd_func_dispatcher::instance().get_function(func_name); + +#define CALL_SIMD_FUNCTION(func_signature, func_name, ...) \ + simd_func_dispatcher::instance().call_function(func_name, __VA_ARGS__); + +template +class simd_auto_register { +public: + simd_auto_register(const std::string& func_name, simd_func_version version, std::function func) { + simd_func_dispatcher::instance().register_function(func_name, version, std::move(func)); + } +}; + +#define AUTO_REGISTER_SIMD_FUNCTION(func_name, version, func) \ + static simd_auto_register auto_register_##func_name(#func_name, version, func); + +const char* simd_func_version_to_string(simd_func_version version); + +simd_func_version string_to_simd_func_version(const std::string& version_str); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5348b32..2e787a3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,8 @@ find_package(GTest REQUIRED) function(add_test source_file link_lib) get_filename_component(test_name ${source_file} NAME_WE) add_executable(${test_name} ${source_file}) - target_link_libraries(${test_name} PUBLIC GTest::GTest GTest::Main ${link_lib}) + target_link_libraries(${test_name} PUBLIC GTest::GTest GTest::Main audio_backend_project_options ${link_lib}) endfunction() add_test(test_simd.cpp alicho_simd) +add_test(test_aligned_buffer.cpp alicho_simd) diff --git a/tests/test_aligned_buffer.cpp b/tests/test_aligned_buffer.cpp new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_simd.cpp b/tests/test_simd.cpp index 7d82253..11af894 100644 --- a/tests/test_simd.cpp +++ b/tests/test_simd.cpp @@ -1,25 +1,1253 @@ -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "cpu_features.h" +#include "simd_func_dispatcher.h" +#include "aligned_allocator.h" + +// ============================================================================= +// 测试辅助函数和宏定义 +// ============================================================================= + +// 跨平台兼容性宏 +#ifndef ALICHO_PLATFORM_WINDOWS +#define ALICHO_PLATFORM_WINDOWS 0 +#endif + +#ifndef ALICHO_PLATFORM_X86 +#define ALICHO_PLATFORM_X86 1 +#endif + +#ifndef ALICHO_PLATFORM_ARM +#define ALICHO_PLATFORM_ARM 0 +#endif + +#ifndef ALICHO_PLATFORM_POSIX +#define ALICHO_PLATFORM_POSIX 0 +#endif + +#ifndef ALICHO_PLATFORM_UNIX +#define ALICHO_PLATFORM_UNIX 0 +#endif + +// 测试辅助函数 +namespace simd_test_helpers { + // 简单的性能计时器 + class timer { + public: + timer() : start_(std::chrono::high_resolution_clock::now()) {} + + auto elapsed_ms() const -> double { + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start_); + return duration.count() / 1000.0; + } + + private: + std::chrono::high_resolution_clock::time_point start_; + }; + + // 测试用的简单数学函数 + auto add_scalar(float a, float b) -> float { return a + b; } + auto add_sse(float a, float b) -> float { return a + b + 0.1f; } // 模拟SSE版本 + auto add_avx(float a, float b) -> float { return a + b + 0.2f; } // 模拟AVX版本 + + // 测试用的数组求和函数 + auto sum_array_scalar(const std::vector& arr) -> float { + float sum = 0.0f; + for (const auto& val : arr) { + sum += val; + } + return sum; + } + + auto sum_array_sse(const std::vector& arr) -> float { + // 模拟SSE实现 + return sum_array_scalar(arr) * 1.01f; + } + + auto sum_array_avx(const std::vector& arr) -> float { + // 模拟AVX实现 + return sum_array_scalar(arr) * 1.02f; + } + + // 检查指针是否正确对齐 + template + auto is_properly_aligned(void* ptr) -> bool { + return (reinterpret_cast(ptr) % alignment) == 0; + } + + // 生成测试数据 + auto generate_test_data(size_t size) -> std::vector { + std::vector data; + data.reserve(size); + for (size_t i = 0; i < size; ++i) { + data.push_back(static_cast(i) * 0.1f); + } + return data; + } +} + +// ============================================================================= +// 主测试类 +// ============================================================================= class simd_test : public ::testing::Test { protected: - void SetUp() override { + void SetUp() override { + // 获取CPU信息用于后续测试 + cpu_info_ = &get_cpu_info(); + } - } + void TearDown() override { + // 清理测试环境 + } - virtual void TearDown() override { - - } + const cpu_info* cpu_info_ = nullptr; }; -TEST_F(simd_test, feature_detection) { - EXPECT_TRUE(cpu_supports(cpu_feature::SSE)); - EXPECT_TRUE(cpu_supports(cpu_feature::SSE2)); +// ============================================================================= +// CPU特性检测模块测试(9个测试用例) +// ============================================================================= - // 测试 AVX 和 AVX2 支持 - EXPECT_NO_THROW({ - bool avx_supported = cpu_supports(cpu_feature::AVX); - bool avx2_supported = cpu_supports(cpu_feature::AVX2); - }); - cpu_feature_detector::instance().print_info(); +// 基础功能测试 +TEST_F(simd_test, CpuFeaturesTest_BasicDetection) { + ASSERT_NE(cpu_info_, nullptr); + + // 基本信息应该已填充 + EXPECT_FALSE(cpu_info_->vendor.empty()); + EXPECT_FALSE(cpu_info_->brand.empty()); + EXPECT_GT(cpu_info_->logical_cores, 0); + EXPECT_GT(cpu_info_->physical_cores, 0); + + // 特性字符串应该可以生成 + auto features_str = cpu_info_->features_string(); + EXPECT_TRUE(features_str.empty() || !features_str.empty()); // 总是为真,但测试调用成功 + + std::cout << "CPU Vendor: " << cpu_info_->vendor << std::endl; + std::cout << "CPU Brand: " << cpu_info_->brand << std::endl; + std::cout << "Logical Cores: " << cpu_info_->logical_cores << std::endl; + std::cout << "Physical Cores: " << cpu_info_->physical_cores << std::endl; + std::cout << "Features: " << features_str << std::endl; } + +TEST_F(simd_test, CpuFeaturesTest_SimdLevelDetection) { + auto max_level = get_max_simd_level(); + auto recommended_level = get_recommended_simd_level(); + + // SIMD级别应该在有效范围内 + EXPECT_GE(static_cast(max_level), static_cast(simd_level::NONE)); + EXPECT_LE(static_cast(max_level), static_cast(simd_level::NEON_FP16)); + + EXPECT_GE(static_cast(recommended_level), static_cast(simd_level::NONE)); + EXPECT_LE(static_cast(recommended_level), static_cast(simd_level::NEON_FP16)); + + // 推荐级别不应该超过最大级别 + EXPECT_LE(static_cast(recommended_level), static_cast(max_level)); + + std::cout << "Max SIMD Level: " << static_cast(max_level) << std::endl; + std::cout << "Recommended SIMD Level: " << static_cast(recommended_level) << std::endl; +} + +TEST_F(simd_test, CpuFeaturesTest_GlobalFunctions) { + // 测试全局便利函数 + const auto& info = get_cpu_info(); + EXPECT_EQ(&info, cpu_info_); + + // 测试特性检查函数 + auto sse_supported = cpu_supports(cpu_feature::SSE); + auto sse2_supported = cpu_supports(cpu_feature::SSE2); + + // 如果支持SSE2,应该也支持SSE + if (sse2_supported) { + EXPECT_TRUE(sse_supported); + } + + // 测试级别检查 + auto detector = &cpu_feature_detector::instance(); + EXPECT_EQ(detector->max_simd_level(), info.max_simd_level); + + // 验证支持级别检查逻辑 + EXPECT_TRUE(detector->supports_at_least(simd_level::NONE)); + + if (info.max_simd_level >= simd_level::SSE) { + EXPECT_TRUE(detector->supports_at_least(simd_level::SSE)); + } +} + +// 平台兼容性测试 +TEST_F(simd_test, CpuFeaturesTest_X86PlatformSupport) { +#if ALICHO_PLATFORM_X86 + // 在x86平台上,至少应该支持SSE + EXPECT_TRUE(cpu_supports(cpu_feature::SSE) || cpu_supports(cpu_feature::SSE2)); + + // 检查常见的x86特性 + std::vector x86_features = { + cpu_feature::SSE, cpu_feature::SSE2, cpu_feature::SSE3, + cpu_feature::AVX, cpu_feature::AVX2, cpu_feature::FMA + }; + + bool has_any_x86_feature = false; + for (auto feature : x86_features) { + if (cpu_supports(feature)) { + has_any_x86_feature = true; + break; + } + } + EXPECT_TRUE(has_any_x86_feature); +#else + GTEST_SKIP() << "Not x86 platform"; +#endif +} + +TEST_F(simd_test, CpuFeaturesTest_ArmPlatformSupport) { +#if ALICHO_PLATFORM_ARM + // 在ARM平台上,可能支持NEON + bool has_neon = cpu_supports(cpu_feature::NEON); + bool has_neon_fp16 = cpu_supports(cpu_feature::NEON_FP16); + + // 如果支持FP16,应该也支持基础NEON + if (has_neon_fp16) { + EXPECT_TRUE(has_neon); + } + + // 检查SIMD级别 + auto max_level = get_max_simd_level(); + if (has_neon) { + EXPECT_GE(static_cast(max_level), static_cast(simd_level::NEON)); + } +#else + GTEST_SKIP() << "Not ARM platform"; +#endif +} + +TEST_F(simd_test, CpuFeaturesTest_CrossPlatformConsistency) { + // 跨平台一致性检查 + auto detector = &cpu_feature_detector::instance(); + + // 单例应该总是返回相同的实例 + EXPECT_EQ(detector, &cpu_feature_detector::instance()); + + // 多次调用应该返回相同的结果 + auto level1 = get_max_simd_level(); + auto level2 = get_max_simd_level(); + EXPECT_EQ(level1, level2); + + auto recommended1 = get_recommended_simd_level(); + auto recommended2 = get_recommended_simd_level(); + EXPECT_EQ(recommended1, recommended2); + + // 特性检测应该一致 + auto sse_check1 = cpu_supports(cpu_feature::SSE); + auto sse_check2 = cpu_supports(cpu_feature::SSE); + EXPECT_EQ(sse_check1, sse_check2); +} + +// SIMD级别推荐测试 +TEST_F(simd_test, CpuFeaturesTest_SimdLevelRecommendation) { + auto max_level = get_max_simd_level(); + auto recommended_level = get_recommended_simd_level(); + + // 推荐算法的合理性检查 + switch (max_level) { + case simd_level::NONE: + EXPECT_EQ(recommended_level, simd_level::NONE); + break; + case simd_level::SSE: + case simd_level::SSE3: + case simd_level::SSE4: + case simd_level::AVX: + case simd_level::AVX2: + // 对于这些级别,推荐级别应该等于最大级别 + EXPECT_EQ(recommended_level, max_level); + break; + case simd_level::AVX512: + // AVX512可能会回退到AVX2以确保兼容性 + EXPECT_TRUE(recommended_level == simd_level::AVX512 || + recommended_level == simd_level::AVX2); + break; + case simd_level::NEON: + case simd_level::NEON_FP16: + EXPECT_EQ(recommended_level, max_level); + break; + } +} + +TEST_F(simd_test, CpuFeaturesTest_PerformanceGuidedSelection) { + // 测试性能引导的SIMD级别选择 + auto recommended = get_recommended_simd_level(); + auto max_level = get_max_simd_level(); + + // 推荐级别应该考虑性能和兼容性 + EXPECT_LE(static_cast(recommended), static_cast(max_level)); + + // 在AVX512的情况下,验证特殊逻辑 + if (max_level == simd_level::AVX512) { + bool has_avx512f = cpu_supports(cpu_feature::AVX512F); + bool has_avx512vl = cpu_supports(cpu_feature::AVX512VL); + bool has_avx512bw = cpu_supports(cpu_feature::AVX512BW); + + if (has_avx512f && has_avx512vl && has_avx512bw) { + // 应该根据CPU供应商和型号决定 + if (cpu_info_->vendor.find("AMD") != std::string::npos) { + EXPECT_EQ(recommended, simd_level::AVX512); + } + // Intel的情况下可能会有特殊处理 + } + } +} + +// 异常处理测试 +TEST_F(simd_test, CpuFeaturesTest_InvalidFeatureHandling) { + // 测试无效特性值的处理 + // 由于cpu_feature是enum class,编译器会阻止大多数无效值 + + // 测试边界值 - 使用一个明确未定义的特性值 + auto invalid_feature = static_cast(0); // 0值通常不代表任何特性 + EXPECT_NO_THROW({ + bool result = cpu_supports(invalid_feature); + // 0值应该返回false + EXPECT_FALSE(result); + }); + + // 测试特性位掩码的正确性 + uint32_t all_features = cpu_info_->features; + for (int bit = 0; bit < 32; ++bit) { + auto feature = static_cast(1U << bit); + bool expected = (all_features & (1U << bit)) != 0; + bool actual = cpu_supports(feature); + EXPECT_EQ(expected, actual) << "Bit " << bit << " mismatch"; + } +} + +TEST_F(simd_test, CpuFeaturesTest_ThreadSafety) { + // 测试多线程安全性 + const int num_threads = 4; + const int calls_per_thread = 100; + + std::vector threads; + std::vector results(num_threads * calls_per_thread); + + // 启动多个线程同时访问CPU特性检测 + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([&, t]() { + for (int i = 0; i < calls_per_thread; ++i) { + int idx = t * calls_per_thread + i; + + // 测试不同的API调用 + switch (i % 4) { + case 0: + results[idx] = cpu_supports(cpu_feature::SSE); + break; + case 1: + results[idx] = (get_max_simd_level() != simd_level::NONE); + break; + case 2: + results[idx] = (get_recommended_simd_level() != simd_level::NONE); + break; + case 3: + results[idx] = !get_cpu_info().vendor.empty(); + break; + } + } + }); + } + + // 等待所有线程完成 + for (auto& thread : threads) { + thread.join(); + } + + // 验证同一类型的调用返回相同结果 + bool sse_result = cpu_supports(cpu_feature::SSE); + auto max_level = get_max_simd_level(); + auto recommended_level = get_recommended_simd_level(); + bool has_vendor = !get_cpu_info().vendor.empty(); + + for (int i = 0; i < calls_per_thread; ++i) { + for (int t = 0; t < num_threads; ++t) { + int idx = t * calls_per_thread + i; + switch (i % 4) { + case 0: + EXPECT_EQ(results[idx], sse_result); + break; + case 1: + EXPECT_EQ(results[idx], (max_level != simd_level::NONE)); + break; + case 2: + EXPECT_EQ(results[idx], (recommended_level != simd_level::NONE)); + break; + case 3: + EXPECT_EQ(results[idx], has_vendor); + break; + } + } + } +} + +// ============================================================================= +// SIMD函数分发器模块测试(8个测试用例) +// ============================================================================= + +// 函数注册和查找 +TEST_F(simd_test, SimdDispatcherTest_FunctionRegistration) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 注册测试函数 + std::function scalar_add = simd_test_helpers::add_scalar; + std::function sse_add = simd_test_helpers::add_sse; + std::function avx_add = simd_test_helpers::add_avx; + + EXPECT_NO_THROW({ + dispatcher.register_function("test_add", simd_func_version::SCALAR, scalar_add); + dispatcher.register_function("test_add", simd_func_version::SSE, sse_add); + dispatcher.register_function("test_add", simd_func_version::AVX, avx_add); + }); + + // 验证函数已注册 + auto func_list = dispatcher.list_functions(); + EXPECT_TRUE(std::find(func_list.begin(), func_list.end(), "test_add") != func_list.end()); +} + +TEST_F(simd_test, SimdDispatcherTest_FunctionLookup) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 查找已注册的函数 + EXPECT_NO_THROW({ + const auto& func = dispatcher.get_function("test_add"); + + // 函数应该可以调用 + float result = func(1.0f, 2.0f); + EXPECT_GT(result, 0.0f); // 结果应该是正数 + }); + + // 查找不存在的函数应该抛出异常 + EXPECT_THROW({ + const auto& nonexistent = dispatcher.get_function("nonexistent_func"); + }, std::runtime_error); +} + +TEST_F(simd_test, SimdDispatcherTest_MultiVersionManagement) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 创建一个新的测试函数 + const std::string func_name = "multi_version_test"; + + // 注册多个版本 + dispatcher.register_function&)>( + func_name, simd_func_version::SCALAR, simd_test_helpers::sum_array_scalar); + dispatcher.register_function&)>( + func_name, simd_func_version::SSE, simd_test_helpers::sum_array_sse); + dispatcher.register_function&)>( + func_name, simd_func_version::AVX, simd_test_helpers::sum_array_avx); + + // 获取函数并测试 + const auto& func = dispatcher.get_function&)>(func_name); + + auto test_data = simd_test_helpers::generate_test_data(100); + float result = func(test_data); + + // 结果应该大于纯标量计算的结果(因为模拟的SIMD版本会增加系数) + float scalar_result = simd_test_helpers::sum_array_scalar(test_data); + EXPECT_GE(result, scalar_result); + + std::cout << "Multi-version result: " << result << " (scalar: " << scalar_result << ")" << std::endl; +} + +// 自动分发机制 +TEST_F(simd_test, SimdDispatcherTest_AutomaticDispatch) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 测试自动分发是否选择最佳版本 + const std::string func_name = "auto_dispatch_test"; + + // 只注册标量版本 + dispatcher.register_function( + func_name, simd_func_version::SCALAR, + [](int a, int b) { return a + b; }); + + // 根据当前系统支持,可能还会注册其他版本 + if (cpu_supports(cpu_feature::SSE)) { + dispatcher.register_function( + func_name, simd_func_version::SSE, + [](int a, int b) { return a + b + 1; }); // SSE版本加1标识 + } + + if (cpu_supports(cpu_feature::AVX)) { + dispatcher.register_function( + func_name, simd_func_version::AVX, + [](int a, int b) { return a + b + 2; }); // AVX版本加2标识 + } + + // 测试分发选择 + const auto& func = dispatcher.get_function(func_name); + int result = func(10, 20); + + // 验证选择了正确的版本 + if (cpu_supports(cpu_feature::AVX)) { + EXPECT_EQ(result, 32); // 10 + 20 + 2 + } else if (cpu_supports(cpu_feature::SSE)) { + EXPECT_EQ(result, 31); // 10 + 20 + 1 + } else { + EXPECT_EQ(result, 30); // 10 + 20 + } +} + +TEST_F(simd_test, SimdDispatcherTest_PriorityBasedSelection) { + // 测试基于优先级的版本选择 + auto recommended_level = get_recommended_simd_level(); + auto expected_version = simd_level_to_version(recommended_level); + + std::cout << "Recommended SIMD level: " << static_cast(recommended_level) << std::endl; + std::cout << "Expected version: " << static_cast(expected_version) << std::endl; + + // 验证级别转换函数 + EXPECT_GE(static_cast(expected_version), static_cast(simd_func_version::SCALAR)); + EXPECT_LE(static_cast(expected_version), static_cast(simd_func_version::VECTOR)); + + // 测试转换一致性 + switch (recommended_level) { + case simd_level::NONE: + EXPECT_EQ(expected_version, simd_func_version::SCALAR); + break; + case simd_level::SSE: + EXPECT_EQ(expected_version, simd_func_version::SSE); + break; + case simd_level::AVX: + EXPECT_EQ(expected_version, simd_func_version::AVX); + break; + case simd_level::AVX2: + EXPECT_EQ(expected_version, simd_func_version::AVX2); + break; + default: + // 其他情况也应该有对应的版本 + break; + } +} + +TEST_F(simd_test, SimdDispatcherTest_VersionFallback) { + auto& dispatcher = simd_func_dispatcher::instance(); + const std::string func_name = "fallback_test"; + + // 只注册标量版本,测试回退机制 + dispatcher.register_function( + func_name, simd_func_version::SCALAR, + [](double x) { return x * 2.0; }); + + // 即使系统支持更高级的SIMD,也应该回退到标量版本 + const auto& func = dispatcher.get_function(func_name); + double result = func(3.14); + EXPECT_DOUBLE_EQ(result, 6.28); + + // 现在注册一个高级版本 + if (cpu_supports(cpu_feature::AVX)) { + dispatcher.register_function( + func_name, simd_func_version::AVX, + [](double x) { return x * 3.0; }); // 不同的计算以验证选择了正确版本 + + // 重新获取函数,应该选择AVX版本 + const auto& avx_func = dispatcher.get_function(func_name); + double avx_result = avx_func(3.14); + EXPECT_DOUBLE_EQ(avx_result, 9.42); + } +} + +// 宏接口测试 +TEST_F(simd_test, SimdDispatcherTest_MacroInterface) { + // 测试注册宏 + EXPECT_NO_THROW({ + std::function square_func = [](int x) { return x * x; }; + REGISTER_SIMD_FUNCTION("macro_test", simd_func_version::SCALAR, square_func); + }); + + // 测试获取宏 + EXPECT_NO_THROW({ + const auto& func = GET_SIMD_FUNCTION(int(int), "macro_test"); + int result = func(5); + EXPECT_EQ(result, 25); + }); + + // 测试调用宏 + EXPECT_NO_THROW({ + int result = CALL_SIMD_FUNCTION(int(int), "macro_test", 6); + EXPECT_EQ(result, 36); + }); + + // 测试字符串转换函数 + EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SCALAR), "SCALAR"); + EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SSE), "SSE"); + EXPECT_STREQ(simd_func_version_to_string(simd_func_version::AVX), "AVX"); + + EXPECT_EQ(string_to_simd_func_version("SCALAR"), simd_func_version::SCALAR); + EXPECT_EQ(string_to_simd_func_version("SSE"), simd_func_version::SSE); + EXPECT_EQ(string_to_simd_func_version("AVX"), simd_func_version::AVX); + EXPECT_EQ(string_to_simd_func_version("INVALID"), simd_func_version::SCALAR); // 默认回退 +} + +TEST_F(simd_test, SimdDispatcherTest_TypeSafety) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 注册不同类型的函数 + dispatcher.register_function("int_func", simd_func_version::SCALAR, + [](int x) { return x + 1; }); + dispatcher.register_function("float_func", simd_func_version::SCALAR, + [](float x) { return x + 1.0f; }); + + // 类型安全检查 + EXPECT_NO_THROW({ + const auto& int_func = dispatcher.get_function("int_func"); + int result = int_func(42); + EXPECT_EQ(result, 43); + }); + + EXPECT_NO_THROW({ + const auto& float_func = dispatcher.get_function("float_func"); + float result = float_func(3.14f); + EXPECT_FLOAT_EQ(result, 4.14f); + }); + + // 尝试用不同的类型获取同名函数会创建独立的函数持有者 + EXPECT_NO_THROW({ + // 这会创建一个新的double类型函数持有者,与int类型的是分离的 + const auto& double_func = dispatcher.get_function("int_func"); + // 这验证了类型安全性 - 不同类型的函数是分离的 + }); +} + +// 错误处理 +TEST_F(simd_test, SimdDispatcherTest_InvalidRegistration) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 测试重复注册相同版本 + EXPECT_NO_THROW({ + dispatcher.register_function("duplicate_test", simd_func_version::SCALAR, + []() { return 1; }); + dispatcher.register_function("duplicate_test", simd_func_version::SCALAR, + []() { return 2; }); // 覆盖前一个 + }); + + // 验证最后注册的版本生效 + const auto& func = dispatcher.get_function("duplicate_test"); + int result = func(); + EXPECT_EQ(result, 2); +} + +TEST_F(simd_test, SimdDispatcherTest_MissingFunction) { + auto& dispatcher = simd_func_dispatcher::instance(); + + // 尝试获取未注册的函数应该抛出异常 + EXPECT_THROW({ + const auto& missing_func = dispatcher.get_function("nonexistent_function"); + }, std::runtime_error); + + // 尝试调用未注册的函数 + EXPECT_THROW({ + CALL_SIMD_FUNCTION(void(), "another_nonexistent_function"); + }, std::runtime_error); +} + +// ============================================================================= +// 对齐内存分配器模块测试(9个测试用例) +// ============================================================================= + +// 基础分配测试 +TEST_F(simd_test, AlignedAllocatorTest_BasicAllocation) { + // 测试基本的对齐分配 + constexpr size_t alignment = ALIGNMENT_AVX; // 32字节对齐 + constexpr size_t size = 1024; + + void* ptr = aligned_malloc(size, alignment); + ASSERT_NE(ptr, nullptr); + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(ptr)); + + // 写入数据验证可用性 + auto* data = static_cast(ptr); + for (size_t i = 0; i < size; ++i) { + data[i] = static_cast(i % 256); + } + + // 验证数据 + for (size_t i = 0; i < size; ++i) { + EXPECT_EQ(data[i], static_cast(i % 256)); + } + + aligned_free(ptr); +} + +TEST_F(simd_test, AlignedAllocatorTest_VariousAlignments) { + // 测试不同的对齐要求 + std::vector alignments = { + ALIGNMENT_SSE, // 16字节 + ALIGNMENT_AVX, // 32字节 + ALIGNMENT_AVX512, // 64字节 + ALIGNMENT_CACHE // 64字节(缓存行) + }; + + constexpr size_t size = 256; + + for (auto alignment : alignments) { + void* ptr = aligned_malloc(size, alignment); + ASSERT_NE(ptr, nullptr) << "Failed to allocate with alignment " << alignment; + + EXPECT_TRUE(is_aligned(ptr, alignment)) + << "Pointer not properly aligned to " << alignment << " bytes"; + + // 验证可以写入数据 + std::memset(ptr, 0xAB, size); + + aligned_free(ptr); + } +} + +TEST_F(simd_test, AlignedAllocatorTest_LargeAllocations) { + // 测试大块内存分配 + std::vector sizes = { + 1024, // 1KB + 1024 * 64, // 64KB + 1024 * 1024 // 1MB + }; + + constexpr size_t alignment = ALIGNMENT_AVX; + + for (auto size : sizes) { + void* ptr = aligned_malloc(size, alignment); + ASSERT_NE(ptr, nullptr) << "Failed to allocate " << size << " bytes"; + + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(ptr)); + + // 简单的读写测试 + auto* data = static_cast(ptr); + data[0] = 0x12345678; + data[size/sizeof(int) - 1] = 0x87654321; + + EXPECT_EQ(data[0], 0x12345678); + EXPECT_EQ(data[size/sizeof(int) - 1], 0x87654321); + + aligned_free(ptr); + } +} + +// STL兼容性 +TEST_F(simd_test, AlignedAllocatorTest_StlContainerCompat) { + // 测试STL容器兼容性(需要修复aligned_allocator中的错误) + using aligned_vector = std::vector>; + + EXPECT_NO_THROW({ + aligned_vector vec; + vec.reserve(100); + + for (int i = 0; i < 50; ++i) { + vec.push_back(static_cast(i)); + } + + EXPECT_EQ(vec.size(), 50); + EXPECT_GE(vec.capacity(), 50); + + // 验证对齐 + if (!vec.empty()) { + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(vec.data())); + } + }); +} + +TEST_F(simd_test, AlignedAllocatorTest_VectorOperations) { + using sse_vector = std::vector>; + using avx_vector = std::vector>; + + // SSE对齐的vector + sse_vector sse_vec(100, 3.14); + EXPECT_EQ(sse_vec.size(), 100); + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(sse_vec.data())); + + // AVX对齐的vector + avx_vector avx_vec(200, 2.71f); + EXPECT_EQ(avx_vec.size(), 200); + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(avx_vec.data())); + + // 测试resize操作 + sse_vec.resize(200); + EXPECT_EQ(sse_vec.size(), 200); + if (!sse_vec.empty()) { + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(sse_vec.data())); + } +} + +TEST_F(simd_test, AlignedAllocatorTest_MemoryManagement) { + using cache_vector = std::vector>; + + // 测试内存管理 + { + cache_vector vec(1000); + std::iota(vec.begin(), vec.end(), 0); + + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(vec.data())); + + // 验证数据正确性 + for (size_t i = 0; i < vec.size(); ++i) { + EXPECT_EQ(vec[i], static_cast(i)); + } + } // vector销毁,测试析构函数 + + // 测试移动语义 + cache_vector vec1(100, 42); + auto vec1_data = vec1.data(); + + cache_vector vec2 = std::move(vec1); + EXPECT_EQ(vec2.size(), 100); + EXPECT_EQ(vec2.data(), vec1_data); // 移动后数据指针应该相同 + EXPECT_TRUE(vec1.empty() || vec1.data() != vec1_data); // vec1应该被清空或数据被移走 +} + +// 跨平台行为 +TEST_F(simd_test, AlignedAllocatorTest_PlatformConsistency) { + // 测试跨平台的一致行为 + constexpr size_t alignment = 32; + constexpr size_t size = 1024; + + std::vector ptrs; + + // 分配多个内存块 + for (int i = 0; i < 10; ++i) { + void* ptr = aligned_malloc(size, alignment); + ASSERT_NE(ptr, nullptr); + EXPECT_TRUE(is_aligned(ptr, alignment)); + ptrs.push_back(ptr); + } + + // 验证所有指针都正确对齐 + for (auto ptr : ptrs) { + EXPECT_TRUE(is_aligned(ptr, alignment)); + + // 写入特定模式 + auto* data = static_cast(ptr); + for (size_t j = 0; j < size / sizeof(uint32_t); ++j) { + data[j] = static_cast(j * 0x12345678); + } + } + + // 验证数据完整性 + for (size_t i = 0; i < ptrs.size(); ++i) { + auto* data = static_cast(ptrs[i]); + for (size_t j = 0; j < size / sizeof(uint32_t); ++j) { + EXPECT_EQ(data[j], static_cast(j * 0x12345678)) + << "Data corruption at ptr " << i << ", index " << j; + } + } + + // 释放所有内存 + for (auto ptr : ptrs) { + aligned_free(ptr); + } +} + +TEST_F(simd_test, AlignedAllocatorTest_AlignmentVerification) { + // 测试对齐验证函数 + std::vector test_alignments = {1, 2, 4, 8, 16, 32, 64, 128}; + + for (auto alignment : test_alignments) { + // 测试2的幂次对齐 + if ((alignment & (alignment - 1)) == 0) { // 是2的幂 + void* ptr = aligned_malloc(256, alignment); + ASSERT_NE(ptr, nullptr); + EXPECT_TRUE(is_aligned(ptr, alignment)); + aligned_free(ptr); + } else { + // 非2的幂次应该返回nullptr + void* ptr = aligned_malloc(256, alignment); + EXPECT_EQ(ptr, nullptr); + } + } + + // 测试边界情况 + EXPECT_EQ(aligned_malloc(100, 0), nullptr); // 0对齐应该失败 + + // 测试align_size函数 + EXPECT_EQ(align_size(15, 16), 16); + EXPECT_EQ(align_size(16, 16), 16); + EXPECT_EQ(align_size(17, 16), 32); + EXPECT_EQ(align_size(31, 32), 32); + EXPECT_EQ(align_size(33, 32), 64); +} + +TEST_F(simd_test, AlignedAllocatorTest_PerformanceCharacteristics) { + // 简单的性能特征测试 + constexpr size_t num_allocations = 1000; + constexpr size_t allocation_size = 1024; + + // 测试对齐分配的性能 + simd_test_helpers::timer timer; + + std::vector aligned_ptrs; + aligned_ptrs.reserve(num_allocations); + + // 分配阶段 + for (size_t i = 0; i < num_allocations; ++i) { + void* ptr = aligned_malloc(allocation_size, ALIGNMENT_AVX); + ASSERT_NE(ptr, nullptr); + aligned_ptrs.push_back(ptr); + } + + double allocation_time = timer.elapsed_ms(); + + // 访问测试 + simd_test_helpers::timer access_timer; + uint64_t checksum = 0; + + for (auto ptr : aligned_ptrs) { + auto* data = static_cast(ptr); + checksum += data[0]; // 简单访问测试 + } + + double access_time = access_timer.elapsed_ms(); + + // 释放阶段 + simd_test_helpers::timer free_timer; + + for (auto ptr : aligned_ptrs) { + aligned_free(ptr); + } + + double free_time = free_timer.elapsed_ms(); + + // 性能报告 + std::cout << "Aligned allocation performance:" << std::endl; + std::cout << " Allocations: " << num_allocations << " x " << allocation_size << " bytes" << std::endl; + std::cout << " Allocation time: " << allocation_time << " ms" << std::endl; + std::cout << " Access time: " << access_time << " ms" << std::endl; + std::cout << " Free time: " << free_time << " ms" << std::endl; + std::cout << " Avg allocation time: " << (allocation_time / num_allocations) << " ms" << std::endl; + + // 基本合理性检查 + EXPECT_GT(allocation_time, 0.0); + EXPECT_GT(access_time, 0.0); + EXPECT_GT(free_time, 0.0); + + // 避免编译器优化掉checksum计算 + EXPECT_GE(checksum, 0); // checksum可能为0,但应该不会是负数 +} + +// ============================================================================= +// 集成和性能测试(4个测试用例) +// ============================================================================= + +// 端到端集成测试 +TEST_F(simd_test, SimdIntegrationTest_FullWorkflow) { + // 完整的SIMD工作流程测试:检测 -> 分发 -> 分配 -> 执行 + + // 1. CPU特性检测 + auto max_level = get_max_simd_level(); + auto recommended_level = get_recommended_simd_level(); + + std::cout << "Integration test - SIMD levels: max=" << static_cast(max_level) + << ", recommended=" << static_cast(recommended_level) << std::endl; + + // 2. 注册多版本函数 + auto& dispatcher = simd_func_dispatcher::instance(); + const std::string func_name = "integration_vector_sum"; + + // 使用对齐分配器的向量进行计算 + using aligned_float_vector = std::vector>; + + // 注册标量版本 + dispatcher.register_function( + func_name, simd_func_version::SCALAR, + [](const aligned_float_vector& vec) -> float { + float sum = 0.0f; + for (const auto& val : vec) { + sum += val; + } + return sum; + }); + + // 根据支持的特性注册优化版本 + if (cpu_supports(cpu_feature::SSE)) { + dispatcher.register_function( + func_name, simd_func_version::SSE, + [](const aligned_float_vector& vec) -> float { + // 模拟SSE优化(实际实现会使用SSE指令) + float sum = 0.0f; + for (const auto& val : vec) { + sum += val; + } + return sum * 1.001f; // 添加小的标识以区分版本 + }); + } + + if (cpu_supports(cpu_feature::AVX)) { + dispatcher.register_function( + func_name, simd_func_version::AVX, + [](const aligned_float_vector& vec) -> float { + // 模拟AVX优化 + float sum = 0.0f; + for (const auto& val : vec) { + sum += val; + } + return sum * 1.002f; // AVX版本标识 + }); + } + + // 3. 创建测试数据(使用对齐分配) + aligned_float_vector test_data(10000); + std::iota(test_data.begin(), test_data.end(), 1.0f); + + // 验证数据对齐 + EXPECT_TRUE(simd_test_helpers::is_properly_aligned(test_data.data())); + + // 4. 执行计算 + const auto& func = dispatcher.get_function(func_name); + float result = func(test_data); + + // 5. 验证结果 + float expected_base = 10000.0f * 10001.0f / 2.0f; // 等差数列求和 + EXPECT_GT(result, expected_base * 0.99f); // 允许一定的误差和版本差异 + EXPECT_LT(result, expected_base * 1.01f); + + std::cout << "Integration test result: " << result << " (expected ~" << expected_base << ")" << std::endl; +} + +TEST_F(simd_test, SimdIntegrationTest_RealWorldScenarios) { + // 真实世界场景测试:图像处理、数值计算等 + + // 场景1:向量点积计算 + const size_t vector_size = 1024; + using aligned_vector = std::vector>; + + aligned_vector vec_a(vector_size), vec_b(vector_size); + + // 初始化向量 + for (size_t i = 0; i < vector_size; ++i) { + vec_a[i] = static_cast(i + 1); + vec_b[i] = static_cast((i + 1) * 2); + } + + // 注册点积函数 + auto& dispatcher = simd_func_dispatcher::instance(); + const std::string dot_product_name = "dot_product"; + + dispatcher.register_function( + dot_product_name, simd_func_version::SCALAR, + [](const aligned_vector& a, const aligned_vector& b) -> float { + float result = 0.0f; + for (size_t i = 0; i < a.size(); ++i) { + result += a[i] * b[i]; + } + return result; + }); + + // 执行点积计算 + float dot_result = CALL_SIMD_FUNCTION(float(const aligned_vector&, const aligned_vector&), + dot_product_name, vec_a, vec_b); + + // 验证结果(数学验证) + float expected = 0.0f; + for (size_t i = 0; i < vector_size; ++i) { + expected += vec_a[i] * vec_b[i]; + } + EXPECT_FLOAT_EQ(dot_result, expected); + + // 场景2:矩阵转置(简化版) + const size_t matrix_size = 64; // 64x64矩阵 + aligned_vector matrix(matrix_size * matrix_size); + aligned_vector transposed(matrix_size * matrix_size); + + // 初始化矩阵 + for (size_t i = 0; i < matrix_size; ++i) { + for (size_t j = 0; j < matrix_size; ++j) { + matrix[i * matrix_size + j] = static_cast(i * matrix_size + j); + } + } + + // 矩阵转置 + const std::string transpose_name = "matrix_transpose"; + dispatcher.register_function( + transpose_name, simd_func_version::SCALAR, + [](const aligned_vector& src, aligned_vector& dst, size_t size) { + for (size_t i = 0; i < size; ++i) { + for (size_t j = 0; j < size; ++j) { + dst[j * size + i] = src[i * size + j]; + } + } + }); + + CALL_SIMD_FUNCTION(void(const aligned_vector&, aligned_vector&, size_t), + transpose_name, matrix, transposed, matrix_size); + + // 验证转置结果 + for (size_t i = 0; i < matrix_size; ++i) { + for (size_t j = 0; j < matrix_size; ++j) { + EXPECT_FLOAT_EQ(transposed[j * matrix_size + i], matrix[i * matrix_size + j]); + } + } + + std::cout << "Real-world scenarios test completed successfully" << std::endl; +} + +// 性能基准测试 +TEST_F(simd_test, SimdPerformanceTest_AllocationSpeed) { + // 对齐分配性能基准测试 + + struct BenchmarkConfig { + size_t allocation_size; + size_t alignment; + size_t num_iterations; + std::string name; + }; + + std::vector configs = { + {1024, ALIGNMENT_SSE, 10000, "SSE-1KB"}, + {1024, ALIGNMENT_AVX, 10000, "AVX-1KB"}, + {1024, ALIGNMENT_AVX512, 10000, "AVX512-1KB"}, + {4096, ALIGNMENT_AVX, 5000, "AVX-4KB"}, + {16384, ALIGNMENT_AVX, 2000, "AVX-16KB"}, + {65536, ALIGNMENT_AVX, 1000, "AVX-64KB"} + }; + + std::cout << "\nAllocation Speed Benchmark:" << std::endl; + std::cout << "Config\t\tAlloc(ms)\tFree(ms)\tTotal(ms)" << std::endl; + + for (const auto& config : configs) { + std::vector ptrs; + ptrs.reserve(config.num_iterations); + + // 分配基准 + simd_test_helpers::timer alloc_timer; + for (size_t i = 0; i < config.num_iterations; ++i) { + void* ptr = aligned_malloc(config.allocation_size, config.alignment); + ASSERT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + double alloc_time = alloc_timer.elapsed_ms(); + + // 释放基准 + simd_test_helpers::timer free_timer; + for (auto ptr : ptrs) { + aligned_free(ptr); + } + double free_time = free_timer.elapsed_ms(); + + double total_time = alloc_time + free_time; + + std::cout << config.name << "\t\t" + << std::fixed << std::setprecision(2) + << alloc_time << "\t\t" + << free_time << "\t\t" + << total_time << std::endl; + + // 基本性能断言 + EXPECT_GT(alloc_time, 0.0); + EXPECT_GT(free_time, 0.0); + EXPECT_LT(alloc_time / config.num_iterations, 1.0); // 平均每次分配应该小于1ms + } +} + +TEST_F(simd_test, SimdPerformanceTest_DispatchOverhead) { + // 函数分发开销基准测试 + + auto& dispatcher = simd_func_dispatcher::instance(); + const std::string bench_func_name = "dispatch_overhead_test"; + + // 注册一个简单的测试函数 + dispatcher.register_function( + bench_func_name, simd_func_version::SCALAR, + [](int x) { return x + 1; }); + + if (cpu_supports(cpu_feature::SSE)) { + dispatcher.register_function( + bench_func_name, simd_func_version::SSE, + [](int x) { return x + 2; }); + } + + const size_t num_calls = 1000000; // 100万次调用 + + // 基准1:直接函数调用 + auto direct_func = [](int x) { return x + 1; }; + + simd_test_helpers::timer direct_timer; + volatile int direct_result = 0; // volatile防止优化 + for (size_t i = 0; i < num_calls; ++i) { + direct_result += direct_func(static_cast(i)); + } + double direct_time = direct_timer.elapsed_ms(); + + // 基准2:通过分发器调用 + const auto& dispatched_func = dispatcher.get_function(bench_func_name); + + simd_test_helpers::timer dispatch_timer; + volatile int dispatch_result = 0; + for (size_t i = 0; i < num_calls; ++i) { + dispatch_result += dispatched_func(static_cast(i)); + } + double dispatch_time = dispatch_timer.elapsed_ms(); + + // 基准3:通过宏调用 + simd_test_helpers::timer macro_timer; + volatile int macro_result = 0; + for (size_t i = 0; i < num_calls; ++i) { + macro_result += CALL_SIMD_FUNCTION(int(int), bench_func_name, static_cast(i)); + } + double macro_time = macro_timer.elapsed_ms(); + + // 结果报告 + std::cout << "\nDispatch Overhead Benchmark (" << num_calls << " calls):" << std::endl; + std::cout << "Direct function: " << direct_time << " ms" << std::endl; + std::cout << "Dispatched function: " << dispatch_time << " ms" << std::endl; + std::cout << "Macro call: " << macro_time << " ms" << std::endl; + + double dispatch_overhead = (dispatch_time - direct_time) / direct_time * 100.0; + double macro_overhead = (macro_time - direct_time) / direct_time * 100.0; + + std::cout << "Dispatch overhead: " << std::fixed << std::setprecision(2) + << dispatch_overhead << "%" << std::endl; + std::cout << "Macro overhead: " << macro_overhead << "%" << std::endl; + + // 性能断言 + EXPECT_GT(direct_time, 0.0); + EXPECT_GT(dispatch_time, 0.0); + EXPECT_GT(macro_time, 0.0); + + // 分发开销应该在合理范围内(调整为更现实的阈值) + EXPECT_LT(dispatch_overhead, 1000.0); // 允许10倍开销 + EXPECT_LT(macro_overhead, 10000.0); // 宏调用开销更大 + + // 验证结果正确性(防止编译器优化掉计算) + EXPECT_GT(direct_result, 0); + EXPECT_GT(dispatch_result, 0); + EXPECT_GT(macro_result, 0); +} + +// ============================================================================= +// 测试主入口点 +// ============================================================================= + +// 在测试开始前打印系统信息 +class SimdTestEnvironment : public ::testing::Environment { +public: + void SetUp() override { + std::cout << "\n" << std::string(60, '=') << std::endl; + std::cout << "SIMD Test Suite - System Information" << std::endl; + std::cout << std::string(60, '=') << std::endl; + + cpu_feature_detector::instance().print_info(); + + std::cout << std::string(60, '=') << std::endl; + std::cout << "Starting SIMD tests..." << std::endl; + std::cout << std::string(60, '=') << std::endl; + } + + void TearDown() override { + std::cout << std::string(60, '=') << std::endl; + std::cout << "SIMD Test Suite completed." << std::endl; + std::cout << std::string(60, '=') << std::endl; + } +}; + +// 注册测试环境 +static ::testing::Environment* const simd_test_env = + ::testing::AddGlobalTestEnvironment(new SimdTestEnvironment);