#include #include #include #include #include #include #include #include #include #include "cpu_features.h" #include "simd_func_dispatcher.h" #include "aligned_allocator.h" // ============================================================================= // 测试辅助函数和宏定义 // ============================================================================= // 跨平台兼容性宏 #ifndef ALICHO_PLATFORM_WINDOWS #define ALICHO_PLATFORM_WINDOWS 0 #endif #ifndef ALICHO_PLATFORM_X86 #define ALICHO_PLATFORM_X86 1 #endif #ifndef ALICHO_PLATFORM_ARM #define ALICHO_PLATFORM_ARM 0 #endif #ifndef ALICHO_PLATFORM_POSIX #define ALICHO_PLATFORM_POSIX 0 #endif #ifndef ALICHO_PLATFORM_UNIX #define ALICHO_PLATFORM_UNIX 0 #endif // 测试辅助函数 namespace simd_test_helpers { // 简单的性能计时器 class timer { public: timer() : start_(std::chrono::high_resolution_clock::now()) { } auto elapsed_ms() const -> double { auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start_); return duration.count() / 1000.0; } private: std::chrono::high_resolution_clock::time_point start_; }; // 测试用的简单数学函数 auto add_scalar(float a, float b) -> float { return a + b; } auto add_sse(float a, float b) -> float { return a + b + 0.1f; } // 模拟SSE版本 auto add_avx(float a, float b) -> float { return a + b + 0.2f; } // 模拟AVX版本 // 测试用的数组求和函数 auto sum_array_scalar(const std::vector& arr) -> float { float sum = 0.0f; for (const auto& val : arr) { sum += val; } return sum; } auto sum_array_sse(const std::vector& arr) -> float { // 模拟SSE实现 return sum_array_scalar(arr) * 1.01f; } auto sum_array_avx(const std::vector& arr) -> float { // 模拟AVX实现 return sum_array_scalar(arr) * 1.02f; } // 检查指针是否正确对齐 template auto is_properly_aligned(void* ptr) -> bool { return (reinterpret_cast(ptr) % alignment) == 0; } // 生成测试数据 auto generate_test_data(size_t size) -> std::vector { std::vector data; data.reserve(size); for (size_t i = 0; i < size; ++i) { data.push_back(static_cast(i) * 0.1f); } return data; } } // ============================================================================= // 主测试类 // ============================================================================= class simd_test : public ::testing::Test { protected: void SetUp() override { // 获取CPU信息用于后续测试 cpu_info_ = &get_cpu_info(); } void TearDown() override { // 清理测试环境 } const cpu_info* cpu_info_ = nullptr; }; // ============================================================================= // CPU特性检测模块测试(9个测试用例) // ============================================================================= // 基础功能测试 TEST_F(simd_test, CpuFeaturesTest_BasicDetection) { ASSERT_NE(cpu_info_, nullptr); // 基本信息应该已填充 EXPECT_FALSE(cpu_info_->vendor.empty()); EXPECT_FALSE(cpu_info_->brand.empty()); EXPECT_GT(cpu_info_->logical_cores, 0); EXPECT_GT(cpu_info_->physical_cores, 0); // 特性字符串应该可以生成 auto features_str = cpu_info_->features_string(); EXPECT_TRUE(features_str.empty() || !features_str.empty()); // 总是为真,但测试调用成功 std::cout << "CPU 厂商: " << cpu_info_->vendor << std::endl; std::cout << "CPU 型号: " << cpu_info_->brand << std::endl; std::cout << "逻辑核心数: " << cpu_info_->logical_cores << std::endl; std::cout << "物理核心数: " << cpu_info_->physical_cores << std::endl; std::cout << "特性: " << features_str << std::endl; } TEST_F(simd_test, CpuFeaturesTest_SimdLevelDetection) { auto max_level = get_max_simd_level(); auto recommended_level = get_recommended_simd_level(); // SIMD级别应该在有效范围内 EXPECT_GE(static_cast(max_level), static_cast(simd_level::NONE)); EXPECT_LE(static_cast(max_level), static_cast(simd_level::NEON_FP16)); EXPECT_GE(static_cast(recommended_level), static_cast(simd_level::NONE)); EXPECT_LE(static_cast(recommended_level), static_cast(simd_level::NEON_FP16)); // 推荐级别不应该超过最大级别 EXPECT_LE(static_cast(recommended_level), static_cast(max_level)); std::cout << "最大 SIMD 级别: " << static_cast(max_level) << std::endl; std::cout << "推荐 SIMD 级别: " << static_cast(recommended_level) << std::endl; } TEST_F(simd_test, CpuFeaturesTest_GlobalFunctions) { // 测试全局便利函数 const auto& info = get_cpu_info(); EXPECT_EQ(&info, cpu_info_); // 测试特性检查函数 auto sse_supported = cpu_supports(cpu_feature::SSE); auto sse2_supported = cpu_supports(cpu_feature::SSE2); // 如果支持SSE2,应该也支持SSE if (sse2_supported) { EXPECT_TRUE(sse_supported); } // 测试级别检查 auto detector = &cpu_feature_detector::instance(); EXPECT_EQ(detector->max_simd_level(), info.max_simd_level); // 验证支持级别检查逻辑 EXPECT_TRUE(detector->supports_at_least(simd_level::NONE)); if (info.max_simd_level >= simd_level::SSE) { EXPECT_TRUE(detector->supports_at_least(simd_level::SSE)); } } // 平台兼容性测试 TEST_F(simd_test, CpuFeaturesTest_X86PlatformSupport) { #if ALICHO_PLATFORM_X86 // 在x86平台上,至少应该支持SSE EXPECT_TRUE(cpu_supports(cpu_feature::SSE) || cpu_supports(cpu_feature::SSE2)); // 检查常见的x86特性 std::vector x86_features = { cpu_feature::SSE, cpu_feature::SSE2, cpu_feature::SSE3, cpu_feature::AVX, cpu_feature::AVX2, cpu_feature::FMA }; bool has_any_x86_feature = false; for (auto feature : x86_features) { if (cpu_supports(feature)) { has_any_x86_feature = true; break; } } EXPECT_TRUE(has_any_x86_feature); #else GTEST_SKIP() << "Not x86 platform"; #endif } TEST_F(simd_test, CpuFeaturesTest_ArmPlatformSupport) { #if ALICHO_PLATFORM_ARM // 在ARM平台上,可能支持NEON bool has_neon = cpu_supports(cpu_feature::NEON); bool has_neon_fp16 = cpu_supports(cpu_feature::NEON_FP16); // 如果支持FP16,应该也支持基础NEON if (has_neon_fp16) { EXPECT_TRUE(has_neon); } // 检查SIMD级别 auto max_level = get_max_simd_level(); if (has_neon) { EXPECT_GE(static_cast(max_level), static_cast(simd_level::NEON)); } #else GTEST_SKIP() << "Not ARM platform"; #endif } TEST_F(simd_test, CpuFeaturesTest_CrossPlatformConsistency) { // 跨平台一致性检查 auto detector = &cpu_feature_detector::instance(); // 单例应该总是返回相同的实例 EXPECT_EQ(detector, &cpu_feature_detector::instance()); // 多次调用应该返回相同的结果 auto level1 = get_max_simd_level(); auto level2 = get_max_simd_level(); EXPECT_EQ(level1, level2); auto recommended1 = get_recommended_simd_level(); auto recommended2 = get_recommended_simd_level(); EXPECT_EQ(recommended1, recommended2); // 特性检测应该一致 auto sse_check1 = cpu_supports(cpu_feature::SSE); auto sse_check2 = cpu_supports(cpu_feature::SSE); EXPECT_EQ(sse_check1, sse_check2); } // SIMD级别推荐测试 TEST_F(simd_test, CpuFeaturesTest_SimdLevelRecommendation) { auto max_level = get_max_simd_level(); auto recommended_level = get_recommended_simd_level(); // 推荐算法的合理性检查 switch (max_level) { case simd_level::NONE: EXPECT_EQ(recommended_level, simd_level::NONE); break; case simd_level::SSE: case simd_level::SSE3: case simd_level::SSE4: case simd_level::AVX: case simd_level::AVX2: // 对于这些级别,推荐级别应该等于最大级别 EXPECT_EQ(recommended_level, max_level); break; case simd_level::AVX512: // AVX512可能会回退到AVX2以确保兼容性 EXPECT_TRUE(recommended_level == simd_level::AVX512 || recommended_level == simd_level::AVX2); break; case simd_level::NEON: case simd_level::NEON_FP16: EXPECT_EQ(recommended_level, max_level); break; } } TEST_F(simd_test, CpuFeaturesTest_PerformanceGuidedSelection) { // 测试性能引导的SIMD级别选择 auto recommended = get_recommended_simd_level(); auto max_level = get_max_simd_level(); // 推荐级别应该考虑性能和兼容性 EXPECT_LE(static_cast(recommended), static_cast(max_level)); // 在AVX512的情况下,验证特殊逻辑 if (max_level == simd_level::AVX512) { bool has_avx512f = cpu_supports(cpu_feature::AVX512F); bool has_avx512vl = cpu_supports(cpu_feature::AVX512VL); bool has_avx512bw = cpu_supports(cpu_feature::AVX512BW); if (has_avx512f && has_avx512vl && has_avx512bw) { // 应该根据CPU供应商和型号决定 if (cpu_info_->vendor.find("AMD") != std::string::npos) { EXPECT_EQ(recommended, simd_level::AVX512); } // Intel的情况下可能会有特殊处理 } } } // 异常处理测试 TEST_F(simd_test, CpuFeaturesTest_InvalidFeatureHandling) { // 测试无效特性值的处理 // 由于cpu_feature是enum class,编译器会阻止大多数无效值 // 测试边界值 - 使用一个明确未定义的特性值 auto invalid_feature = static_cast(0); // 0值通常不代表任何特性 EXPECT_NO_THROW({ bool result = cpu_supports(invalid_feature); // 0值应该返回false EXPECT_FALSE(result); }); // 测试特性位掩码的正确性 uint32_t all_features = cpu_info_->features; for (int bit = 0; bit < 32; ++bit) { auto feature = static_cast(1U << bit); bool expected = (all_features & (1U << bit)) != 0; bool actual = cpu_supports(feature); EXPECT_EQ(expected, actual) << "Bit " << bit << " mismatch"; } } TEST_F(simd_test, CpuFeaturesTest_ThreadSafety) { // 测试多线程安全性 const int num_threads = 4; const int calls_per_thread = 100; std::vector threads; std::vector results(num_threads * calls_per_thread); // 启动多个线程同时访问CPU特性检测 for (int t = 0; t < num_threads; ++t) { threads.emplace_back([&, t]() { for (int i = 0; i < calls_per_thread; ++i) { int idx = t * calls_per_thread + i; // 测试不同的API调用 switch (i % 4) { case 0: results[idx] = cpu_supports(cpu_feature::SSE); break; case 1: results[idx] = (get_max_simd_level() != simd_level::NONE); break; case 2: results[idx] = (get_recommended_simd_level() != simd_level::NONE); break; case 3: results[idx] = !get_cpu_info().vendor.empty(); break; } } }); } // 等待所有线程完成 for (auto& thread : threads) { thread.join(); } // 验证同一类型的调用返回相同结果 bool sse_result = cpu_supports(cpu_feature::SSE); auto max_level = get_max_simd_level(); auto recommended_level = get_recommended_simd_level(); bool has_vendor = !get_cpu_info().vendor.empty(); for (int i = 0; i < calls_per_thread; ++i) { for (int t = 0; t < num_threads; ++t) { int idx = t * calls_per_thread + i; switch (i % 4) { case 0: EXPECT_EQ(results[idx], sse_result); break; case 1: EXPECT_EQ(results[idx], (max_level != simd_level::NONE)); break; case 2: EXPECT_EQ(results[idx], (recommended_level != simd_level::NONE)); break; case 3: EXPECT_EQ(results[idx], has_vendor); break; } } } } // ============================================================================= // SIMD函数分发器模块测试(8个测试用例) // ============================================================================= // 函数注册和查找 TEST_F(simd_test, SimdDispatcherTest_FunctionRegistration) { auto& dispatcher = simd_func_dispatcher::instance(); // 注册测试函数 std::function scalar_add = simd_test_helpers::add_scalar; std::function sse_add = simd_test_helpers::add_sse; std::function avx_add = simd_test_helpers::add_avx; EXPECT_NO_THROW({ dispatcher.register_function("test_add", simd_func_version::SCALAR, scalar_add); dispatcher.register_function("test_add", simd_func_version::SSE, sse_add); dispatcher.register_function("test_add", simd_func_version::AVX, avx_add); }); // 验证函数已注册 auto func_list = dispatcher.list_functions(); EXPECT_TRUE(std::find(func_list.begin(), func_list.end(), "test_add") != func_list.end()); } TEST_F(simd_test, SimdDispatcherTest_FunctionLookup) { auto& dispatcher = simd_func_dispatcher::instance(); // 查找已注册的函数 EXPECT_NO_THROW({ const auto& func = dispatcher.get_function("test_add"); // 函数应该可以调用 float result = func(1.0f, 2.0f); EXPECT_GT(result, 0.0f); // 结果应该是正数 }); // 查找不存在的函数应该抛出异常 EXPECT_THROW({ const auto& nonexistent = dispatcher.get_function("nonexistent_func"); }, std::runtime_error); } TEST_F(simd_test, SimdDispatcherTest_MultiVersionManagement) { auto& dispatcher = simd_func_dispatcher::instance(); // 创建一个新的测试函数 const std::string func_name = "multi_version_test"; // 注册多个版本 dispatcher.register_function&)>( func_name, simd_func_version::SCALAR, simd_test_helpers::sum_array_scalar); dispatcher.register_function&)>( func_name, simd_func_version::SSE, simd_test_helpers::sum_array_sse); dispatcher.register_function&)>( func_name, simd_func_version::AVX, simd_test_helpers::sum_array_avx); // 获取函数并测试 const auto& func = dispatcher.get_function&)>(func_name); auto test_data = simd_test_helpers::generate_test_data(100); float result = func(test_data); // 结果应该大于纯标量计算的结果(因为模拟的SIMD版本会增加系数) float scalar_result = simd_test_helpers::sum_array_scalar(test_data); EXPECT_GE(result, scalar_result); std::cout << "多版本结果: " << result << " (标量: " << scalar_result << ")" << std::endl; } // 自动分发机制 TEST_F(simd_test, SimdDispatcherTest_AutomaticDispatch) { auto& dispatcher = simd_func_dispatcher::instance(); // 测试自动分发是否选择最佳版本 const std::string func_name = "auto_dispatch_test"; // 只注册标量版本 dispatcher.register_function( func_name, simd_func_version::SCALAR, [](int a, int b) { return a + b; }); // 根据当前系统支持,可能还会注册其他版本 if (cpu_supports(cpu_feature::SSE)) { dispatcher.register_function( func_name, simd_func_version::SSE, [](int a, int b) { return a + b + 1; }); // SSE版本加1标识 } if (cpu_supports(cpu_feature::AVX)) { dispatcher.register_function( func_name, simd_func_version::AVX, [](int a, int b) { return a + b + 2; }); // AVX版本加2标识 } // 测试分发选择 const auto& func = dispatcher.get_function(func_name); int result = func(10, 20); // 验证选择了正确的版本 if (cpu_supports(cpu_feature::AVX)) { EXPECT_EQ(result, 32); // 10 + 20 + 2 } else if (cpu_supports(cpu_feature::SSE)) { EXPECT_EQ(result, 31); // 10 + 20 + 1 } else { EXPECT_EQ(result, 30); // 10 + 20 } } TEST_F(simd_test, SimdDispatcherTest_PriorityBasedSelection) { // 测试基于优先级的版本选择 auto recommended_level = get_recommended_simd_level(); auto expected_version = simd_level_to_version(recommended_level); std::cout << "推荐 SIMD 级别: " << static_cast(recommended_level) << std::endl; std::cout << "期望版本: " << static_cast(expected_version) << std::endl; // 验证级别转换函数 EXPECT_GE(static_cast(expected_version), static_cast(simd_func_version::SCALAR)); EXPECT_LE(static_cast(expected_version), static_cast(simd_func_version::VECTOR)); // 测试转换一致性 switch (recommended_level) { case simd_level::NONE: EXPECT_EQ(expected_version, simd_func_version::SCALAR); break; case simd_level::SSE: EXPECT_EQ(expected_version, simd_func_version::SSE); break; case simd_level::AVX: EXPECT_EQ(expected_version, simd_func_version::AVX); break; case simd_level::AVX2: EXPECT_EQ(expected_version, simd_func_version::AVX2); break; default: // 其他情况也应该有对应的版本 break; } } TEST_F(simd_test, SimdDispatcherTest_VersionFallback) { auto& dispatcher = simd_func_dispatcher::instance(); const std::string func_name = "fallback_test"; // 只注册标量版本,测试回退机制 dispatcher.register_function( func_name, simd_func_version::SCALAR, [](double x) { return x * 2.0; }); // 即使系统支持更高级的SIMD,也应该回退到标量版本 const auto& func = dispatcher.get_function(func_name); double result = func(3.14); EXPECT_DOUBLE_EQ(result, 6.28); // 现在注册一个高级版本 if (cpu_supports(cpu_feature::AVX)) { dispatcher.register_function( func_name, simd_func_version::AVX, [](double x) { return x * 3.0; }); // 不同的计算以验证选择了正确版本 // 重新获取函数,应该选择AVX版本 const auto& avx_func = dispatcher.get_function(func_name); double avx_result = avx_func(3.14); EXPECT_DOUBLE_EQ(avx_result, 9.42); } } // 宏接口测试 TEST_F(simd_test, SimdDispatcherTest_MacroInterface) { // 测试注册宏 EXPECT_NO_THROW({ std::function square_func = [](int x) { return x * x; }; REGISTER_SIMD_FUNCTION("macro_test", simd_func_version::SCALAR, square_func); }); // 测试获取宏 EXPECT_NO_THROW({ const auto& func = GET_SIMD_FUNCTION(int(int), "macro_test"); int result = func(5); EXPECT_EQ(result, 25); }); // 测试调用宏 EXPECT_NO_THROW({ int result = CALL_SIMD_FUNCTION(int(int), "macro_test", 6); EXPECT_EQ(result, 36); }); // 测试字符串转换函数 EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SCALAR), "SCALAR"); EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SSE), "SSE"); EXPECT_STREQ(simd_func_version_to_string(simd_func_version::AVX), "AVX"); EXPECT_EQ(string_to_simd_func_version("SCALAR"), simd_func_version::SCALAR); EXPECT_EQ(string_to_simd_func_version("SSE"), simd_func_version::SSE); EXPECT_EQ(string_to_simd_func_version("AVX"), simd_func_version::AVX); EXPECT_EQ(string_to_simd_func_version("INVALID"), simd_func_version::SCALAR); // 默认回退 } TEST_F(simd_test, SimdDispatcherTest_TypeSafety) { auto& dispatcher = simd_func_dispatcher::instance(); // 注册不同类型的函数 dispatcher.register_function("int_func", simd_func_version::SCALAR, [](int x) { return x + 1; }); dispatcher.register_function("float_func", simd_func_version::SCALAR, [](float x) { return x + 1.0f; }); // 类型安全检查 EXPECT_NO_THROW({ const auto& int_func = dispatcher.get_function("int_func"); int result = int_func(42); EXPECT_EQ(result, 43); }); EXPECT_NO_THROW({ const auto& float_func = dispatcher.get_function("float_func"); float result = float_func(3.14f); EXPECT_FLOAT_EQ(result, 4.14f); }); // 尝试用不同的类型获取同名函数会创建独立的函数持有者 EXPECT_NO_THROW({ // 这会创建一个新的double类型函数持有者,与int类型的是分离的 const auto& double_func = dispatcher.get_function("int_func"); // 这验证了类型安全性 - 不同类型的函数是分离的 }); } // 错误处理 TEST_F(simd_test, SimdDispatcherTest_InvalidRegistration) { auto& dispatcher = simd_func_dispatcher::instance(); // 测试重复注册相同版本 EXPECT_NO_THROW({ dispatcher.register_function("duplicate_test", simd_func_version::SCALAR, []() { return 1; }); dispatcher.register_function("duplicate_test", simd_func_version::SCALAR, []() { return 2; }); // 覆盖前一个 }); // 验证最后注册的版本生效 const auto& func = dispatcher.get_function("duplicate_test"); int result = func(); EXPECT_EQ(result, 2); } TEST_F(simd_test, SimdDispatcherTest_MissingFunction) { auto& dispatcher = simd_func_dispatcher::instance(); // 尝试获取未注册的函数应该抛出异常 EXPECT_THROW({ const auto& missing_func = dispatcher.get_function("nonexistent_function"); }, std::runtime_error); // 尝试调用未注册的函数 EXPECT_THROW({ CALL_SIMD_FUNCTION(void(), "another_nonexistent_function"); }, std::runtime_error); } // ============================================================================= // 对齐内存分配器模块测试(9个测试用例) // ============================================================================= // 基础分配测试 TEST_F(simd_test, AlignedAllocatorTest_BasicAllocation) { // 测试基本的对齐分配 constexpr size_t alignment = ALIGNMENT_AVX; // 32字节对齐 constexpr size_t size = 1024; void* ptr = aligned_malloc(size, alignment); ASSERT_NE(ptr, nullptr); EXPECT_TRUE(simd_test_helpers::is_properly_aligned(ptr)); // 写入数据验证可用性 auto* data = static_cast(ptr); for (size_t i = 0; i < size; ++i) { data[i] = static_cast(i % 256); } // 验证数据 for (size_t i = 0; i < size; ++i) { EXPECT_EQ(data[i], static_cast(i % 256)); } aligned_free(ptr); } TEST_F(simd_test, AlignedAllocatorTest_VariousAlignments) { // 测试不同的对齐要求 std::vector alignments = { ALIGNMENT_SSE, // 16字节 ALIGNMENT_AVX, // 32字节 ALIGNMENT_AVX512, // 64字节 ALIGNMENT_CACHE // 64字节(缓存行) }; constexpr size_t size = 256; for (auto alignment : alignments) { void* ptr = aligned_malloc(size, alignment); ASSERT_NE(ptr, nullptr) << "Failed to allocate with alignment " << alignment; EXPECT_TRUE(is_aligned(ptr, alignment)) << "Pointer not properly aligned to " << alignment << " bytes"; // 验证可以写入数据 std::memset(ptr, 0xAB, size); aligned_free(ptr); } } TEST_F(simd_test, AlignedAllocatorTest_LargeAllocations) { // 测试大块内存分配 std::vector sizes = { 1024, // 1KB 1024 * 64, // 64KB 1024 * 1024 // 1MB }; constexpr size_t alignment = ALIGNMENT_AVX; for (auto size : sizes) { void* ptr = aligned_malloc(size, alignment); ASSERT_NE(ptr, nullptr) << "Failed to allocate " << size << " bytes"; EXPECT_TRUE(simd_test_helpers::is_properly_aligned(ptr)); // 简单的读写测试 auto* data = static_cast(ptr); data[0] = 0x12345678; data[size / sizeof(int) - 1] = 0x87654321; EXPECT_EQ(data[0], 0x12345678); EXPECT_EQ(data[size/sizeof(int) - 1], 0x87654321); aligned_free(ptr); } } // STL兼容性 TEST_F(simd_test, AlignedAllocatorTest_StlContainerCompat) { // 测试STL容器兼容性(需要修复aligned_allocator中的错误) using aligned_vector = std::vector>; EXPECT_NO_THROW({ aligned_vector vec; vec.reserve(100); for (int i = 0; i < 50; ++i) { vec.push_back(static_cast(i)); } EXPECT_EQ(vec.size(), 50); EXPECT_GE(vec.capacity(), 50); // 验证对齐 if (!vec.empty()) { EXPECT_TRUE(simd_test_helpers::is_properly_aligned(vec.data())); } }); } TEST_F(simd_test, AlignedAllocatorTest_VectorOperations) { using sse_vector = std::vector>; using avx_vector = std::vector>; // SSE对齐的vector sse_vector sse_vec(100, 3.14); EXPECT_EQ(sse_vec.size(), 100); EXPECT_TRUE(simd_test_helpers::is_properly_aligned(sse_vec.data())); // AVX对齐的vector avx_vector avx_vec(200, 2.71f); EXPECT_EQ(avx_vec.size(), 200); EXPECT_TRUE(simd_test_helpers::is_properly_aligned(avx_vec.data())); // 测试resize操作 sse_vec.resize(200); EXPECT_EQ(sse_vec.size(), 200); if (!sse_vec.empty()) { EXPECT_TRUE(simd_test_helpers::is_properly_aligned(sse_vec.data())); } } TEST_F(simd_test, AlignedAllocatorTest_MemoryManagement) { using cache_vector = std::vector>; // 测试内存管理 { cache_vector vec(1000); std::iota(vec.begin(), vec.end(), 0); EXPECT_TRUE(simd_test_helpers::is_properly_aligned(vec.data())); // 验证数据正确性 for (size_t i = 0; i < vec.size(); ++i) { EXPECT_EQ(vec[i], static_cast(i)); } } // vector销毁,测试析构函数 // 测试移动语义 cache_vector vec1(100, 42); auto vec1_data = vec1.data(); cache_vector vec2 = std::move(vec1); EXPECT_EQ(vec2.size(), 100); EXPECT_EQ(vec2.data(), vec1_data); // 移动后数据指针应该相同 EXPECT_TRUE(vec1.empty() || vec1.data() != vec1_data); // vec1应该被清空或数据被移走 } // 跨平台行为 TEST_F(simd_test, AlignedAllocatorTest_PlatformConsistency) { // 测试跨平台的一致行为 constexpr size_t alignment = 32; constexpr size_t size = 1024; std::vector ptrs; // 分配多个内存块 for (int i = 0; i < 10; ++i) { void* ptr = aligned_malloc(size, alignment); ASSERT_NE(ptr, nullptr); EXPECT_TRUE(is_aligned(ptr, alignment)); ptrs.push_back(ptr); } // 验证所有指针都正确对齐 for (auto ptr : ptrs) { EXPECT_TRUE(is_aligned(ptr, alignment)); // 写入特定模式 auto* data = static_cast(ptr); for (size_t j = 0; j < size / sizeof(uint32_t); ++j) { data[j] = static_cast(j * 0x12345678); } } // 验证数据完整性 for (size_t i = 0; i < ptrs.size(); ++i) { auto* data = static_cast(ptrs[i]); for (size_t j = 0; j < size / sizeof(uint32_t); ++j) { EXPECT_EQ(data[j], static_cast(j * 0x12345678)) << "Data corruption at ptr " << i << ", index " << j; } } // 释放所有内存 for (auto ptr : ptrs) { aligned_free(ptr); } } TEST_F(simd_test, AlignedAllocatorTest_AlignmentVerification) { // 测试对齐验证函数 std::vector test_alignments = {1, 2, 4, 8, 16, 32, 64, 128}; for (auto alignment : test_alignments) { // 测试2的幂次对齐 if ((alignment & (alignment - 1)) == 0) { // 是2的幂 void* ptr = aligned_malloc(256, alignment); ASSERT_NE(ptr, nullptr); EXPECT_TRUE(is_aligned(ptr, alignment)); aligned_free(ptr); } else { // 非2的幂次应该返回nullptr void* ptr = aligned_malloc(256, alignment); EXPECT_EQ(ptr, nullptr); } } // 测试边界情况 EXPECT_EQ(aligned_malloc(100, 0), nullptr); // 0对齐应该失败 // 测试align_size函数 EXPECT_EQ(align_size(15, 16), 16); EXPECT_EQ(align_size(16, 16), 16); EXPECT_EQ(align_size(17, 16), 32); EXPECT_EQ(align_size(31, 32), 32); EXPECT_EQ(align_size(33, 32), 64); } TEST_F(simd_test, AlignedAllocatorTest_PerformanceCharacteristics) { // 简单的性能特征测试 constexpr size_t num_allocations = 1000; constexpr size_t allocation_size = 1024; // 测试对齐分配的性能 simd_test_helpers::timer timer; std::vector aligned_ptrs; aligned_ptrs.reserve(num_allocations); // 分配阶段 for (size_t i = 0; i < num_allocations; ++i) { void* ptr = aligned_malloc(allocation_size, ALIGNMENT_AVX); ASSERT_NE(ptr, nullptr); aligned_ptrs.push_back(ptr); } double allocation_time = timer.elapsed_ms(); // 访问测试 simd_test_helpers::timer access_timer; uint64_t checksum = 0; // 记录开始时间 auto start_time = std::chrono::high_resolution_clock::now(); for (auto ptr : aligned_ptrs) { auto* data = static_cast(ptr); checksum += data[0]; // 简单访问测试 } auto end_time = std::chrono::high_resolution_clock::now(); auto duration_ns = std::chrono::duration_cast(end_time - start_time).count(); double access_time = access_timer.elapsed_ms(); // 诊断日志 std::cout << " [诊断] 访问循环耗时: " << duration_ns << " 纳秒" << std::endl; std::cout << " [诊断] 计时器测量的访问时间: " << access_time << " 毫秒" << std::endl; std::cout << " [诊断] 校验和值: " << checksum << std::endl; std::cout << " [诊断] 分配数量: " << aligned_ptrs.size() << std::endl; // 释放阶段 simd_test_helpers::timer free_timer; for (auto ptr : aligned_ptrs) { aligned_free(ptr); } double free_time = free_timer.elapsed_ms(); // 性能报告 std::cout << "对齐分配性能:" << std::endl; std::cout << " 分配次数: " << num_allocations << " x " << allocation_size << " 字节" << std::endl; std::cout << " 分配时间: " << allocation_time << " 毫秒" << std::endl; std::cout << " 访问时间: " << access_time << " 毫秒" << std::endl; std::cout << " 释放时间: " << free_time << " 毫秒" << std::endl; std::cout << " 平均分配时间: " << (allocation_time / num_allocations) << " 毫秒" << std::endl; // 基本合理性检查 EXPECT_GT(allocation_time, 0.0); // 访问时间可能因为优化而接近0,特别是在release模式下 // 改为检查访问时间 >= 0 而不是严格大于0 EXPECT_GE(access_time, 0.0) << "Access time should be non-negative (may be 0 in optimized builds)"; std::cout << " [注意] 访问时间为 " << access_time << " 毫秒 - 在发布模式下由于编译器优化可能为 0" << std::endl; EXPECT_GT(free_time, 0.0); // 避免编译器优化掉checksum计算 EXPECT_GE(checksum, 0); // checksum可能为0,但应该不会是负数 } // ============================================================================= // 集成和性能测试(4个测试用例) // ============================================================================= // 端到端集成测试 TEST_F(simd_test, SimdIntegrationTest_FullWorkflow) { // 完整的SIMD工作流程测试:检测 -> 分发 -> 分配 -> 执行 // 1. CPU特性检测 auto max_level = get_max_simd_level(); auto recommended_level = get_recommended_simd_level(); std::cout << "集成测试 - SIMD 级别: 最大=" << static_cast(max_level) << ", 推荐=" << static_cast(recommended_level) << std::endl; // 2. 注册多版本函数 auto& dispatcher = simd_func_dispatcher::instance(); const std::string func_name = "integration_vector_sum"; // 使用对齐分配器的向量进行计算 using aligned_float_vector = std::vector>; // 注册标量版本 dispatcher.register_function( func_name, simd_func_version::SCALAR, [](const aligned_float_vector& vec) -> float { float sum = 0.0f; for (const auto& val : vec) { sum += val; } return sum; }); // 根据支持的特性注册优化版本 if (cpu_supports(cpu_feature::SSE)) { dispatcher.register_function( func_name, simd_func_version::SSE, [](const aligned_float_vector& vec) -> float { // 模拟SSE优化(实际实现会使用SSE指令) float sum = 0.0f; for (const auto& val : vec) { sum += val; } return sum * 1.001f; // 添加小的标识以区分版本 }); } if (cpu_supports(cpu_feature::AVX)) { dispatcher.register_function( func_name, simd_func_version::AVX, [](const aligned_float_vector& vec) -> float { // 模拟AVX优化 float sum = 0.0f; for (const auto& val : vec) { sum += val; } return sum * 1.002f; // AVX版本标识 }); } // 3. 创建测试数据(使用对齐分配) aligned_float_vector test_data(10000); std::iota(test_data.begin(), test_data.end(), 1.0f); // 验证数据对齐 EXPECT_TRUE(simd_test_helpers::is_properly_aligned(test_data.data())); // 4. 执行计算 const auto& func = dispatcher.get_function(func_name); float result = func(test_data); // 5. 验证结果 float expected_base = 10000.0f * 10001.0f / 2.0f; // 等差数列求和 EXPECT_GT(result, expected_base * 0.99f); // 允许一定的误差和版本差异 EXPECT_LT(result, expected_base * 1.01f); std::cout << "集成测试结果: " << result << " (期望约 " << expected_base << ")" << std::endl; } TEST_F(simd_test, SimdIntegrationTest_RealWorldScenarios) { // 真实世界场景测试:图像处理、数值计算等 // 场景1:向量点积计算 const size_t vector_size = 1024; using aligned_vector = std::vector>; aligned_vector vec_a(vector_size), vec_b(vector_size); // 初始化向量 for (size_t i = 0; i < vector_size; ++i) { vec_a[i] = static_cast(i + 1); vec_b[i] = static_cast((i + 1) * 2); } // 注册点积函数 auto& dispatcher = simd_func_dispatcher::instance(); const std::string dot_product_name = "dot_product"; dispatcher.register_function( dot_product_name, simd_func_version::SCALAR, [](const aligned_vector& a, const aligned_vector& b) -> float { float result = 0.0f; for (size_t i = 0; i < a.size(); ++i) { result += a[i] * b[i]; } return result; }); // 执行点积计算 float dot_result = CALL_SIMD_FUNCTION(float(const aligned_vector&, const aligned_vector&), dot_product_name, vec_a, vec_b); // 验证结果(数学验证) float expected = 0.0f; for (size_t i = 0; i < vector_size; ++i) { expected += vec_a[i] * vec_b[i]; } EXPECT_FLOAT_EQ(dot_result, expected); // 场景2:矩阵转置(简化版) const size_t matrix_size = 64; // 64x64矩阵 aligned_vector matrix(matrix_size * matrix_size); aligned_vector transposed(matrix_size * matrix_size); // 初始化矩阵 for (size_t i = 0; i < matrix_size; ++i) { for (size_t j = 0; j < matrix_size; ++j) { matrix[i * matrix_size + j] = static_cast(i * matrix_size + j); } } // 矩阵转置 const std::string transpose_name = "matrix_transpose"; dispatcher.register_function( transpose_name, simd_func_version::SCALAR, [](const aligned_vector& src, aligned_vector& dst, size_t size) { for (size_t i = 0; i < size; ++i) { for (size_t j = 0; j < size; ++j) { dst[j * size + i] = src[i * size + j]; } } }); CALL_SIMD_FUNCTION(void(const aligned_vector&, aligned_vector&, size_t), transpose_name, matrix, transposed, matrix_size); // 验证转置结果 for (size_t i = 0; i < matrix_size; ++i) { for (size_t j = 0; j < matrix_size; ++j) { EXPECT_FLOAT_EQ(transposed[j * matrix_size + i], matrix[i * matrix_size + j]); } } std::cout << "真实场景测试成功完成" << std::endl; } // 性能基准测试 TEST_F(simd_test, SimdPerformanceTest_AllocationSpeed) { // 对齐分配性能基准测试 struct BenchmarkConfig { size_t allocation_size; size_t alignment; size_t num_iterations; std::string name; }; std::vector configs = { {1024, ALIGNMENT_SSE, 10000, "SSE-1KB"}, {1024, ALIGNMENT_AVX, 10000, "AVX-1KB"}, {1024, ALIGNMENT_AVX512, 10000, "AVX512-1KB"}, {4096, ALIGNMENT_AVX, 5000, "AVX-4KB"}, {16384, ALIGNMENT_AVX, 2000, "AVX-16KB"}, {65536, ALIGNMENT_AVX, 1000, "AVX-64KB"} }; std::cout << "\n分配速度基准测试:" << std::endl; std::cout << "配置\t\t分配(毫秒)\t释放(毫秒)\t总计(毫秒)" << std::endl; for (const auto& config : configs) { std::vector ptrs; ptrs.reserve(config.num_iterations); // 分配基准 simd_test_helpers::timer alloc_timer; for (size_t i = 0; i < config.num_iterations; ++i) { void* ptr = aligned_malloc(config.allocation_size, config.alignment); ASSERT_NE(ptr, nullptr); ptrs.push_back(ptr); } double alloc_time = alloc_timer.elapsed_ms(); // 释放基准 simd_test_helpers::timer free_timer; for (auto ptr : ptrs) { aligned_free(ptr); } double free_time = free_timer.elapsed_ms(); double total_time = alloc_time + free_time; std::cout << config.name << "\t\t" << std::fixed << std::setprecision(2) << alloc_time << "\t\t" << free_time << "\t\t" << total_time << std::endl; // 基本性能断言 EXPECT_GT(alloc_time, 0.0); EXPECT_GT(free_time, 0.0); EXPECT_LT(alloc_time / config.num_iterations, 1.0); // 平均每次分配应该小于1ms } } TEST_F(simd_test, SimdPerformanceTest_DispatchOverhead) { // 函数分发开销基准测试 auto& dispatcher = simd_func_dispatcher::instance(); const std::string bench_func_name = "dispatch_overhead_test"; // 注册一个简单的测试函数 dispatcher.register_function( bench_func_name, simd_func_version::SCALAR, [](int x) { return x + 1; }); if (cpu_supports(cpu_feature::SSE)) { dispatcher.register_function( bench_func_name, simd_func_version::SSE, [](int x) { return x + 2; }); } const size_t num_calls = 1000000; // 100万次调用 // 基准1:直接函数调用 auto direct_func = [](int x) { return x + 1; }; simd_test_helpers::timer direct_timer; volatile int direct_result = 0; // volatile防止优化 for (size_t i = 0; i < num_calls; ++i) { direct_result += direct_func(static_cast(i)); } double direct_time = direct_timer.elapsed_ms(); // 基准2:通过分发器调用 const auto& dispatched_func = dispatcher.get_function(bench_func_name); simd_test_helpers::timer dispatch_timer; volatile int dispatch_result = 0; for (size_t i = 0; i < num_calls; ++i) { dispatch_result += dispatched_func(static_cast(i)); } double dispatch_time = dispatch_timer.elapsed_ms(); // 基准3:通过宏调用 simd_test_helpers::timer macro_timer; volatile int macro_result = 0; for (size_t i = 0; i < num_calls; ++i) { macro_result += CALL_SIMD_FUNCTION(int(int), bench_func_name, static_cast(i)); } double macro_time = macro_timer.elapsed_ms(); // 结果报告 std::cout << "\n分发开销基准测试 (" << num_calls << " 次调用):" << std::endl; std::cout << "直接函数调用: " << direct_time << " 毫秒" << std::endl; std::cout << "分发函数调用: " << dispatch_time << " 毫秒" << std::endl; std::cout << "宏调用: " << macro_time << " 毫秒" << std::endl; double dispatch_overhead = (dispatch_time - direct_time) / direct_time * 100.0; double macro_overhead = (macro_time - direct_time) / direct_time * 100.0; std::cout << "分发开销: " << std::fixed << std::setprecision(2) << dispatch_overhead << "%" << std::endl; std::cout << "宏调用开销: " << macro_overhead << "%" << std::endl; // 性能断言 EXPECT_GT(direct_time, 0.0); EXPECT_GT(dispatch_time, 0.0); EXPECT_GT(macro_time, 0.0); // 分发开销应该在合理范围内(调整为更现实的阈值) EXPECT_LT(dispatch_overhead, 1000.0); // 允许10倍开销 EXPECT_LT(macro_overhead, 10000.0); // 宏调用开销更大 // 验证结果正确性(防止编译器优化掉计算) EXPECT_GT(direct_result, 0); EXPECT_GT(dispatch_result, 0); EXPECT_GT(macro_result, 0); } // ============================================================================= // 测试主入口点 // ============================================================================= // 在测试开始前打印系统信息 class SimdTestEnvironment : public ::testing::Environment { public: void SetUp() override { std::cout << "\n" << std::string(60, '=') << std::endl; std::cout << "SIMD 测试套件 - 系统信息" << std::endl; std::cout << std::string(60, '=') << std::endl; cpu_feature_detector::instance().print_info(); std::cout << std::string(60, '=') << std::endl; std::cout << "开始 SIMD 测试..." << std::endl; std::cout << std::string(60, '=') << std::endl; } void TearDown() override { std::cout << std::string(60, '=') << std::endl; std::cout << "SIMD 测试套件完成。" << std::endl; std::cout << std::string(60, '=') << std::endl; } }; // 注册测试环境 static ::testing::Environment* const simd_test_env = ::testing::AddGlobalTestEnvironment(new SimdTestEnvironment);