Files
Alicho/tests/simd/test_simd_basic.cpp

1274 lines
40 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include <gtest/gtest.h>
#include <chrono>
#include <vector>
#include <unordered_set>
#include <thread>
#include <functional>
#include <iomanip>
#include <memory>
#include <numeric>
#include "cpu_features.h"
#include "simd_func_dispatcher.h"
#include "aligned_allocator.h"
// =============================================================================
// 测试辅助函数和宏定义
// =============================================================================
// 跨平台兼容性宏
#ifndef ALICHO_PLATFORM_WINDOWS
#define ALICHO_PLATFORM_WINDOWS 0
#endif
#ifndef ALICHO_PLATFORM_X86
#define ALICHO_PLATFORM_X86 1
#endif
#ifndef ALICHO_PLATFORM_ARM
#define ALICHO_PLATFORM_ARM 0
#endif
#ifndef ALICHO_PLATFORM_POSIX
#define ALICHO_PLATFORM_POSIX 0
#endif
#ifndef ALICHO_PLATFORM_UNIX
#define ALICHO_PLATFORM_UNIX 0
#endif
// 测试辅助函数
namespace simd_test_helpers {
// 简单的性能计时器
class timer {
public:
timer() : start_(std::chrono::high_resolution_clock::now()) {
}
auto elapsed_ms() const -> double {
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start_);
return duration.count() / 1000.0;
}
private:
std::chrono::high_resolution_clock::time_point start_;
};
// 测试用的简单数学函数
auto add_scalar(float a, float b) -> float { return a + b; }
auto add_sse(float a, float b) -> float { return a + b + 0.1f; } // 模拟SSE版本
auto add_avx(float a, float b) -> float { return a + b + 0.2f; } // 模拟AVX版本
// 测试用的数组求和函数
auto sum_array_scalar(const std::vector<float>& arr) -> float {
float sum = 0.0f;
for (const auto& val : arr) {
sum += val;
}
return sum;
}
auto sum_array_sse(const std::vector<float>& arr) -> float {
// 模拟SSE实现
return sum_array_scalar(arr) * 1.01f;
}
auto sum_array_avx(const std::vector<float>& arr) -> float {
// 模拟AVX实现
return sum_array_scalar(arr) * 1.02f;
}
// 检查指针是否正确对齐
template <size_t alignment>
auto is_properly_aligned(void* ptr) -> bool {
return (reinterpret_cast<uintptr_t>(ptr) % alignment) == 0;
}
// 生成测试数据
auto generate_test_data(size_t size) -> std::vector<float> {
std::vector<float> data;
data.reserve(size);
for (size_t i = 0; i < size; ++i) {
data.push_back(static_cast<float>(i) * 0.1f);
}
return data;
}
}
// =============================================================================
// 主测试类
// =============================================================================
class simd_test : public ::testing::Test {
protected:
void SetUp() override {
// 获取CPU信息用于后续测试
cpu_info_ = &get_cpu_info();
}
void TearDown() override {
// 清理测试环境
}
const cpu_info* cpu_info_ = nullptr;
};
// =============================================================================
// CPU特性检测模块测试9个测试用例
// =============================================================================
// 基础功能测试
TEST_F(simd_test, CpuFeaturesTest_BasicDetection) {
ASSERT_NE(cpu_info_, nullptr);
// 基本信息应该已填充
EXPECT_FALSE(cpu_info_->vendor.empty());
EXPECT_FALSE(cpu_info_->brand.empty());
EXPECT_GT(cpu_info_->logical_cores, 0);
EXPECT_GT(cpu_info_->physical_cores, 0);
// 特性字符串应该可以生成
auto features_str = cpu_info_->features_string();
EXPECT_TRUE(features_str.empty() || !features_str.empty()); // 总是为真,但测试调用成功
std::cout << "CPU 厂商: " << cpu_info_->vendor << std::endl;
std::cout << "CPU 型号: " << cpu_info_->brand << std::endl;
std::cout << "逻辑核心数: " << cpu_info_->logical_cores << std::endl;
std::cout << "物理核心数: " << cpu_info_->physical_cores << std::endl;
std::cout << "特性: " << features_str << std::endl;
}
TEST_F(simd_test, CpuFeaturesTest_SimdLevelDetection) {
auto max_level = get_max_simd_level();
auto recommended_level = get_recommended_simd_level();
// SIMD级别应该在有效范围内
EXPECT_GE(static_cast<int>(max_level), static_cast<int>(simd_level::NONE));
EXPECT_LE(static_cast<int>(max_level), static_cast<int>(simd_level::NEON_FP16));
EXPECT_GE(static_cast<int>(recommended_level), static_cast<int>(simd_level::NONE));
EXPECT_LE(static_cast<int>(recommended_level), static_cast<int>(simd_level::NEON_FP16));
// 推荐级别不应该超过最大级别
EXPECT_LE(static_cast<int>(recommended_level), static_cast<int>(max_level));
std::cout << "最大 SIMD 级别: " << static_cast<int>(max_level) << std::endl;
std::cout << "推荐 SIMD 级别: " << static_cast<int>(recommended_level) << std::endl;
}
TEST_F(simd_test, CpuFeaturesTest_GlobalFunctions) {
// 测试全局便利函数
const auto& info = get_cpu_info();
EXPECT_EQ(&info, cpu_info_);
// 测试特性检查函数
auto sse_supported = cpu_supports(cpu_feature::SSE);
auto sse2_supported = cpu_supports(cpu_feature::SSE2);
// 如果支持SSE2应该也支持SSE
if (sse2_supported) {
EXPECT_TRUE(sse_supported);
}
// 测试级别检查
auto detector = &cpu_feature_detector::instance();
EXPECT_EQ(detector->max_simd_level(), info.max_simd_level);
// 验证支持级别检查逻辑
EXPECT_TRUE(detector->supports_at_least(simd_level::NONE));
if (info.max_simd_level >= simd_level::SSE) {
EXPECT_TRUE(detector->supports_at_least(simd_level::SSE));
}
}
// 平台兼容性测试
TEST_F(simd_test, CpuFeaturesTest_X86PlatformSupport) {
#if ALICHO_PLATFORM_X86
// 在x86平台上至少应该支持SSE
EXPECT_TRUE(cpu_supports(cpu_feature::SSE) || cpu_supports(cpu_feature::SSE2));
// 检查常见的x86特性
std::vector<cpu_feature> x86_features = {
cpu_feature::SSE, cpu_feature::SSE2, cpu_feature::SSE3,
cpu_feature::AVX, cpu_feature::AVX2, cpu_feature::FMA
};
bool has_any_x86_feature = false;
for (auto feature : x86_features) {
if (cpu_supports(feature)) {
has_any_x86_feature = true;
break;
}
}
EXPECT_TRUE(has_any_x86_feature);
#else
GTEST_SKIP() << "Not x86 platform";
#endif
}
TEST_F(simd_test, CpuFeaturesTest_ArmPlatformSupport) {
#if ALICHO_PLATFORM_ARM
// 在ARM平台上可能支持NEON
bool has_neon = cpu_supports(cpu_feature::NEON);
bool has_neon_fp16 = cpu_supports(cpu_feature::NEON_FP16);
// 如果支持FP16应该也支持基础NEON
if (has_neon_fp16) {
EXPECT_TRUE(has_neon);
}
// 检查SIMD级别
auto max_level = get_max_simd_level();
if (has_neon) {
EXPECT_GE(static_cast<int>(max_level), static_cast<int>(simd_level::NEON));
}
#else
GTEST_SKIP() << "Not ARM platform";
#endif
}
TEST_F(simd_test, CpuFeaturesTest_CrossPlatformConsistency) {
// 跨平台一致性检查
auto detector = &cpu_feature_detector::instance();
// 单例应该总是返回相同的实例
EXPECT_EQ(detector, &cpu_feature_detector::instance());
// 多次调用应该返回相同的结果
auto level1 = get_max_simd_level();
auto level2 = get_max_simd_level();
EXPECT_EQ(level1, level2);
auto recommended1 = get_recommended_simd_level();
auto recommended2 = get_recommended_simd_level();
EXPECT_EQ(recommended1, recommended2);
// 特性检测应该一致
auto sse_check1 = cpu_supports(cpu_feature::SSE);
auto sse_check2 = cpu_supports(cpu_feature::SSE);
EXPECT_EQ(sse_check1, sse_check2);
}
// SIMD级别推荐测试
TEST_F(simd_test, CpuFeaturesTest_SimdLevelRecommendation) {
auto max_level = get_max_simd_level();
auto recommended_level = get_recommended_simd_level();
// 推荐算法的合理性检查
switch (max_level) {
case simd_level::NONE:
EXPECT_EQ(recommended_level, simd_level::NONE);
break;
case simd_level::SSE:
case simd_level::SSE3:
case simd_level::SSE4:
case simd_level::AVX:
case simd_level::AVX2:
// 对于这些级别,推荐级别应该等于最大级别
EXPECT_EQ(recommended_level, max_level);
break;
case simd_level::AVX512:
// AVX512可能会回退到AVX2以确保兼容性
EXPECT_TRUE(recommended_level == simd_level::AVX512 ||
recommended_level == simd_level::AVX2);
break;
case simd_level::NEON:
case simd_level::NEON_FP16:
EXPECT_EQ(recommended_level, max_level);
break;
}
}
TEST_F(simd_test, CpuFeaturesTest_PerformanceGuidedSelection) {
// 测试性能引导的SIMD级别选择
auto recommended = get_recommended_simd_level();
auto max_level = get_max_simd_level();
// 推荐级别应该考虑性能和兼容性
EXPECT_LE(static_cast<int>(recommended), static_cast<int>(max_level));
// 在AVX512的情况下验证特殊逻辑
if (max_level == simd_level::AVX512) {
bool has_avx512f = cpu_supports(cpu_feature::AVX512F);
bool has_avx512vl = cpu_supports(cpu_feature::AVX512VL);
bool has_avx512bw = cpu_supports(cpu_feature::AVX512BW);
if (has_avx512f && has_avx512vl && has_avx512bw) {
// 应该根据CPU供应商和型号决定
if (cpu_info_->vendor.find("AMD") != std::string::npos) {
EXPECT_EQ(recommended, simd_level::AVX512);
}
// Intel的情况下可能会有特殊处理
}
}
}
// 异常处理测试
TEST_F(simd_test, CpuFeaturesTest_InvalidFeatureHandling) {
// 测试无效特性值的处理
// 由于cpu_feature是enum class编译器会阻止大多数无效值
// 测试边界值 - 使用一个明确未定义的特性值
auto invalid_feature = static_cast<cpu_feature>(0); // 0值通常不代表任何特性
EXPECT_NO_THROW({
bool result = cpu_supports(invalid_feature);
// 0值应该返回false
EXPECT_FALSE(result);
});
// 测试特性位掩码的正确性
uint32_t all_features = cpu_info_->features;
for (int bit = 0; bit < 32; ++bit) {
auto feature = static_cast<cpu_feature>(1U << bit);
bool expected = (all_features & (1U << bit)) != 0;
bool actual = cpu_supports(feature);
EXPECT_EQ(expected, actual) << "Bit " << bit << " mismatch";
}
}
TEST_F(simd_test, CpuFeaturesTest_ThreadSafety) {
// 测试多线程安全性
const int num_threads = 4;
const int calls_per_thread = 100;
std::vector<std::thread> threads;
std::vector<bool> results(num_threads * calls_per_thread);
// 启动多个线程同时访问CPU特性检测
for (int t = 0; t < num_threads; ++t) {
threads.emplace_back([&, t]() {
for (int i = 0; i < calls_per_thread; ++i) {
int idx = t * calls_per_thread + i;
// 测试不同的API调用
switch (i % 4) {
case 0:
results[idx] = cpu_supports(cpu_feature::SSE);
break;
case 1:
results[idx] = (get_max_simd_level() != simd_level::NONE);
break;
case 2:
results[idx] = (get_recommended_simd_level() != simd_level::NONE);
break;
case 3:
results[idx] = !get_cpu_info().vendor.empty();
break;
}
}
});
}
// 等待所有线程完成
for (auto& thread : threads) {
thread.join();
}
// 验证同一类型的调用返回相同结果
bool sse_result = cpu_supports(cpu_feature::SSE);
auto max_level = get_max_simd_level();
auto recommended_level = get_recommended_simd_level();
bool has_vendor = !get_cpu_info().vendor.empty();
for (int i = 0; i < calls_per_thread; ++i) {
for (int t = 0; t < num_threads; ++t) {
int idx = t * calls_per_thread + i;
switch (i % 4) {
case 0:
EXPECT_EQ(results[idx], sse_result);
break;
case 1:
EXPECT_EQ(results[idx], (max_level != simd_level::NONE));
break;
case 2:
EXPECT_EQ(results[idx], (recommended_level != simd_level::NONE));
break;
case 3:
EXPECT_EQ(results[idx], has_vendor);
break;
}
}
}
}
// =============================================================================
// SIMD函数分发器模块测试8个测试用例
// =============================================================================
// 函数注册和查找
TEST_F(simd_test, SimdDispatcherTest_FunctionRegistration) {
auto& dispatcher = simd_func_dispatcher::instance();
// 注册测试函数
std::function scalar_add = simd_test_helpers::add_scalar;
std::function sse_add = simd_test_helpers::add_sse;
std::function avx_add = simd_test_helpers::add_avx;
EXPECT_NO_THROW({
dispatcher.register_function<float(float, float)>("test_add", simd_func_version::SCALAR, scalar_add);
dispatcher.register_function<float(float, float)>("test_add", simd_func_version::SSE, sse_add);
dispatcher.register_function<float(float, float)>("test_add", simd_func_version::AVX, avx_add);
});
// 验证函数已注册
auto func_list = dispatcher.list_functions();
EXPECT_TRUE(std::find(func_list.begin(), func_list.end(), "test_add") != func_list.end());
}
TEST_F(simd_test, SimdDispatcherTest_FunctionLookup) {
auto& dispatcher = simd_func_dispatcher::instance();
// 查找已注册的函数
EXPECT_NO_THROW({
const auto& func = dispatcher.get_function<float(float, float)>("test_add");
// 函数应该可以调用
float result = func(1.0f, 2.0f);
EXPECT_GT(result, 0.0f); // 结果应该是正数
});
// 查找不存在的函数应该抛出异常
EXPECT_THROW({
const auto& nonexistent = dispatcher.get_function<int(int)>("nonexistent_func");
}, std::runtime_error);
}
TEST_F(simd_test, SimdDispatcherTest_MultiVersionManagement) {
auto& dispatcher = simd_func_dispatcher::instance();
// 创建一个新的测试函数
const std::string func_name = "multi_version_test";
// 注册多个版本
dispatcher.register_function<float(const std::vector<float>&)>(
func_name, simd_func_version::SCALAR, simd_test_helpers::sum_array_scalar);
dispatcher.register_function<float(const std::vector<float>&)>(
func_name, simd_func_version::SSE, simd_test_helpers::sum_array_sse);
dispatcher.register_function<float(const std::vector<float>&)>(
func_name, simd_func_version::AVX, simd_test_helpers::sum_array_avx);
// 获取函数并测试
const auto& func = dispatcher.get_function<float(const std::vector<float>&)>(func_name);
auto test_data = simd_test_helpers::generate_test_data(100);
float result = func(test_data);
// 结果应该大于纯标量计算的结果因为模拟的SIMD版本会增加系数
float scalar_result = simd_test_helpers::sum_array_scalar(test_data);
EXPECT_GE(result, scalar_result);
std::cout << "多版本结果: " << result << " (标量: " << scalar_result << ")" << std::endl;
}
// 自动分发机制
TEST_F(simd_test, SimdDispatcherTest_AutomaticDispatch) {
auto& dispatcher = simd_func_dispatcher::instance();
// 测试自动分发是否选择最佳版本
const std::string func_name = "auto_dispatch_test";
// 只注册标量版本
dispatcher.register_function<int(int, int)>(
func_name, simd_func_version::SCALAR,
[](int a, int b) { return a + b; });
// 根据当前系统支持,可能还会注册其他版本
if (cpu_supports(cpu_feature::SSE)) {
dispatcher.register_function<int(int, int)>(
func_name, simd_func_version::SSE,
[](int a, int b) { return a + b + 1; }); // SSE版本加1标识
}
if (cpu_supports(cpu_feature::AVX)) {
dispatcher.register_function<int(int, int)>(
func_name, simd_func_version::AVX,
[](int a, int b) { return a + b + 2; }); // AVX版本加2标识
}
// 测试分发选择
const auto& func = dispatcher.get_function<int(int, int)>(func_name);
int result = func(10, 20);
// 验证选择了正确的版本
if (cpu_supports(cpu_feature::AVX)) {
EXPECT_EQ(result, 32); // 10 + 20 + 2
}
else if (cpu_supports(cpu_feature::SSE)) {
EXPECT_EQ(result, 31); // 10 + 20 + 1
}
else {
EXPECT_EQ(result, 30); // 10 + 20
}
}
TEST_F(simd_test, SimdDispatcherTest_PriorityBasedSelection) {
// 测试基于优先级的版本选择
auto recommended_level = get_recommended_simd_level();
auto expected_version = simd_level_to_version(recommended_level);
std::cout << "推荐 SIMD 级别: " << static_cast<int>(recommended_level) << std::endl;
std::cout << "期望版本: " << static_cast<int>(expected_version) << std::endl;
// 验证级别转换函数
EXPECT_GE(static_cast<int>(expected_version), static_cast<int>(simd_func_version::SCALAR));
EXPECT_LE(static_cast<int>(expected_version), static_cast<int>(simd_func_version::VECTOR));
// 测试转换一致性
switch (recommended_level) {
case simd_level::NONE:
EXPECT_EQ(expected_version, simd_func_version::SCALAR);
break;
case simd_level::SSE:
EXPECT_EQ(expected_version, simd_func_version::SSE);
break;
case simd_level::AVX:
EXPECT_EQ(expected_version, simd_func_version::AVX);
break;
case simd_level::AVX2:
EXPECT_EQ(expected_version, simd_func_version::AVX2);
break;
default:
// 其他情况也应该有对应的版本
break;
}
}
TEST_F(simd_test, SimdDispatcherTest_VersionFallback) {
auto& dispatcher = simd_func_dispatcher::instance();
const std::string func_name = "fallback_test";
// 只注册标量版本,测试回退机制
dispatcher.register_function<double(double)>(
func_name, simd_func_version::SCALAR,
[](double x) { return x * 2.0; });
// 即使系统支持更高级的SIMD也应该回退到标量版本
const auto& func = dispatcher.get_function<double(double)>(func_name);
double result = func(3.14);
EXPECT_DOUBLE_EQ(result, 6.28);
// 现在注册一个高级版本
if (cpu_supports(cpu_feature::AVX)) {
dispatcher.register_function<double(double)>(
func_name, simd_func_version::AVX,
[](double x) { return x * 3.0; }); // 不同的计算以验证选择了正确版本
// 重新获取函数应该选择AVX版本
const auto& avx_func = dispatcher.get_function<double(double)>(func_name);
double avx_result = avx_func(3.14);
EXPECT_DOUBLE_EQ(avx_result, 9.42);
}
}
// 宏接口测试
TEST_F(simd_test, SimdDispatcherTest_MacroInterface) {
// 测试注册宏
EXPECT_NO_THROW({
std::function square_func = [](int x) { return x * x; };
REGISTER_SIMD_FUNCTION("macro_test", simd_func_version::SCALAR, square_func);
});
// 测试获取宏
EXPECT_NO_THROW({
const auto& func = GET_SIMD_FUNCTION(int(int), "macro_test");
int result = func(5);
EXPECT_EQ(result, 25);
});
// 测试调用宏
EXPECT_NO_THROW({
int result = CALL_SIMD_FUNCTION(int(int), "macro_test", 6);
EXPECT_EQ(result, 36);
});
// 测试字符串转换函数
EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SCALAR), "SCALAR");
EXPECT_STREQ(simd_func_version_to_string(simd_func_version::SSE), "SSE");
EXPECT_STREQ(simd_func_version_to_string(simd_func_version::AVX), "AVX");
EXPECT_EQ(string_to_simd_func_version("SCALAR"), simd_func_version::SCALAR);
EXPECT_EQ(string_to_simd_func_version("SSE"), simd_func_version::SSE);
EXPECT_EQ(string_to_simd_func_version("AVX"), simd_func_version::AVX);
EXPECT_EQ(string_to_simd_func_version("INVALID"), simd_func_version::SCALAR); // 默认回退
}
TEST_F(simd_test, SimdDispatcherTest_TypeSafety) {
auto& dispatcher = simd_func_dispatcher::instance();
// 注册不同类型的函数
dispatcher.register_function<int(int)>("int_func", simd_func_version::SCALAR,
[](int x) { return x + 1; });
dispatcher.register_function<float(float)>("float_func", simd_func_version::SCALAR,
[](float x) { return x + 1.0f; });
// 类型安全检查
EXPECT_NO_THROW({
const auto& int_func = dispatcher.get_function<int(int)>("int_func");
int result = int_func(42);
EXPECT_EQ(result, 43);
});
EXPECT_NO_THROW({
const auto& float_func = dispatcher.get_function<float(float)>("float_func");
float result = float_func(3.14f);
EXPECT_FLOAT_EQ(result, 4.14f);
});
// 尝试用不同的类型获取同名函数会创建独立的函数持有者
EXPECT_NO_THROW({
// 这会创建一个新的double类型函数持有者与int类型的是分离的
const auto& double_func = dispatcher.get_function<double(double)>("int_func");
// 这验证了类型安全性 - 不同类型的函数是分离的
});
}
// 错误处理
TEST_F(simd_test, SimdDispatcherTest_InvalidRegistration) {
auto& dispatcher = simd_func_dispatcher::instance();
// 测试重复注册相同版本
EXPECT_NO_THROW({
dispatcher.register_function<int()>("duplicate_test", simd_func_version::SCALAR,
[]() { return 1; });
dispatcher.register_function<int()>("duplicate_test", simd_func_version::SCALAR,
[]() { return 2; }); // 覆盖前一个
});
// 验证最后注册的版本生效
const auto& func = dispatcher.get_function<int()>("duplicate_test");
int result = func();
EXPECT_EQ(result, 2);
}
TEST_F(simd_test, SimdDispatcherTest_MissingFunction) {
auto& dispatcher = simd_func_dispatcher::instance();
// 尝试获取未注册的函数应该抛出异常
EXPECT_THROW({
const auto& missing_func = dispatcher.get_function<void()>("nonexistent_function");
}, std::runtime_error);
// 尝试调用未注册的函数
EXPECT_THROW({
CALL_SIMD_FUNCTION(void(), "another_nonexistent_function");
}, std::runtime_error);
}
// =============================================================================
// 对齐内存分配器模块测试9个测试用例
// =============================================================================
// 基础分配测试
TEST_F(simd_test, AlignedAllocatorTest_BasicAllocation) {
// 测试基本的对齐分配
constexpr size_t alignment = ALIGNMENT_AVX; // 32字节对齐
constexpr size_t size = 1024;
void* ptr = aligned_malloc(size, alignment);
ASSERT_NE(ptr, nullptr);
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<alignment>(ptr));
// 写入数据验证可用性
auto* data = static_cast<char*>(ptr);
for (size_t i = 0; i < size; ++i) {
data[i] = static_cast<char>(i % 256);
}
// 验证数据
for (size_t i = 0; i < size; ++i) {
EXPECT_EQ(data[i], static_cast<char>(i % 256));
}
aligned_free(ptr);
}
TEST_F(simd_test, AlignedAllocatorTest_VariousAlignments) {
// 测试不同的对齐要求
std::vector<size_t> alignments = {
ALIGNMENT_SSE, // 16字节
ALIGNMENT_AVX, // 32字节
ALIGNMENT_AVX512, // 64字节
ALIGNMENT_CACHE // 64字节缓存行
};
constexpr size_t size = 256;
for (auto alignment : alignments) {
void* ptr = aligned_malloc(size, alignment);
ASSERT_NE(ptr, nullptr) << "Failed to allocate with alignment " << alignment;
EXPECT_TRUE(is_aligned(ptr, alignment))
<< "Pointer not properly aligned to " << alignment << " bytes";
// 验证可以写入数据
std::memset(ptr, 0xAB, size);
aligned_free(ptr);
}
}
TEST_F(simd_test, AlignedAllocatorTest_LargeAllocations) {
// 测试大块内存分配
std::vector<size_t> sizes = {
1024, // 1KB
1024 * 64, // 64KB
1024 * 1024 // 1MB
};
constexpr size_t alignment = ALIGNMENT_AVX;
for (auto size : sizes) {
void* ptr = aligned_malloc(size, alignment);
ASSERT_NE(ptr, nullptr) << "Failed to allocate " << size << " bytes";
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<alignment>(ptr));
// 简单的读写测试
auto* data = static_cast<int*>(ptr);
data[0] = 0x12345678;
data[size / sizeof(int) - 1] = 0x87654321;
EXPECT_EQ(data[0], 0x12345678);
EXPECT_EQ(data[size/sizeof(int) - 1], 0x87654321);
aligned_free(ptr);
}
}
// STL兼容性
TEST_F(simd_test, AlignedAllocatorTest_StlContainerCompat) {
// 测试STL容器兼容性需要修复aligned_allocator中的错误
using aligned_vector = std::vector<float, aligned_allocator<float, ALIGNMENT_AVX>>;
EXPECT_NO_THROW({
aligned_vector vec;
vec.reserve(100);
for (int i = 0; i < 50; ++i) {
vec.push_back(static_cast<float>(i));
}
EXPECT_EQ(vec.size(), 50);
EXPECT_GE(vec.capacity(), 50);
// 验证对齐
if (!vec.empty()) {
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_AVX>(vec.data()));
}
});
}
TEST_F(simd_test, AlignedAllocatorTest_VectorOperations) {
using sse_vector = std::vector<double, sse_aligned_allocator<double>>;
using avx_vector = std::vector<float, avx_aligned_allocator<float>>;
// SSE对齐的vector
sse_vector sse_vec(100, 3.14);
EXPECT_EQ(sse_vec.size(), 100);
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_SSE>(sse_vec.data()));
// AVX对齐的vector
avx_vector avx_vec(200, 2.71f);
EXPECT_EQ(avx_vec.size(), 200);
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_AVX>(avx_vec.data()));
// 测试resize操作
sse_vec.resize(200);
EXPECT_EQ(sse_vec.size(), 200);
if (!sse_vec.empty()) {
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_SSE>(sse_vec.data()));
}
}
TEST_F(simd_test, AlignedAllocatorTest_MemoryManagement) {
using cache_vector = std::vector<int, cache_aligned_allocator<int>>;
// 测试内存管理
{
cache_vector vec(1000);
std::iota(vec.begin(), vec.end(), 0);
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_CACHE>(vec.data()));
// 验证数据正确性
for (size_t i = 0; i < vec.size(); ++i) {
EXPECT_EQ(vec[i], static_cast<int>(i));
}
} // vector销毁测试析构函数
// 测试移动语义
cache_vector vec1(100, 42);
auto vec1_data = vec1.data();
cache_vector vec2 = std::move(vec1);
EXPECT_EQ(vec2.size(), 100);
EXPECT_EQ(vec2.data(), vec1_data); // 移动后数据指针应该相同
EXPECT_TRUE(vec1.empty() || vec1.data() != vec1_data); // vec1应该被清空或数据被移走
}
// 跨平台行为
TEST_F(simd_test, AlignedAllocatorTest_PlatformConsistency) {
// 测试跨平台的一致行为
constexpr size_t alignment = 32;
constexpr size_t size = 1024;
std::vector<void*> ptrs;
// 分配多个内存块
for (int i = 0; i < 10; ++i) {
void* ptr = aligned_malloc(size, alignment);
ASSERT_NE(ptr, nullptr);
EXPECT_TRUE(is_aligned(ptr, alignment));
ptrs.push_back(ptr);
}
// 验证所有指针都正确对齐
for (auto ptr : ptrs) {
EXPECT_TRUE(is_aligned(ptr, alignment));
// 写入特定模式
auto* data = static_cast<uint32_t*>(ptr);
for (size_t j = 0; j < size / sizeof(uint32_t); ++j) {
data[j] = static_cast<uint32_t>(j * 0x12345678);
}
}
// 验证数据完整性
for (size_t i = 0; i < ptrs.size(); ++i) {
auto* data = static_cast<uint32_t*>(ptrs[i]);
for (size_t j = 0; j < size / sizeof(uint32_t); ++j) {
EXPECT_EQ(data[j], static_cast<uint32_t>(j * 0x12345678))
<< "Data corruption at ptr " << i << ", index " << j;
}
}
// 释放所有内存
for (auto ptr : ptrs) {
aligned_free(ptr);
}
}
TEST_F(simd_test, AlignedAllocatorTest_AlignmentVerification) {
// 测试对齐验证函数
std::vector<size_t> test_alignments = {1, 2, 4, 8, 16, 32, 64, 128};
for (auto alignment : test_alignments) {
// 测试2的幂次对齐
if ((alignment & (alignment - 1)) == 0) {
// 是2的幂
void* ptr = aligned_malloc(256, alignment);
ASSERT_NE(ptr, nullptr);
EXPECT_TRUE(is_aligned(ptr, alignment));
aligned_free(ptr);
}
else {
// 非2的幂次应该返回nullptr
void* ptr = aligned_malloc(256, alignment);
EXPECT_EQ(ptr, nullptr);
}
}
// 测试边界情况
EXPECT_EQ(aligned_malloc(100, 0), nullptr); // 0对齐应该失败
// 测试align_size函数
EXPECT_EQ(align_size(15, 16), 16);
EXPECT_EQ(align_size(16, 16), 16);
EXPECT_EQ(align_size(17, 16), 32);
EXPECT_EQ(align_size(31, 32), 32);
EXPECT_EQ(align_size(33, 32), 64);
}
TEST_F(simd_test, AlignedAllocatorTest_PerformanceCharacteristics) {
// 简单的性能特征测试
constexpr size_t num_allocations = 1000;
constexpr size_t allocation_size = 1024;
// 测试对齐分配的性能
simd_test_helpers::timer timer;
std::vector<void*> aligned_ptrs;
aligned_ptrs.reserve(num_allocations);
// 分配阶段
for (size_t i = 0; i < num_allocations; ++i) {
void* ptr = aligned_malloc(allocation_size, ALIGNMENT_AVX);
ASSERT_NE(ptr, nullptr);
aligned_ptrs.push_back(ptr);
}
double allocation_time = timer.elapsed_ms();
// 访问测试
simd_test_helpers::timer access_timer;
uint64_t checksum = 0;
// 记录开始时间
auto start_time = std::chrono::high_resolution_clock::now();
for (auto ptr : aligned_ptrs) {
auto* data = static_cast<const uint64_t*>(ptr);
checksum += data[0]; // 简单访问测试
}
auto end_time = std::chrono::high_resolution_clock::now();
auto duration_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count();
double access_time = access_timer.elapsed_ms();
// 诊断日志
std::cout << " [诊断] 访问循环耗时: " << duration_ns << " 纳秒" << std::endl;
std::cout << " [诊断] 计时器测量的访问时间: " << access_time << " 毫秒" << std::endl;
std::cout << " [诊断] 校验和值: " << checksum << std::endl;
std::cout << " [诊断] 分配数量: " << aligned_ptrs.size() << std::endl;
// 释放阶段
simd_test_helpers::timer free_timer;
for (auto ptr : aligned_ptrs) {
aligned_free(ptr);
}
double free_time = free_timer.elapsed_ms();
// 性能报告
std::cout << "对齐分配性能:" << std::endl;
std::cout << " 分配次数: " << num_allocations << " x " << allocation_size << " 字节" << std::endl;
std::cout << " 分配时间: " << allocation_time << " 毫秒" << std::endl;
std::cout << " 访问时间: " << access_time << " 毫秒" << std::endl;
std::cout << " 释放时间: " << free_time << " 毫秒" << std::endl;
std::cout << " 平均分配时间: " << (allocation_time / num_allocations) << " 毫秒" << std::endl;
// 基本合理性检查
EXPECT_GT(allocation_time, 0.0);
// 访问时间可能因为优化而接近0特别是在release模式下
// 改为检查访问时间 >= 0 而不是严格大于0
EXPECT_GE(access_time, 0.0) << "Access time should be non-negative (may be 0 in optimized builds)";
std::cout << " [注意] 访问时间为 " << access_time << " 毫秒 - 在发布模式下由于编译器优化可能为 0" << std::endl;
EXPECT_GT(free_time, 0.0);
// 避免编译器优化掉checksum计算
EXPECT_GE(checksum, 0); // checksum可能为0但应该不会是负数
}
// =============================================================================
// 集成和性能测试4个测试用例
// =============================================================================
// 端到端集成测试
TEST_F(simd_test, SimdIntegrationTest_FullWorkflow) {
// 完整的SIMD工作流程测试检测 -> 分发 -> 分配 -> 执行
// 1. CPU特性检测
auto max_level = get_max_simd_level();
auto recommended_level = get_recommended_simd_level();
std::cout << "集成测试 - SIMD 级别: 最大=" << static_cast<int>(max_level)
<< ", 推荐=" << static_cast<int>(recommended_level) << std::endl;
// 2. 注册多版本函数
auto& dispatcher = simd_func_dispatcher::instance();
const std::string func_name = "integration_vector_sum";
// 使用对齐分配器的向量进行计算
using aligned_float_vector = std::vector<float, avx_aligned_allocator<float>>;
// 注册标量版本
dispatcher.register_function<float(const aligned_float_vector&)>(
func_name, simd_func_version::SCALAR,
[](const aligned_float_vector& vec) -> float {
float sum = 0.0f;
for (const auto& val : vec) {
sum += val;
}
return sum;
});
// 根据支持的特性注册优化版本
if (cpu_supports(cpu_feature::SSE)) {
dispatcher.register_function<float(const aligned_float_vector&)>(
func_name, simd_func_version::SSE,
[](const aligned_float_vector& vec) -> float {
// 模拟SSE优化实际实现会使用SSE指令
float sum = 0.0f;
for (const auto& val : vec) {
sum += val;
}
return sum * 1.001f; // 添加小的标识以区分版本
});
}
if (cpu_supports(cpu_feature::AVX)) {
dispatcher.register_function<float(const aligned_float_vector&)>(
func_name, simd_func_version::AVX,
[](const aligned_float_vector& vec) -> float {
// 模拟AVX优化
float sum = 0.0f;
for (const auto& val : vec) {
sum += val;
}
return sum * 1.002f; // AVX版本标识
});
}
// 3. 创建测试数据(使用对齐分配)
aligned_float_vector test_data(10000);
std::iota(test_data.begin(), test_data.end(), 1.0f);
// 验证数据对齐
EXPECT_TRUE(simd_test_helpers::is_properly_aligned<ALIGNMENT_AVX>(test_data.data()));
// 4. 执行计算
const auto& func = dispatcher.get_function<float(const aligned_float_vector&)>(func_name);
float result = func(test_data);
// 5. 验证结果
float expected_base = 10000.0f * 10001.0f / 2.0f; // 等差数列求和
EXPECT_GT(result, expected_base * 0.99f); // 允许一定的误差和版本差异
EXPECT_LT(result, expected_base * 1.01f);
std::cout << "集成测试结果: " << result << " (期望约 " << expected_base << ")" << std::endl;
}
TEST_F(simd_test, SimdIntegrationTest_RealWorldScenarios) {
// 真实世界场景测试:图像处理、数值计算等
// 场景1向量点积计算
const size_t vector_size = 1024;
using aligned_vector = std::vector<float, avx_aligned_allocator<float>>;
aligned_vector vec_a(vector_size), vec_b(vector_size);
// 初始化向量
for (size_t i = 0; i < vector_size; ++i) {
vec_a[i] = static_cast<float>(i + 1);
vec_b[i] = static_cast<float>((i + 1) * 2);
}
// 注册点积函数
auto& dispatcher = simd_func_dispatcher::instance();
const std::string dot_product_name = "dot_product";
dispatcher.register_function<float(const aligned_vector&, const aligned_vector&)>(
dot_product_name, simd_func_version::SCALAR,
[](const aligned_vector& a, const aligned_vector& b) -> float {
float result = 0.0f;
for (size_t i = 0; i < a.size(); ++i) {
result += a[i] * b[i];
}
return result;
});
// 执行点积计算
float dot_result = CALL_SIMD_FUNCTION(float(const aligned_vector&, const aligned_vector&),
dot_product_name, vec_a, vec_b);
// 验证结果(数学验证)
float expected = 0.0f;
for (size_t i = 0; i < vector_size; ++i) {
expected += vec_a[i] * vec_b[i];
}
EXPECT_FLOAT_EQ(dot_result, expected);
// 场景2矩阵转置简化版
const size_t matrix_size = 64; // 64x64矩阵
aligned_vector matrix(matrix_size * matrix_size);
aligned_vector transposed(matrix_size * matrix_size);
// 初始化矩阵
for (size_t i = 0; i < matrix_size; ++i) {
for (size_t j = 0; j < matrix_size; ++j) {
matrix[i * matrix_size + j] = static_cast<float>(i * matrix_size + j);
}
}
// 矩阵转置
const std::string transpose_name = "matrix_transpose";
dispatcher.register_function<void(const aligned_vector&, aligned_vector&, size_t)>(
transpose_name, simd_func_version::SCALAR,
[](const aligned_vector& src, aligned_vector& dst, size_t size) {
for (size_t i = 0; i < size; ++i) {
for (size_t j = 0; j < size; ++j) {
dst[j * size + i] = src[i * size + j];
}
}
});
CALL_SIMD_FUNCTION(void(const aligned_vector&, aligned_vector&, size_t),
transpose_name, matrix, transposed, matrix_size);
// 验证转置结果
for (size_t i = 0; i < matrix_size; ++i) {
for (size_t j = 0; j < matrix_size; ++j) {
EXPECT_FLOAT_EQ(transposed[j * matrix_size + i], matrix[i * matrix_size + j]);
}
}
std::cout << "真实场景测试成功完成" << std::endl;
}
// 性能基准测试
TEST_F(simd_test, SimdPerformanceTest_AllocationSpeed) {
// 对齐分配性能基准测试
struct BenchmarkConfig {
size_t allocation_size;
size_t alignment;
size_t num_iterations;
std::string name;
};
std::vector<BenchmarkConfig> configs = {
{1024, ALIGNMENT_SSE, 10000, "SSE-1KB"},
{1024, ALIGNMENT_AVX, 10000, "AVX-1KB"},
{1024, ALIGNMENT_AVX512, 10000, "AVX512-1KB"},
{4096, ALIGNMENT_AVX, 5000, "AVX-4KB"},
{16384, ALIGNMENT_AVX, 2000, "AVX-16KB"},
{65536, ALIGNMENT_AVX, 1000, "AVX-64KB"}
};
std::cout << "\n分配速度基准测试:" << std::endl;
std::cout << "配置\t\t分配(毫秒)\t释放(毫秒)\t总计(毫秒)" << std::endl;
for (const auto& config : configs) {
std::vector<void*> ptrs;
ptrs.reserve(config.num_iterations);
// 分配基准
simd_test_helpers::timer alloc_timer;
for (size_t i = 0; i < config.num_iterations; ++i) {
void* ptr = aligned_malloc(config.allocation_size, config.alignment);
ASSERT_NE(ptr, nullptr);
ptrs.push_back(ptr);
}
double alloc_time = alloc_timer.elapsed_ms();
// 释放基准
simd_test_helpers::timer free_timer;
for (auto ptr : ptrs) {
aligned_free(ptr);
}
double free_time = free_timer.elapsed_ms();
double total_time = alloc_time + free_time;
std::cout << config.name << "\t\t"
<< std::fixed << std::setprecision(2)
<< alloc_time << "\t\t"
<< free_time << "\t\t"
<< total_time << std::endl;
// 基本性能断言
EXPECT_GT(alloc_time, 0.0);
EXPECT_GT(free_time, 0.0);
EXPECT_LT(alloc_time / config.num_iterations, 1.0); // 平均每次分配应该小于1ms
}
}
TEST_F(simd_test, SimdPerformanceTest_DispatchOverhead) {
// 函数分发开销基准测试
auto& dispatcher = simd_func_dispatcher::instance();
const std::string bench_func_name = "dispatch_overhead_test";
// 注册一个简单的测试函数
dispatcher.register_function<int(int)>(
bench_func_name, simd_func_version::SCALAR,
[](int x) { return x + 1; });
if (cpu_supports(cpu_feature::SSE)) {
dispatcher.register_function<int(int)>(
bench_func_name, simd_func_version::SSE,
[](int x) { return x + 2; });
}
const size_t num_calls = 1000000; // 100万次调用
// 基准1直接函数调用
auto direct_func = [](int x) { return x + 1; };
simd_test_helpers::timer direct_timer;
volatile int direct_result = 0; // volatile防止优化
for (size_t i = 0; i < num_calls; ++i) {
direct_result += direct_func(static_cast<int>(i));
}
double direct_time = direct_timer.elapsed_ms();
// 基准2通过分发器调用
const auto& dispatched_func = dispatcher.get_function<int(int)>(bench_func_name);
simd_test_helpers::timer dispatch_timer;
volatile int dispatch_result = 0;
for (size_t i = 0; i < num_calls; ++i) {
dispatch_result += dispatched_func(static_cast<int>(i));
}
double dispatch_time = dispatch_timer.elapsed_ms();
// 基准3通过宏调用
simd_test_helpers::timer macro_timer;
volatile int macro_result = 0;
for (size_t i = 0; i < num_calls; ++i) {
macro_result += CALL_SIMD_FUNCTION(int(int), bench_func_name, static_cast<int>(i));
}
double macro_time = macro_timer.elapsed_ms();
// 结果报告
std::cout << "\n分发开销基准测试 (" << num_calls << " 次调用):" << std::endl;
std::cout << "直接函数调用: " << direct_time << " 毫秒" << std::endl;
std::cout << "分发函数调用: " << dispatch_time << " 毫秒" << std::endl;
std::cout << "宏调用: " << macro_time << " 毫秒" << std::endl;
double dispatch_overhead = (dispatch_time - direct_time) / direct_time * 100.0;
double macro_overhead = (macro_time - direct_time) / direct_time * 100.0;
std::cout << "分发开销: " << std::fixed << std::setprecision(2)
<< dispatch_overhead << "%" << std::endl;
std::cout << "宏调用开销: " << macro_overhead << "%" << std::endl;
// 性能断言
EXPECT_GT(direct_time, 0.0);
EXPECT_GT(dispatch_time, 0.0);
EXPECT_GT(macro_time, 0.0);
// 分发开销应该在合理范围内(调整为更现实的阈值)
EXPECT_LT(dispatch_overhead, 1000.0); // 允许10倍开销
EXPECT_LT(macro_overhead, 10000.0); // 宏调用开销更大
// 验证结果正确性(防止编译器优化掉计算)
EXPECT_GT(direct_result, 0);
EXPECT_GT(dispatch_result, 0);
EXPECT_GT(macro_result, 0);
}
// =============================================================================
// 测试主入口点
// =============================================================================
// 在测试开始前打印系统信息
class SimdTestEnvironment : public ::testing::Environment {
public:
void SetUp() override {
std::cout << "\n" << std::string(60, '=') << std::endl;
std::cout << "SIMD 测试套件 - 系统信息" << std::endl;
std::cout << std::string(60, '=') << std::endl;
cpu_feature_detector::instance().print_info();
std::cout << std::string(60, '=') << std::endl;
std::cout << "开始 SIMD 测试..." << std::endl;
std::cout << std::string(60, '=') << std::endl;
}
void TearDown() override {
std::cout << std::string(60, '=') << std::endl;
std::cout << "SIMD 测试套件完成。" << std::endl;
std::cout << std::string(60, '=') << std::endl;
}
};
// 注册测试环境
static ::testing::Environment* const simd_test_env =
::testing::AddGlobalTestEnvironment(new SimdTestEnvironment);