#include #include #include #include #include #include #include namespace { template __global__ void global_entry_point(F f, T* out) { *out = f(); } template bool verify(F f, T expected) { std::unique_ptr cpu_T(new T); T* gpu_T = nullptr; bool result = true; { hipError_t err = hipMalloc((void**)&gpu_T, sizeof(T)); if (err != hipSuccess) { std::cerr << "hipMalloc failed: " << hipGetErrorString(err) << std::endl; result = false; } } hipLaunchKernelGGL(global_entry_point, 1, 1, 0, 0, f, gpu_T); { hipError_t err = hipMemcpy(cpu_T.get(), gpu_T, sizeof(T), hipMemcpyDeviceToHost); if (err != hipSuccess) { std::cerr << "hipMemcpy failed: " << hipGetErrorString(err) << std::endl; result = false; } } result = hipFree(gpu_T) == hipSuccess && result; result = *cpu_T == expected && result; return result; } } int main(int argc, char** argv) { bool valid = verify([] __device__() { return std::round(1.4f); }, 1.0f); valid &= verify([] __device__() { return max<_Float16>(1.0f, 2.0f); }, 2.0f); valid &= verify([] __device__() { return min<_Float16>(1.0f, 2.0f); }, 1.0f); if (valid) { return 0; } else { return 1; } }