diff options
author | Pierre Moreau <dev@pmoreau.org> | 2017-02-02 21:54:24 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2017-02-10 18:46:08 (GMT) |
commit | 8731701cb636317df2691359361562f32adfe759 (patch) | |
tree | 14700e683bd52634938e6a0c82cb96e21dd0acf0 | |
parent | 0ae5386aa953d1670074c2f1bfc9a04ddb382684 (diff) | |
download | CMake-8731701cb636317df2691359361562f32adfe759.zip CMake-8731701cb636317df2691359361562f32adfe759.tar.gz CMake-8731701cb636317df2691359361562f32adfe759.tar.bz2 |
Tests/Cuda: Use memory allocated on the GPU in the kernels
-rw-r--r-- | Tests/Cuda/Complex/file3.cu | 27 | ||||
-rw-r--r-- | Tests/Cuda/Complex/mixed.cu | 28 |
2 files changed, 45 insertions, 10 deletions
diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu index 912105a..bd8198d 100644 --- a/Tests/Cuda/Complex/file3.cu +++ b/Tests/Cuda/Complex/file3.cu @@ -9,17 +9,25 @@ result_type_dynamic __device__ file2_func(int x); static __global__ -void file3_kernel(result_type& r, int x) +void file3_kernel(result_type* r, int x) { - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } int file3_launch_kernel(int x) { - result_type r; + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if(err != cudaSuccess) + { + std::cerr << "file3_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + file3_kernel <<<1,1>>> (r,x); - cudaError_t err = cudaGetLastError(); + err = cudaGetLastError(); if(err != cudaSuccess) { std::cerr << "file3_kernel [SYNC] failed: " @@ -33,5 +41,14 @@ int file3_launch_kernel(int x) << cudaGetErrorString(cudaGetLastError()) << std::endl; return x; } - return r.sum; + int result = r->sum; + err = cudaFree(r); + if(err != cudaSuccess) + { + std::cerr << "file3_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + return result; } diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu index a7bcd4e..d96cc7c 100644 --- a/Tests/Cuda/Complex/mixed.cu +++ b/Tests/Cuda/Complex/mixed.cu @@ -19,9 +19,9 @@ IMPORT void __host__ cuda_dynamic_lib_func(); static __global__ -void mixed_kernel(result_type& r, int x) +void mixed_kernel(result_type* r, int x) { - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } @@ -29,9 +29,17 @@ EXPORT int mixed_launch_kernel(int x) { cuda_dynamic_lib_func(); - result_type r; + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if(err != cudaSuccess) + { + std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + mixed_kernel <<<1,1>>> (r,x); - cudaError_t err = cudaGetLastError(); + err = cudaGetLastError(); if(err != cudaSuccess) { std::cerr << "mixed_kernel [SYNC] failed: " @@ -45,5 +53,15 @@ EXPORT int mixed_launch_kernel(int x) << cudaGetErrorString(cudaGetLastError()) << std::endl; return x; } - return r.sum; + + int result = r->sum; + err = cudaFree(r); + if(err != cudaSuccess) + { + std::cerr << "mixed_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + return result; } |