summaryrefslogtreecommitdiffstats
path: root/Tests/Cuda/Complex
diff options
context:
space:
mode:
authorPierre Moreau <dev@pmoreau.org>2017-02-02 21:54:24 (GMT)
committerBrad King <brad.king@kitware.com>2017-02-10 18:46:08 (GMT)
commit8731701cb636317df2691359361562f32adfe759 (patch)
tree14700e683bd52634938e6a0c82cb96e21dd0acf0 /Tests/Cuda/Complex
parent0ae5386aa953d1670074c2f1bfc9a04ddb382684 (diff)
downloadCMake-8731701cb636317df2691359361562f32adfe759.zip
CMake-8731701cb636317df2691359361562f32adfe759.tar.gz
CMake-8731701cb636317df2691359361562f32adfe759.tar.bz2
Tests/Cuda: Use memory allocated on the GPU in the kernels
Diffstat (limited to 'Tests/Cuda/Complex')
-rw-r--r--Tests/Cuda/Complex/file3.cu27
-rw-r--r--Tests/Cuda/Complex/mixed.cu28
2 files changed, 45 insertions, 10 deletions
diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu
index 912105a..bd8198d 100644
--- a/Tests/Cuda/Complex/file3.cu
+++ b/Tests/Cuda/Complex/file3.cu
@@ -9,17 +9,25 @@ result_type_dynamic __device__ file2_func(int x);
static
__global__
-void file3_kernel(result_type& r, int x)
+void file3_kernel(result_type* r, int x)
{
- r = file1_func(x);
+ *r = file1_func(x);
result_type_dynamic rd = file2_func(x);
}
int file3_launch_kernel(int x)
{
- result_type r;
+ result_type* r;
+ cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+ if(err != cudaSuccess)
+ {
+ std::cerr << "file3_launch_kernel: cudaMallocManaged failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return x;
+ }
+
file3_kernel <<<1,1>>> (r,x);
- cudaError_t err = cudaGetLastError();
+ err = cudaGetLastError();
if(err != cudaSuccess)
{
std::cerr << "file3_kernel [SYNC] failed: "
@@ -33,5 +41,14 @@ int file3_launch_kernel(int x)
<< cudaGetErrorString(cudaGetLastError()) << std::endl;
return x;
}
- return r.sum;
+ int result = r->sum;
+ err = cudaFree(r);
+ if(err != cudaSuccess)
+ {
+ std::cerr << "file3_launch_kernel: cudaFree failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return x;
+ }
+
+ return result;
}
diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu
index a7bcd4e..d96cc7c 100644
--- a/Tests/Cuda/Complex/mixed.cu
+++ b/Tests/Cuda/Complex/mixed.cu
@@ -19,9 +19,9 @@ IMPORT void __host__ cuda_dynamic_lib_func();
static
__global__
-void mixed_kernel(result_type& r, int x)
+void mixed_kernel(result_type* r, int x)
{
- r = file1_func(x);
+ *r = file1_func(x);
result_type_dynamic rd = file2_func(x);
}
@@ -29,9 +29,17 @@ EXPORT int mixed_launch_kernel(int x)
{
cuda_dynamic_lib_func();
- result_type r;
+ result_type* r;
+ cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+ if(err != cudaSuccess)
+ {
+ std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return x;
+ }
+
mixed_kernel <<<1,1>>> (r,x);
- cudaError_t err = cudaGetLastError();
+ err = cudaGetLastError();
if(err != cudaSuccess)
{
std::cerr << "mixed_kernel [SYNC] failed: "
@@ -45,5 +53,15 @@ EXPORT int mixed_launch_kernel(int x)
<< cudaGetErrorString(cudaGetLastError()) << std::endl;
return x;
}
- return r.sum;
+
+ int result = r->sum;
+ err = cudaFree(r);
+ if(err != cudaSuccess)
+ {
+ std::cerr << "mixed_launch_kernel: cudaFree failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return x;
+ }
+
+ return result;
}