diff options
author | Brad King <brad.king@kitware.com> | 2020-01-15 18:39:58 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2020-01-15 18:40:07 (GMT) |
commit | 9f1ce93d92ec33e879ef593de021d0b577de6c68 (patch) | |
tree | 8ffaad9b08ce4ac009d89a4bf0bddbaa3f89a49a | |
parent | 6c23cbca371bb37a6de34b2e0d6fc795632da08b (diff) | |
parent | 6e474364d19566d211aefed003840be15cba940e (diff) | |
download | CMake-9f1ce93d92ec33e879ef593de021d0b577de6c68.zip CMake-9f1ce93d92ec33e879ef593de021d0b577de6c68.tar.gz CMake-9f1ce93d92ec33e879ef593de021d0b577de6c68.tar.bz2 |
Merge topic 'add_cuda_toolkit_tests'
6e474364d1 CUDAToolkit: No targets now depend on the CUDA runtime
907bb7df57 CUDAToolkit: Gracefully handle missing SDK components
e500eb80cd CUDAToolkit: add_cuda_link_dependency correctly sets dependencies
Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !4183
-rw-r--r-- | Modules/FindCUDAToolkit.cmake | 20 | ||||
-rw-r--r-- | Tests/Cuda/CMakeLists.txt | 8 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt | 35 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp | 65 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/main.cpp | 23 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp | 16 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp | 92 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp | 16 | ||||
-rw-r--r-- | Tests/Cuda/SharedRuntimePlusToolkit/static.cpp | 16 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt | 29 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp | 59 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/main.cpp | 11 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp | 8 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp | 86 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp | 8 | ||||
-rw-r--r-- | Tests/Cuda/StaticRuntimePlusToolkit/static.cpp | 8 |
16 files changed, 488 insertions, 12 deletions
diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake index 1837694..f7f68ca 100644 --- a/Modules/FindCUDAToolkit.cmake +++ b/Modules/FindCUDAToolkit.cmake @@ -122,7 +122,6 @@ CUDA Runtime Library The CUDA Runtime library (cudart) are what most applications will typically need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. -They are an explicit dependency of almost every library. Targets Created: @@ -708,9 +707,13 @@ if(CUDAToolkit_FOUND) endfunction() function(add_cuda_link_dependency lib_name) - foreach(dependency IN LISTS ${ARGN}) - target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) - endforeach() + if(TARGET CUDA::${lib_name}) + foreach(dependency IN LISTS ARGN) + if(TARGET CUDA::${dependency}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) + endif() + endforeach() + endif() endfunction() add_library(CUDA::toolkit IMPORTED INTERFACE) @@ -725,10 +728,8 @@ if(CUDAToolkit_FOUND) foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg) find_and_add_cuda_import_lib(${cuda_lib}) - add_cuda_link_dependency(${cuda_lib} cudart) find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib}_static cudart_static) endforeach() # cuSOLVER depends on cuBLAS, and cuSPARSE @@ -742,9 +743,6 @@ if(CUDAToolkit_FOUND) find_and_add_cuda_import_lib(nppc) find_and_add_cuda_import_lib(nppc_static) - add_cuda_link_dependency(nppc cudart) - add_cuda_link_dependency(nppc_static cudart_static culibos) - # Process the majority of the NPP libraries. foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) find_and_add_cuda_import_lib(${cuda_lib}) @@ -771,13 +769,11 @@ if(CUDAToolkit_FOUND) endif() find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64) - add_cuda_link_dependency(nvToolsExt cudart) - find_and_add_cuda_import_lib(OpenCL) find_and_add_cuda_import_lib(culibos) if(TARGET CUDA::culibos) - foreach (cuda_lib cublas cufft cusparse curand nvjpeg) + foreach (cuda_lib cublas cufft cusparse curand nppc nvjpeg) add_cuda_link_dependency(${cuda_lib}_static culibos) endforeach() endif() diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt index 5ba82d8..58b9b03 100644 --- a/Tests/Cuda/CMakeLists.txt +++ b/Tests/Cuda/CMakeLists.txt @@ -14,4 +14,12 @@ ADD_TEST_MACRO(Cuda.Toolkit Toolkit) ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit) ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries) ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags) +ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit) + +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + ADD_TEST_MACRO(Cuda.StaticRuntimePlusToolkit StaticRuntimePlusToolkit) +endif() + ADD_TEST_MACRO(Cuda.WithC CudaWithC) diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt new file mode 100644 index 0000000..48df558 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt @@ -0,0 +1,35 @@ +cmake_minimum_required(VERSION 3.15) +project(SharedRuntimePlusToolkit CXX) + +#Goal for this example: +# Validate that with c++ we can use some components of the CUDA toolkit, and +# specify the cuda runtime +find_package(CUDAToolkit REQUIRED) + +add_library(Common OBJECT curand.cpp nppif.cpp) +target_link_libraries(Common PRIVATE CUDA::toolkit) +set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON) + +#shared runtime with shared toolkit libraries +add_library(SharedToolkit SHARED shared.cpp) +target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif) +target_link_libraries(SharedToolkit PUBLIC CUDA::cudart) + +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + #shared runtime with static toolkit libraries + add_library(StaticToolkit SHARED static.cpp) + target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static) + target_link_libraries(StaticToolkit PUBLIC CUDA::cudart) + + #static runtime with mixed toolkit libraries + add_library(MixedToolkit SHARED mixed.cpp) + target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand_static CUDA::nppif) + target_link_libraries(MixedToolkit PUBLIC CUDA::cudart) +endif() + +add_executable(SharedRuntimePlusToolkit main.cpp) +target_link_libraries(SharedRuntimePlusToolkit PRIVATE SharedToolkit + $<TARGET_NAME_IF_EXISTS:StaticToolkit> + $<TARGET_NAME_IF_EXISTS:MixedToolkit>) diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp new file mode 100644 index 0000000..fdd7b53 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp @@ -0,0 +1,65 @@ +// Comes from: +// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +/* + * This program uses the host CURAND API to generate 100 + * pseudorandom floats. + */ +#include <cuda.h> +#include <curand.h> +#include <stdio.h> +#include <stdlib.h> + +#define CUDA_CALL(x) \ + do { \ + if ((x) != cudaSuccess) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) +#define CURAND_CALL(x) \ + do { \ + if ((x) != CURAND_STATUS_SUCCESS) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) + +EXPORT int curand_main() +{ + size_t n = 100; + size_t i; + curandGenerator_t gen; + float *devData, *hostData; + + /* Allocate n floats on host */ + hostData = (float*)calloc(n, sizeof(float)); + + /* Allocate n floats on device */ + CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float))); + + /* Create pseudo-random number generator */ + CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); + + /* Set seed */ + CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL)); + + /* Generate n floats on device */ + CURAND_CALL(curandGenerateUniform(gen, devData, n)); + + /* Copy device memory to host */ + CUDA_CALL( + cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost)); + + /* Cleanup */ + CURAND_CALL(curandDestroyGenerator(gen)); + CUDA_CALL(cudaFree(devData)); + free(hostData); + return EXIT_SUCCESS; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp new file mode 100644 index 0000000..2a4da22 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp @@ -0,0 +1,23 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +IMPORT int shared_version(); +int static_version() +{ + return 0; +} +int mixed_version() +{ + return 0; +} +#else +int shared_version(); +int static_version(); +int mixed_version(); +#endif + +int main() +{ + return mixed_version() == 0 && shared_version() == 0 && + static_version() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp new file mode 100644 index 0000000..6de6886 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +IMPORT int curand_main(); +IMPORT int nppif_main(); + +EXPORT int mixed_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp new file mode 100644 index 0000000..ac5341c --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp @@ -0,0 +1,92 @@ +// Comes from +// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066 + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +#include <cstdio> +#include <iostream> + +#include <assert.h> +#include <cuda_runtime_api.h> +#include <nppi_filtering_functions.h> + +EXPORT int nppif_main() +{ + /** + * 8-bit unsigned single-channel 1D row convolution. + */ + const int simgrows = 32; + const int simgcols = 32; + Npp8u *d_pSrc, *d_pDst; + const int nMaskSize = 3; + NppiSize oROI; + oROI.width = simgcols - nMaskSize; + oROI.height = simgrows; + const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]); + const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]); + const int simgpix = simgrows * simgcols; + const int dimgpix = oROI.width * oROI.height; + const int nSrcStep = simgcols * sizeof(d_pSrc[0]); + const int nDstStep = oROI.width * sizeof(d_pDst[0]); + const int pixval = 1; + const int nDivisor = 1; + const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval }; + Npp32s* d_pKernel; + const Npp32s nAnchor = 2; + cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pDst, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0])); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // set image to pixval initially + err = cudaMemset(d_pSrc, pixval, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemset(d_pDst, 0, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // copy src to dst + NppStatus ret = + nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel, + nMaskSize, nAnchor, nDivisor); + assert(ret == NPP_NO_ERROR); + Npp8u* h_imgres = new Npp8u[dimgpix]; + err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // test for filtering + for (int i = 0; i < dimgpix; i++) { + if (h_imgres[i] != (pixval * pixval * nMaskSize)) { + fprintf(stderr, "h_imgres at index %d failed to match\n", i); + return 1; + } + } + + return 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp new file mode 100644 index 0000000..f3c3dbc --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +int curand_main(); +int nppif_main(); + +EXPORT int shared_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp new file mode 100644 index 0000000..6932fa3 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +IMPORT int curand_main(); +IMPORT int nppif_main(); + +EXPORT int static_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt new file mode 100644 index 0000000..df6c392 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.15) +project(StaticRuntimePlusToolkit CXX) + +#Goal for this example: +# Validate that with c++ we can use some components of the CUDA toolkit, and +# specify the cuda runtime +find_package(CUDAToolkit REQUIRED) + +add_library(Common OBJECT curand.cpp nppif.cpp) +target_link_libraries(Common PRIVATE CUDA::toolkit) +set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON) + +#static runtime with shared toolkit libraries +add_library(SharedToolkit SHARED shared.cpp) +target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif) +target_link_libraries(SharedToolkit PUBLIC CUDA::cudart_static) + +#static runtime with static toolkit libraries +add_library(StaticToolkit SHARED static.cpp) +target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static) +target_link_libraries(StaticToolkit PUBLIC CUDA::cudart_static) + +#static runtime with mixed toolkit libraries +add_library(MixedToolkit SHARED mixed.cpp) +target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand CUDA::nppif_static) +target_link_libraries(MixedToolkit PUBLIC CUDA::cudart_static) + +add_executable(StaticRuntimePlusToolkit main.cpp) +target_link_libraries(StaticRuntimePlusToolkit PRIVATE SharedToolkit StaticToolkit MixedToolkit) diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp new file mode 100644 index 0000000..95872f0 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp @@ -0,0 +1,59 @@ +// Comes from: +// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example + +/* + * This program uses the host CURAND API to generate 100 + * pseudorandom floats. + */ +#include <cuda.h> +#include <curand.h> +#include <stdio.h> +#include <stdlib.h> + +#define CUDA_CALL(x) \ + do { \ + if ((x) != cudaSuccess) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) +#define CURAND_CALL(x) \ + do { \ + if ((x) != CURAND_STATUS_SUCCESS) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) + +int curand_main() +{ + size_t n = 100; + size_t i; + curandGenerator_t gen; + float *devData, *hostData; + + /* Allocate n floats on host */ + hostData = (float*)calloc(n, sizeof(float)); + + /* Allocate n floats on device */ + CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float))); + + /* Create pseudo-random number generator */ + CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); + + /* Set seed */ + CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL)); + + /* Generate n floats on device */ + CURAND_CALL(curandGenerateUniform(gen, devData, n)); + + /* Copy device memory to host */ + CUDA_CALL( + cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost)); + + /* Cleanup */ + CURAND_CALL(curandDestroyGenerator(gen)); + CUDA_CALL(cudaFree(devData)); + free(hostData); + return EXIT_SUCCESS; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp new file mode 100644 index 0000000..5a09f8e --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp @@ -0,0 +1,11 @@ + + +int shared_version(); +int static_version(); +int mixed_version(); + +int main() +{ + return mixed_version() == 0 && shared_version() == 0 && + static_version() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp new file mode 100644 index 0000000..a05140d --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int mixed_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp new file mode 100644 index 0000000..2871090 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp @@ -0,0 +1,86 @@ +// Comes from +// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066 + +#include <cstdio> +#include <iostream> + +#include <assert.h> +#include <cuda_runtime_api.h> +#include <nppi_filtering_functions.h> + +int nppif_main() +{ + /** + * 8-bit unsigned single-channel 1D row convolution. + */ + const int simgrows = 32; + const int simgcols = 32; + Npp8u *d_pSrc, *d_pDst; + const int nMaskSize = 3; + NppiSize oROI; + oROI.width = simgcols - nMaskSize; + oROI.height = simgrows; + const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]); + const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]); + const int simgpix = simgrows * simgcols; + const int dimgpix = oROI.width * oROI.height; + const int nSrcStep = simgcols * sizeof(d_pSrc[0]); + const int nDstStep = oROI.width * sizeof(d_pDst[0]); + const int pixval = 1; + const int nDivisor = 1; + const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval }; + Npp32s* d_pKernel; + const Npp32s nAnchor = 2; + cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pDst, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0])); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // set image to pixval initially + err = cudaMemset(d_pSrc, pixval, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemset(d_pDst, 0, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // copy src to dst + NppStatus ret = + nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel, + nMaskSize, nAnchor, nDivisor); + assert(ret == NPP_NO_ERROR); + Npp8u* h_imgres = new Npp8u[dimgpix]; + err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // test for filtering + for (int i = 0; i < dimgpix; i++) { + if (h_imgres[i] != (pixval * pixval * nMaskSize)) { + fprintf(stderr, "h_imgres at index %d failed to match\n", i); + return 1; + } + } + + return 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp new file mode 100644 index 0000000..9967b66 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int shared_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp new file mode 100644 index 0000000..ca7eb4c --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int static_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} |