diff options
Diffstat (limited to 'Tests/Cuda')
40 files changed, 729 insertions, 23 deletions
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt index 44c6005..58b9b03 100644 --- a/Tests/Cuda/CMakeLists.txt +++ b/Tests/Cuda/CMakeLists.txt @@ -1,11 +1,25 @@ ADD_TEST_MACRO(Cuda.Complex CudaComplex) ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures) +ADD_TEST_MACRO(Cuda.CXXStandardSetTwice CXXStandardSetTwice) ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary) -ADD_TEST_MACRO(Cuda.MixedStandardLevels MixedStandardLevels) +ADD_TEST_MACRO(Cuda.MixedStandardLevels1 MixedStandardLevels1) +ADD_TEST_MACRO(Cuda.MixedStandardLevels2 MixedStandardLevels2) +ADD_TEST_MACRO(Cuda.MixedStandardLevels3 MixedStandardLevels3) +ADD_TEST_MACRO(Cuda.MixedStandardLevels4 MixedStandardLevels4) +ADD_TEST_MACRO(Cuda.MixedStandardLevels5 MixedStandardLevels5) ADD_TEST_MACRO(Cuda.NotEnabled CudaNotEnabled) ADD_TEST_MACRO(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly) -ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude) +ADD_TEST_MACRO(Cuda.Toolkit Toolkit) +ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit) ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries) ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags) +ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit) + +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + ADD_TEST_MACRO(Cuda.StaticRuntimePlusToolkit StaticRuntimePlusToolkit) +endif() + ADD_TEST_MACRO(Cuda.WithC CudaWithC) diff --git a/Tests/Cuda/MixedStandardLevels/CMakeLists.txt b/Tests/Cuda/CXXStandardSetTwice/CMakeLists.txt index b399662..1941c49 100644 --- a/Tests/Cuda/MixedStandardLevels/CMakeLists.txt +++ b/Tests/Cuda/CXXStandardSetTwice/CMakeLists.txt @@ -1,14 +1,14 @@ cmake_minimum_required(VERSION 3.7) -project(MixedStandardLevels CXX CUDA) +project(CXXStandardSetTwice CXX CUDA) string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") set(CMAKE_CXX_STANDARD 11) -add_executable(MixedStandardLevels main.cu) -target_compile_features(MixedStandardLevels PUBLIC cxx_std_11) +add_executable(CXXStandardSetTwice main.cu) +target_compile_features(CXXStandardSetTwice PUBLIC cxx_std_11) if(APPLE) # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. - set_property(TARGET MixedStandardLevels PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + set_property(TARGET CXXStandardSetTwice PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) endif() diff --git a/Tests/Cuda/MixedStandardLevels/main.cu b/Tests/Cuda/CXXStandardSetTwice/main.cu index d57c05a..d57c05a 100644 --- a/Tests/Cuda/MixedStandardLevels/main.cu +++ b/Tests/Cuda/CXXStandardSetTwice/main.cu diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt index d3d4b7c..08d1e16 100644 --- a/Tests/Cuda/Complex/CMakeLists.txt +++ b/Tests/Cuda/Complex/CMakeLists.txt @@ -22,18 +22,11 @@ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) set(CMAKE_CXX_STANDARD_REQUIRED TRUE) add_library(CudaComplexCppBase SHARED dynamic.cpp) -add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu) -set_target_properties(CudaComplexSeperableLib - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -set_target_properties( CudaComplexSeperableLib - PROPERTIES POSITION_INDEPENDENT_CODE ON) - add_library(CudaComplexSharedLib SHARED dynamic.cu) target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase) +add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu) add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu) -set_target_properties(CudaComplexMixedLib - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_link_libraries(CudaComplexMixedLib PUBLIC CudaComplexSharedLib PRIVATE CudaComplexSeperableLib) @@ -41,7 +34,27 @@ target_link_libraries(CudaComplexMixedLib add_executable(CudaComplex main.cpp) target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib) + +set_target_properties(CudaComplexMixedLib + CudaComplexSeperableLib + PROPERTIES + POSITION_INDEPENDENT_CODE ON + CUDA_SEPARABLE_COMPILATION ON + ) +set_target_properties(CudaComplexMixedLib + CudaComplexSharedLib + PROPERTIES + CUDA_RUNTIME_LIBRARY shared + ) + + if(APPLE) # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) endif() + +if(UNIX) + # Help the shared cuda runtime find libcudart as it is not located + # in a default system searched location + set_property(TARGET CudaComplexMixedLib PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu index 9da8853..7f2f2b5 100644 --- a/Tests/Cuda/Complex/dynamic.cu +++ b/Tests/Cuda/Complex/dynamic.cu @@ -54,17 +54,20 @@ EXPORT int choose_cuda_device() return 1; } -EXPORT void cuda_dynamic_lib_func() +EXPORT bool cuda_dynamic_lib_func() { - DetermineIfValidCudaDevice<<<1, 1>>>(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { - std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: " + std::cerr << "DetermineIfValidCudaDevice [Per Launch] failed: " << cudaGetErrorString(err) << std::endl; + return false; } + DetermineIfValidCudaDevice<<<1, 1>>>(); err = cudaDeviceSynchronize(); if (err != cudaSuccess) { - std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: " + std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: " << cudaGetErrorString(cudaGetLastError()) << std::endl; + return false; } + return true; } diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp index 6ca5952..da09b44 100644 --- a/Tests/Cuda/Complex/main.cpp +++ b/Tests/Cuda/Complex/main.cpp @@ -22,5 +22,6 @@ int main(int argc, char** argv) int r1 = call_cuda_seperable_code(42); int r2 = mixed_launch_kernel(42); + return (r1 == 42 || r2 == 42) ? 1 : 0; } diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu index 5b85aec..76119ad 100644 --- a/Tests/Cuda/Complex/mixed.cu +++ b/Tests/Cuda/Complex/mixed.cu @@ -15,7 +15,7 @@ result_type __device__ file1_func(int x); result_type_dynamic __device__ file2_func(int x); -IMPORT void __host__ cuda_dynamic_lib_func(); +IMPORT bool __host__ cuda_dynamic_lib_func(); static __global__ void mixed_kernel(result_type* r, int x) { @@ -25,7 +25,9 @@ static __global__ void mixed_kernel(result_type* r, int x) EXPORT int mixed_launch_kernel(int x) { - cuda_dynamic_lib_func(); + if (!cuda_dynamic_lib_func()) { + return x; + } result_type* r; cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); diff --git a/Tests/Cuda/ToolkitInclude/CMakeLists.txt b/Tests/Cuda/IncludePathNoToolkit/CMakeLists.txt index f246b54..7be1561 100644 --- a/Tests/Cuda/ToolkitInclude/CMakeLists.txt +++ b/Tests/Cuda/IncludePathNoToolkit/CMakeLists.txt @@ -1,11 +1,11 @@ cmake_minimum_required(VERSION 3.8) -project (ToolkitInclude CXX CUDA) +project (IncludePathNoToolkit CXX CUDA) #Goal for this example: # Validate that between the CXX implicit include directories and the # CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES directories we can find # the cuda runtime headers -add_executable(CudaToolkitInclude main.cpp) -target_include_directories(CudaToolkitInclude PRIVATE +add_executable(IncludePathNoToolkit main.cpp) +target_include_directories(IncludePathNoToolkit PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) diff --git a/Tests/Cuda/ToolkitInclude/main.cpp b/Tests/Cuda/IncludePathNoToolkit/main.cpp index c8d5c6b..c8d5c6b 100644 --- a/Tests/Cuda/ToolkitInclude/main.cpp +++ b/Tests/Cuda/IncludePathNoToolkit/main.cpp diff --git a/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt new file mode 100644 index 0000000..b03e51e --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.7) +project(MixedStandardLevels1 CXX CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CUDA_STANDARD 11) + +add_executable(MixedStandardLevels1 main.cu lib.cpp) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET MixedStandardLevels1 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/MixedStandardLevels1/lib.cpp b/Tests/Cuda/MixedStandardLevels1/lib.cpp new file mode 100644 index 0000000..cabbacb --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels1/lib.cpp @@ -0,0 +1,7 @@ + +int func(int A, int B) +{ + // Verify that we have at least c++14 + auto mult_func = [](auto a, auto b) { return a * b; }; + return mult_func(A, B); +} diff --git a/Tests/Cuda/MixedStandardLevels1/main.cu b/Tests/Cuda/MixedStandardLevels1/main.cu new file mode 100644 index 0000000..bc02c6d --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels1/main.cu @@ -0,0 +1,9 @@ + +#include <type_traits> + +int main(int argc, char** argv) +{ + // Verify that we have at least c++11 + using returnv = std::integral_constant<int, 0>; + return returnv::value; +} diff --git a/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt new file mode 100644 index 0000000..12dd328 --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.7) +project(MixedStandardLevels2 CXX CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +set(CMAKE_CXX_STANDARD 17) #this can decay + +add_executable(MixedStandardLevels2 main.cu lib.cpp) +target_compile_features(MixedStandardLevels2 PUBLIC cuda_std_11) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET MixedStandardLevels2 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/MixedStandardLevels2/lib.cpp b/Tests/Cuda/MixedStandardLevels2/lib.cpp new file mode 100644 index 0000000..cabbacb --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels2/lib.cpp @@ -0,0 +1,7 @@ + +int func(int A, int B) +{ + // Verify that we have at least c++14 + auto mult_func = [](auto a, auto b) { return a * b; }; + return mult_func(A, B); +} diff --git a/Tests/Cuda/MixedStandardLevels2/main.cu b/Tests/Cuda/MixedStandardLevels2/main.cu new file mode 100644 index 0000000..a97a41e --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels2/main.cu @@ -0,0 +1,11 @@ + +#if __cplusplus < 201103L && !defined(_MSC_VER) +# error "invalid standard value" +#endif +#include <type_traits> + +int main(int argc, char** argv) +{ + using returnv = std::integral_constant<int, 0>; + return returnv::value; +} diff --git a/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt new file mode 100644 index 0000000..2b611be --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.7) +project(MixedStandardLevels3 CXX CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +add_executable(MixedStandardLevels3 main.cu lib.cpp) +target_compile_features(MixedStandardLevels3 PUBLIC cuda_std_03 cxx_std_14) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET MixedStandardLevels3 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/MixedStandardLevels3/lib.cpp b/Tests/Cuda/MixedStandardLevels3/lib.cpp new file mode 100644 index 0000000..cabbacb --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels3/lib.cpp @@ -0,0 +1,7 @@ + +int func(int A, int B) +{ + // Verify that we have at least c++14 + auto mult_func = [](auto a, auto b) { return a * b; }; + return mult_func(A, B); +} diff --git a/Tests/Cuda/MixedStandardLevels3/main.cu b/Tests/Cuda/MixedStandardLevels3/main.cu new file mode 100644 index 0000000..1c19e8d --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels3/main.cu @@ -0,0 +1,5 @@ + +int main(int argc, char** argv) +{ + return 0; +} diff --git a/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt new file mode 100644 index 0000000..faf6869 --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.7) +project(MixedStandardLevels4 CXX CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +set(CMAKE_CUDA_STANDARD 03) + +add_executable(MixedStandardLevels4 main.cu lib.cpp) +target_compile_features(MixedStandardLevels4 PUBLIC cxx_std_14) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET MixedStandardLevels4 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/MixedStandardLevels4/lib.cpp b/Tests/Cuda/MixedStandardLevels4/lib.cpp new file mode 100644 index 0000000..ef6fc20 --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels4/lib.cpp @@ -0,0 +1,16 @@ + + +constexpr int func(int A, int B) +{ +#if defined(_MSC_VER) && _MSC_VER < 1913 + // no suppport for extended constexpr + return B * A; +#else + // Verify that we have at least c++14 + if (A < B) { + return A + B; + } else { + return B * A; + } +#endif +} diff --git a/Tests/Cuda/MixedStandardLevels4/main.cu b/Tests/Cuda/MixedStandardLevels4/main.cu new file mode 100644 index 0000000..1c19e8d --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels4/main.cu @@ -0,0 +1,5 @@ + +int main(int argc, char** argv) +{ + return 0; +} diff --git a/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt new file mode 100644 index 0000000..7209f60 --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.7) +project(MixedStandardLevels5 CXX CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +set(CMAKE_CXX_STANDARD 98) + +add_executable(MixedStandardLevels5 main.cu lib.cpp) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET MixedStandardLevels5 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/MixedStandardLevels5/lib.cpp b/Tests/Cuda/MixedStandardLevels5/lib.cpp new file mode 100644 index 0000000..dd7b31b --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels5/lib.cpp @@ -0,0 +1,13 @@ + +#if __cplusplus >= 201103L +# error "invalid standard value" +#endif +int func(int A, int B) +{ + // Verify that we have at least c++14 + if (A < B) { + return A + B; + } else { + return B * A; + } +} diff --git a/Tests/Cuda/MixedStandardLevels5/main.cu b/Tests/Cuda/MixedStandardLevels5/main.cu new file mode 100644 index 0000000..c79afd6 --- /dev/null +++ b/Tests/Cuda/MixedStandardLevels5/main.cu @@ -0,0 +1,8 @@ + +#if __cplusplus >= 201103L +# error "invalid standard value" +#endif +int main(int argc, char** argv) +{ + return 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt new file mode 100644 index 0000000..48df558 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt @@ -0,0 +1,35 @@ +cmake_minimum_required(VERSION 3.15) +project(SharedRuntimePlusToolkit CXX) + +#Goal for this example: +# Validate that with c++ we can use some components of the CUDA toolkit, and +# specify the cuda runtime +find_package(CUDAToolkit REQUIRED) + +add_library(Common OBJECT curand.cpp nppif.cpp) +target_link_libraries(Common PRIVATE CUDA::toolkit) +set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON) + +#shared runtime with shared toolkit libraries +add_library(SharedToolkit SHARED shared.cpp) +target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif) +target_link_libraries(SharedToolkit PUBLIC CUDA::cudart) + +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + #shared runtime with static toolkit libraries + add_library(StaticToolkit SHARED static.cpp) + target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static) + target_link_libraries(StaticToolkit PUBLIC CUDA::cudart) + + #static runtime with mixed toolkit libraries + add_library(MixedToolkit SHARED mixed.cpp) + target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand_static CUDA::nppif) + target_link_libraries(MixedToolkit PUBLIC CUDA::cudart) +endif() + +add_executable(SharedRuntimePlusToolkit main.cpp) +target_link_libraries(SharedRuntimePlusToolkit PRIVATE SharedToolkit + $<TARGET_NAME_IF_EXISTS:StaticToolkit> + $<TARGET_NAME_IF_EXISTS:MixedToolkit>) diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp new file mode 100644 index 0000000..fdd7b53 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp @@ -0,0 +1,65 @@ +// Comes from: +// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +/* + * This program uses the host CURAND API to generate 100 + * pseudorandom floats. + */ +#include <cuda.h> +#include <curand.h> +#include <stdio.h> +#include <stdlib.h> + +#define CUDA_CALL(x) \ + do { \ + if ((x) != cudaSuccess) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) +#define CURAND_CALL(x) \ + do { \ + if ((x) != CURAND_STATUS_SUCCESS) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) + +EXPORT int curand_main() +{ + size_t n = 100; + size_t i; + curandGenerator_t gen; + float *devData, *hostData; + + /* Allocate n floats on host */ + hostData = (float*)calloc(n, sizeof(float)); + + /* Allocate n floats on device */ + CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float))); + + /* Create pseudo-random number generator */ + CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); + + /* Set seed */ + CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL)); + + /* Generate n floats on device */ + CURAND_CALL(curandGenerateUniform(gen, devData, n)); + + /* Copy device memory to host */ + CUDA_CALL( + cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost)); + + /* Cleanup */ + CURAND_CALL(curandDestroyGenerator(gen)); + CUDA_CALL(cudaFree(devData)); + free(hostData); + return EXIT_SUCCESS; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp new file mode 100644 index 0000000..2a4da22 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp @@ -0,0 +1,23 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +IMPORT int shared_version(); +int static_version() +{ + return 0; +} +int mixed_version() +{ + return 0; +} +#else +int shared_version(); +int static_version(); +int mixed_version(); +#endif + +int main() +{ + return mixed_version() == 0 && shared_version() == 0 && + static_version() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp new file mode 100644 index 0000000..6de6886 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +IMPORT int curand_main(); +IMPORT int nppif_main(); + +EXPORT int mixed_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp new file mode 100644 index 0000000..ac5341c --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp @@ -0,0 +1,92 @@ +// Comes from +// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066 + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +#include <cstdio> +#include <iostream> + +#include <assert.h> +#include <cuda_runtime_api.h> +#include <nppi_filtering_functions.h> + +EXPORT int nppif_main() +{ + /** + * 8-bit unsigned single-channel 1D row convolution. + */ + const int simgrows = 32; + const int simgcols = 32; + Npp8u *d_pSrc, *d_pDst; + const int nMaskSize = 3; + NppiSize oROI; + oROI.width = simgcols - nMaskSize; + oROI.height = simgrows; + const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]); + const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]); + const int simgpix = simgrows * simgcols; + const int dimgpix = oROI.width * oROI.height; + const int nSrcStep = simgcols * sizeof(d_pSrc[0]); + const int nDstStep = oROI.width * sizeof(d_pDst[0]); + const int pixval = 1; + const int nDivisor = 1; + const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval }; + Npp32s* d_pKernel; + const Npp32s nAnchor = 2; + cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pDst, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0])); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // set image to pixval initially + err = cudaMemset(d_pSrc, pixval, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemset(d_pDst, 0, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // copy src to dst + NppStatus ret = + nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel, + nMaskSize, nAnchor, nDivisor); + assert(ret == NPP_NO_ERROR); + Npp8u* h_imgres = new Npp8u[dimgpix]; + err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // test for filtering + for (int i = 0; i < dimgpix; i++) { + if (h_imgres[i] != (pixval * pixval * nMaskSize)) { + fprintf(stderr, "h_imgres at index %d failed to match\n", i); + return 1; + } + } + + return 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp new file mode 100644 index 0000000..f3c3dbc --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +int curand_main(); +int nppif_main(); + +EXPORT int shared_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp new file mode 100644 index 0000000..6932fa3 --- /dev/null +++ b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp @@ -0,0 +1,16 @@ + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +#else +# define IMPORT +# define EXPORT +#endif + +IMPORT int curand_main(); +IMPORT int nppif_main(); + +EXPORT int static_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt new file mode 100644 index 0000000..df6c392 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.15) +project(StaticRuntimePlusToolkit CXX) + +#Goal for this example: +# Validate that with c++ we can use some components of the CUDA toolkit, and +# specify the cuda runtime +find_package(CUDAToolkit REQUIRED) + +add_library(Common OBJECT curand.cpp nppif.cpp) +target_link_libraries(Common PRIVATE CUDA::toolkit) +set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON) + +#static runtime with shared toolkit libraries +add_library(SharedToolkit SHARED shared.cpp) +target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif) +target_link_libraries(SharedToolkit PUBLIC CUDA::cudart_static) + +#static runtime with static toolkit libraries +add_library(StaticToolkit SHARED static.cpp) +target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static) +target_link_libraries(StaticToolkit PUBLIC CUDA::cudart_static) + +#static runtime with mixed toolkit libraries +add_library(MixedToolkit SHARED mixed.cpp) +target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand CUDA::nppif_static) +target_link_libraries(MixedToolkit PUBLIC CUDA::cudart_static) + +add_executable(StaticRuntimePlusToolkit main.cpp) +target_link_libraries(StaticRuntimePlusToolkit PRIVATE SharedToolkit StaticToolkit MixedToolkit) diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp new file mode 100644 index 0000000..95872f0 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp @@ -0,0 +1,59 @@ +// Comes from: +// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example + +/* + * This program uses the host CURAND API to generate 100 + * pseudorandom floats. + */ +#include <cuda.h> +#include <curand.h> +#include <stdio.h> +#include <stdlib.h> + +#define CUDA_CALL(x) \ + do { \ + if ((x) != cudaSuccess) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) +#define CURAND_CALL(x) \ + do { \ + if ((x) != CURAND_STATUS_SUCCESS) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + return EXIT_FAILURE; \ + } \ + } while (0) + +int curand_main() +{ + size_t n = 100; + size_t i; + curandGenerator_t gen; + float *devData, *hostData; + + /* Allocate n floats on host */ + hostData = (float*)calloc(n, sizeof(float)); + + /* Allocate n floats on device */ + CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float))); + + /* Create pseudo-random number generator */ + CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); + + /* Set seed */ + CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL)); + + /* Generate n floats on device */ + CURAND_CALL(curandGenerateUniform(gen, devData, n)); + + /* Copy device memory to host */ + CUDA_CALL( + cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost)); + + /* Cleanup */ + CURAND_CALL(curandDestroyGenerator(gen)); + CUDA_CALL(cudaFree(devData)); + free(hostData); + return EXIT_SUCCESS; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp new file mode 100644 index 0000000..5a09f8e --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp @@ -0,0 +1,11 @@ + + +int shared_version(); +int static_version(); +int mixed_version(); + +int main() +{ + return mixed_version() == 0 && shared_version() == 0 && + static_version() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp new file mode 100644 index 0000000..a05140d --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int mixed_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp new file mode 100644 index 0000000..2871090 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp @@ -0,0 +1,86 @@ +// Comes from +// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066 + +#include <cstdio> +#include <iostream> + +#include <assert.h> +#include <cuda_runtime_api.h> +#include <nppi_filtering_functions.h> + +int nppif_main() +{ + /** + * 8-bit unsigned single-channel 1D row convolution. + */ + const int simgrows = 32; + const int simgcols = 32; + Npp8u *d_pSrc, *d_pDst; + const int nMaskSize = 3; + NppiSize oROI; + oROI.width = simgcols - nMaskSize; + oROI.height = simgrows; + const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]); + const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]); + const int simgpix = simgrows * simgcols; + const int dimgpix = oROI.width * oROI.height; + const int nSrcStep = simgcols * sizeof(d_pSrc[0]); + const int nDstStep = oROI.width * sizeof(d_pDst[0]); + const int pixval = 1; + const int nDivisor = 1; + const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval }; + Npp32s* d_pKernel; + const Npp32s nAnchor = 2; + cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pDst, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0])); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // set image to pixval initially + err = cudaMemset(d_pSrc, pixval, simgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemset(d_pDst, 0, dimgsize); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // copy src to dst + NppStatus ret = + nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel, + nMaskSize, nAnchor, nDivisor); + assert(ret == NPP_NO_ERROR); + Npp8u* h_imgres = new Npp8u[dimgpix]; + err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + fprintf(stderr, "Cuda error %d\n", __LINE__); + return 1; + } + // test for filtering + for (int i = 0; i < dimgpix; i++) { + if (h_imgres[i] != (pixval * pixval * nMaskSize)) { + fprintf(stderr, "h_imgres at index %d failed to match\n", i); + return 1; + } + } + + return 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp new file mode 100644 index 0000000..9967b66 --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int shared_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp new file mode 100644 index 0000000..ca7eb4c --- /dev/null +++ b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp @@ -0,0 +1,8 @@ + +int curand_main(); +int nppif_main(); + +int static_version() +{ + return curand_main() == 0 && nppif_main() == 0; +} diff --git a/Tests/Cuda/Toolkit/CMakeLists.txt b/Tests/Cuda/Toolkit/CMakeLists.txt new file mode 100644 index 0000000..86b4652 --- /dev/null +++ b/Tests/Cuda/Toolkit/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.15) +project(Toolkit CXX) + +#Goal for this example: +# Validate that we can use CUDAToolkit to find cuda include paths +find_package(CUDAToolkit REQUIRED) + +message(STATUS "CUDAToolkit_VERSION: ${CUDAToolkit_VERSION}") +message(STATUS "CUDAToolkit_VERSION_MAJOR: ${CUDAToolkit_VERSION_MAJOR}") +message(STATUS "CUDAToolkit_VERSION_MINOR: ${CUDAToolkit_VERSION_MINOR}") +message(STATUS "CUDAToolkit_VERSION_PATCH: ${CUDAToolkit_VERSION_PATCH}") +message(STATUS "CUDAToolkit_BIN_DIR: ${CUDAToolkit_BIN_DIR}") +message(STATUS "CUDAToolkit_INCLUDE_DIRS: ${CUDAToolkit_INCLUDE_DIRS}") +message(STATUS "CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") +message(STATUS "CUDAToolkit_NVCC_EXECUTABLE ${CUDAToolkit_NVCC_EXECUTABLE}") + +# Verify that all the CUDA:: targets exist even when the CUDA language isn't enabled + +foreach (cuda_lib cudart cuda_driver cublas cufft cufftw curand cusolver cusparse nvgraph) + if(NOT TARGET CUDA::${cuda_lib}) + message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found") + endif() +endforeach() + +foreach (cuda_lib nppc nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) + if(NOT TARGET CUDA::${cuda_lib}) + message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found") + endif() +endforeach() + +foreach (cuda_lib nvrtc nvToolsExt OpenCL) + if(NOT TARGET CUDA::${cuda_lib}) + message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found") + endif() +endforeach() + +add_executable(Toolkit main.cpp) +target_link_libraries(Toolkit PRIVATE CUDA::toolkit) diff --git a/Tests/Cuda/Toolkit/main.cpp b/Tests/Cuda/Toolkit/main.cpp new file mode 100644 index 0000000..c8d5c6b --- /dev/null +++ b/Tests/Cuda/Toolkit/main.cpp @@ -0,0 +1,8 @@ +// Only thing we care about is that these headers are found +#include <cuda.h> +#include <cuda_runtime_api.h> + +int main() +{ + return 0; +} |