diff options
Diffstat (limited to 'Tests/Cuda')
29 files changed, 563 insertions, 0 deletions
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt new file mode 100644 index 0000000..de48501 --- /dev/null +++ b/Tests/Cuda/CMakeLists.txt @@ -0,0 +1,7 @@ + +ADD_TEST_MACRO(Cuda.Complex CudaComplex) +ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures) +ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary) +ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude) +ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags) +ADD_TEST_MACRO(Cuda.WithC CudaWithC) diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt new file mode 100644 index 0000000..a7137e3 --- /dev/null +++ b/Tests/Cuda/Complex/CMakeLists.txt @@ -0,0 +1,47 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaComplex CXX CUDA) +#Goal for this example: + +#build a cpp dynamic library base +#build a cuda static library base that uses separable compilation + +#build a cuda dynamic library that uses the first dynamic library +#build a mixed cpp & cuda dynamic library uses all 3 previous libraries + +#lastly build a cpp executable that uses this last cuda dynamic library + +#this tests that we can properly handle linking cuda and cpp together +#and also bulding cpp targets that need cuda implicit libraries + +#verify that we can pass explicit cuda arch flags +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") +set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) +set(CMAKE_CXX_STANDARD_REQUIRED TRUE) + +add_library(CudaComplexCppBase SHARED dynamic.cpp) +add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu) +set_target_properties(CudaComplexSeperableLib + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +set_target_properties( CudaComplexSeperableLib + PROPERTIES POSITION_INDEPENDENT_CODE ON) + +add_library(CudaComplexSharedLib SHARED dynamic.cu) +target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase) + +add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu) +set_target_properties(CudaComplexMixedLib + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +target_link_libraries(CudaComplexMixedLib + PUBLIC CudaComplexSharedLib + PRIVATE CudaComplexSeperableLib) + +add_executable(CudaComplex main.cpp) +target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/Complex/dynamic.cpp b/Tests/Cuda/Complex/dynamic.cpp new file mode 100644 index 0000000..3848ce7 --- /dev/null +++ b/Tests/Cuda/Complex/dynamic.cpp @@ -0,0 +1,11 @@ + +#ifdef _WIN32 +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +EXPORT int dynamic_base_func(int x) +{ + return x * x; +} diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu new file mode 100644 index 0000000..a76973d --- /dev/null +++ b/Tests/Cuda/Complex/dynamic.cu @@ -0,0 +1,69 @@ + +#include <cuda.h> +#include <iostream> +#include <string> + +#ifdef _WIN32 +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +int dynamic_base_func(int); + +EXPORT int __host__ cuda_dynamic_host_func(int x) +{ + return dynamic_base_func(x); +} + +static __global__ void DetermineIfValidCudaDevice() +{ +} + +EXPORT int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +EXPORT void cuda_dynamic_lib_func() +{ + DetermineIfValidCudaDevice<<<1, 1>>>(); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: " + << cudaGetErrorString(err) << std::endl; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; + } +} diff --git a/Tests/Cuda/Complex/file1.cu b/Tests/Cuda/Complex/file1.cu new file mode 100644 index 0000000..1ce63bf --- /dev/null +++ b/Tests/Cuda/Complex/file1.cu @@ -0,0 +1,10 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/Cuda/Complex/file1.h b/Tests/Cuda/Complex/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/Cuda/Complex/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/Cuda/Complex/file2.cu b/Tests/Cuda/Complex/file2.cu new file mode 100644 index 0000000..74f3558 --- /dev/null +++ b/Tests/Cuda/Complex/file2.cu @@ -0,0 +1,16 @@ + +#include "file2.h" + +result_type __device__ file1_func(int x); + +result_type_dynamic __device__ file2_func(int x) +{ + if (x != 42) { + const result_type r = file1_func(x); + const result_type_dynamic rd{ r.input, r.sum, true }; + return rd; + } else { + const result_type_dynamic rd{ x, x * x * x, false }; + return rd; + } +} diff --git a/Tests/Cuda/Complex/file2.h b/Tests/Cuda/Complex/file2.h new file mode 100644 index 0000000..d2dbaa4 --- /dev/null +++ b/Tests/Cuda/Complex/file2.h @@ -0,0 +1,10 @@ + +#pragma once +#include "file1.h" + +struct result_type_dynamic +{ + int input; + int sum; + bool from_static; +}; diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu new file mode 100644 index 0000000..d055b42 --- /dev/null +++ b/Tests/Cuda/Complex/file3.cu @@ -0,0 +1,48 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +result_type __device__ file1_func(int x); +result_type_dynamic __device__ file2_func(int x); + +static __global__ void file3_kernel(result_type* r, int x) +{ + *r = file1_func(x); + result_type_dynamic rd = file2_func(x); +} + +int file3_launch_kernel(int x) +{ + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if (err != cudaSuccess) { + std::cerr << "file3_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + file3_kernel<<<1, 1>>>(r, x); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file3_kernel [SYNC] failed: " << cudaGetErrorString(err) + << std::endl; + return x; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "file3_kernel [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; + return x; + } + int result = r->sum; + err = cudaFree(r); + if (err != cudaSuccess) { + std::cerr << "file3_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + return result; +} diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp new file mode 100644 index 0000000..2498235 --- /dev/null +++ b/Tests/Cuda/Complex/main.cpp @@ -0,0 +1,26 @@ +#include <iostream> + +#include "file1.h" +#include "file2.h" + +#ifdef _WIN32 +#define IMPORT __declspec(dllimport) +#else +#define IMPORT +#endif + +IMPORT int choose_cuda_device(); +IMPORT int call_cuda_seperable_code(int x); +IMPORT int mixed_launch_kernel(int x); + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + int r1 = call_cuda_seperable_code(42); + int r2 = mixed_launch_kernel(42); + return (r1 == 42 || r2 == 42) ? 1 : 0; +} diff --git a/Tests/Cuda/Complex/mixed.cpp b/Tests/Cuda/Complex/mixed.cpp new file mode 100644 index 0000000..bd32e51 --- /dev/null +++ b/Tests/Cuda/Complex/mixed.cpp @@ -0,0 +1,22 @@ + +#ifdef _WIN32 +#define EXPORT __declspec(dllexport) +#define IMPORT __declspec(dllimport) +#else +#define EXPORT +#define IMPORT +#endif + +int dynamic_base_func(int); +IMPORT int cuda_dynamic_host_func(int); +int file3_launch_kernel(int); + +int dynamic_final_func(int x) +{ + return cuda_dynamic_host_func(dynamic_base_func(x)); +} + +EXPORT int call_cuda_seperable_code(int x) +{ + return file3_launch_kernel(x); +} diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu new file mode 100644 index 0000000..a81ccb7 --- /dev/null +++ b/Tests/Cuda/Complex/mixed.cu @@ -0,0 +1,61 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +#ifdef _WIN32 +#define EXPORT __declspec(dllexport) +#define IMPORT __declspec(dllimport) +#else +#define EXPORT +#define IMPORT +#endif + +result_type __device__ file1_func(int x); +result_type_dynamic __device__ file2_func(int x); + +IMPORT void __host__ cuda_dynamic_lib_func(); + +static __global__ void mixed_kernel(result_type* r, int x) +{ + *r = file1_func(x); + result_type_dynamic rd = file2_func(x); +} + +EXPORT int mixed_launch_kernel(int x) +{ + cuda_dynamic_lib_func(); + + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if (err != cudaSuccess) { + std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + mixed_kernel<<<1, 1>>>(r, x); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "mixed_kernel [SYNC] failed: " << cudaGetErrorString(err) + << std::endl; + return x; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "mixed_kernel [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; + return x; + } + + int result = r->sum; + err = cudaFree(r); + if (err != cudaSuccess) { + std::cerr << "mixed_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + return result; +} diff --git a/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt new file mode 100644 index 0000000..9fda2d0 --- /dev/null +++ b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt @@ -0,0 +1,17 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaConsumeCompileFeatures CXX CUDA) +#Goal for this example: + +#build a c++11 library that express a c++11 public compile feature +#link a cuda library and verify it builds with c++11 enabled + +#build a standalone c++/cuda mixed executable where we express a c++11 +#compile feature. + + +add_library(CudaConsumeLib STATIC static.cpp static.cu) +target_compile_features(CudaConsumeLib PUBLIC cxx_nullptr) + +add_executable(CudaConsumeCompileFeatures main.cu) +target_link_libraries(CudaConsumeCompileFeatures PRIVATE CudaConsumeLib) diff --git a/Tests/Cuda/ConsumeCompileFeatures/main.cu b/Tests/Cuda/ConsumeCompileFeatures/main.cu new file mode 100644 index 0000000..bc32450 --- /dev/null +++ b/Tests/Cuda/ConsumeCompileFeatures/main.cu @@ -0,0 +1,20 @@ + +#include <iostream> + +int static_cxx11_func(int); + +void test_functions() +{ + auto x = static_cxx11_func(int(42)); + std::cout << x << std::endl; +} + +int main(int argc, char** argv) +{ + test_functions(); + std::cout + << "this executable doesn't use cuda code, just call methods defined" + << std::endl; + std::cout << "in libraries that have cuda code" << std::endl; + return 0; +} diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cpp b/Tests/Cuda/ConsumeCompileFeatures/static.cpp new file mode 100644 index 0000000..565d52e --- /dev/null +++ b/Tests/Cuda/ConsumeCompileFeatures/static.cpp @@ -0,0 +1,10 @@ + + +#include <type_traits> + +int static_cuda11_func(int); + +int static_cxx11_func(int x) +{ + return static_cuda11_func(x) + std::integral_constant<int, 32>::value; +} diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cu b/Tests/Cuda/ConsumeCompileFeatures/static.cu new file mode 100644 index 0000000..73e43a8 --- /dev/null +++ b/Tests/Cuda/ConsumeCompileFeatures/static.cu @@ -0,0 +1,9 @@ + +#include <type_traits> + +using tt = std::true_type; +using ft = std::false_type; +int __host__ static_cuda11_func(int x) +{ + return x * x + std::integral_constant<int, 17>::value; +} diff --git a/Tests/Cuda/ObjectLibrary/CMakeLists.txt b/Tests/Cuda/ObjectLibrary/CMakeLists.txt new file mode 100644 index 0000000..276dc92 --- /dev/null +++ b/Tests/Cuda/ObjectLibrary/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.7) +project (CudaObjectLibrary CUDA CXX) +#Goal for this example: + +#build a object files some with cuda and some without than +#embed these into an executable + +add_library(CudaMixedObjectLib OBJECT static.cu static.cpp) + +add_executable(CudaObjectLibrary + main.cpp + $<TARGET_OBJECTS:CudaMixedObjectLib>) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaObjectLibrary PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/ObjectLibrary/main.cpp b/Tests/Cuda/ObjectLibrary/main.cpp new file mode 100644 index 0000000..4d2f890 --- /dev/null +++ b/Tests/Cuda/ObjectLibrary/main.cpp @@ -0,0 +1,22 @@ + +#include <iostream> + +int static_func(int); +int file1_sq_func(int); + +int test_functions() +{ + return file1_sq_func(static_func(42)); +} + +int main(int argc, char** argv) +{ + if (test_functions() == 1) { + return 1; + } + std::cout + << "this executable doesn't use cuda code, just call methods defined" + << std::endl; + std::cout << "in object files that have cuda code" << std::endl; + return 0; +} diff --git a/Tests/Cuda/ObjectLibrary/static.cpp b/Tests/Cuda/ObjectLibrary/static.cpp new file mode 100644 index 0000000..6db1f91 --- /dev/null +++ b/Tests/Cuda/ObjectLibrary/static.cpp @@ -0,0 +1,6 @@ +int file1_sq_func(int); + +int static_func(int x) +{ + return file1_sq_func(x); +} diff --git a/Tests/Cuda/ObjectLibrary/static.cu b/Tests/Cuda/ObjectLibrary/static.cu new file mode 100644 index 0000000..aa35729 --- /dev/null +++ b/Tests/Cuda/ObjectLibrary/static.cu @@ -0,0 +1,21 @@ + +#include <cuda.h> +#include <cuda_runtime.h> +#include <iostream> + +int __host__ file1_sq_func(int x) +{ + cudaError_t err; + int nDevices = 0; + err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "nDevices: " << nDevices << std::endl; + std::cerr << "err: " << err << std::endl; + return 1; + } + std::cout << "this library uses cuda code" << std::endl; + std::cout << "you have " << nDevices << " devices that support cuda" + << std::endl; + + return x * x; +} diff --git a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt new file mode 100644 index 0000000..b6e0e39 --- /dev/null +++ b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt @@ -0,0 +1,20 @@ + +cmake_minimum_required(VERSION 3.7) +project (ProperLinkFlags CUDA CXX) + +#Goal for this example: +#Verify that when we have CXX and CUDA enabled and we link an executable that +#has CUDA and CXX we use the CUDA link flags when doing the device link +#step + +#Specify a set of valid CUDA flags and an invalid set of CXX flags ( for CUDA ) +#to make sure we don't use the CXX flags when linking CUDA executables +string(APPEND CMAKE_CUDA_FLAGS " -arch=sm_35 --use_fast_math") +set(CMAKE_CXX_FLAGS "-Wall") + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) +add_executable(ProperLinkFlags file1.cu main.cxx) + +set_target_properties( ProperLinkFlags + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/Tests/Cuda/ProperLinkFlags/file1.cu b/Tests/Cuda/ProperLinkFlags/file1.cu new file mode 100644 index 0000000..9a105f0 --- /dev/null +++ b/Tests/Cuda/ProperLinkFlags/file1.cu @@ -0,0 +1,11 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + __ldg(&x); + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/Cuda/ProperLinkFlags/file1.h b/Tests/Cuda/ProperLinkFlags/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/Cuda/ProperLinkFlags/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/Cuda/ProperLinkFlags/main.cxx b/Tests/Cuda/ProperLinkFlags/main.cxx new file mode 100644 index 0000000..7c0ee9e --- /dev/null +++ b/Tests/Cuda/ProperLinkFlags/main.cxx @@ -0,0 +1,9 @@ + +#include <iostream> + +#include "file1.h" + +int main(int argc, char** argv) +{ + return 0; +} diff --git a/Tests/Cuda/ToolkitInclude/CMakeLists.txt b/Tests/Cuda/ToolkitInclude/CMakeLists.txt new file mode 100644 index 0000000..f246b54 --- /dev/null +++ b/Tests/Cuda/ToolkitInclude/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.8) +project (ToolkitInclude CXX CUDA) + +#Goal for this example: +# Validate that between the CXX implicit include directories and the +# CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES directories we can find +# the cuda runtime headers + +add_executable(CudaToolkitInclude main.cpp) +target_include_directories(CudaToolkitInclude PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) diff --git a/Tests/Cuda/ToolkitInclude/main.cpp b/Tests/Cuda/ToolkitInclude/main.cpp new file mode 100644 index 0000000..c8d5c6b --- /dev/null +++ b/Tests/Cuda/ToolkitInclude/main.cpp @@ -0,0 +1,8 @@ +// Only thing we care about is that these headers are found +#include <cuda.h> +#include <cuda_runtime_api.h> + +int main() +{ + return 0; +} diff --git a/Tests/Cuda/WithC/CMakeLists.txt b/Tests/Cuda/WithC/CMakeLists.txt new file mode 100644 index 0000000..831ce12 --- /dev/null +++ b/Tests/Cuda/WithC/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.7) +project(CudaComplex CUDA C) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") + +add_executable(CudaWithC main.c cuda.cu) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaWithC PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/Cuda/WithC/cuda.cu b/Tests/Cuda/WithC/cuda.cu new file mode 100644 index 0000000..06bd7b9 --- /dev/null +++ b/Tests/Cuda/WithC/cuda.cu @@ -0,0 +1,16 @@ +#include <cuda.h> + +#include <iostream> + +extern "C" int use_cuda(void) +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + std::cout << "Found " << nDevices << " CUDA enabled devices" << std::endl; + return 0; +} diff --git a/Tests/Cuda/WithC/main.c b/Tests/Cuda/WithC/main.c new file mode 100644 index 0000000..cb5fddc --- /dev/null +++ b/Tests/Cuda/WithC/main.c @@ -0,0 +1,14 @@ +extern int use_cuda(void); + +#ifdef _WIN32 +#include <windows.h> +#endif + +int main() +{ +#ifdef _WIN32 + /* Use an API that requires CMake's "standard" C libraries. */ + GetOpenFileName(NULL); +#endif + return use_cuda(); +} |