diff options
Diffstat (limited to 'Tests/CudaOnly')
36 files changed, 1021 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt new file mode 100644 index 0000000..59f3e84 --- /dev/null +++ b/Tests/CudaOnly/CMakeLists.txt @@ -0,0 +1,12 @@ + +ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) +ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) +ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) +ADD_TEST_MACRO(CudaOnly.LinkSystemDeviceLibraries CudaOnlyLinkSystemDeviceLibraries) +ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) +ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) +ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) + +if(MSVC) + ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB) +endif() diff --git a/Tests/CudaOnly/EnableStandard/CMakeLists.txt b/Tests/CudaOnly/EnableStandard/CMakeLists.txt new file mode 100644 index 0000000..35a1deb --- /dev/null +++ b/Tests/CudaOnly/EnableStandard/CMakeLists.txt @@ -0,0 +1,26 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyEnableStandard CUDA) + +#Goal for this example: +#build cuda sources that require C++11 to be enabled. + +add_library(CUDAStatic11 STATIC static.cu) +add_library(CUDADynamic11 SHARED shared.cu) + +add_executable(CudaOnlyEnableStandard main.cu) +target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11) + +set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11) +set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE) + +#Verify CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES +foreach(dir ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + if(NOT IS_DIRECTORY "${dir}") + message(FATAL_ERROR + "CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES entry\n" + " ${dir}\n" + "is not an existing directory." + ) + endif() +endforeach() diff --git a/Tests/CudaOnly/EnableStandard/main.cu b/Tests/CudaOnly/EnableStandard/main.cu new file mode 100644 index 0000000..740c832 --- /dev/null +++ b/Tests/CudaOnly/EnableStandard/main.cu @@ -0,0 +1,23 @@ + +#include <iostream> + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +int static_cuda11_func(int); +IMPORT int shared_cuda11_func(int); + +void test_functions() +{ + static_cuda11_func(int(42)); + shared_cuda11_func(int(42)); +} + +int main(int argc, char** argv) +{ + test_functions(); + return 0; +} diff --git a/Tests/CudaOnly/EnableStandard/shared.cu b/Tests/CudaOnly/EnableStandard/shared.cu new file mode 100644 index 0000000..004cb83 --- /dev/null +++ b/Tests/CudaOnly/EnableStandard/shared.cu @@ -0,0 +1,15 @@ + +#include <type_traits> + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +using tt = std::true_type; +using ft = std::false_type; +EXPORT int __host__ shared_cuda11_func(int x) +{ + return x * x + std::integral_constant<int, 17>::value; +} diff --git a/Tests/CudaOnly/EnableStandard/static.cu b/Tests/CudaOnly/EnableStandard/static.cu new file mode 100644 index 0000000..73e43a8 --- /dev/null +++ b/Tests/CudaOnly/EnableStandard/static.cu @@ -0,0 +1,9 @@ + +#include <type_traits> + +using tt = std::true_type; +using ft = std::false_type; +int __host__ static_cuda11_func(int x) +{ + return x * x + std::integral_constant<int, 17>::value; +} diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt new file mode 100644 index 0000000..65d5243 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -0,0 +1,81 @@ +cmake_minimum_required(VERSION 3.8) +project (CudaOnlyExportPTX CUDA) + +#Goal for this example: +# How to generate PTX files instead of OBJECT files +# How to reference PTX files for custom commands +# How to install PTX files + +add_library(CudaPTX OBJECT kernelA.cu kernelB.cu) +set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) + +#Test ObjectFiles with file(GENERATE) +file(GENERATE + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$<LOWER_CASE:$<CONFIG>/>path_to_objs.h + CONTENT [[ + +#include <vector> +#include <string> + +#ifndef path_to_objs +#define path_to_objs + +static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>"; + +#endif + +]] +) +#We are going to need a wrapper around bin2c for multiple reasons +# 1. bin2c only converts a single file at a time +# 2. bin2c has only standard out support, so we have to manually +# redirect to a cmake buffer +# 3. We want to pack everything into a single output file, so we +# need to also pass the --name option +set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) + +get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY) +find_program(bin_to_c + NAMES bin2c + PATHS ${cuda_compiler_bin} + ) +if(NOT bin_to_c) + message(FATAL_ERROR + "bin2c not found:\n" + " CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n" + " cuda_compiler_bin='${cuda_compiler_bin}'\n" + ) +endif() + +add_custom_command( + OUTPUT "${output_file}" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>" + "-DOUTPUT=${output_file}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake + VERBATIM + DEPENDS $<TARGET_OBJECTS:CudaPTX> + COMMENT "Converting Object files to a C header" + ) + +add_executable(CudaOnlyExportPTX main.cu ${output_file}) +add_dependencies(CudaOnlyExportPTX CudaPTX) +target_include_directories(CudaOnlyExportPTX PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ) +target_compile_definitions(CudaOnlyExportPTX PRIVATE + "CONFIG_TYPE=gen_$<LOWER_CASE:$<CONFIG>>") + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyExportPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() + +#Verify that we can install object targets properly +install(TARGETS CudaPTX CudaOnlyExportPTX + EXPORT cudaPTX + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + OBJECTS DESTINATION objs + ) +install(EXPORT cudaPTX DESTINATION lib/cudaPTX) diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake new file mode 100644 index 0000000..0baf934 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake @@ -0,0 +1,19 @@ + +set(file_contents) +foreach(obj ${OBJECTS}) + get_filename_component(obj_ext ${obj} EXT) + get_filename_component(obj_name ${obj} NAME_WE) + get_filename_component(obj_dir ${obj} DIRECTORY) + + if(obj_ext MATCHES ".ptx") + set(args --name ${obj_name} ${obj}) + execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args} + WORKING_DIRECTORY ${obj_dir} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_var + ) + set(file_contents "${file_contents} \n${output}") + endif() +endforeach() +file(WRITE "${OUTPUT}" "${file_contents}") diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu new file mode 100644 index 0000000..11872e4 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/kernelB.cu @@ -0,0 +1,8 @@ + + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/ExportPTX/main.cu b/Tests/CudaOnly/ExportPTX/main.cu new file mode 100644 index 0000000..132377c --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/main.cu @@ -0,0 +1,28 @@ + +#include <iostream> + +/* + Define GENERATED_HEADER macro to allow c++ files to include headers + generated based on different configuration types. +*/ + +/* clang-format off */ +#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x) +/* clang-format on */ +#define GENERATED_HEADER0(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +#include GENERATED_HEADER(path_to_objs.h) + +#include "embedded_objs.h" + +int main(int argc, char** argv) +{ + (void)argc; + (void)argv; + + unsigned char* ka = kernelA; + unsigned char* kb = kernelB; + + return (ka != NULL && kb != NULL) ? 0 : 1; +} diff --git a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt new file mode 100644 index 0000000..5b96906 --- /dev/null +++ b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt @@ -0,0 +1,23 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaOnlGPUDebugFlag CUDA) + +#Goal for this example: +#verify that -G enables gpu debug flags +string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_30,code=compute_30") +string(APPEND CMAKE_CUDA_FLAGS " -G") +set(CMAKE_CUDA_STANDARD 11) + +add_executable(CudaOnlyGPUDebugFlag main.cu) + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0.0) + #CUDA's __CUDACC_DEBUG__ define was added in 9.0 + #so if we are below 9.0.0 we will manually add the define so that the test + #passes + target_compile_definitions(CudaOnlyGPUDebugFlag PRIVATE "__CUDACC_DEBUG__") +endif() + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyGPUDebugFlag PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/GPUDebugFlag/main.cu b/Tests/CudaOnly/GPUDebugFlag/main.cu new file mode 100644 index 0000000..1f3fc12 --- /dev/null +++ b/Tests/CudaOnly/GPUDebugFlag/main.cu @@ -0,0 +1,71 @@ +#include <cuda.h> +#include <cuda_runtime.h> +#include <iostream> + +static __global__ void debug_kernel(bool* has_debug) +{ +// Verify using the return code if we have GPU debug flag enabled +#if defined(__CUDACC__) && defined(__CUDACC_DEBUG__) + *has_debug = true; +#else + *has_debug = false; +#endif +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + bool* has_debug; + cudaError_t err = cudaMallocManaged(&has_debug, sizeof(bool)); + if (err != cudaSuccess) { + std::cerr << "cudaMallocManaged failed:\n" + << " " << cudaGetErrorString(err) << std::endl; + return 1; + } + + debug_kernel<<<1, 1>>>(has_debug); + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "debug_kernel: kernel launch shouldn't have failed\n" + << "reason:\t" << cudaGetErrorString(err) << std::endl; + return 1; + } + if (*has_debug == false) { + std::cerr << "debug_kernel: kernel not compiled with device debug" + << std::endl; + return 1; + } + return 0; +} diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt new file mode 100644 index 0000000..62be1e6 --- /dev/null +++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.8) +project(CudaOnlyLinkSystemDeviceLibraries CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35") +set(CMAKE_CUDA_STANDARD 11) + +add_executable(CudaOnlyLinkSystemDeviceLibraries main.cu) +set_target_properties( CudaOnlyLinkSystemDeviceLibraries + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +target_link_libraries( CudaOnlyLinkSystemDeviceLibraries PRIVATE cublas_device) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyLinkSystemDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu new file mode 100644 index 0000000..7eecec1 --- /dev/null +++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu @@ -0,0 +1,77 @@ + +#include <cublas_v2.h> +#include <cuda_runtime.h> +#include <iostream> + +__global__ void deviceCublasSgemm(int n, float alpha, float beta, + const float* d_A, const float* d_B, + float* d_C) +{ + cublasHandle_t cnpHandle; + cublasStatus_t status = cublasCreate(&cnpHandle); + + if (status != CUBLAS_STATUS_SUCCESS) { + return; + } + + // Call function defined in the cublas_device system static library. + // This way we can verify that we properly pass system libraries to the + // device link line + status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha, + d_A, n, d_B, n, &beta, d_C, n); + + cublasDestroy(cnpHandle); +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + + if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.5" + << std::endl; + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + // initial values that will make sure that the cublasSgemm won't actually + // do any work + int n = 0; + float alpha = 1; + float beta = 1; + float* d_A = nullptr; + float* d_B = nullptr; + float* d_C = nullptr; + deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C); + + return 0; +} diff --git a/Tests/CudaOnly/PDB/CMakeLists.txt b/Tests/CudaOnly/PDB/CMakeLists.txt new file mode 100644 index 0000000..34e1e5c --- /dev/null +++ b/Tests/CudaOnly/PDB/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.11) +project (CudaOnlyPDB CUDA) + +add_executable(CudaOnlyPDB main.cu) +set_target_properties(CudaOnlyPDB PROPERTIES + PDB_NAME LinkPDBName + PDB_OUTPUT_DIRECTORY LinkPDBDir + COMPILE_PDB_NAME CompPDBName + COMPILE_PDB_OUTPUT_DIRECTORY CompPDBDir + ) + +set(pdbs + ${CMAKE_CURRENT_BINARY_DIR}/CompPDBDir/${CMAKE_CFG_INTDIR}/CompPDBName.pdb + ${CMAKE_CURRENT_BINARY_DIR}/LinkPDBDir/${CMAKE_CFG_INTDIR}/LinkPDBName.pdb + ) +add_custom_command(TARGET CudaOnlyPDB POST_BUILD + COMMAND ${CMAKE_COMMAND} -Dconfig=$<CONFIG> "-Dpdbs=${pdbs}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/check_pdbs.cmake + ) diff --git a/Tests/CudaOnly/PDB/check_pdbs.cmake b/Tests/CudaOnly/PDB/check_pdbs.cmake new file mode 100644 index 0000000..5e01ca7 --- /dev/null +++ b/Tests/CudaOnly/PDB/check_pdbs.cmake @@ -0,0 +1,10 @@ +if(NOT "${config}" MATCHES "[Dd][Ee][Bb]") + return() +endif() +foreach(pdb ${pdbs}) + if(EXISTS "${pdb}") + message(STATUS "PDB Exists: ${pdb}") + else() + message(SEND_ERROR "PDB MISSING:\n ${pdb}") + endif() +endforeach() diff --git a/Tests/CudaOnly/PDB/main.cu b/Tests/CudaOnly/PDB/main.cu new file mode 100644 index 0000000..f8b643a --- /dev/null +++ b/Tests/CudaOnly/PDB/main.cu @@ -0,0 +1,4 @@ +int main() +{ + return 0; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt new file mode 100644 index 0000000..0c453a9 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyResolveDeviceSymbols CUDA) + +# Find nm and dumpbin +if(CMAKE_NM) + set(dump_command ${CMAKE_NM}) + set(dump_args -g) +else() + include(GetPrerequisites) + message(STATUS "calling list_prerequisites to find dumpbin") + list_prerequisites("${CMAKE_COMMAND}" 0 0 0) + if(gp_dumpbin) + set(dump_command ${gp_dumpbin}) + set(dump_args /ARCHIVEMEMBERS) + endif() +endif() + +#Goal for this example: +# Build a static library that defines multiple methods and kernels that +# use each other. +# Resolve the device symbols into that static library +# Verify that we can't use those device symbols from anything that links +# to the static library +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu) +set_target_properties(CUDAResolveDeviceLib + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +if(dump_command) +add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD + COMMAND ${CMAKE_COMMAND} + -DDUMP_COMMAND=${dump_command} + -DDUMP_ARGS=${dump_args} + -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib> + -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake + ) +endif() + +add_executable(CudaOnlyResolveDeviceSymbols main.cu) +set_target_properties(CudaOnlyResolveDeviceSymbols + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON) + +target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyResolveDeviceSymbols PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu new file mode 100644 index 0000000..1ce63bf --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu @@ -0,0 +1,10 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu new file mode 100644 index 0000000..278fd6c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu @@ -0,0 +1,25 @@ + +#include "file2.h" + +result_type __device__ file1_func(int x); + +result_type_dynamic __device__ file2_func(int x) +{ + const result_type r = file1_func(x); + const result_type_dynamic rd{ r.input, r.sum, true }; + return rd; +} + +static __global__ void file2_kernel(result_type_dynamic& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file2_func(x); +} + +int file2_launch_kernel(int x) +{ + result_type_dynamic r; + file2_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h new file mode 100644 index 0000000..d2dbaa4 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h @@ -0,0 +1,10 @@ + +#pragma once +#include "file1.h" + +struct result_type_dynamic +{ + int input; + int sum; + bool from_static; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu new file mode 100644 index 0000000..d464f96 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu @@ -0,0 +1,76 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +int file2_launch_kernel(int x); + +result_type_dynamic __device__ file2_func(int x); +static __global__ void main_kernel(result_type_dynamic& r, int x) +{ + // call function that was not device linked to us, this will cause + // a runtime failure of "invalid device function" + r = file2_func(x); +} + +int main_launch_kernel(int x) +{ + result_type_dynamic r; + main_kernel<<<1, 1>>>(r, x); + return r.sum; +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + main_launch_kernel(1); + cudaError_t err = cudaGetLastError(); + if (err == cudaSuccess) { + // This kernel launch should fail as the file2_func was device linked + // into the static library and is not usable by the executable + std::cerr << "main_launch_kernel: kernel launch should have failed" + << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake new file mode 100644 index 0000000..94d388b --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake @@ -0,0 +1,14 @@ +execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH} + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE OUTPUT + ERROR_VARIABLE ERROR +) + +if(NOT "${RESULT}" STREQUAL "0") + message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]") +endif() + +if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)") + message(FATAL_ERROR + "No cuda device objects found, device linking did not occur") +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt new file mode 100644 index 0000000..c934c51 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -0,0 +1,61 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaOnlySeparateCompilation CUDA) + +#Goal for this example: +#Build a static library that defines multiple methods and kernels that +#use each other. +#After that confirm that we can call those methods from dynamic libraries +#and executables. +#We complicate the matter by also testing that multiple static libraries +#all containing cuda separable compilation code links properly +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=\\\"compute_30,sm_30,sm_35\\\"") +string(APPEND CMAKE_CUDA_FLAGS " --generate-code=arch=compute_50,code=[compute_50,sm_50,sm_52]") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) +add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu) +get_property(sep_comp TARGET CUDASeparateLibA PROPERTY CUDA_SEPARABLE_COMPILATION) +if(NOT sep_comp) + message(FATAL_ERROR "CUDA_SEPARABLE_COMPILATION not initialized") +endif() +unset(CMAKE_CUDA_SEPARABLE_COMPILATION) + +if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC") + # Test adding a flag that is not in our CUDA flag table for VS. + if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 8) + string(APPEND CMAKE_CUDA_FLAGS " --ftemplate-depth 50") + endif() + # Test adding a flag that nvcc should pass to the host compiler. + target_compile_options(CUDASeparateLibA PRIVATE -Xcompiler=-bigobj) +endif() + +#Having file4/file5 in a shared library causes serious problems +#with the nvcc linker and it will generate bad entries that will +#cause a segv when trying to run the executable +# +add_library(CUDASeparateLibB STATIC file4.cu file5.cu) +target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) + +add_executable(CudaOnlySeparateCompilation main.cu) +target_link_libraries(CudaOnlySeparateCompilation + PRIVATE CUDASeparateLibB) + +set_target_properties(CUDASeparateLibA + CUDASeparateLibB + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) + +if (CMAKE_GENERATOR MATCHES "^Visual Studio") + #Visual Studio CUDA integration will not perform device linking + #on a target that itself does not have GenerateRelocatableDeviceCode + #enabled. + set_target_properties(CudaOnlySeparateCompilation + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +endif() + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/file1.cu b/Tests/CudaOnly/SeparateCompilation/file1.cu new file mode 100644 index 0000000..1ce63bf --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file1.cu @@ -0,0 +1,10 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/CudaOnly/SeparateCompilation/file1.h b/Tests/CudaOnly/SeparateCompilation/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/CudaOnly/SeparateCompilation/file2.cu b/Tests/CudaOnly/SeparateCompilation/file2.cu new file mode 100644 index 0000000..74f3558 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file2.cu @@ -0,0 +1,16 @@ + +#include "file2.h" + +result_type __device__ file1_func(int x); + +result_type_dynamic __device__ file2_func(int x) +{ + if (x != 42) { + const result_type r = file1_func(x); + const result_type_dynamic rd{ r.input, r.sum, true }; + return rd; + } else { + const result_type_dynamic rd{ x, x * x * x, false }; + return rd; + } +} diff --git a/Tests/CudaOnly/SeparateCompilation/file2.h b/Tests/CudaOnly/SeparateCompilation/file2.h new file mode 100644 index 0000000..d2dbaa4 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file2.h @@ -0,0 +1,10 @@ + +#pragma once +#include "file1.h" + +struct result_type_dynamic +{ + int input; + int sum; + bool from_static; +}; diff --git a/Tests/CudaOnly/SeparateCompilation/file3.cu b/Tests/CudaOnly/SeparateCompilation/file3.cu new file mode 100644 index 0000000..155b513 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file3.cu @@ -0,0 +1,22 @@ + + +#include "file1.h" +#include "file2.h" + +result_type __device__ file1_func(int x); +result_type_dynamic __device__ file2_func(int x); + +static __global__ void file3_kernel(result_type& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file1_func(x); + result_type_dynamic rd = file2_func(x); +} + +result_type file3_launch_kernel(int x) +{ + result_type r; + file3_kernel<<<1, 1>>>(r, x); + return r; +} diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu new file mode 100644 index 0000000..2e3e01e --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file4.cu @@ -0,0 +1,23 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +result_type __device__ file1_func(int x); +result_type_dynamic __device__ file2_func(int x); + +static __global__ void file4_kernel(result_type& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file1_func(x); + result_type_dynamic rd = file2_func(x); +} + +int file4_launch_kernel(int x) +{ + result_type r; + file4_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu new file mode 100644 index 0000000..fee8e9e --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/file5.cu @@ -0,0 +1,23 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +result_type __device__ file1_func(int x); +result_type_dynamic __device__ file2_func(int x); + +static __global__ void file5_kernel(result_type& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file1_func(x); + result_type_dynamic rd = file2_func(x); +} + +int file5_launch_kernel(int x) +{ + result_type r; + file5_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu new file mode 100644 index 0000000..40dbe5d --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/main.cu @@ -0,0 +1,68 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +int file4_launch_kernel(int x); +int file5_launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + file4_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file4_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + file5_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file5_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt new file mode 100644 index 0000000..926d9ed --- /dev/null +++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt @@ -0,0 +1,50 @@ + +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyWithDefs CUDA) + +#verify that we can pass explicit cuda arch flags +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 9) + set(debug_compile_flags --generate-code arch=compute_32,code=sm_32) +else() + set(debug_compile_flags --generate-code arch=compute_20,code=sm_20) +endif() +if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC") + list(APPEND debug_compile_flags -Xcompiler=-WX) +else() + list(APPEND debug_compile_flags -Xcompiler=-Werror) +endif() +set(release_compile_defs DEFREL) + +#Goal for this example: +#build a executable that needs to be passed a complex define through add_defintions +#this verifies we can pass things such as '_','(' to nvcc +add_definitions("-DPACKED_DEFINE=__attribute__((packed))") + +add_executable(CudaOnlyWithDefs main.notcu) +set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA) + +target_compile_options(CudaOnlyWithDefs + PRIVATE + -DFLAG_COMPILE_LANG_$<COMPILE_LANGUAGE> + -DFLAG_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA> + -Xcompiler=-DHOST_DEFINE + $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>> + ) + +target_compile_definitions(CudaOnlyWithDefs + PRIVATE + $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>> + -DDEF_COMPILE_LANG_$<COMPILE_LANGUAGE> + -DDEF_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA> + ) + +target_include_directories(CudaOnlyWithDefs + PRIVATE + $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CURRENT_SOURCE_DIR}/inc_cuda> +) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyWithDefs PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h b/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h new file mode 100644 index 0000000..e228b58 --- /dev/null +++ b/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h @@ -0,0 +1 @@ +#define INC_CUDA diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu new file mode 100644 index 0000000..98f73ce --- /dev/null +++ b/Tests/CudaOnly/WithDefs/main.notcu @@ -0,0 +1,86 @@ +#include <cuda.h> +#include <cuda_runtime.h> +#include <iostream> + +#include <inc_cuda.h> +#ifndef INC_CUDA +# error "INC_CUDA not defined!" +#endif + +#ifndef HOST_DEFINE +# error "HOST_DEFINE not defined!" +#endif + +#ifndef PACKED_DEFINE +# error "PACKED_DEFINE not defined!" +#endif + +#ifndef FLAG_COMPILE_LANG_CUDA +# error "FLAG_COMPILE_LANG_CUDA not defined!" +#endif + +#ifndef FLAG_LANG_IS_CUDA +# error "FLAG_LANG_IS_CUDA not defined!" +#endif + +#if !FLAG_LANG_IS_CUDA +# error "Expected FLAG_LANG_IS_CUDA" +#endif + +#ifndef DEF_COMPILE_LANG_CUDA +# error "DEF_COMPILE_LANG_CUDA not defined!" +#endif + +#ifndef DEF_LANG_IS_CUDA +# error "DEF_LANG_IS_CUDA not defined!" +#endif + +#if !DEF_LANG_IS_CUDA +# error "Expected DEF_LANG_IS_CUDA" +#endif + +static __global__ void DetermineIfValidCudaDevice() +{ +} + +#ifdef _MSC_VER +# pragma pack(push, 1) +# undef PACKED_DEFINE +# define PACKED_DEFINE +#endif +struct PACKED_DEFINE result_type +{ + bool valid; + int value; +#if defined(NDEBUG) && !defined(DEFREL) +# error missing DEFREL flag +#endif +}; +#ifdef _MSC_VER +# pragma pack(pop) +#endif + +result_type can_launch_kernel() +{ + result_type r; + DetermineIfValidCudaDevice<<<1, 1>>>(); + r.valid = (cudaSuccess == cudaGetLastError()); + if (r.valid) { + r.value = 1; + } else { + r.value = -1; + } + return r; +} + +int main(int argc, char** argv) +{ + cudaError_t err; + int nDevices = 0; + err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << cudaGetErrorString(err) << std::endl; + return 1; + } + return 0; +} |