diff --git a/Help/release/dev/FindCUDA-cubin-fatbin.rst b/Help/release/dev/FindCUDA-cubin-fatbin.rst new file mode 100644 index 000000000..d44da8e87 --- /dev/null +++ b/Help/release/dev/FindCUDA-cubin-fatbin.rst @@ -0,0 +1,5 @@ +FindCUDA-cubin-fatbin +--------------------- + +* The :module:`FindCUDA` module learned to support ``fatbin`` and ``cubin`` + modules. diff --git a/Modules/FindCUDA.cmake b/Modules/FindCUDA.cmake index c02809d79..1f6962083 100644 --- a/Modules/FindCUDA.cmake +++ b/Modules/FindCUDA.cmake @@ -31,8 +31,8 @@ # The following variables affect the behavior of the macros in the # script (in alphebetical order). Note that any of these flags can be # changed multiple times in the same directory before calling -# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX -# or CUDA_WRAP_SRCS:: +# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX, +# CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS:: # # CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) # -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. @@ -152,6 +152,12 @@ # CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) # -- Returns a list of PTX files generated from the input source files. # +# CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of FATBIN files generated from the input source files. +# +# CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of CUBIN files generated from the input source files. +# # CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var # cuda_target # object_files ) @@ -1016,7 +1022,7 @@ endfunction() # a .cpp or .ptx file. # INPUT: # cuda_target - Target name -# format - PTX or OBJ +# format - PTX, CUBIN, FATBIN or OBJ # FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. # OPTIONS - Extra options to NVCC # OUTPUT: @@ -1202,16 +1208,22 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(_cuda_source_format ${format}) endif() - if( ${_cuda_source_format} MATCHES "PTX" ) - set( compile_to_ptx ON ) - elseif( ${_cuda_source_format} MATCHES "OBJ") - set( compile_to_ptx OFF ) + if( ${_cuda_source_format} MATCHES "OBJ") + set( cuda_compile_to_external_module OFF ) else() - message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'. Use OBJ or PTX.") + set( cuda_compile_to_external_module ON ) + if( ${_cuda_source_format} MATCHES "PTX" ) + set( cuda_compile_to_external_module_type "ptx" ) + elseif( ${_cuda_source_format} MATCHES "CUBIN") + set( cuda_compile_to_external_module_type "cubin" ) + elseif( ${_cuda_source_format} MATCHES "FATBIN") + set( cuda_compile_to_external_module_type "fatbin" ) + else() + message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'. Use OBJ, PTX, CUBIN or FATBIN.") + endif() endif() - - if(compile_to_ptx) + if(cuda_compile_to_external_module) # Don't use any of the host compilation flags for PTX targets. set(CUDA_HOST_FLAGS) set(CUDA_NVCC_FLAGS_CONFIG) @@ -1226,7 +1238,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) if(CUDA_GENERATED_OUTPUT_DIR) set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") else() - if ( compile_to_ptx ) + if ( cuda_compile_to_external_module ) set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") else() set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}") @@ -1236,10 +1248,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Add a custom target to generate a c or ptx file. ###################### get_filename_component( basename ${file} NAME ) - if( compile_to_ptx ) + if( cuda_compile_to_external_module ) set(generated_file_path "${cuda_compile_output_dir}") - set(generated_file_basename "${cuda_target}_generated_${basename}.ptx") - set(format_flag "-ptx") + set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}") + set(format_flag "-${cuda_compile_to_external_module_type}") file(MAKE_DIRECTORY "${cuda_compile_output_dir}") else() set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") @@ -1262,7 +1274,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake") # Setup properties for obj files: - if( NOT compile_to_ptx ) + if( NOT cuda_compile_to_external_module ) set_source_files_properties("${generated_file}" PROPERTIES EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. @@ -1277,7 +1289,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") endif() - if( NOT compile_to_ptx AND CUDA_SEPARABLE_COMPILATION) + if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION) list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}") endif() @@ -1294,7 +1306,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Build the NVCC made dependency file ################################### set(build_cubin OFF) if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - if ( NOT compile_to_ptx ) + if ( NOT cuda_compile_to_external_module ) set ( build_cubin ON ) endif() endif() @@ -1321,8 +1333,8 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Create up the comment string file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - if(compile_to_ptx) - set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}") + if(cuda_compile_to_external_module) + set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}") else() set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") endif() @@ -1552,21 +1564,29 @@ endmacro() ############################################################################### ############################################################################### -# CUDA COMPILE +# (Internal) helper for manually added cuda source files with specific targets ############################################################################### ############################################################################### -macro(CUDA_COMPILE generated_files) +macro(cuda_compile_base cuda_target format generated_files) # Separate the sources from the options CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options} + CUDA_WRAP_SRCS( ${cuda_target} ${format} _generated_files ${_sources} ${_cmake_options} OPTIONS ${_options} ) set( ${generated_files} ${_generated_files}) endmacro() +############################################################################### +############################################################################### +# CUDA COMPILE +############################################################################### +############################################################################### +macro(CUDA_COMPILE generated_files) + cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN}) +endmacro() ############################################################################### ############################################################################### @@ -1574,17 +1594,28 @@ endmacro() ############################################################################### ############################################################################### macro(CUDA_COMPILE_PTX generated_files) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options} - OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) - + cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN}) endmacro() +############################################################################### +############################################################################### +# CUDA COMPILE FATBIN +############################################################################### +############################################################################### +macro(CUDA_COMPILE_FATBIN generated_files) + cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN}) +endmacro() + +############################################################################### +############################################################################### +# CUDA COMPILE CUBIN +############################################################################### +############################################################################### +macro(CUDA_COMPILE_CUBIN generated_files) + cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN}) +endmacro() + + ############################################################################### ############################################################################### # CUDA ADD CUFFT TO TARGET