2024-05-31 16:34:54 -04:00
cmake_minimum_required ( VERSION 3.21 ) # for PROJECT_IS_TOP_LEVEL
2023-05-25 15:22:45 -04:00
set ( CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON )
2023-07-12 10:49:24 -04:00
set ( CMAKE_EXPORT_COMPILE_COMMANDS ON )
2023-04-25 19:16:45 -04:00
2024-05-15 15:27:50 -04:00
if ( APPLE )
option ( BUILD_UNIVERSAL "Build a Universal binary on macOS" ON )
else ( )
option ( LLMODEL_KOMPUTE "llmodel: use Kompute" ON )
option ( LLMODEL_VULKAN "llmodel: use Vulkan" OFF )
option ( LLMODEL_CUDA "llmodel: use CUDA" ON )
option ( LLMODEL_ROCM "llmodel: use ROCm" OFF )
endif ( )
if ( APPLE )
if ( BUILD_UNIVERSAL )
2023-04-25 19:16:45 -04:00
# Build a Universal binary on macOS
# This requires that the found Qt library is compiled as Universal binaries.
set ( CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE )
else ( )
# Build for the host architecture on macOS
2024-05-15 15:27:50 -04:00
if ( NOT CMAKE_OSX_ARCHITECTURES )
2023-06-13 08:05:34 -04:00
set ( CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE )
endif ( )
2023-04-25 19:16:45 -04:00
endif ( )
endif ( )
# Include the binary directory for the generated header file
include_directories ( "${CMAKE_CURRENT_BINARY_DIR}" )
2023-05-10 11:46:40 -04:00
set ( LLMODEL_VERSION_MAJOR 0 )
2023-10-05 18:18:07 -04:00
set ( LLMODEL_VERSION_MINOR 5 )
2023-05-31 17:04:01 -04:00
set ( LLMODEL_VERSION_PATCH 0 )
2023-05-10 11:46:40 -04:00
set ( LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}" )
project ( llmodel VERSION ${ LLMODEL_VERSION } LANGUAGES CXX C )
2023-04-25 19:16:45 -04:00
2024-08-08 11:49:01 -04:00
set ( CMAKE_CXX_STANDARD 20 )
2023-04-25 19:16:45 -04:00
set ( CMAKE_CXX_STANDARD_REQUIRED ON )
2023-05-31 17:04:01 -04:00
set ( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${ CMAKE_RUNTIME_OUTPUT_DIRECTORY } )
set ( BUILD_SHARED_LIBS ON )
2023-04-25 19:16:45 -04:00
2023-05-31 17:04:01 -04:00
# Check for IPO support
include ( CheckIPOSupported )
check_ipo_supported ( RESULT IPO_SUPPORTED OUTPUT IPO_ERROR )
if ( NOT IPO_SUPPORTED )
message ( WARNING "Interprocedural optimization is not supported by your toolchain! This will lead to bigger file sizes and worse performance: ${IPO_ERROR}" )
else ( )
message ( STATUS "Interprocedural optimization support detected" )
endif ( )
2024-05-15 15:27:50 -04:00
set ( DIRECTORY llama.cpp-mainline )
2023-05-31 17:04:01 -04:00
include ( llama.cpp.cmake )
2024-05-15 15:27:50 -04:00
set ( BUILD_VARIANTS )
if ( APPLE )
list ( APPEND BUILD_VARIANTS metal )
endif ( )
if ( LLMODEL_KOMPUTE )
list ( APPEND BUILD_VARIANTS kompute kompute-avxonly )
else ( )
list ( PREPEND BUILD_VARIANTS cpu cpu-avxonly )
endif ( )
if ( LLMODEL_VULKAN )
list ( APPEND BUILD_VARIANTS vulkan vulkan-avxonly )
endif ( )
if ( LLMODEL_CUDA )
2024-08-13 14:47:48 -04:00
cmake_minimum_required ( VERSION 3.18 ) # for CMAKE_CUDA_ARCHITECTURES
# Defaults must be set before enable_language(CUDA).
# Keep this in sync with the arch list in ggml/src/CMakeLists.txt.
if ( NOT DEFINED CMAKE_CUDA_ARCHITECTURES )
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if ( GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16 )
set ( CMAKE_CUDA_ARCHITECTURES "60;61;70;75" ) # needed for f16 CUDA intrinsics
else ( )
set ( CMAKE_CUDA_ARCHITECTURES "52;61;70;75" ) # lowest CUDA 12 standard + lowest for integer intrinsics
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif ( )
endif ( )
message ( STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}" )
2024-05-15 15:27:50 -04:00
include ( CheckLanguage )
check_language ( CUDA )
if ( NOT CMAKE_CUDA_COMPILER )
message ( WARNING "CUDA Toolkit not found. To build without CUDA, use -DLLMODEL_CUDA=OFF." )
endif ( )
enable_language ( CUDA )
list ( APPEND BUILD_VARIANTS cuda cuda-avxonly )
endif ( )
if ( LLMODEL_ROCM )
enable_language ( HIP )
list ( APPEND BUILD_VARIANTS rocm rocm-avxonly )
2023-06-09 16:48:46 -04:00
endif ( )
2023-04-25 19:16:45 -04:00
set ( CMAKE_VERBOSE_MAKEFILE ON )
2023-05-31 17:04:01 -04:00
# Go through each build variant
foreach ( BUILD_VARIANT IN LISTS BUILD_VARIANTS )
# Determine flags
2024-05-15 15:27:50 -04:00
if ( BUILD_VARIANT MATCHES avxonly )
set ( GPT4ALL_ALLOW_NON_AVX OFF )
2023-05-31 17:04:01 -04:00
else ( )
2024-05-15 15:27:50 -04:00
set ( GPT4ALL_ALLOW_NON_AVX ON )
2023-05-31 17:04:01 -04:00
endif ( )
2024-07-19 14:52:58 -04:00
set ( GGML_AVX2 ${ GPT4ALL_ALLOW_NON_AVX } )
set ( GGML_F16C ${ GPT4ALL_ALLOW_NON_AVX } )
set ( GGML_FMA ${ GPT4ALL_ALLOW_NON_AVX } )
2023-05-31 17:04:01 -04:00
2024-07-19 14:52:58 -04:00
set ( GGML_METAL OFF )
set ( GGML_KOMPUTE OFF )
set ( GGML_VULKAN OFF )
set ( GGML_CUDA OFF )
set ( GGML_ROCM OFF )
2024-05-15 15:27:50 -04:00
if ( BUILD_VARIANT MATCHES metal )
2024-07-19 14:52:58 -04:00
set ( GGML_METAL ON )
2024-05-15 15:27:50 -04:00
elseif ( BUILD_VARIANT MATCHES kompute )
2024-07-19 14:52:58 -04:00
set ( GGML_KOMPUTE ON )
2024-05-15 15:27:50 -04:00
elseif ( BUILD_VARIANT MATCHES vulkan )
2024-07-19 14:52:58 -04:00
set ( GGML_VULKAN ON )
2024-05-15 15:27:50 -04:00
elseif ( BUILD_VARIANT MATCHES cuda )
2024-07-19 14:52:58 -04:00
set ( GGML_CUDA ON )
2024-05-15 15:27:50 -04:00
elseif ( BUILD_VARIANT MATCHES rocm )
2024-07-19 14:52:58 -04:00
set ( GGML_HIPBLAS ON )
2023-06-09 16:48:46 -04:00
endif ( )
2023-05-31 17:04:01 -04:00
# Include GGML
2024-05-15 15:27:50 -04:00
include_ggml ( -mainline- ${ BUILD_VARIANT } )
2023-05-31 17:04:01 -04:00
# Function for preparing individual implementations
function ( prepare_target TARGET_NAME BASE_LIB )
set ( TARGET_NAME ${ TARGET_NAME } - ${ BUILD_VARIANT } )
message ( STATUS "Configuring model implementation target ${TARGET_NAME}" )
# Link to ggml/llama
target_link_libraries ( ${ TARGET_NAME }
2023-06-05 14:30:56 -04:00
P R I V A T E $ { B A S E _ L I B } - $ { B U I L D _ V A R I A N T } )
2023-05-31 17:04:01 -04:00
# Let it know about its build variant
target_compile_definitions ( ${ TARGET_NAME }
P R I V A T E G G M L _ B U I L D _ V A R I A N T = " $ { B U I L D _ V A R I A N T } " )
# Enable IPO if possible
2023-06-07 12:58:21 -04:00
# FIXME: Doesn't work with msvc reliably. See https://github.com/nomic-ai/gpt4all/issues/841
# set_property(TARGET ${TARGET_NAME}
# PROPERTY INTERPROCEDURAL_OPTIMIZATION ${IPO_SUPPORTED})
2023-05-31 17:04:01 -04:00
endfunction ( )
# Add each individual implementations
add_library ( llamamodel-mainline- ${ BUILD_VARIANT } SHARED
2023-06-02 10:47:12 -04:00
l l a m a m o d e l . c p p l l m o d e l _ s h a r e d . c p p )
2023-05-31 17:04:01 -04:00
target_compile_definitions ( llamamodel-mainline- ${ BUILD_VARIANT } PRIVATE
L L A M A _ V E R S I O N S = > = 3 L L A M A _ D A T E = 9 9 9 9 9 9 )
prepare_target ( llamamodel-mainline llama-mainline )
2024-05-31 16:34:54 -04:00
if ( NOT PROJECT_IS_TOP_LEVEL AND BUILD_VARIANT STREQUAL cuda )
2024-05-15 15:27:50 -04:00
set ( CUDAToolkit_BIN_DIR ${ CUDAToolkit_BIN_DIR } PARENT_SCOPE )
endif ( )
2023-05-31 17:04:01 -04:00
endforeach ( )
2023-04-25 19:16:45 -04:00
add_library ( llmodel
2023-06-04 08:59:24 -04:00
l l m o d e l . h l l m o d e l . c p p l l m o d e l _ s h a r e d . c p p
2023-05-31 17:04:01 -04:00
l l m o d e l _ c . h l l m o d e l _ c . c p p
2024-05-31 13:12:28 -04:00
d l h a n d l e . c p p
2023-04-25 19:16:45 -04:00
)
2023-05-31 17:04:01 -04:00
target_compile_definitions ( llmodel PRIVATE LIB_FILE_EXT= "${CMAKE_SHARED_LIBRARY_SUFFIX}" )
2023-04-25 19:16:45 -04:00
2023-05-10 11:46:40 -04:00
set_target_properties ( llmodel PROPERTIES
V E R S I O N $ { P R O J E C T _ V E R S I O N }
S O V E R S I O N $ { P R O J E C T _ V E R S I O N _ M A J O R } )
2023-04-25 19:16:45 -04:00
set ( COMPONENT_NAME_MAIN ${ PROJECT_NAME } )
set ( CMAKE_INSTALL_PREFIX ${ CMAKE_BINARY_DIR } /install )