Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use.

This commit is contained in:
Adam Treat 2023-04-25 19:16:45 -04:00
parent d16306a205
commit 3c9139b5d2
15 changed files with 188 additions and 20 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "llama.cpp"]
path = llama.cpp
path = llmodel/llama.cpp
url = https://github.com/manyoso/llama.cpp.git

View File

@ -32,6 +32,8 @@ set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
find_package(Qt6 6.2 COMPONENTS Core Quick QuickDialogs2 Svg REQUIRED)
# Get the Qt6Core target properties
@ -48,28 +50,13 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
set(BUILD_SHARED_LIBS ON FORCE)
set(CMAKE_VERBOSE_MAKEFILE ON)
option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
option(GPT4ALL_LOCALHOST OFF "Build for local install repo")
if (GPT4ALL_AVX_ONLY)
set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
endif()
add_subdirectory(llama.cpp)
add_subdirectory(llmodel)
qt_add_executable(chat
main.cpp
download.h download.cpp
network.h network.cpp
gptj.h gptj.cpp
llamamodel.h llamamodel.cpp
llama.cpp/examples/common.cpp
llm.h llm.cpp
llmodel.h
utils.h utils.cpp
)
qt_add_qml_module(chat
@ -123,7 +110,7 @@ target_compile_definitions(chat
target_link_libraries(chat
PRIVATE Qt6::Quick Qt6::Svg)
target_link_libraries(chat
PRIVATE llama)
PRIVATE llmodel)
set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
@ -134,6 +121,7 @@ if(NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR
endif()
install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
install(TARGETS llama DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
set(CPACK_GENERATOR "IFW")

View File

@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2)
file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dylib
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dylib
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/favicon.icns"
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Resources)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"

View File

@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dll
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dll
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"

4
llm.h
View File

@ -3,8 +3,8 @@
#include <QObject>
#include <QThread>
#include "gptj.h"
#include "llamamodel.h"
#include "llmodel/gptj.h"
#include "llmodel/llamamodel.h"
class LLMObject : public QObject
{

55
llmodel/CMakeLists.txt Normal file
View File

@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.16)
if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
if(BUILD_UNIVERSAL)
# Build a Universal binary on macOS
# This requires that the found Qt library is compiled as Universal binaries.
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
else()
# Build for the host architecture on macOS
set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
endif()
endif()
set(APP_VERSION_MAJOR 2)
set(APP_VERSION_MINOR 2)
set(APP_VERSION_PATCH 2)
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
# Generate a header file with the version number
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/../config.h"
)
# Include the binary directory for the generated header file
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
project(llmodel VERSION ${APP_VERSION} LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
set(BUILD_SHARED_LIBS ON FORCE)
set(CMAKE_VERBOSE_MAKEFILE ON)
if (GPT4ALL_AVX_ONLY)
set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
endif()
add_subdirectory(llama.cpp)
add_library(llmodel
gptj.h gptj.cpp
llamamodel.h llamamodel.cpp
llama.cpp/examples/common.cpp
llmodel.h llmodel_c.h
utils.h utils.cpp
)
target_link_libraries(llmodel
PRIVATE llama)
set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)

121
llmodel/llmodel_c.h Normal file
View File

@ -0,0 +1,121 @@
#ifndef LLMODEL_C_H
#define LLMODEL_C_H
#include <stdint.h>
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Opaque pointers to the underlying C++ classes.
*/
typedef void *LLMODEL_C;
typedef void *GPTJ_C;
typedef void *LLAMA_C;
/**
* PromptContext_C structure for holding the prompt context.
*/
typedef struct {
float *logits; // logits of current context
int32_t *tokens; // current tokens in the context window
int32_t n_past; // number of tokens in past conversation
int32_t n_ctx; // number of tokens possible in context window
int32_t n_predict; // number of tokens to predict
int32_t top_k; // top k logits to sample from
float top_p; // nucleus sampling probability threshold
float temp; // temperature to adjust model's output distribution
int32_t n_batch; // number of predictions to generate in parallel
float repeat_penalty; // penalty factor for repeated tokens
int32_t repeat_last_n; // last n tokens to penalize
float contextErase; // percent of context to erase if we exceed the context window
} PromptContext_C;
/**
* Callback types for response and recalculation.
*/
typedef bool (*ResponseCallback)(int32_t, const char *);
typedef bool (*RecalculateCallback)(bool);
/**
* Create a GPTJ instance.
* @return A pointer to the GPTJ instance.
*/
GPTJ_C GPTJ_create();
/**
* Destroy a GPTJ instance.
* @param gptj A pointer to the GPTJ instance.
*/
void GPTJ_destroy(GPTJ_C gptj);
/**
* Create a LLAMA instance.
* @return A pointer to the LLAMA instance.
*/
LLAMA_C LLAMA_create();
/**
* Destroy a LLAMA instance.
* @param llama A pointer to the LLAMA instance.
*/
void LLAMA_destroy(LLAMA_C llama);
/**
* Load a model from a file.
* @param model A pointer to the LLMODEL_C instance.
* @param modelPath A string representing the path to the model file.
* @return true if the model was loaded successfully, false otherwise.
*/
bool LLMODEL_loadModel(LLMODEL_C model, const char *modelPath);
/**
* Load a model from an input stream.
* @param model A pointer to the LLMODEL_C instance.
* @param modelPath A string representing the path to the model file.
* @param fin A pointer to the input stream.
* @return true if the model was loaded successfully, false otherwise.
*/
bool LLMODEL_loadModelStream(LLMODEL_C model, const char *modelPath, void *fin);
/**
* Check if a model is loaded.
* @param model A pointer to the LLMODEL_C instance.
* @return true if the model is loaded, false otherwise.
*/
bool LLMODEL_isModelLoaded(LLMODEL_C model);
/**
* Generate a response using the model.
* @param model A pointer to the LLMODEL_C instance.
* @param prompt A string representing the input prompt.
* @param response A callback function for handling the generated response.
* @param recalculate A callback function for handling recalculation requests.
* @param ctx A pointer to the PromptContext_C structure.
*/
void LLMODEL_prompt(LLMODEL_C model, const char *prompt,
ResponseCallback response,
RecalculateCallback recalculate,
PromptContext_C *ctx);
/**
* Set the number of threads to be used by the model.
* @param model A pointer to the LLMODEL_C instance.
* @param n_threads The number of threads to be used.
*/
void LLMODEL_setThreadCount(LLMODEL_C model, int32_t n_threads);
/**
* Get the number of threads currently being used by the model.
* @param model A pointer to the LLMODEL_C instance.
* @return The number of threads currently being used.
*/
int32_t LLMODEL_threadCount(LLMODEL_C model);
#ifdef __cplusplus
}
#endif
#endif // LLMODEL_C_H