Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use.

2024-10-01 01:06:10 -04:00 · 2023-04-25 19:16:45 -04:00 · 2023-04-25 19:16:45 -04:00 · 3c9139b5d2
commit 3c9139b5d2
parent d16306a205
15 changed files with 188 additions and 20 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,3 @@
 [submodule "llama.cpp"]
-	path = llama.cpp
+	path = llmodel/llama.cpp
 	url = https://github.com/manyoso/llama.cpp.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -32,6 +32,8 @@ set(CMAKE_AUTOMOC ON)
 set(CMAKE_AUTORCC ON)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

+option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
+
 find_package(Qt6 6.2 COMPONENTS Core Quick QuickDialogs2 Svg REQUIRED)

 # Get the Qt6Core target properties
@ -48,28 +50,13 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
 message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
 message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")

-set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
-set(BUILD_SHARED_LIBS ON FORCE)
-
-set(CMAKE_VERBOSE_MAKEFILE ON)
-option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
-option(GPT4ALL_LOCALHOST OFF "Build for local install repo")
-if (GPT4ALL_AVX_ONLY)
-    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
-endif()
-
-add_subdirectory(llama.cpp)
+add_subdirectory(llmodel)

 qt_add_executable(chat
    main.cpp
    download.h download.cpp
    network.h network.cpp
-    gptj.h gptj.cpp
-    llamamodel.h llamamodel.cpp
-    llama.cpp/examples/common.cpp
    llm.h llm.cpp
-    llmodel.h
-    utils.h utils.cpp
 )

 qt_add_qml_module(chat
@ -123,7 +110,7 @@ target_compile_definitions(chat
 target_link_libraries(chat
    PRIVATE Qt6::Quick Qt6::Svg)
 target_link_libraries(chat
-    PRIVATE llama)
+    PRIVATE llmodel)

 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
@ -134,6 +121,7 @@ if(NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR
 endif()

 install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
+install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 install(TARGETS llama DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})

 set(CPACK_GENERATOR "IFW")
--- a/cmake/deploy-qt-mac.cmake.in
+++ b/cmake/deploy-qt-mac.cmake.in
@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dylib
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
+file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dylib
+     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/favicon.icns"
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Resources)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
--- a/cmake/deploy-qt-windows.cmake.in
+++ b/cmake/deploy-qt-windows.cmake.in
@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dll
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
+file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dll
+     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
--- a/llm.h
+++ b/llm.h
@ -3,8 +3,8 @@

 #include <QObject>
 #include <QThread>
-#include "gptj.h"
-#include "llamamodel.h"
+#include "llmodel/gptj.h"
+#include "llmodel/llamamodel.h"

 class LLMObject : public QObject
 {
--- a/llmodel/CMakeLists.txt
+++ b/llmodel/CMakeLists.txt
@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.16)
+
+if(APPLE)
+  option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
+  if(BUILD_UNIVERSAL)
+    # Build a Universal binary on macOS
+    # This requires that the found Qt library is compiled as Universal binaries.
+    set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
+  else()
+    # Build for the host architecture on macOS
+    set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
+  endif()
+endif()
+
+set(APP_VERSION_MAJOR 2)
+set(APP_VERSION_MINOR 2)
+set(APP_VERSION_PATCH 2)
+set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
+
+# Generate a header file with the version number
+configure_file(
+  "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/config.h.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/../config.h"
+)
+
+# Include the binary directory for the generated header file
+include_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+project(llmodel VERSION ${APP_VERSION} LANGUAGES CXX C)
+
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
+set(BUILD_SHARED_LIBS ON FORCE)
+
+set(CMAKE_VERBOSE_MAKEFILE ON)
+if (GPT4ALL_AVX_ONLY)
+    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
+endif()
+
+add_subdirectory(llama.cpp)
+
+add_library(llmodel
+    gptj.h gptj.cpp
+    llamamodel.h llamamodel.cpp
+    llama.cpp/examples/common.cpp
+    llmodel.h llmodel_c.h
+    utils.h utils.cpp
+)
+
+target_link_libraries(llmodel
+    PRIVATE llama)
+
+set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
+set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
--- a/llmodel/gptj.cpp
+++ b/llmodel/gptj.cpp
--- a/llmodel/gptj.h
+++ b/llmodel/gptj.h
--- a/llmodel/llama.cpp
+++ b/llmodel/llama.cpp
--- a/llmodel/llamamodel.cpp
+++ b/llmodel/llamamodel.cpp
--- a/llmodel/llamamodel.h
+++ b/llmodel/llamamodel.h
--- a/llmodel/llmodel.h
+++ b/llmodel/llmodel.h
--- a/llmodel/llmodel_c.h
+++ b/llmodel/llmodel_c.h
@ -0,0 +1,121 @@
+#ifndef LLMODEL_C_H
+#define LLMODEL_C_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Opaque pointers to the underlying C++ classes.
+ */
+typedef void *LLMODEL_C;
+typedef void *GPTJ_C;
+typedef void *LLAMA_C;
+
+/**
+ * PromptContext_C structure for holding the prompt context.
+ */
+typedef struct {
+    float *logits;          // logits of current context
+    int32_t *tokens;        // current tokens in the context window
+    int32_t n_past;         // number of tokens in past conversation
+    int32_t n_ctx;          // number of tokens possible in context window
+    int32_t n_predict;      // number of tokens to predict
+    int32_t top_k;          // top k logits to sample from
+    float top_p;            // nucleus sampling probability threshold
+    float temp;             // temperature to adjust model's output distribution
+    int32_t n_batch;        // number of predictions to generate in parallel
+    float repeat_penalty;   // penalty factor for repeated tokens
+    int32_t repeat_last_n;  // last n tokens to penalize
+    float contextErase;     // percent of context to erase if we exceed the context window
+} PromptContext_C;
+
+/**
+ * Callback types for response and recalculation.
+ */
+typedef bool (*ResponseCallback)(int32_t, const char *);
+typedef bool (*RecalculateCallback)(bool);
+
+/**
+ * Create a GPTJ instance.
+ * @return A pointer to the GPTJ instance.
+ */
+GPTJ_C GPTJ_create();
+
+/**
+ * Destroy a GPTJ instance.
+ * @param gptj A pointer to the GPTJ instance.
+ */
+void GPTJ_destroy(GPTJ_C gptj);
+
+/**
+ * Create a LLAMA instance.
+ * @return A pointer to the LLAMA instance.
+ */
+LLAMA_C LLAMA_create();
+
+/**
+ * Destroy a LLAMA instance.
+ * @param llama A pointer to the LLAMA instance.
+ */
+void LLAMA_destroy(LLAMA_C llama);
+
+/**
+ * Load a model from a file.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param modelPath A string representing the path to the model file.
+ * @return true if the model was loaded successfully, false otherwise.
+ */
+bool LLMODEL_loadModel(LLMODEL_C model, const char *modelPath);
+
+/**
+ * Load a model from an input stream.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param modelPath A string representing the path to the model file.
+ * @param fin A pointer to the input stream.
+ * @return true if the model was loaded successfully, false otherwise.
+ */
+bool LLMODEL_loadModelStream(LLMODEL_C model, const char *modelPath, void *fin);
+
+/**
+ * Check if a model is loaded.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @return true if the model is loaded, false otherwise.
+ */
+bool LLMODEL_isModelLoaded(LLMODEL_C model);
+
+/**
+ * Generate a response using the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param prompt A string representing the input prompt.
+ * @param response A callback function for handling the generated response.
+ * @param recalculate A callback function for handling recalculation requests.
+ * @param ctx A pointer to the PromptContext_C structure.
+ */
+void LLMODEL_prompt(LLMODEL_C model, const char *prompt,
+                    ResponseCallback response,
+                    RecalculateCallback recalculate,
+                    PromptContext_C *ctx);
+
+/**
+ * Set the number of threads to be used by the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param n_threads The number of threads to be used.
+ */
+void LLMODEL_setThreadCount(LLMODEL_C model, int32_t n_threads);
+
+/**
+ * Get the number of threads currently being used by the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @return The number of threads currently being used.
+ */
+int32_t LLMODEL_threadCount(LLMODEL_C model);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LLMODEL_C_H
--- a/llmodel/utils.cpp
+++ b/llmodel/utils.cpp
--- a/llmodel/utils.h
+++ b/llmodel/utils.h