gpt4all/gpt4all-backend/llmodel_c.h

#ifndef LLMODEL_C_H
#define LLMODEL_C_H

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>

#ifdef __GNUC__
#define DEPRECATED __attribute__ ((deprecated))
#elif defined(_MSC_VER)
#define DEPRECATED __declspec(deprecated)
#else
#pragma message("WARNING: You need to implement DEPRECATED for this compiler")
#define DEPRECATED
#endif

#ifdef __cplusplus
extern "C" {
#endif

/**
 * Opaque pointer to the underlying model.
 */
typedef void *llmodel_model;

/**
 * Structure containing any errors that may eventually occur
 */
struct llmodel_error {
    const char *message;  // Human readable error description; Thread-local; guaranteed to survive until next llmodel C API call
    int code;             // errno; 0 if none
};
#ifndef __cplusplus
typedef struct llmodel_error llmodel_error;
#endif

/**
 * llmodel_prompt_context structure for holding the prompt context.
 * NOTE: The implementation takes care of all the memory handling of the raw logits pointer and the
 * raw tokens pointer. Attempting to resize them or modify them in any way can lead to undefined
 * behavior.
 */
struct llmodel_prompt_context {
    float *logits;          // logits of current context
    size_t logits_size;     // the size of the raw logits vector
    int32_t *tokens;        // current tokens in the context window
    size_t tokens_size;     // the size of the raw tokens vector
    int32_t n_past;         // number of tokens in past conversation
    int32_t n_ctx;          // number of tokens possible in context window
    int32_t n_predict;      // number of tokens to predict
    int32_t top_k;          // top k logits to sample from
    float top_p;            // nucleus sampling probability threshold
    float temp;             // temperature to adjust model's output distribution
    int32_t n_batch;        // number of predictions to generate in parallel
    float repeat_penalty;   // penalty factor for repeated tokens
    int32_t repeat_last_n;  // last n tokens to penalize
    float context_erase;    // percent of context to erase if we exceed the context window
};
#ifndef __cplusplus
typedef struct llmodel_prompt_context llmodel_prompt_context;
#endif

/**
 * Callback type for prompt processing.
 * @param token_id The token id of the prompt.
 * @return a bool indicating whether the model should keep processing.
 */
typedef bool (*llmodel_prompt_callback)(int32_t token_id);

/**
 * Callback type for response.
 * @param token_id The token id of the response.
 * @param response The response string. NOTE: a token_id of -1 indicates the string is an error string.
 * @return a bool indicating whether the model should keep generating.
 */
typedef bool (*llmodel_response_callback)(int32_t token_id, const char *response);

/**
 * Callback type for recalculation of context.
 * @param whether the model is recalculating the context.
 * @return a bool indicating whether the model should keep generating.
 */
typedef bool (*llmodel_recalculate_callback)(bool is_recalculating);

/**
 * Create a llmodel instance.
 * Recognises correct model type from file at model_path
 * @param model_path A string representing the path to the model file.
 * @return A pointer to the llmodel_model instance; NULL on error.
 */
DEPRECATED llmodel_model llmodel_model_create(const char *model_path);

/**
 * Create a llmodel instance.
 * Recognises correct model type from file at model_path
 * @param model_path A string representing the path to the model file; will only be used to detect model type.
 * @param build_variant A string representing the implementation to use (auto, default, avxonly, ...),
 * @param error A pointer to a llmodel_error; will only be set on error.
 * @return A pointer to the llmodel_model instance; NULL on error.
 */
llmodel_model llmodel_model_create2(const char *model_path, const char *build_variant, llmodel_error *error);

/**
 * Destroy a llmodel instance.
 * Recognises correct model type using type info
 * @param model a pointer to a llmodel_model instance.
 */
void llmodel_model_destroy(llmodel_model model);

/**
 * Load a model from a file.
 * @param model A pointer to the llmodel_model instance.
 * @param model_path A string representing the path to the model file.
 * @return true if the model was loaded successfully, false otherwise.
 */
bool llmodel_loadModel(llmodel_model model, const char *model_path);

/**
 * Check if a model is loaded.
 * @param model A pointer to the llmodel_model instance.
 * @return true if the model is loaded, false otherwise.
 */
bool llmodel_isModelLoaded(llmodel_model model);

/**
 * Get the size of the internal state of the model.
 * NOTE: This state data is specific to the type of model you have created.
 * @param model A pointer to the llmodel_model instance.
 * @return the size in bytes of the internal state of the model
 */
uint64_t llmodel_get_state_size(llmodel_model model);

/**
 * Saves the internal state of the model to the specified destination address.
 * NOTE: This state data is specific to the type of model you have created.
 * @param model A pointer to the llmodel_model instance.
 * @param dest A pointer to the destination.
 * @return the number of bytes copied
 */
uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest);

/**
 * Restores the internal state of the model using data from the specified address.
 * NOTE: This state data is specific to the type of model you have created.
 * @param model A pointer to the llmodel_model instance.
 * @param src A pointer to the src.
 * @return the number of bytes read
 */
uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src);

/**
 * Generate a response using the model.
 * @param model A pointer to the llmodel_model instance.
 * @param prompt A string representing the input prompt.
 * @param prompt_callback A callback function for handling the processing of prompt.
 * @param response_callback A callback function for handling the generated response.
 * @param recalculate_callback A callback function for handling recalculation requests.
 * @param ctx A pointer to the llmodel_prompt_context structure.
 */
void llmodel_prompt(llmodel_model model, const char *prompt,
                    llmodel_prompt_callback prompt_callback,
                    llmodel_response_callback response_callback,
                    llmodel_recalculate_callback recalculate_callback,
                    llmodel_prompt_context *ctx);

/**
 * Set the number of threads to be used by the model.
 * @param model A pointer to the llmodel_model instance.
 * @param n_threads The number of threads to be used.
 */
void llmodel_setThreadCount(llmodel_model model, int32_t n_threads);

/**
 * Get the number of threads currently being used by the model.
 * @param model A pointer to the llmodel_model instance.
 * @return The number of threads currently being used.
 */
int32_t llmodel_threadCount(llmodel_model model);

#ifdef __cplusplus
}
#endif

#endif // LLMODEL_C_H
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`#ifndef LLMODEL_C_H`
			`#define LLMODEL_C_H`

			`#include <stdint.h>`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 09:43:24 -04:00			`#include <stddef.h>`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`#include <stdbool.h>`

Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`#ifdef __GNUC__`
			`#define DEPRECATED __attribute__ ((deprecated))`
			`#elif defined(_MSC_VER)`
			`#define DEPRECATED __declspec(deprecated)`
			`#else`
			`#pragma message("WARNING: You need to implement DEPRECATED for this compiler")`
			`#define DEPRECATED`
			`#endif`

Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`/**`
Only need one opaque pointer. 2023-04-25 21:06:45 -04:00			`* Opaque pointer to the underlying model.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`typedef void *llmodel_model;`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`/**`
			`* Structure containing any errors that may eventually occur`
			`*/`
			`struct llmodel_error {`
			`const char *message; // Human readable error description; Thread-local; guaranteed to survive until next llmodel C API call`
			`int code; // errno; 0 if none`
			`};`
			`#ifndef __cplusplus`
			`typedef struct llmodel_error llmodel_error;`
			`#endif`

Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`/**`
Clean up the docs a bit more. 2023-04-25 21:15:38 -04:00			`* llmodel_prompt_context structure for holding the prompt context.`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 09:43:24 -04:00			`* NOTE: The implementation takes care of all the memory handling of the raw logits pointer and the`
			`* raw tokens pointer. Attempting to resize them or modify them in any way can lead to undefined`
			`* behavior.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`*/`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`struct llmodel_prompt_context {`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`float *logits; // logits of current context`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 09:43:24 -04:00			`size_t logits_size; // the size of the raw logits vector`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`int32_t *tokens; // current tokens in the context window`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 09:43:24 -04:00			`size_t tokens_size; // the size of the raw tokens vector`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`int32_t n_past; // number of tokens in past conversation`
			`int32_t n_ctx; // number of tokens possible in context window`
			`int32_t n_predict; // number of tokens to predict`
			`int32_t top_k; // top k logits to sample from`
			`float top_p; // nucleus sampling probability threshold`
			`float temp; // temperature to adjust model's output distribution`
			`int32_t n_batch; // number of predictions to generate in parallel`
			`float repeat_penalty; // penalty factor for repeated tokens`
			`int32_t repeat_last_n; // last n tokens to penalize`
Clean up the docs a bit more still. 2023-04-25 21:17:00 -04:00			`float context_erase; // percent of context to erase if we exceed the context window`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`};`
			`#ifndef __cplusplus`
			`typedef struct llmodel_prompt_context llmodel_prompt_context;`
			`#endif`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`/**`
			`* Callback type for prompt processing.`
			`* @param token_id The token id of the prompt.`
			`* @return a bool indicating whether the model should keep processing.`
			`*/`
			`typedef bool (*llmodel_prompt_callback)(int32_t token_id);`

Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`/**`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`* Callback type for response.`
			`* @param token_id The token id of the response.`
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`* @param response The response string. NOTE: a token_id of -1 indicates the string is an error string.`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`* @return a bool indicating whether the model should keep generating.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`typedef bool (llmodel_response_callback)(int32_t token_id, const char response);`

			`/**`
			`* Callback type for recalculation of context.`
			`* @param whether the model is recalculating the context.`
			`* @return a bool indicating whether the model should keep generating.`
			`*/`
			`typedef bool (*llmodel_recalculate_callback)(bool is_recalculating);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
			`/**`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`* Create a llmodel instance.`
			`* Recognises correct model type from file at model_path`
			`* @param model_path A string representing the path to the model file.`
			`* @return A pointer to the llmodel_model instance; NULL on error.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`*/`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`DEPRECATED llmodel_model llmodel_model_create(const char *model_path);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 11:36:46 -04:00			`/**`
			`* Create a llmodel instance.`
			`* Recognises correct model type from file at model_path`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`* @param model_path A string representing the path to the model file; will only be used to detect model type.`
			`* @param build_variant A string representing the implementation to use (auto, default, avxonly, ...),`
			`* @param error A pointer to a llmodel_error; will only be set on error.`
			`* @return A pointer to the llmodel_model instance; NULL on error.`
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 11:36:46 -04:00			`*/`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 17:04:01 -04:00			`llmodel_model llmodel_model_create2(const char model_path, const char build_variant, llmodel_error *error);`
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 11:36:46 -04:00
			`/**`
			`* Destroy a llmodel instance.`
			`* Recognises correct model type using type info`
			`* @param model a pointer to a llmodel_model instance.`
			`*/`
			`void llmodel_model_destroy(llmodel_model model);`

Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`/**`
			`* Load a model from a file.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param model A pointer to the llmodel_model instance.`
			`* @param model_path A string representing the path to the model file.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`* @return true if the model was loaded successfully, false otherwise.`
			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`bool llmodel_loadModel(llmodel_model model, const char *model_path);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
			`/**`
			`* Check if a model is loaded.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param model A pointer to the llmodel_model instance.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`* @return true if the model is loaded, false otherwise.`
			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`bool llmodel_isModelLoaded(llmodel_model model);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`/**`
			`* Get the size of the internal state of the model.`
			`* NOTE: This state data is specific to the type of model you have created.`
			`* @param model A pointer to the llmodel_model instance.`
			`* @return the size in bytes of the internal state of the model`
			`*/`
			`uint64_t llmodel_get_state_size(llmodel_model model);`

			`/**`
			`* Saves the internal state of the model to the specified destination address.`
			`* NOTE: This state data is specific to the type of model you have created.`
			`* @param model A pointer to the llmodel_model instance.`
			`* @param dest A pointer to the destination.`
			`* @return the number of bytes copied`
			`*/`
			`uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest);`

			`/**`
			`* Restores the internal state of the model using data from the specified address.`
			`* NOTE: This state data is specific to the type of model you have created.`
			`* @param model A pointer to the llmodel_model instance.`
			`* @param src A pointer to the src.`
			`* @return the number of bytes read`
			`*/`
			`uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src);`

Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`/**`
			`* Generate a response using the model.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param model A pointer to the llmodel_model instance.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`* @param prompt A string representing the input prompt.`
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`* @param prompt_callback A callback function for handling the processing of prompt.`
			`* @param response_callback A callback function for handling the generated response.`
			`* @param recalculate_callback A callback function for handling recalculation requests.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param ctx A pointer to the llmodel_prompt_context structure.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`void llmodel_prompt(llmodel_model model, const char *prompt,`
llmodel: fix wrong and/or missing prompt callback type Fix occurrences of the prompt callback being incorrectly specified, or the response callback's prototype being incorrectly used in its place. Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-05-21 15:43:45 -04:00			`llmodel_prompt_callback prompt_callback,`
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`llmodel_response_callback response_callback,`
			`llmodel_recalculate_callback recalculate_callback,`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`llmodel_prompt_context *ctx);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
			`/**`
			`* Set the number of threads to be used by the model.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param model A pointer to the llmodel_model instance.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`* @param n_threads The number of threads to be used.`
			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`void llmodel_setThreadCount(llmodel_model model, int32_t n_threads);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
			`/**`
			`* Get the number of threads currently being used by the model.`
Clean up the docs a bit. 2023-04-25 21:14:18 -04:00			`* @param model A pointer to the llmodel_model instance.`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`* @return The number of threads currently being used.`
			`*/`
Fixup the api a bit. 2023-04-25 21:03:10 -04:00			`int32_t llmodel_threadCount(llmodel_model model);`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00
			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif // LLMODEL_C_H`