2023-04-25 19:16:45 -04:00
|
|
|
#ifndef LLMODEL_C_H
|
|
|
|
#define LLMODEL_C_H
|
|
|
|
|
|
|
|
#include <stdint.h>
|
2023-04-27 09:43:24 -04:00
|
|
|
#include <stddef.h>
|
2023-04-25 19:16:45 -04:00
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/**
|
2023-04-25 21:06:45 -04:00
|
|
|
* Opaque pointer to the underlying model.
|
2023-04-25 19:16:45 -04:00
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
typedef void *llmodel_model;
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
2023-04-25 21:15:38 -04:00
|
|
|
* llmodel_prompt_context structure for holding the prompt context.
|
2023-04-27 09:43:24 -04:00
|
|
|
* NOTE: The implementation takes care of all the memory handling of the raw logits pointer and the
|
|
|
|
* raw tokens pointer. Attempting to resize them or modify them in any way can lead to undefined
|
|
|
|
* behavior.
|
2023-04-25 19:16:45 -04:00
|
|
|
*/
|
|
|
|
typedef struct {
|
|
|
|
float *logits; // logits of current context
|
2023-04-27 09:43:24 -04:00
|
|
|
size_t logits_size; // the size of the raw logits vector
|
2023-04-25 19:16:45 -04:00
|
|
|
int32_t *tokens; // current tokens in the context window
|
2023-04-27 09:43:24 -04:00
|
|
|
size_t tokens_size; // the size of the raw tokens vector
|
2023-04-25 19:16:45 -04:00
|
|
|
int32_t n_past; // number of tokens in past conversation
|
|
|
|
int32_t n_ctx; // number of tokens possible in context window
|
|
|
|
int32_t n_predict; // number of tokens to predict
|
|
|
|
int32_t top_k; // top k logits to sample from
|
|
|
|
float top_p; // nucleus sampling probability threshold
|
|
|
|
float temp; // temperature to adjust model's output distribution
|
|
|
|
int32_t n_batch; // number of predictions to generate in parallel
|
|
|
|
float repeat_penalty; // penalty factor for repeated tokens
|
|
|
|
int32_t repeat_last_n; // last n tokens to penalize
|
2023-04-25 21:17:00 -04:00
|
|
|
float context_erase; // percent of context to erase if we exceed the context window
|
2023-04-25 21:03:10 -04:00
|
|
|
} llmodel_prompt_context;
|
2023-04-25 19:16:45 -04:00
|
|
|
|
2023-04-27 11:08:15 -04:00
|
|
|
/**
|
|
|
|
* Callback type for prompt processing.
|
|
|
|
* @param token_id The token id of the prompt.
|
|
|
|
* @return a bool indicating whether the model should keep processing.
|
|
|
|
*/
|
|
|
|
typedef bool (*llmodel_prompt_callback)(int32_t token_id);
|
|
|
|
|
2023-04-25 19:16:45 -04:00
|
|
|
/**
|
2023-04-25 21:03:10 -04:00
|
|
|
* Callback type for response.
|
|
|
|
* @param token_id The token id of the response.
|
2023-04-27 11:08:15 -04:00
|
|
|
* @param response The response string. NOTE: a token_id of -1 indicates the string is an error string.
|
2023-04-25 21:03:10 -04:00
|
|
|
* @return a bool indicating whether the model should keep generating.
|
2023-04-25 19:16:45 -04:00
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
typedef bool (*llmodel_response_callback)(int32_t token_id, const char *response);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback type for recalculation of context.
|
|
|
|
* @param whether the model is recalculating the context.
|
|
|
|
* @return a bool indicating whether the model should keep generating.
|
|
|
|
*/
|
|
|
|
typedef bool (*llmodel_recalculate_callback)(bool is_recalculating);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a GPTJ instance.
|
|
|
|
* @return A pointer to the GPTJ instance.
|
|
|
|
*/
|
2023-04-25 21:06:45 -04:00
|
|
|
llmodel_model llmodel_gptj_create();
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Destroy a GPTJ instance.
|
|
|
|
* @param gptj A pointer to the GPTJ instance.
|
|
|
|
*/
|
2023-04-25 21:06:45 -04:00
|
|
|
void llmodel_gptj_destroy(llmodel_model gptj);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a LLAMA instance.
|
|
|
|
* @return A pointer to the LLAMA instance.
|
|
|
|
*/
|
2023-04-25 21:06:45 -04:00
|
|
|
llmodel_model llmodel_llama_create();
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Destroy a LLAMA instance.
|
|
|
|
* @param llama A pointer to the LLAMA instance.
|
|
|
|
*/
|
2023-04-25 21:06:45 -04:00
|
|
|
void llmodel_llama_destroy(llmodel_model llama);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Load a model from a file.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param model A pointer to the llmodel_model instance.
|
|
|
|
* @param model_path A string representing the path to the model file.
|
2023-04-25 19:16:45 -04:00
|
|
|
* @return true if the model was loaded successfully, false otherwise.
|
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
bool llmodel_loadModel(llmodel_model model, const char *model_path);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if a model is loaded.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param model A pointer to the llmodel_model instance.
|
2023-04-25 19:16:45 -04:00
|
|
|
* @return true if the model is loaded, false otherwise.
|
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
bool llmodel_isModelLoaded(llmodel_model model);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate a response using the model.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param model A pointer to the llmodel_model instance.
|
2023-04-25 19:16:45 -04:00
|
|
|
* @param prompt A string representing the input prompt.
|
2023-04-27 11:08:15 -04:00
|
|
|
* @param prompt_callback A callback function for handling the processing of prompt.
|
|
|
|
* @param response_callback A callback function for handling the generated response.
|
|
|
|
* @param recalculate_callback A callback function for handling recalculation requests.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param ctx A pointer to the llmodel_prompt_context structure.
|
2023-04-25 19:16:45 -04:00
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
void llmodel_prompt(llmodel_model model, const char *prompt,
|
2023-04-27 11:08:15 -04:00
|
|
|
llmodel_response_callback prompt_callback,
|
|
|
|
llmodel_response_callback response_callback,
|
|
|
|
llmodel_recalculate_callback recalculate_callback,
|
2023-04-25 21:03:10 -04:00
|
|
|
llmodel_prompt_context *ctx);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the number of threads to be used by the model.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param model A pointer to the llmodel_model instance.
|
2023-04-25 19:16:45 -04:00
|
|
|
* @param n_threads The number of threads to be used.
|
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
void llmodel_setThreadCount(llmodel_model model, int32_t n_threads);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the number of threads currently being used by the model.
|
2023-04-25 21:14:18 -04:00
|
|
|
* @param model A pointer to the llmodel_model instance.
|
2023-04-25 19:16:45 -04:00
|
|
|
* @return The number of threads currently being used.
|
|
|
|
*/
|
2023-04-25 21:03:10 -04:00
|
|
|
int32_t llmodel_threadCount(llmodel_model model);
|
2023-04-25 19:16:45 -04:00
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif // LLMODEL_C_H
|