2023-05-31 17:04:01 -04:00
# ifndef LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
# error This file is NOT meant to be included outside of llamamodel.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
# endif
2023-04-15 15:57:32 -04:00
# ifndef LLAMAMODEL_H
# define LLAMAMODEL_H
# include <functional>
2024-01-31 14:17:44 -05:00
# include <memory>
# include <string>
2023-04-15 15:57:32 -04:00
# include <vector>
# include "llmodel.h"
2023-05-31 17:04:01 -04:00
struct LLamaPrivate ;
2023-04-15 15:57:32 -04:00
class LLamaModel : public LLModel {
public :
LLamaModel ( ) ;
~ LLamaModel ( ) ;
2023-07-09 11:32:51 -04:00
bool supportsEmbedding ( ) const override { return false ; }
bool supportsCompletion ( ) const override { return true ; }
2024-01-31 14:17:44 -05:00
bool loadModel ( const std : : string & modelPath , int n_ctx , int ngl ) override ;
2023-04-15 15:57:32 -04:00
bool isModelLoaded ( ) const override ;
2024-01-31 14:17:44 -05:00
size_t requiredMem ( const std : : string & modelPath , int n_ctx , int ngl ) override ;
2023-05-04 15:31:41 -04:00
size_t stateSize ( ) const override ;
size_t saveState ( uint8_t * dest ) const override ;
size_t restoreState ( const uint8_t * src ) override ;
2023-04-15 15:57:32 -04:00
void setThreadCount ( int32_t n_threads ) override ;
2023-05-21 16:45:29 -04:00
int32_t threadCount ( ) const override ;
2024-01-31 14:17:44 -05:00
std : : vector < GPUDevice > availableGPUDevices ( size_t memoryRequired ) const override ;
bool initializeGPUDevice ( size_t memoryRequired , const std : : string & name ) const override ;
bool initializeGPUDevice ( int device , std : : string * unavail_reason ) const override ;
2023-08-30 09:43:56 -04:00
bool hasGPUDevice ( ) override ;
2023-09-14 09:59:19 -04:00
bool usingGPUDevice ( ) override ;
2023-04-15 15:57:32 -04:00
private :
2024-01-31 14:17:44 -05:00
std : : unique_ptr < LLamaPrivate > d_ptr ;
2023-06-04 08:59:24 -04:00
protected :
2023-06-04 19:31:00 -04:00
std : : vector < Token > tokenize ( PromptContext & , const std : : string & ) const override ;
2023-06-13 07:14:02 -04:00
std : : string tokenToString ( Token ) const override ;
2023-06-04 08:59:24 -04:00
Token sampleToken ( PromptContext & ctx ) const override ;
bool evalTokens ( PromptContext & ctx , const std : : vector < int32_t > & tokens ) const override ;
int32_t contextLength ( ) const override ;
const std : : vector < Token > & endTokens ( ) const override ;
2024-01-31 14:17:44 -05:00
int32_t maxContextLength ( std : : string const & modelPath ) const override ;
int32_t layerCount ( std : : string const & modelPath ) const override ;
2023-04-15 15:57:32 -04:00
} ;
2023-05-31 17:04:01 -04:00
# endif // LLAMAMODEL_H