2023-05-31 17:04:01 -04:00
# ifndef LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
# error This file is NOT meant to be included outside of llamamodel.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
# endif
2023-04-15 15:57:32 -04:00
# ifndef LLAMAMODEL_H
# define LLAMAMODEL_H
2024-05-31 16:34:54 -04:00
# include "llmodel.h"
2023-04-15 15:57:32 -04:00
# include <functional>
2024-01-31 14:17:44 -05:00
# include <memory>
# include <string>
2023-04-15 15:57:32 -04:00
# include <vector>
2023-05-31 17:04:01 -04:00
struct LLamaPrivate ;
2024-03-13 18:09:24 -04:00
struct EmbModelSpec ;
2023-04-15 15:57:32 -04:00
class LLamaModel : public LLModel {
public :
LLamaModel ( ) ;
~ LLamaModel ( ) ;
2024-03-13 18:09:24 -04:00
bool supportsEmbedding ( ) const override { return m_supportsEmbedding ; }
bool supportsCompletion ( ) const override { return m_supportsCompletion ; }
2024-01-31 14:17:44 -05:00
bool loadModel ( const std : : string & modelPath , int n_ctx , int ngl ) override ;
2024-03-13 18:09:24 -04:00
bool isModelBlacklisted ( const std : : string & modelPath ) const override ;
bool isEmbeddingModel ( const std : : string & modelPath ) const override ;
2023-04-15 15:57:32 -04:00
bool isModelLoaded ( ) const override ;
2024-01-31 14:17:44 -05:00
size_t requiredMem ( const std : : string & modelPath , int n_ctx , int ngl ) override ;
2023-05-04 15:31:41 -04:00
size_t stateSize ( ) const override ;
size_t saveState ( uint8_t * dest ) const override ;
size_t restoreState ( const uint8_t * src ) override ;
2023-04-15 15:57:32 -04:00
void setThreadCount ( int32_t n_threads ) override ;
2023-05-21 16:45:29 -04:00
int32_t threadCount ( ) const override ;
2024-05-15 15:27:50 -04:00
std : : vector < GPUDevice > availableGPUDevices ( size_t memoryRequired = 0 ) const override ;
2024-02-21 15:45:32 -05:00
bool initializeGPUDevice ( size_t memoryRequired , const std : : string & name ) const override ;
2024-03-13 18:09:24 -04:00
bool initializeGPUDevice ( int device , std : : string * unavail_reason = nullptr ) const override ;
2024-04-18 14:52:02 -04:00
bool usingGPUDevice ( ) const override ;
const char * backendName ( ) const override ;
const char * gpuDeviceName ( ) const override ;
2023-04-15 15:57:32 -04:00
2024-03-13 18:09:24 -04:00
size_t embeddingSize ( ) const override ;
// user-specified prefix
void embed ( const std : : vector < std : : string > & texts , float * embeddings , std : : optional < std : : string > prefix ,
2024-04-12 16:00:39 -04:00
int dimensionality = - 1 , size_t * tokenCount = nullptr , bool doMean = true , bool atlas = false ,
EmbedCancelCallback * cancelCb = nullptr ) override ;
2024-03-13 18:09:24 -04:00
// automatic prefix
void embed ( const std : : vector < std : : string > & texts , float * embeddings , bool isRetrieval , int dimensionality = - 1 ,
2024-03-20 11:24:02 -04:00
size_t * tokenCount = nullptr , bool doMean = true , bool atlas = false ) override ;
2024-03-13 18:09:24 -04:00
2023-04-15 15:57:32 -04:00
private :
2024-01-31 14:17:44 -05:00
std : : unique_ptr < LLamaPrivate > d_ptr ;
2024-03-13 18:09:24 -04:00
bool m_supportsEmbedding = false ;
bool m_supportsCompletion = false ;
2023-06-04 08:59:24 -04:00
protected :
2024-02-21 15:45:32 -05:00
std : : vector < Token > tokenize ( PromptContext & ctx , const std : : string & str , bool special ) const override ;
std : : string tokenToString ( Token id ) const override ;
Token sampleToken ( PromptContext & ctx ) const override ;
bool evalTokens ( PromptContext & ctx , const std : : vector < int32_t > & tokens ) const override ;
2023-06-04 08:59:24 -04:00
int32_t contextLength ( ) const override ;
2024-02-21 15:45:32 -05:00
const std : : vector < Token > & endTokens ( ) const override ;
bool shouldAddBOS ( ) const override ;
2024-01-31 14:17:44 -05:00
int32_t maxContextLength ( std : : string const & modelPath ) const override ;
int32_t layerCount ( std : : string const & modelPath ) const override ;
2024-03-13 18:09:24 -04:00
void embedInternal ( const std : : vector < std : : string > & texts , float * embeddings , std : : string prefix , int dimensionality ,
2024-04-12 16:00:39 -04:00
size_t * tokenCount , bool doMean , bool atlas , EmbedCancelCallback * cancelCb ,
const EmbModelSpec * spec ) ;
2023-04-15 15:57:32 -04:00
} ;
2023-05-31 17:04:01 -04:00
# endif // LLAMAMODEL_H