Merge branch 'main' into feature/model-lock

This commit is contained in:
James Ravenscroft 2023-08-24 14:58:44 +01:00
commit dc81abbc52
2 changed files with 11 additions and 4 deletions

View File

@ -6,9 +6,9 @@
| Model Name | RAM Requirement | Direct Download | HF Project Link | | Model Name | RAM Requirement | Direct Download | HF Project Link |
|---------------------|-----------------|-----------------|-----------------| |---------------------|-----------------|-----------------|-----------------|
| StarCoder | ~3GiB | [:arrow_down:](https://huggingface.co/TheBloke/stablecode-instruct-alpha-3b-GGML/blob/main/stablecode-instruct-alpha-3b.ggmlv1.q4_0.bin) | [:hugs:](https://huggingface.co/TheBloke/stablecode-instruct-alpha-3b-GGML/) | | StableCode | ~3GiB | [:arrow_down:](https://huggingface.co/TheBloke/stablecode-instruct-alpha-3b-GGML/blob/main/stablecode-instruct-alpha-3b.ggmlv1.q4_0.bin) | [:hugs:](https://huggingface.co/TheBloke/stablecode-instruct-alpha-3b-GGML/) |
To run in Turbopilot set model type `-m stablecode`
## "Coder" family models ## "Coder" family models
@ -23,7 +23,7 @@ This model is primarily trained on Python, Java and Javscript.
| Model Name | RAM Requirement | Direct Download | HF Project Link | | Model Name | RAM Requirement | Direct Download | HF Project Link |
|---------------------|-----------------|-----------------|-----------------| |---------------------|-----------------|-----------------|-----------------|
| StarCoder | ~2GiB | [:arrow_down:](https://huggingface.co/mike-ravkine/gpt_bigcode-santacoder-GGML/resolve/main/santacoder-q4_0.bin) | [:hugs:](https://huggingface.co/mike-ravkine/gpt_bigcode-santacoder-GGML/) | | SantaCoder | ~2GiB | [:arrow_down:](https://huggingface.co/mike-ravkine/gpt_bigcode-santacoder-GGML/resolve/main/santacoder-q4_0.bin) | [:hugs:](https://huggingface.co/mike-ravkine/gpt_bigcode-santacoder-GGML/) |
To run in Turbopilot set model type `-m starcoder` To run in Turbopilot set model type `-m starcoder`
@ -39,7 +39,7 @@ Even when quantized, WizardCoder is a large model that takes up a significant am
|---------------------|-----------------|-----------------|-----------------| |---------------------|-----------------|-----------------|-----------------|
| WizardCoder | ~12GiB | [:arrow_down:](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GGML/resolve/main/WizardCoder-15B-1.0.ggmlv3.q4_0.bin) | [:hugs:](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GGML/) | | WizardCoder | ~12GiB | [:arrow_down:](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GGML/resolve/main/WizardCoder-15B-1.0.ggmlv3.q4_0.bin) | [:hugs:](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GGML/) |
To run in Turbopilot set model type `-m starcoder` To run in Turbopilot set model type `-m wizardcoder`
### StarCoder (Released 4/5/2023) ### StarCoder (Released 4/5/2023)

View File

@ -60,6 +60,11 @@ int main(int argc, char **argv)
.default_value(0.1f) .default_value(0.1f)
.scan<'g', float>(); .scan<'g', float>();
program.add_argument("-b", "--batch-size")
.help("set batch size for model completion")
.default_value(512)
.scan<'i',int>();
program.add_argument("prompt").remaining(); program.add_argument("prompt").remaining();
@ -96,6 +101,7 @@ int main(int argc, char **argv)
config.n_threads = program.get<int>("--threads"); config.n_threads = program.get<int>("--threads");
config.temp = program.get<float>("--temperature"); config.temp = program.get<float>("--temperature");
config.top_p = program.get<float>("--top-p"); config.top_p = program.get<float>("--top-p");
config.n_batch = program.get<int>("--batch-size");
if(model_type.compare("codegen") == 0) { if(model_type.compare("codegen") == 0) {
spdlog::info("Initializing GPT-J type model for '{}' model", model_type); spdlog::info("Initializing GPT-J type model for '{}' model", model_type);
@ -136,6 +142,7 @@ int main(int argc, char **argv)
return "Hello world"; return "Hello world";
}); });
CROW_ROUTE(app, "/copilot_internal/v2/token")([](){ CROW_ROUTE(app, "/copilot_internal/v2/token")([](){
//return "Hello world"; //return "Hello world";