diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index e88ad9fe..35dd559d 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -920,11 +920,11 @@ void LLamaModel::embedInternal( int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false); if (n_tokens) { (void)eos_token; - assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token)); - tokens.resize(n_tokens - useEOS); // erase EOS/SEP - } else { - tokens.clear(); + assert((useEOS && wantBOS) == (eos_token != -1 && tokens[n_tokens - 1] == eos_token)); + if (useEOS && wantBOS) + n_tokens--; // erase EOS/SEP } + tokens.resize(n_tokens); }; // tokenize the texts diff --git a/gpt4all-chat/database.cpp b/gpt4all-chat/database.cpp index 286de5e4..fbb87968 100644 --- a/gpt4all-chat/database.cpp +++ b/gpt4all-chat/database.cpp @@ -938,7 +938,7 @@ void Database::start() connect(m_embLLM, &EmbeddingLLM::errorGenerated, this, &Database::handleErrorGenerated); m_scanTimer->callOnTimeout(this, &Database::scanQueue); if (!QSqlDatabase::drivers().contains("QSQLITE")) { - qWarning() << "ERROR: missing sqllite driver"; + qWarning() << "ERROR: missing sqlite driver"; } else { QSqlError err = initDb(); if (err.type() != QSqlError::NoError) diff --git a/gpt4all-training/old-README.md b/gpt4all-training/old-README.md index 078c6203..4a2f51dd 100644 --- a/gpt4all-training/old-README.md +++ b/gpt4all-training/old-README.md @@ -229,7 +229,7 @@ Raw Data: - Explorer: https://atlas.nomic.ai/map/gpt4all_data_clean - [GPT4All-J Dataset](https://huggingface.co/datasets/nomic-ai/gpt4all-j-prompt-generations) - Explorer Indexed on Prompts: https://atlas.nomic.ai/map/gpt4all-j-prompts-curated - - Exporer Indexed on Responses: https://atlas.nomic.ai/map/gpt4all-j-response-curated + - Explorer Indexed on Responses: https://atlas.nomic.ai/map/gpt4all-j-response-curated We are not distributing a LLaMa 7B checkpoint.