From 6518b3369799a6722e25eb5d119d379e25f13bb2 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 13 Aug 2024 17:04:50 -0400 Subject: [PATCH] llamamodel: use greedy sampling when temp=0 (#2854) Signed-off-by: Jared Van Bortel --- gpt4all-backend/llamamodel.cpp | 26 +++++++++++++++++--------- gpt4all-bindings/python/CHANGELOG.md | 3 +++ gpt4all-chat/CHANGELOG.md | 8 +++++++- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index f07a05e8..e2bbd0ac 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -137,7 +137,7 @@ struct gpt_params { bool use_mlock = false; // use mlock to keep model in memory }; -static int llama_sample_top_p_top_k( +static llama_token llama_sample_top_p_top_k( llama_context *ctx, const llama_token *last_n_tokens_data, int last_n_tokens_size, @@ -157,14 +157,22 @@ static int llama_sample_top_p_top_k( llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false}; // Sample repeat penalty llama_sample_repetition_penalties(nullptr, &candidates_p, last_n_tokens_data, last_n_tokens_size, repeat_penalty, 0.0f, 0.0f); - // Temperature sampling - llama_sample_top_k(ctx, &candidates_p, top_k, 1); - llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1); - llama_sample_typical(ctx, &candidates_p, 1.0f, 1); - llama_sample_top_p(ctx, &candidates_p, top_p, 1); - llama_sample_min_p(ctx, &candidates_p, min_p, 1); - llama_sample_temp(ctx, &candidates_p, temp); - return llama_sample_token(ctx, &candidates_p); + + llama_token id; + if (temp == 0.0) { + // greedy sampling, no probs + id = llama_sample_token_greedy(ctx, &candidates_p); + } else { + // temperature sampling + llama_sample_top_k(ctx, &candidates_p, top_k, 1); + llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1); + llama_sample_typical(ctx, &candidates_p, 1.0f, 1); + llama_sample_top_p(ctx, &candidates_p, top_p, 1); + llama_sample_min_p(ctx, &candidates_p, min_p, 1); + llama_sample_temp(ctx, &candidates_p, temp); + id = llama_sample_token(ctx, &candidates_p); + } + return id; } const char *get_arch_name(gguf_context *ctx_gguf) diff --git a/gpt4all-bindings/python/CHANGELOG.md b/gpt4all-bindings/python/CHANGELOG.md index 9dfd7f9f..7de7b980 100644 --- a/gpt4all-bindings/python/CHANGELOG.md +++ b/gpt4all-bindings/python/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854)) + ### Changed - Search for pip-installed CUDA 11 as well as CUDA 12 ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802)) - Stop shipping CUBINs to reduce wheel size ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802)) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index ae4b5d08..969669b1 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854)) + +## [3.2.1] - 2024-08-13 + ### Fixed - Do not initialize Vulkan driver when only using CPU ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843)) - Fix a potential crash on exit when using only CPU on Linux with NVIDIA (does not affect X11) ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843)) @@ -90,7 +95,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694)) - Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701)) -[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...HEAD +[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.2.1...HEAD +[3.2.1]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...v3.2.1 [3.2.0]: https://github.com/nomic-ai/gpt4all/compare/v3.1.1...v3.2.0 [3.1.1]: https://github.com/nomic-ai/gpt4all/compare/v3.1.0...v3.1.1 [3.1.0]: https://github.com/nomic-ai/gpt4all/compare/v3.0.0...v3.1.0