From 893a72a1c56c625c513f4637d421d16ed2ab774a Mon Sep 17 00:00:00 2001 From: BadisG <110173477+BadisG@users.noreply.github.com> Date: Mon, 18 Sep 2023 19:27:06 +0200 Subject: [PATCH] Stop generation immediately when using "Maximum tokens/second" (#3952) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- modules/text_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 8c2fe0b2..37189f18 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -96,7 +96,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap last_update = cur_time yield reply - if stop_found: + if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break if not is_chat: