From 893a72a1c56c625c513f4637d421d16ed2ab774a Mon Sep 17 00:00:00 2001
From: BadisG <110173477+BadisG@users.noreply.github.com>
Date: Mon, 18 Sep 2023 19:27:06 +0200
Subject: [PATCH] Stop generation immediately when using "Maximum
 tokens/second" (#3952)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
---
 modules/text_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index 8c2fe0b2..37189f18 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -96,7 +96,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
                     last_update = cur_time
                     yield reply
 
-        if stop_found:
+        if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
             break
 
     if not is_chat: