Better handle CUDA out of memory errors in chat mode

2024-10-01 01:26:03 -04:00 · 2023-04-02 17:48:00 -03:00 · 2023-04-02 17:48:00 -03:00 · 5f3f3faa96
commit 5f3f3faa96
parent b0890a7925
1 changed files with 6 additions and 2 deletions
--- a/modules/chat.py
+++ b/modules/chat.py
@ -119,6 +119,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
    # Generate
    cumulative_reply = ''
    for i in range(chat_generation_attempts):
+        reply = None
        for reply in generate_reply(f"{prompt}{' ' if len(cumulative_reply) > 0 else ''}{cumulative_reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=eos_token, stopping_strings=[f"\n{name1}:", f"\n{name2}:"]):
            reply = cumulative_reply + reply

@ -145,6 +146,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
            if next_character_found:
                break

+        if reply is not None:
            cumulative_reply = reply

    yield shared.history['visible']
@ -162,6 +164,7 @@ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ

    cumulative_reply = ''
    for i in range(chat_generation_attempts):
+        reply = None
        for reply in generate_reply(f"{prompt}{' ' if len(cumulative_reply) > 0 else ''}{cumulative_reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=eos_token, stopping_strings=[f"\n{name1}:", f"\n{name2}:"]):
            reply = cumulative_reply + reply
            reply, next_character_found = extract_message_from_reply(reply, name1, name2, check)
@ -169,6 +172,7 @@ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ
            if next_character_found:
                break

+        if reply is not None:
            cumulative_reply = reply

    yield reply