From 15940e762e9f9a257fb8ce4f711b5e1ca7740616 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 25 Apr 2023 22:47:23 -0300 Subject: [PATCH] Fix missing initial space for LlamaTokenizer --- modules/text_generation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/text_generation.py b/modules/text_generation.py index ba915482..420cff62 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -69,6 +69,11 @@ def get_reply_from_output_ids(output_ids, input_ids, original_question, state): else: new_tokens = len(output_ids) - len(input_ids[0]) reply = decode(output_ids[-new_tokens:], state['skip_special_tokens']) + + if type(shared.tokenizer) is transformers.LlamaTokenizer: + if len(original_question) > 0 and original_question[-1] not in [' ', '\n']: + reply = ' ' + reply + if not shared.is_chat(): reply = original_question + apply_extensions('output', reply)