From e3968f7dd0d1d3252615b54f2df1de5d0eae1f30 Mon Sep 17 00:00:00 2001 From: practicaldreamer <78515588+practicaldreamer@users.noreply.github.com> Date: Tue, 2 May 2023 21:16:08 -0500 Subject: [PATCH] Fix Training Pad Token (#1678) Currently padding with 0 the character vs 0 the token id ( in the case of llama) --- modules/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index 074e5b68..9789c1c2 100644 --- a/modules/training.py +++ b/modules/training.py @@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch return gradient_accumulation_steps = batch_size // micro_batch_size - shared.tokenizer.pad_token = 0 + shared.tokenizer.pad_token_id = 0 shared.tokenizer.padding_side = "left" def tokenize(prompt):