From e3968f7dd0d1d3252615b54f2df1de5d0eae1f30 Mon Sep 17 00:00:00 2001
From: practicaldreamer <78515588+practicaldreamer@users.noreply.github.com>
Date: Tue, 2 May 2023 21:16:08 -0500
Subject: [PATCH] Fix Training Pad Token (#1678)

Currently padding with 0 the character vs 0 the token id (<unk> in the case of llama)
---
 modules/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/training.py b/modules/training.py
index 074e5b68..9789c1c2 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
         return
 
     gradient_accumulation_steps = batch_size // micro_batch_size
-    shared.tokenizer.pad_token = 0
+    shared.tokenizer.pad_token_id = 0
     shared.tokenizer.padding_side = "left"
 
     def tokenize(prompt):