fix: log for multiple epochs

2024-10-01 01:06:10 -04:00 · 2023-03-28 18:47:58 +00:00 · 2023-03-28 18:47:58 +00:00 · 812b807003
commit 812b807003
parent c4f96d9bec
1 changed files with 4 additions and 2 deletions
--- a/train.py
+++ b/train.py
@ -127,7 +127,8 @@ def train(accelerator, config):
            # log LR in case something weird happens 
            if step > 0 and step % (config["eval_every"] // 10) == 0:
                if config["wandb"]:
-                    accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=step)
+                    curr_step = step + epoch * len(train_dataloader)
+                    accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step)

            if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
                optimizer.step()
@ -151,7 +152,8 @@ def train(accelerator, config):
                }

                if config["wandb"]:
-                    accelerator.log({**log_train, **log_val}, step=step)
+                    curr_step = step + epoch * len(train_dataloader)
+                    accelerator.log({**log_train, **log_val}, step=curr_step)

                accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}")
                accelerator.print(format_metrics(log_train, "train", f" step {step} "))