diff --git a/README.md b/README.md index 01f86723..3d7452aa 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ Optionally, you can use the following command-line flags: | Flag | Description | |---------------------------------------------|-------------| -| `--cpu` | Use the CPU to generate text. | +| `--cpu` | Use the CPU to generate text. Warning: Training on CPU is extremely slow.| | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU. | | `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maxmimum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. You can also set values in MiB like `--gpu-memory 3500MiB`. | | `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.| diff --git a/modules/shared.py b/modules/shared.py index 62cd20d3..9dc8d970 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -90,7 +90,7 @@ parser.add_argument('--extensions', type=str, nargs="+", help='The list of exten parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') # Accelerate/transformers -parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.') +parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.') parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.') parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.') parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.') diff --git a/modules/training.py b/modules/training.py index 51072846..aaca44c5 100644 --- a/modules/training.py +++ b/modules/training.py @@ -238,7 +238,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int warmup_steps=100, num_train_epochs=epochs, learning_rate=actual_lr, - fp16=True, + fp16=False if shared.args.cpu else True, logging_steps=20, evaluation_strategy="steps" if eval_data is not None else "no", save_strategy="steps", @@ -248,7 +248,8 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int save_total_limit=3, load_best_model_at_end=True if eval_data is not None else False, # TODO: Enable multi-device support - ddp_find_unused_parameters=None + ddp_find_unused_parameters=None, + no_cuda=shared.args.cpu ), data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), callbacks=list([Callbacks()])