Fix training dataset loading #636

2024-10-01 01:26:03 -04:00 · 2023-03-29 11:48:17 -03:00 · 2023-03-29 11:48:17 -03:00 · a6d0373063
commit a6d0373063
parent 41b58bc47e
1 changed files with 7 additions and 6 deletions
--- a/modules/training.py
+++ b/modules/training.py
@ -119,7 +119,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
        }

    # == Prep the dataset, format, etc ==
-    if raw_text_file is not None:
+    if raw_text_file not in ['None', '']:
        print("Loading raw text file dataset...")
        with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file:
            raw_text = file.read()
@ -136,16 +136,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
        del text_chunks

    else:
-        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
-            format_data: dict[str, str] = json.load(formatFile)
-
-        if dataset is None:
+        if dataset in ['None', '']:
            yield "**Missing dataset choice input, cannot continue.**"
            return
-        if format is None:
+
+        if format in ['None', '']:
            yield "**Missing format choice input, cannot continue.**"
            return

+        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
+            format_data: dict[str, str] = json.load(formatFile)
+
        def generate_prompt(data_point: dict[str, str]):
            for options, data in format_data.items():
                if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):