From b801e0d50d4d314243e1e254e82e2c3f5541b9df Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jan 2023 14:37:43 -0300
Subject: [PATCH] Minor changes

---
 README.md | 4 ++--
 server.py | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index c5efcc04..dec01cc1 100644
--- a/README.md
+++ b/README.md
@@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso
 
 The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load:
 
-    python convert-to-torch.py models/model-name/
+    python convert-to-torch.py models/model-name
 
-The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. 
+The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`. 
 
 ## Starting the webui
 
diff --git a/server.py b/server.py
index 0d5dcab8..8c184985 100644
--- a/server.py
+++ b/server.py
@@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
     if selected_model != model_name:
         model_name = selected_model
         model = None
-        tokenier = None
+        tokenizer = None
         torch.cuda.empty_cache()
         model, tokenizer = load_model(model_name)
     if inference_settings != loaded_preset:
@@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
         loaded_preset = inference_settings
 
     torch.cuda.empty_cache()
-    input_text = question
-    input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda()
+    input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda()
 
     output = eval(f"model.generate(input_ids, {preset}).cuda()")
     reply = tokenizer.decode(output[0], skip_special_tokens=True)