From d47182d9d186af5a5c72cc6ce1554a6eb1c040e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 14 Feb 2024 00:28:51 -0300
Subject: [PATCH] llamacpp_HF: do not use oobabooga/llama-tokenizer (#5499)

---
 modules/models.py        | 23 +++++++----------------
 modules/ui_model_menu.py |  2 +-
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index 038669f3..d8f1a9f8 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -254,26 +254,17 @@ def llamacpp_loader(model_name):
 def llamacpp_HF_loader(model_name):
     from modules.llamacpp_hf import LlamacppHF
 
-    for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
-        path = Path(f'{shared.args.model_dir}/{fname}')
-        if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
-            logger.info(f'Using tokenizer from: \"{path}\"')
-            break
+    path = Path(f'{shared.args.model_dir}/{model_name}')
+
+    # Check if a HF tokenizer is available for the model
+    if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']):
+        logger.info(f'Using tokenizer from: \"{path}\"')
     else:
-        logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
+        logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
         return None, None
 
-    if shared.args.no_use_fast:
-        logger.info('Loading the tokenizer with use_fast=False.')
-
-    tokenizer = AutoTokenizer.from_pretrained(
-        path,
-        trust_remote_code=shared.args.trust_remote_code,
-        use_fast=not shared.args.no_use_fast
-    )
-
     model = LlamacppHF.from_pretrained(model_name)
-    return model, tokenizer
+    return model
 
 
 def ctransformers_loader(model_name):
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 23679097..09d4276c 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -143,7 +143,7 @@ def create_ui():
                             shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
                             shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
                             shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
-                            shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.')
++                            shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
 
             with gr.Column():
                 with gr.Row():