Read more GGUF metadata (scale_linear and freq_base) (#3877)

2024-10-01 01:26:03 -04:00 · 2023-09-12 23:02:42 +03:00 · 2023-09-12 23:02:42 +03:00 · 460c40d8ab
commit 460c40d8ab
parent 90fca6a77d
3 changed files with 9 additions and 1 deletions
--- a/models/config.yaml
+++ b/models/config.yaml
@ -226,4 +226,4 @@ llama-65b-gptq-3bit:
 .*codellama:
  rope_freq_base: 1000000
 .*codellama.*instruct:
-  instruction_template: 'Llama-v2'
+  instruction_template: 'Llama-v2'
--- a/modules/metadata_gguf.py
+++ b/modules/metadata_gguf.py
@ -70,6 +70,9 @@ def load_metadata(fname):
        GGUF_VERSION = struct.unpack("<I", file.read(4))[0]
        ti_data_count = struct.unpack("<Q", file.read(8))[0]
        kv_data_count = struct.unpack("<Q", file.read(8))[0]
+        
+        if GGUF_VERSION == 1: 
+            raise Exception('You are using an outdated GGUF, please download a new one.')

        for i in range(kv_data_count):
            key_length = struct.unpack("<Q", file.read(8))[0]
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@ -17,6 +17,7 @@ def get_fallback_settings():
        'truncation_length': shared.settings['truncation_length'],
        'n_ctx': 2048,
        'rope_freq_base': 0,
+        'compress_pos_emb': 1,
    }


@ -48,6 +49,10 @@ def get_model_metadata(model):
        metadata = metadata_gguf.load_metadata(model_file)
        if 'llama.context_length' in metadata:
            model_settings['n_ctx'] = metadata['llama.context_length']
+        if 'llama.rope.scale_linear' in metadata:
+            model_settings['compress_pos_emb'] = metadata['llama.rope.scale_linear']
+        if 'llama.rope.freq_base' in metadata:
+            model_settings['rope_freq_base'] = metadata['llama.rope.freq_base']    

    return model_settings