fix: use right conversion script

2024-10-01 01:06:10 -04:00 · 2023-05-11 11:20:43 -04:00 · 2023-05-11 11:20:43 -04:00 · 1ed71fbbf8
commit 1ed71fbbf8
parent f8fdcccc5d
1 changed files with 46 additions and 76 deletions
--- a/gpt4all-backend/scripts/convert_mpt_hf_to_ggml.py
+++ b/gpt4all-backend/scripts/convert_mpt_hf_to_ggml.py
@ -76,24 +76,32 @@ fout = open(fname_out, "wb")
 vocab = tokenizer.vocab
 hparams["multiple_of"] = 1
-fout.write(struct.pack("i", 0x67676d6d)) # magic: ggml in hex
+fout.write(struct.pack("I", 0x67676d6d)) # magic: ggml in hex
-fout.write(struct.pack("i", hparams["vocab_size"]))
+fout.write(struct.pack("I", model.config.vocab_size))
-fout.write(struct.pack("i", hparams["max_seq_len"]))
+fout.write(struct.pack("I", model.config.max_seq_len))
-fout.write(struct.pack("i", hparams["d_model"]))
+fout.write(struct.pack("I", model.config.n_layers))
-fout.write(struct.pack("i", hparams["n_heads"]))
+fout.write(struct.pack("I", model.config.n_heads))
-fout.write(struct.pack("i", hparams["n_layers"]))
+fout.write(struct.pack("I", model.config.d_model))
-# n_rot (unused)
+fout.write(struct.pack("f", model.config.attn_config['alibi_bias_max']))
-fout.write(struct.pack("i", 0))
+clip_qkv = model.config.attn_config['clip_qkv']
-fout.write(struct.pack("i", ftype))
+fout.write(struct.pack("f",  clip_qkv if clip_qkv is not None else 0))
 fout.write(struct.pack("I", ftype))
 # # Is this correct??
 # dot_token = tokenizer.encode(".")[0]
 # write tokens to ggml file 
-fout.write(struct.pack("i", hparams["vocab_size"]))
+dot_token = tokenizer.encode('.')[0]
 fout.write(struct.pack("I", model.config.vocab_size))
-for i in range(hparams["vocab_size"]):
+for i in range(model.config.vocab_size):
-    text = tokenizer.decode([i]).encode('utf-8')
+    text = tokenizer.decode([dot_token, i]).encode('utf-8')
-    fout.write(struct.pack("i", len(text)))
+    # remove the first byte (it's always '.')
    text = text[1:]
    enclen = len(text)
    if i in tokenizer.all_special_ids:
        print(f"special token: {text}")
        enclen = enclen | 1<<31
    fout.write(struct.pack("I", enclen))
    fout.write(text)
 list_vars = model.state_dict()
@ -101,51 +109,13 @@ for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()
    print("Processing variable: " + name + " with shape: ", data.shape)
    # we don't need these
    if name.endswith("attn.masked_bias") or name.endswith(".attn.bias"):
        print("  Skipping variable: " + name)
        continue
    if "Wqkv.weight" in name:
        # chunk qkv
        query, key, value = np.split(data, 3, axis=0)
        new_name = name.split("Wqkv.weight")[0]
        for (data, name) in [(query, new_name + "q_proj.weight"), (key, new_name + "k_proj.weight"), (value, new_name + "v_proj.weight")]:
            print(f"Processing variable: {name} with shape: {data.shape}")
    n_dims = len(data.shape);
    # ftype == 0 -> float32, ftype == 1 -> float16
    ftype_cur = 0;
    if ftype != 0:
-                print("  Converting to float16")
+        # Keep token embeddings in fp32
-                data = data.astype(np.float16)
+        if name[-7:] == ".weight" and n_dims == 2 and ".wte" not in name:
                ftype_cur = 1
            else:
                if data.dtype != np.float32:
                    print("  Converting to float32")
                    data = data.astype(np.float32)
                    ftype_cur = 0
            # header
            str = name.encode('utf-8')
            fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
            for i in range(n_dims):
                fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
            fout.write(str);
            # data
            data.tofile(fout)
    else:
        n_dims = len(data.shape);
        # ftype == 0 -> float32, ftype == 1 -> float16
        ftype_cur = 0;
        if ftype != 0:
            if name[-7:] == ".weight" and n_dims == 2:
            print("  Converting to float16")
            data = data.astype(np.float16)
            ftype_cur = 1