From ce3eb80f5bd8351170a7e18286096ece2563e83b Mon Sep 17 00:00:00 2001 From: swanserquack <75944176+swanserquack@users.noreply.github.com> Date: Wed, 12 Apr 2023 19:29:53 +0100 Subject: [PATCH] Set UTF-8 encoding on vocab.json --- convert-codegen-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-codegen-to-ggml.py b/convert-codegen-to-ggml.py index 5d6c327..0f5e55c 100644 --- a/convert-codegen-to-ggml.py +++ b/convert-codegen-to-ggml.py @@ -59,7 +59,7 @@ if len(sys.argv) < 3: dir_model = sys.argv[1] fname_out = sys.argv[1] + "/ggml-model.bin" -with open(dir_model + "/vocab.json", "r") as f: +with open(dir_model + "/vocab.json", "r", encoding="utf8") as f: encoder = json.load(f) with open(dir_model + "/added_tokens.json", "r") as f: