mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Fix UnicodeDecodeError for BPE-based Models (especially GLM-4) (#6357)
This commit is contained in:
parent
41a8eb4eeb
commit
4c74c7a116
@ -274,7 +274,12 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
|
||||
if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '):
|
||||
first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from]))
|
||||
if isinstance(first_token, (bytes,)):
|
||||
first_token = first_token.decode('utf8')
|
||||
#try to decode the bytes to a string
|
||||
try:
|
||||
first_token = first_token.decode('utf8')
|
||||
#if it fails, which means it's not a string in this turn, just ignore it
|
||||
except UnicodeDecodeError:
|
||||
first_token = ''
|
||||
|
||||
if first_token.startswith('▁'):
|
||||
reply = ' ' + reply
|
||||
|
Loading…
Reference in New Issue
Block a user