mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Merge remote-tracking branch 'refs/remotes/origin/dev' into dev
This commit is contained in:
commit
00aedf9209
@ -165,10 +165,19 @@ class ExllamaModel:
|
|||||||
if has_leading_space:
|
if has_leading_space:
|
||||||
decoded_text = ' ' + decoded_text
|
decoded_text = ' ' + decoded_text
|
||||||
|
|
||||||
yield decoded_text
|
# Check the partial unicode character
|
||||||
|
if chr(0xfffd) in decoded_text:
|
||||||
|
is_last = i == max_new_tokens - 1
|
||||||
|
is_stopping = token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything
|
||||||
|
# If we are not at the end of the generation, we skip this token
|
||||||
|
if not (is_last or is_stopping):
|
||||||
|
continue
|
||||||
|
|
||||||
if token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything:
|
if token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
yield decoded_text
|
||||||
|
|
||||||
# Case 2: CFG
|
# Case 2: CFG
|
||||||
# Copied from https://github.com/turboderp/exllama/blob/master/example_cfg.py
|
# Copied from https://github.com/turboderp/exllama/blob/master/example_cfg.py
|
||||||
else:
|
else:
|
||||||
@ -205,6 +214,14 @@ class ExllamaModel:
|
|||||||
if has_leading_space:
|
if has_leading_space:
|
||||||
decoded_text = ' ' + decoded_text
|
decoded_text = ' ' + decoded_text
|
||||||
|
|
||||||
|
# Check the partial unicode character
|
||||||
|
if chr(0xfffd) in decoded_text:
|
||||||
|
is_last = i == max_new_tokens - 1
|
||||||
|
is_stopping = token.item() == self.tokenizer.eos_token_id or shared.stop_everything
|
||||||
|
# If we are not at the end of the generation, we skip this token
|
||||||
|
if not (is_last or is_stopping):
|
||||||
|
continue
|
||||||
|
|
||||||
yield decoded_text
|
yield decoded_text
|
||||||
if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
|
if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
|
||||||
break
|
break
|
||||||
|
@ -138,11 +138,19 @@ class Exllamav2Model:
|
|||||||
if has_leading_space:
|
if has_leading_space:
|
||||||
decoded_text = ' ' + decoded_text
|
decoded_text = ' ' + decoded_text
|
||||||
|
|
||||||
yield decoded_text
|
# Check the partial unicode character
|
||||||
|
if chr(0xfffd) in decoded_text:
|
||||||
|
is_last = i == max_new_tokens - 1
|
||||||
|
is_stopping = token.item() == self.tokenizer.eos_token_id or shared.stop_everything
|
||||||
|
# If we are not at the end of the generation, we skip this token
|
||||||
|
if not (is_last or is_stopping):
|
||||||
|
continue
|
||||||
|
|
||||||
if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
|
if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
yield decoded_text
|
||||||
|
|
||||||
def generate(self, prompt, state):
|
def generate(self, prompt, state):
|
||||||
output = ''
|
output = ''
|
||||||
for output in self.generate_with_streaming(prompt, state):
|
for output in self.generate_with_streaming(prompt, state):
|
||||||
|
@ -362,7 +362,12 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
|
|||||||
if output[-1] in eos_token_ids:
|
if output[-1] in eos_token_ids:
|
||||||
break
|
break
|
||||||
|
|
||||||
cumulative_reply += get_reply_from_output_ids(output, state, starting_from=starting_from)
|
new_content = get_reply_from_output_ids(output, state, starting_from=starting_from)
|
||||||
|
# check the partial unicode character
|
||||||
|
if chr(0xfffd) in new_content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cumulative_reply += new_content
|
||||||
starting_from = len(output)
|
starting_from = len(output)
|
||||||
yield cumulative_reply
|
yield cumulative_reply
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user