Handle unfinished lists during markdown streaming

This commit is contained in:
oobabooga 2023-08-03 17:10:57 -07:00
parent f4005164f4
commit 4b3384e353
2 changed files with 26 additions and 8 deletions

View File

@ -75,12 +75,12 @@ class ModelDownloader:
if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
is_lora = True
is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname)
is_safetensors = re.match(".*\.safetensors", fname)
is_pt = re.match(".*\.pt", fname)
is_ggml = re.match(".*ggml.*\.bin", fname)
is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname)
is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
is_safetensors = re.match(r".*\.safetensors", fname)
is_pt = re.match(r".*\.pt", fname)
is_ggml = re.match(r".*ggml.*\.bin", fname)
is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname)
is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)):
if 'lfs' in dict[i]:
sha256.append([fname, dict[i]['lfs']['oid']])

View File

@ -61,8 +61,26 @@ def convert_to_markdown(string):
if is_code:
result = result + '```' # Unfinished code block
string = result.strip()
return markdown.markdown(string, extensions=['fenced_code', 'tables'])
result = result.strip()
# Unfinished list, like "\n1.". A |delete| string is added and then
# removed to force a <ol> to be generated instead of a <p>.
if re.search(r'(\d+\.?)$', result):
delete_str = '|delete|'
if not result.endswith('.'):
result += '.'
result = re.sub(r'(\d+\.)$', r'\g<1> ' + delete_str, result)
html = markdown.markdown(result, extensions=['fenced_code', 'tables'])
pos = html.rfind(delete_str)
if pos > -1:
html = html[:pos] + html[pos + len(delete_str):]
else:
html = markdown.markdown(result, extensions=['fenced_code', 'tables'])
return html
def generate_basic_html(string):