mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
commit
dd97a83534
@ -22,7 +22,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# oobabooga/text-generation-webui\n",
|
"# oobabooga/text-generation-webui\n",
|
||||||
"\n",
|
"\n",
|
||||||
"After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n",
|
"After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"* Project page: https://github.com/oobabooga/text-generation-webui\n",
|
"* Project page: https://github.com/oobabooga/text-generation-webui\n",
|
||||||
"* Gradio server status: https://status.gradio.app/"
|
"* Gradio server status: https://status.gradio.app/"
|
||||||
@ -53,44 +53,28 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"#@markdown If unsure about the branch, write \"main\" or leave it blank.\n",
|
"#@markdown If unsure about the branch, write \"main\" or leave it blank.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import torch\n",
|
"import os\n",
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"os.environ.pop('PYTHONPATH', None)\n",
|
||||||
|
"\n",
|
||||||
"if Path.cwd().name != 'text-generation-webui':\n",
|
"if Path.cwd().name != 'text-generation-webui':\n",
|
||||||
" print(\"Installing the webui...\")\n",
|
" print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" !git clone https://github.com/oobabooga/text-generation-webui\n",
|
" !git clone https://github.com/oobabooga/text-generation-webui\n",
|
||||||
" %cd text-generation-webui\n",
|
" %cd text-generation-webui\n",
|
||||||
"\n",
|
"\n",
|
||||||
" torver = torch.__version__\n",
|
" # Install the project in an isolated environment\n",
|
||||||
" print(f\"TORCH: {torver}\")\n",
|
" !GPU_CHOICE=A \\\n",
|
||||||
" is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n",
|
" USE_CUDA118=FALSE \\\n",
|
||||||
"\n",
|
" LAUNCH_AFTER_INSTALL=FALSE \\\n",
|
||||||
" if is_cuda118:\n",
|
" INSTALL_EXTENSIONS=FALSE \\\n",
|
||||||
" !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n",
|
" ./start_linux.sh\n",
|
||||||
" else:\n",
|
|
||||||
" !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n",
|
|
||||||
"\n",
|
|
||||||
" textgen_requirements = open('requirements.txt').read().splitlines()\n",
|
|
||||||
" if is_cuda118:\n",
|
|
||||||
" textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
|
|
||||||
" with open('temp_requirements.txt', 'w') as file:\n",
|
|
||||||
" file.write('\\n'.join(textgen_requirements))\n",
|
|
||||||
"\n",
|
|
||||||
" !pip install -r temp_requirements.txt --upgrade\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n",
|
|
||||||
" print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" import flash_attn\n",
|
|
||||||
" except:\n",
|
|
||||||
" !pip uninstall -y flash_attn\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Parameters\n",
|
"# Parameters\n",
|
||||||
"model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n",
|
"model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
|
||||||
"branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n",
|
"branch = \"8.0bpw\" #@param {type:\"string\"}\n",
|
||||||
"command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
|
"command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
|
||||||
"api = False #@param {type:\"boolean\"}\n",
|
"api = False #@param {type:\"boolean\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if api:\n",
|
"if api:\n",
|
||||||
@ -116,11 +100,10 @@
|
|||||||
" output_folder = \"\"\n",
|
" output_folder = \"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Start the web UI\n",
|
"# Start the web UI\n",
|
||||||
"cmd = f\"python server.py --share\"\n",
|
"cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
|
||||||
"if output_folder != \"\":\n",
|
"if output_folder != \"\":\n",
|
||||||
" cmd += f\" --model {output_folder}\"\n",
|
" cmd += f\" --model {output_folder}\"\n",
|
||||||
"cmd += f\" {command_line_flags}\"\n",
|
"\n",
|
||||||
"print(cmd)\n",
|
|
||||||
"!$cmd"
|
"!$cmd"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
cd "$(dirname "${BASH_SOURCE[0]}")"
|
cd "$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
|
||||||
|
@ -39,14 +39,6 @@
|
|||||||
margin-bottom: 0 !important;
|
margin-bottom: 0 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .message-body p em {
|
|
||||||
color: rgb(198 202 214) !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.message-body p em {
|
|
||||||
color: rgb(110 110 110) !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.gradio-container .chat .assistant-message {
|
.gradio-container .chat .assistant-message {
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
background: #f4f4f4;
|
background: #f4f4f4;
|
||||||
|
@ -406,6 +406,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||||||
color: var(--body-text-color);
|
color: var(--body-text-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.dark .message q {
|
||||||
|
color: #f5b031;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message q::before, .message q::after {
|
||||||
|
content: "";
|
||||||
|
}
|
||||||
|
|
||||||
.message-body li {
|
.message-body li {
|
||||||
list-style-position: outside;
|
list-style-position: outside;
|
||||||
}
|
}
|
||||||
|
@ -213,12 +213,10 @@ function doSyntaxHighlighting() {
|
|||||||
renderMathInElement(element, {
|
renderMathInElement(element, {
|
||||||
delimiters: [
|
delimiters: [
|
||||||
{ left: "$$", right: "$$", display: true },
|
{ left: "$$", right: "$$", display: true },
|
||||||
{ left: "$", right: "$", display: false },
|
|
||||||
{ left: "\\(", right: "\\)", display: false },
|
{ left: "\\(", right: "\\)", display: false },
|
||||||
{ left: "\\[", right: "\\]", display: true },
|
{ left: "\\[", right: "\\]", display: true },
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
observer.observe(targetElement, config);
|
observer.observe(targetElement, config);
|
||||||
|
@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names):
|
|||||||
else:
|
else:
|
||||||
if len(lora_names) > 1:
|
if len(lora_names) > 1:
|
||||||
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
|
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
|
||||||
if not shared.args.no_inject_fused_attention:
|
|
||||||
logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.')
|
|
||||||
|
|
||||||
peft_config = GPTQLoraConfig(
|
peft_config = GPTQLoraConfig(
|
||||||
inference_mode=True,
|
inference_mode=True,
|
||||||
|
@ -17,7 +17,11 @@ from PIL import Image
|
|||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
from modules import utils
|
from modules import utils
|
||||||
from modules.extensions import apply_extensions
|
from modules.extensions import apply_extensions
|
||||||
from modules.html_generator import chat_html_wrapper, make_thumbnail
|
from modules.html_generator import (
|
||||||
|
chat_html_wrapper,
|
||||||
|
convert_to_markdown,
|
||||||
|
make_thumbnail
|
||||||
|
)
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.text_generation import (
|
from modules.text_generation import (
|
||||||
generate_reply,
|
generate_reply,
|
||||||
@ -368,7 +372,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||||||
|
|
||||||
|
|
||||||
def impersonate_wrapper(text, state):
|
def impersonate_wrapper(text, state):
|
||||||
|
|
||||||
static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
prompt = generate_chat_prompt('', state, impersonate=True)
|
prompt = generate_chat_prompt('', state, impersonate=True)
|
||||||
@ -488,7 +491,7 @@ def start_new_chat(state):
|
|||||||
greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
|
greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
|
||||||
if greeting != '':
|
if greeting != '':
|
||||||
history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
|
history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
|
||||||
history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]]
|
history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]
|
||||||
|
|
||||||
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
|
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
|
||||||
save_history(history, unique_id, state['character_menu'], state['mode'])
|
save_history(history, unique_id, state['character_menu'], state['mode'])
|
||||||
@ -1044,6 +1047,8 @@ def handle_unique_id_select(state):
|
|||||||
history = load_history(state['unique_id'], state['character_menu'], state['mode'])
|
history = load_history(state['unique_id'], state['character_menu'], state['mode'])
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [history, html]
|
return [history, html]
|
||||||
|
|
||||||
|
|
||||||
@ -1052,6 +1057,8 @@ def handle_start_new_chat_click(state):
|
|||||||
histories = find_all_histories_with_first_prompts(state)
|
histories = find_all_histories_with_first_prompts(state)
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [history, html, gr.update(choices=histories, value=histories[0][1])]
|
return [history, html, gr.update(choices=histories, value=histories[0][1])]
|
||||||
|
|
||||||
|
|
||||||
@ -1061,6 +1068,8 @@ def handle_delete_chat_confirm_click(state):
|
|||||||
history, unique_id = load_history_after_deletion(state, index)
|
history, unique_id = load_history_after_deletion(state, index)
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
history,
|
history,
|
||||||
html,
|
html,
|
||||||
@ -1099,6 +1108,8 @@ def handle_upload_chat_history(load_chat_history, state):
|
|||||||
|
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
history,
|
history,
|
||||||
html,
|
html,
|
||||||
@ -1119,6 +1130,8 @@ def handle_character_menu_change(state):
|
|||||||
histories = find_all_histories_with_first_prompts(state)
|
histories = find_all_histories_with_first_prompts(state)
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
history,
|
history,
|
||||||
html,
|
html,
|
||||||
@ -1136,6 +1149,8 @@ def handle_mode_change(state):
|
|||||||
histories = find_all_histories_with_first_prompts(state)
|
histories = find_all_histories_with_first_prompts(state)
|
||||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||||
|
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
history,
|
history,
|
||||||
html,
|
html,
|
||||||
|
@ -42,13 +42,39 @@ def fix_newlines(string):
|
|||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
|
def replace_quotes(text):
|
||||||
|
|
||||||
|
# Define a list of quote pairs (opening and closing), using HTML entities
|
||||||
|
quote_pairs = [
|
||||||
|
('"', '"'), # Double quotes
|
||||||
|
('“', '”'), # Unicode left and right double quotation marks
|
||||||
|
('‘', '’'), # Unicode left and right single quotation marks
|
||||||
|
('«', '»'), # French quotes
|
||||||
|
('„', '“'), # German quotes
|
||||||
|
('‘', '’'), # Alternative single quotes
|
||||||
|
('“', '”'), # Unicode quotes (numeric entities)
|
||||||
|
('“', '”'), # Unicode quotes (hex entities)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create a regex pattern that matches any of the quote pairs, including newlines
|
||||||
|
pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
|
||||||
|
|
||||||
|
# Replace matched patterns with <q> tags, keeping original quotes
|
||||||
|
replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
|
||||||
|
|
||||||
|
return replaced_text
|
||||||
|
|
||||||
|
|
||||||
def replace_blockquote(m):
|
def replace_blockquote(m):
|
||||||
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
|
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=4096)
|
@functools.lru_cache(maxsize=None)
|
||||||
def convert_to_markdown(string):
|
def convert_to_markdown(string):
|
||||||
|
|
||||||
|
# Quote to <q></q>
|
||||||
|
string = replace_quotes(string)
|
||||||
|
|
||||||
# Blockquote
|
# Blockquote
|
||||||
string = re.sub(r'(^|[\n])>', r'\1>', string)
|
string = re.sub(r'(^|[\n])>', r'\1>', string)
|
||||||
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
|
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
|
||||||
@ -124,6 +150,7 @@ def convert_to_markdown_wrapped(string, use_cache=True):
|
|||||||
|
|
||||||
|
|
||||||
def generate_basic_html(string):
|
def generate_basic_html(string):
|
||||||
|
convert_to_markdown.cache_clear()
|
||||||
string = convert_to_markdown(string)
|
string = convert_to_markdown(string)
|
||||||
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
|
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
|
||||||
return string
|
return string
|
||||||
|
@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
'autogptq_info',
|
'autogptq_info',
|
||||||
],
|
],
|
||||||
'AutoAWQ': [
|
|
||||||
'cpu_memory',
|
|
||||||
'gpu_memory',
|
|
||||||
'auto_devices',
|
|
||||||
'max_seq_len',
|
|
||||||
'no_inject_fused_attention',
|
|
||||||
'trust_remote_code',
|
|
||||||
'no_use_fast',
|
|
||||||
],
|
|
||||||
'HQQ': [
|
'HQQ': [
|
||||||
'hqq_backend',
|
'hqq_backend',
|
||||||
'trust_remote_code',
|
'trust_remote_code',
|
||||||
@ -200,7 +191,6 @@ def transformers_samplers():
|
|||||||
loaders_samplers = {
|
loaders_samplers = {
|
||||||
'Transformers': transformers_samplers(),
|
'Transformers': transformers_samplers(),
|
||||||
'AutoGPTQ': transformers_samplers(),
|
'AutoGPTQ': transformers_samplers(),
|
||||||
'AutoAWQ': transformers_samplers(),
|
|
||||||
'HQQ': transformers_samplers(),
|
'HQQ': transformers_samplers(),
|
||||||
'ExLlamav2': {
|
'ExLlamav2': {
|
||||||
'temperature',
|
'temperature',
|
||||||
|
@ -75,7 +75,6 @@ def load_model(model_name, loader=None):
|
|||||||
'llamacpp_HF': llamacpp_HF_loader,
|
'llamacpp_HF': llamacpp_HF_loader,
|
||||||
'ExLlamav2': ExLlamav2_loader,
|
'ExLlamav2': ExLlamav2_loader,
|
||||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||||
'AutoAWQ': AutoAWQ_loader,
|
|
||||||
'HQQ': HQQ_loader,
|
'HQQ': HQQ_loader,
|
||||||
'TensorRT-LLM': TensorRT_LLM_loader,
|
'TensorRT-LLM': TensorRT_LLM_loader,
|
||||||
}
|
}
|
||||||
@ -292,24 +291,6 @@ def llamacpp_HF_loader(model_name):
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def AutoAWQ_loader(model_name):
|
|
||||||
from awq import AutoAWQForCausalLM
|
|
||||||
|
|
||||||
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
|
||||||
|
|
||||||
model = AutoAWQForCausalLM.from_quantized(
|
|
||||||
quant_path=model_dir,
|
|
||||||
max_new_tokens=shared.args.max_seq_len,
|
|
||||||
trust_remote_code=shared.args.trust_remote_code,
|
|
||||||
fuse_layers=not shared.args.no_inject_fused_attention,
|
|
||||||
max_memory=get_max_memory_dict(),
|
|
||||||
batch_size=1,
|
|
||||||
safetensors=any(model_dir.glob('*.safetensors')),
|
|
||||||
)
|
|
||||||
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
def AutoGPTQ_loader(model_name):
|
def AutoGPTQ_loader(model_name):
|
||||||
import modules.AutoGPTQ_loader
|
import modules.AutoGPTQ_loader
|
||||||
|
|
||||||
|
@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings):
|
|||||||
loader = None
|
loader = None
|
||||||
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
|
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
|
||||||
loader = 'ExLlamav2_HF'
|
loader = 'ExLlamav2_HF'
|
||||||
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
|
|
||||||
loader = 'AutoAWQ'
|
|
||||||
elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
|
elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
|
||||||
loader = 'llamacpp_HF'
|
loader = 'llamacpp_HF'
|
||||||
elif len(list(path_to_model.glob('*.gguf'))) > 0:
|
elif len(list(path_to_model.glob('*.gguf'))) > 0:
|
||||||
|
@ -89,7 +89,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
|
|||||||
|
|
||||||
# Model loader
|
# Model loader
|
||||||
group = parser.add_argument_group('Model loader')
|
group = parser.add_argument_group('Model loader')
|
||||||
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.')
|
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.')
|
||||||
|
|
||||||
# Transformers/Accelerate
|
# Transformers/Accelerate
|
||||||
group = parser.add_argument_group('Transformers/Accelerate')
|
group = parser.add_argument_group('Transformers/Accelerate')
|
||||||
@ -160,10 +160,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL
|
|||||||
group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
||||||
group.add_argument('--groupsize', type=int, default=-1, help='Group size.')
|
group.add_argument('--groupsize', type=int, default=-1, help='Group size.')
|
||||||
|
|
||||||
# AutoAWQ
|
|
||||||
group = parser.add_argument_group('AutoAWQ')
|
|
||||||
group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.')
|
|
||||||
|
|
||||||
# HQQ
|
# HQQ
|
||||||
group = parser.add_argument_group('HQQ')
|
group = parser.add_argument_group('HQQ')
|
||||||
group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
|
group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
|
||||||
@ -217,6 +213,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED')
|
|||||||
group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
|
group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
|
||||||
group.add_argument('--checkpoint', type=str, help='DEPRECATED')
|
group.add_argument('--checkpoint', type=str, help='DEPRECATED')
|
||||||
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
|
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
|
||||||
|
group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
args_defaults = parser.parse_args([])
|
args_defaults = parser.parse_args([])
|
||||||
@ -267,8 +264,6 @@ def fix_loader_name(name):
|
|||||||
return 'ExLlamav2'
|
return 'ExLlamav2'
|
||||||
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
|
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
|
||||||
return 'ExLlamav2_HF'
|
return 'ExLlamav2_HF'
|
||||||
elif name in ['autoawq', 'awq', 'auto-awq']:
|
|
||||||
return 'AutoAWQ'
|
|
||||||
elif name in ['hqq']:
|
elif name in ['hqq']:
|
||||||
return 'HQQ'
|
return 'HQQ'
|
||||||
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
|
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
|
||||||
|
@ -78,7 +78,6 @@ def list_model_elements():
|
|||||||
'groupsize',
|
'groupsize',
|
||||||
'triton',
|
'triton',
|
||||||
'desc_act',
|
'desc_act',
|
||||||
'no_inject_fused_attention',
|
|
||||||
'no_inject_fused_mlp',
|
'no_inject_fused_mlp',
|
||||||
'no_use_cuda_fp16',
|
'no_use_cuda_fp16',
|
||||||
'disable_exllama',
|
'disable_exllama',
|
||||||
|
@ -84,13 +84,13 @@ def create_ui():
|
|||||||
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
|
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
|
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
|
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
|
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
|
||||||
|
|
||||||
|
|
||||||
def create_chat_settings_ui():
|
def create_chat_settings_ui():
|
||||||
|
@ -127,7 +127,6 @@ def create_ui():
|
|||||||
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||||
shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
|
shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
|
||||||
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
|
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
|
||||||
shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.')
|
|
||||||
shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
|
shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
|
||||||
shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
|
shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
|
||||||
shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
|
shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
|
||||||
|
@ -388,7 +388,12 @@ def update_requirements(initial_installation=False, pull=True):
|
|||||||
# Prepare the requirements file
|
# Prepare the requirements file
|
||||||
textgen_requirements = open(requirements_file).read().splitlines()
|
textgen_requirements = open(requirements_file).read().splitlines()
|
||||||
if is_cuda118:
|
if is_cuda118:
|
||||||
textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req]
|
textgen_requirements = [
|
||||||
|
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
|
||||||
|
for req in textgen_requirements
|
||||||
|
if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
|
||||||
|
]
|
||||||
|
|
||||||
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
|
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
|
||||||
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
|
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -53,12 +53,20 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
|
|||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -40,6 +40,10 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
|
|||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -38,6 +38,10 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
|
|||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me
|
|||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl
|
||||||
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me
|
|||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl
|
||||||
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
@ -53,12 +53,20 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
|
|||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
@ -21,7 +21,7 @@ safetensors==0.4.*
|
|||||||
scipy
|
scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.42.*
|
transformers==4.43.*
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ def create_interface():
|
|||||||
# Force some events to be triggered on page load
|
# Force some events to be triggered on page load
|
||||||
shared.persistent_interface_state.update({
|
shared.persistent_interface_state.update({
|
||||||
'loader': shared.args.loader or 'Transformers',
|
'loader': shared.args.loader or 'Transformers',
|
||||||
'mode': shared.settings['mode'],
|
'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
|
||||||
'character_menu': shared.args.character or shared.settings['character'],
|
'character_menu': shared.args.character or shared.settings['character'],
|
||||||
'instruction_template_str': shared.settings['instruction_template_str'],
|
'instruction_template_str': shared.settings['instruction_template_str'],
|
||||||
'prompt_menu-default': shared.settings['prompt-default'],
|
'prompt_menu-default': shared.settings['prompt-default'],
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
cd "$(dirname "${BASH_SOURCE[0]}")"
|
cd "$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
cd "$(dirname "${BASH_SOURCE[0]}")"
|
cd "$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user