text-generation-webui/requirements.txt

accelerate==0.30.*
aqlm[gpu,cpu]==1.1.3; platform_system == "Linux"
bitsandbytes==0.43.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.5
jinja2==3.1.2
lm_eval==0.3.0
markdown
numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
peft==0.8.*
Pillow>=9.5.0
psutil
pyyaml
requests
rich
safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.40.*
tqdm
wandb

# API
SpeechRecognition==3.10.0
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken

# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

# llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

# llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

# CUDA wheels
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
autoawq==0.2.3; platform_system == "Linux" or platform_system == "Windows"
Update accelerate requirement from ==0.27.* to ==0.30.* (#5989) 2024-05-19 19:03:18 -04:00			`accelerate==0.30.*`
Bump aqlm[cpu,gpu] from 1.1.2 to 1.1.3 (#5790) 2024-04-05 12:26:49 -04:00			`aqlm[gpu,cpu]==1.1.3; platform_system == "Linux"`
Bump bitsandbytes to 0.43, add official Windows wheel 2024-03-10 11:30:53 -04:00			`bitsandbytes==0.43.*`
Add 4-bit LoRA support (#1200) 2023-04-16 22:26:52 -04:00			`colorama`
New yaml character format (#337 from TheTerrasque/feature/yaml-characters) This doesn't break backward compatibility with JSON characters. 2023-04-02 19:34:25 -04:00			`datasets`
Falcon support (trust-remote-code and autogptq checkboxes) (#2367) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-05-29 09:20:18 -04:00			`einops`
Update gradio requirement from ==4.25.* to ==4.26.* (#5832) 2024-04-11 01:24:53 -04:00			`gradio==4.26.*`
Bump HQQ to 0.1.5 2024-03-05 05:33:51 -05:00			`hqq==0.1.5`
Add requirement jinja2==3.1.* to fix error as described in issue #5240 (#5249) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: Rim <anonymous@mail.com> 2024-01-13 19:47:13 -05:00			`jinja2==3.1.2`
Pin lm_eval package version 2023-12-24 12:22:31 -05:00			`lm_eval==0.3.0`
Sort the requirements 2023-03-15 11:40:03 -04:00			`markdown`
Add numba to requirements.txt 2024-03-10 19:13:29 -04:00			`numba==0.59.*`
Update numpy requirement from ==1.24.* to ==1.26.* (#5490) 2024-02-13 14:26:35 -05:00			`numpy==1.26.*`
Update optimum requirement from ==1.16.* to ==1.17.* (#5548) 2024-02-19 17:15:21 -05:00			`optimum==1.17.*`
Add an "Evaluate" tab to calculate the perplexities of models (#1322) 2023-04-20 23:20:33 -04:00			`pandas`
Revert "Update peft requirement from ==0.8.* to ==0.9.* (#5626)" This reverts commit 72a498ddd44a895205e53b5696742dc4ded9e12e. 2024-03-05 05:56:37 -05:00			`peft==0.8.*`
Add Pillow as a requirement 2023-04-08 17:48:46 -04:00			`Pillow>=9.5.0`
requirements: add psutil (#5819) 2024-04-06 22:02:20 -04:00			`psutil`
Add an "Evaluate" tab to calculate the perplexities of models (#1322) 2023-04-20 23:20:33 -04:00			`pyyaml`
Add requests to requirements.txt 2023-03-11 12:47:30 -05:00			`requests`
Add rich requirement 2023-12-20 00:58:36 -05:00			`rich`
Bump safetensors version 2024-02-04 21:40:25 -05:00			`safetensors==0.4.*`
Add 'scipy' to requirements.txt #2335 (#2343) Unlisted dependency of bitsandbytes 2023-05-25 22:26:25 -04:00			`scipy`
Add CUDA wheels for llama-cpp-python by jllllll 2023-07-19 22:31:19 -04:00			`sentencepiece`
Add Tensorboard/Weights and biases integration for training (#2624) 2023-07-12 10:53:31 -04:00			`tensorboard`
Bump transformers to 4.40 2024-04-18 18:55:34 -04:00			`transformers==4.40.*`
Add CUDA wheels for llama-cpp-python by jllllll 2023-07-19 22:31:19 -04:00			`tqdm`
			`wandb`
Pin aiofiles version to fix statvfs issue 2023-08-09 11:07:55 -04:00
Do not install extensions requirements by default (#5621) 2024-03-04 02:46:39 -05:00			`# API`
			`SpeechRecognition==3.10.0`
			`flask_cloudflared==0.0.14`
Revert sse-starlette version bump because it breaks API request cancellation (#5873) 2024-04-18 14:05:00 -04:00			`sse-starlette==1.6.5`
Do not install extensions requirements by default (#5621) 2024-03-04 02:46:39 -05:00			`tiktoken`

Add back my llama-cpp-python wheels, bump to 0.2.65 (#5964) 2024-04-30 08:11:31 -04:00			`# llama-cpp-python (CPU only, AVX2)`
Bump llama-cpp-python to 0.2.75 2024-05-18 08:26:26 -04:00			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.75+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
Add back my llama-cpp-python wheels, bump to 0.2.65 (#5964) 2024-04-30 08:11:31 -04:00
			`# llama-cpp-python (CUDA, no tensor cores)`
Bump llama-cpp-python to 0.2.75 2024-05-18 08:26:26 -04:00			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.75+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
Add back my llama-cpp-python wheels, bump to 0.2.65 (#5964) 2024-04-30 08:11:31 -04:00
			`# llama-cpp-python (CUDA, tensor cores)`
Bump llama-cpp-python to 0.2.75 2024-05-18 08:26:26 -04:00			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.75+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
Add llama-cpp-python wheels with tensor cores support (#5003) 2023-12-19 15:30:53 -05:00
Create alternative requirements.txt with AMD and Metal wheels (#4052) 2023-09-24 08:58:29 -04:00			`# CUDA wheels`
Bump AutoGPTQ to 0.6.0 (adds Mixtral support) 2023-12-15 09:18:49 -05:00			`https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
Bump ExLlamaV2 to 0.0.20 2024-05-01 19:20:50 -04:00			`https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
			`https://github.com/oobabooga/exllamav2/releases/download/v0.0.20/exllamav2-0.0.20-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"`
Update PyTorch to 2.2 (also update flash-attn to 2.5.6) (#5618) 2024-03-03 17:40:32 -05:00			`https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
Add flash-attention 2 for windows (#4235) 2023-10-21 02:46:23 -04:00			`https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"`
			`https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"`
			`https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"`
			`https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"`
Add back AutoAWQ for Windows https://github.com/casper-hansen/AutoAWQ/issues/377#issuecomment-1986440695 2024-03-08 17:54:56 -05:00			`autoawq==0.2.3; platform_system == "Linux" or platform_system == "Windows"`