text-generation-webui/.env.example

# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
TORCH_CUDA_ARCH_LIST=7.5

# these commands worked for me with roughly 4.5GB of vram
CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices

# the following examples have been tested with the files linked in docs/README_docker.md:
# example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
# example running 7b with 8bit groupsize             : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices

# the port the webui binds to on the host
HOST_PORT=7860
# the port the webui binds to inside the container
CONTAINER_PORT=7860

# the port the api binds to on the host
HOST_API_PORT=5000
# the port the api binds to inside the container
CONTAINER_API_PORT=5000

# the version used to install text-generation-webui from
WEBUI_VERSION=HEAD
creating a layer with Docker/docker-compose (#633) 2023-04-06 21:46:04 -04:00			`# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX`
			`# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5`
			`# https://developer.nvidia.com/cuda-gpus you can find the version for your card here`
			`TORCH_CUDA_ARCH_LIST=7.5`

			`# these commands worked for me with roughly 4.5GB of vram`
			`CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices`

			`# the following examples have been tested with the files linked in docs/README_docker.md:`
			`# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25`
			`# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share`
			`# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices`

			`# the port the webui binds to on the host`
			`HOST_PORT=7860`
			`# the port the webui binds to inside the container`
			`CONTAINER_PORT=7860`

			`# the port the api binds to on the host`
			`HOST_API_PORT=5000`
			`# the port the api binds to inside the container`
			`CONTAINER_API_PORT=5000`

			`# the version used to install text-generation-webui from`
			`WEBUI_VERSION=HEAD`