diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..cbc59ef --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.venv +.github +.vscode +.docker-compose.yml diff --git a/.gitignore b/.gitignore index a20ef28..59fc6f6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ wandb evaluate.py test_data.json todo.txt -.vscode/ \ No newline at end of file +.venv +.vscode diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f6599a0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y \ + git \ + curl \ + software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt install -y python3.10 \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /workspace +COPY requirements.txt requirements.txt +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \ + && python3.10 -m pip install -r requirements.txt \ + && python3.10 -m pip install numpy --pre torch --force-reinstall --index-url https://download.pytorch.org/whl/nightly/cu118 +COPY . . +ENTRYPOINT [ "python3.10"] diff --git a/README.md b/README.md index d672f4c..a551aec 100644 --- a/README.md +++ b/README.md @@ -15,25 +15,13 @@ as well as Tim Dettmers' [bitsandbytes](https://github.com/TimDettmers/bitsandby Without hyperparameter tuning, the LoRA model produces outputs comparable to the Stanford Alpaca model. (Please see the outputs included below.) Further tuning might be able to achieve better performance; I invite interested users to give it a try and report their results. -## Setup +### Local Setup 1. Install dependencies - ```bash - pip install -r requirements.txt - ``` - -1. Set environment variables, or modify the files referencing `BASE_MODEL`: - - ```bash - # Files referencing `BASE_MODEL` - # export_hf_checkpoint.py - # export_state_dict_checkpoint.py - - export BASE_MODEL=decapoda-research/llama-7b-hf - ``` - - Both `finetune.py` and `generate.py` use `--base_model` flag as shown further below. + ```bash + pip install -r requirements.txt + ``` 1. If bitsandbytes doesn't work, [install it from source.](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md) Windows users can follow [these instructions](https://github.com/tloen/alpaca-lora/issues/17). @@ -94,6 +82,49 @@ They should help users who want to run inference in projects like [llama.cpp](https://github.com/ggerganov/llama.cpp) or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp). +### Docker Setup & Inference + +1. Build the container image: + +```bash +docker build -t alpaca-lora . +``` + +2. Run the container (you can also use `finetune.py` and all of its parameters as shown above for training): + +```bash +docker run --gpus=all --shm-size 64g -p 7860:7860 -v ${HOME}/.cache:/root/.cache --rm alpaca-lora generate.py \ + --load_8bit \ + --base_model 'decapoda-research/llama-7b-hf' \ + --lora_weights 'tloen/alpaca-lora-7b' +``` + +3. Open `https://localhost:7860` in the browser + +### Docker Compose Setup & Inference + +1. (optional) Change desired model and weights under `environment` in the `docker-compose.yml` + +2. Build and run the container + +```bash +docker-compose up -d --build +``` + +3. Open `https://localhost:7860` in the browser + +4. See logs: + +```bash +docker-compose logs -f +``` + +5. Clean everything up: + +```bash +docker-compose down --volumes --rmi all +``` + ### Notes - We can likely improve our model performance significantly if we had a better dataset. Consider supporting the [LAION Open Assistant](https://open-assistant.io/) effort to produce a high-quality dataset for supervised fine-tuning (or bugging them to release their data). @@ -110,9 +141,7 @@ or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp). - 7B: - - - - 🤖 - 🇧🇷 - - 🇨🇳 - 🇨🇳 - 🇯🇵 - 🇫🇷 @@ -131,9 +160,6 @@ or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp). - - - 🇯🇵 - - 🇰🇷 - - 65B: - - 🇰🇷 - [alpaca-native](https://huggingface.co/chavinlo/alpaca-native), a replication using the original Alpaca code ### Example outputs diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d268039 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +version: '3' + +services: + alpaca-lora: + build: + context: ./ + dockerfile: Dockerfile + args: + BUILDKIT_INLINE_CACHE: "0" + image: alpaca-lora + shm_size: '64gb' + command: generate.py --load_8bit --base_model $BASE_MODEL --lora_weights 'tloen/alpaca-lora-7b' + restart: unless-stopped + volumes: + - alpaca-lora:/root/.cache # Location downloaded weights will be stored + ports: + - 7860:7860 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [ gpu ] + +volumes: + alpaca-lora: + name: alpaca-lora diff --git a/generate.py b/generate.py index fad8650..f8f4db7 100644 --- a/generate.py +++ b/generate.py @@ -1,3 +1,4 @@ +import os import sys import fire @@ -29,6 +30,7 @@ def main( server_name: str = "127.0.0.1", # Allows to listen on all interfaces by providing '0.0.0.0' share_gradio: bool = False, ): + base_model = base_model or os.environ.get("BASE_MODEL", "") assert ( base_model ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'" @@ -146,7 +148,7 @@ def main( ], title="🦙🌲 Alpaca-LoRA", description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).", # noqa: E501 - ).launch(server_name=server_name, share=share_gradio) + ).launch(server_name="0.0.0.0", share=share_gradio) # Old testing code follows. """ diff --git a/requirements.txt b/requirements.txt index 74bab8f..4f5d31b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ accelerate appdirs +loralib bitsandbytes black black[jupyter] @@ -7,5 +8,6 @@ datasets fire git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git +sentencepiece gradio sentencepiece