diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index d8f588d..ea2e7f4 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -17,7 +17,8 @@ jobs: matrix: config: - {tag: "", dockerfile: "./Dockerfile.default", platforms: "linux/amd64,linux/arm64"} - - {tag: "-cuda", dockerfile: "./Dockerfile.cuda", platforms: "linux/amd64"} + - {tag: "-cuda11", dockerfile: "./Dockerfile.cuda11", platforms: "linux/amd64"} + - {tag: "-cuda12", dockerfile: "./Dockerfile.cuda12", platforms: "linux/amd64"} steps: @@ -54,7 +55,7 @@ jobs: platforms: ${{matrix.config.platforms}} - - name: Build and push release + - name: Build and push release (Main Latest Build) uses: docker/build-push-action@v4 if: startsWith(github.ref, 'refs/tags/') && matrix.config.tag == '' with: @@ -65,12 +66,12 @@ jobs: platforms: ${{matrix.config.platforms}} - - name: Build and push release (CUDA) + - name: Build and push release (Accelerated Builds) uses: docker/build-push-action@v4 if: startsWith(github.ref, 'refs/tags/') && matrix.config.tag != '' with: file: ${{matrix.config.dockerfile}} push: true - tags: ghcr.io/ravenscroftj/turbopilot:${{ github.ref_name }} + tags: ghcr.io/ravenscroftj/turbopilot:${{ github.ref_name }}${{matrix.config.tag}} context: ${{github.workspace}} platforms: linux/amd64,linux/arm64 diff --git a/Dockerfile.cuda11 b/Dockerfile.cuda11 new file mode 100644 index 0000000..acbb7c2 --- /dev/null +++ b/Dockerfile.cuda11 @@ -0,0 +1,39 @@ +FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build + +ENV DEBIAN_FRONTEND=noninteractive + +# inlude kitware apt repo to allow us to grab latest cmake +RUN apt-get update && apt-get install ca-certificates gpg wget +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null +RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null + +RUN apt-get update && apt-get install -y build-essential cmake libboost-dev + + +ADD ./ /turbopilot + +RUN mkdir /turbopilot/build + +WORKDIR /turbopilot/build + +RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. +RUN make turbopilot + +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime + + +WORKDIR /app + +COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot + +ENV THREADS=4 + +ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" + +ENV BATCHSIZE=64 + +COPY ./run.sh /app/ + +EXPOSE 18080 + +CMD /app/run.sh \ No newline at end of file diff --git a/Dockerfile.cuda12 b/Dockerfile.cuda12 new file mode 100644 index 0000000..f72656b --- /dev/null +++ b/Dockerfile.cuda12 @@ -0,0 +1,37 @@ +FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS build + +ENV DEBIAN_FRONTEND=noninteractive + +# inlude kitware apt repo to allow us to grab latest cmake +RUN apt-get update && apt-get install ca-certificates gpg wget +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null +RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null + +RUN apt-get update && apt-get install -y build-essential cmake libboost-dev + +ADD ./ /turbopilot + +RUN mkdir /turbopilot/build + +WORKDIR /turbopilot/build + +RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. +RUN make turbopilot + +FROM nvidia/cuda:12.2.0-runtime-ubuntu20.04 AS runtime + +WORKDIR /app + +COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot + +ENV THREADS=4 + +ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" + +ENV BATCHSIZE=64 + +COPY ./run.sh /app/ + +EXPOSE 18080 + +CMD /app/run.sh