update workflows with cuda build

This commit is contained in:
James Ravenscroft 2023-05-08 11:38:25 +01:00
parent 92c558d0b3
commit cd72682396
4 changed files with 58 additions and 3 deletions

View File

@ -75,6 +75,17 @@ jobs:
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-latest
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
steps:
- uses: actions/checkout@v3
with:

View File

@ -13,6 +13,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
config:
- {tag: "", dockerfile: "./Dockerfile.default"}
- {tags: "-cuda", dockerfile: "./Dockerfile.cuda"}
steps:
- name: Checkout
@ -49,10 +55,19 @@ jobs:
- name: Build and push release
uses: docker/build-push-action@v4
if: startsWith(github.ref, 'refs/tags/')
if: startsWith(github.ref, 'refs/tags/') && matrix.config.tag == ""
with:
push: true
tags: ghcr.io/ravenscroftj/turbopilot:${{ github.ref_name }}, ghcr.io/ravenscroftj/turbopilot:latest
context: ${{github.workspace}}
platforms: linux/amd64,linux/arm64
platforms: linux/amd64,linux/arm64
- name: Build and push release (CUDA)
uses: docker/build-push-action@v4
if: startsWith(github.ref, 'refs/tags/') && matrix.config.tag != ""
with:
push: true
tags: ghcr.io/ravenscroftj/turbopilot:${{ github.ref_name }}
context: ${{github.workspace}}
platforms: linux/amd64,linux/arm64

29
Dockerfile.cuda Normal file
View File

@ -0,0 +1,29 @@
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build
RUN apt-get update && apt-get install -y build-essential cmake libboost-dev
ADD ./ggml /build/ggml
RUN mkdir /build/ggml/build
WORKDIR /build/ggml/build
RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc ..
RUN make codegen-serve
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime
WORKDIR /app
COPY --from=build /build/ggml/build/bin/codegen-serve /app/codegen-serve
ENV THREADS=4
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
COPY ./run.sh /app/
EXPOSE 18080
CMD /app/run.sh