mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-10-01 01:06:01 -04:00
39 lines
1.1 KiB
Docker
39 lines
1.1 KiB
Docker
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# inlude kitware apt repo to allow us to grab latest cmake
|
|
RUN apt-get update && apt-get install ca-certificates gpg wget
|
|
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
|
|
RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null
|
|
|
|
RUN apt-get update && apt-get install -y build-essential cmake libboost-dev libboost-thread-dev
|
|
|
|
|
|
ADD ./ /turbopilot
|
|
|
|
RUN mkdir /turbopilot/build
|
|
|
|
WORKDIR /turbopilot/build
|
|
|
|
RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc ..
|
|
RUN make turbopilot
|
|
|
|
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime
|
|
|
|
|
|
WORKDIR /app
|
|
|
|
COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot
|
|
|
|
ENV THREADS=4
|
|
|
|
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
|
|
|
|
ENV BATCHSIZE=64
|
|
|
|
COPY ./run.sh /app/
|
|
|
|
EXPOSE 18080
|
|
|
|
CMD /app/run.sh |