FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build RUN apt-get update && apt-get install -y build-essential cmake libboost-dev libasio-dev ADD ./ /turbopilot RUN mkdir /turbopilot/build WORKDIR /turbopilot/build RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. RUN make turbopilot FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime WORKDIR /app COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot ENV THREADS=4 ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" ENV BATCHSIZE=64 COPY ./run.sh /app/ EXPOSE 18080 CMD /app/run.sh