FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build RUN apt-get update && apt-get install -y build-essential cmake libboost-dev ADD ./ggml /build/ggml RUN mkdir /build/ggml/build WORKDIR /build/ggml/build RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. RUN make codegen-serve FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime WORKDIR /app COPY --from=build /build/ggml/build/bin/codegen-serve /app/codegen-serve ENV THREADS=4 ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" ENV BATCHSIZE=64 COPY ./run.sh /app/ EXPOSE 18080 CMD /app/run.sh