2023-05-08 06:38:25 -04:00
|
|
|
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build
|
2023-04-10 03:51:48 -04:00
|
|
|
|
2023-05-08 06:38:25 -04:00
|
|
|
RUN apt-get update && apt-get install -y build-essential cmake libboost-dev
|
2023-04-10 03:51:48 -04:00
|
|
|
|
|
|
|
ADD ./ggml /build/ggml
|
|
|
|
|
|
|
|
RUN mkdir /build/ggml/build
|
|
|
|
|
|
|
|
WORKDIR /build/ggml/build
|
|
|
|
|
2023-05-08 06:38:25 -04:00
|
|
|
RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc ..
|
2023-04-10 03:51:48 -04:00
|
|
|
RUN make codegen-serve
|
|
|
|
|
2023-05-08 06:38:25 -04:00
|
|
|
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime
|
2023-04-10 03:51:48 -04:00
|
|
|
|
|
|
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
|
|
COPY --from=build /build/ggml/build/bin/codegen-serve /app/codegen-serve
|
|
|
|
|
|
|
|
ENV THREADS=4
|
|
|
|
|
2023-04-10 04:18:04 -04:00
|
|
|
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
|
2023-04-10 03:51:48 -04:00
|
|
|
|
2023-05-08 09:24:49 -04:00
|
|
|
ENV BATCHSIZE=64
|
|
|
|
|
2023-04-10 03:51:48 -04:00
|
|
|
COPY ./run.sh /app/
|
|
|
|
|
|
|
|
EXPOSE 18080
|
|
|
|
|
|
|
|
CMD /app/run.sh
|