diff --git a/Dockerfile.cuda11 b/Dockerfile.cuda11 deleted file mode 100644 index bbeeff3..0000000 --- a/Dockerfile.cuda11 +++ /dev/null @@ -1,39 +0,0 @@ -FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 AS build - -ENV DEBIAN_FRONTEND=noninteractive - -# inlude kitware apt repo to allow us to grab latest cmake -RUN apt-get update && apt-get install ca-certificates gpg wget -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null -RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null - -RUN apt-get update && apt-get install -y build-essential cmake libboost-dev libboost-thread-dev - - -ADD ./ /turbopilot - -RUN mkdir /turbopilot/build - -WORKDIR /turbopilot/build - -RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. -RUN make turbopilot - -FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 AS runtime - - -WORKDIR /app - -COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot - -ENV THREADS=4 - -ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" - -ENV BATCHSIZE=64 - -COPY ./run.sh /app/ - -EXPOSE 18080 - -CMD /app/run.sh \ No newline at end of file diff --git a/Dockerfile.cuda12 b/Dockerfile.cuda12 deleted file mode 100644 index ff27869..0000000 --- a/Dockerfile.cuda12 +++ /dev/null @@ -1,37 +0,0 @@ -FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS build - -ENV DEBIAN_FRONTEND=noninteractive - -# inlude kitware apt repo to allow us to grab latest cmake -RUN apt-get update && apt-get install ca-certificates gpg wget -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null -RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null - -RUN apt-get update && apt-get install -y build-essential cmake libboost-dev libboost-thread-dev - -ADD ./ /turbopilot - -RUN mkdir /turbopilot/build - -WORKDIR /turbopilot/build - -RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc .. -RUN make turbopilot - -FROM nvidia/cuda:12.2.0-runtime-ubuntu20.04 AS runtime - -WORKDIR /app - -COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot - -ENV THREADS=4 - -ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" - -ENV BATCHSIZE=64 - -COPY ./run.sh /app/ - -EXPOSE 18080 - -CMD /app/run.sh