diff --git a/Dockerfile.cuda b/Dockerfile.cuda index ac72a88..89ef32d 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -22,6 +22,8 @@ ENV THREADS=4 ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" +ENV BATCHSIZE=64 + COPY ./run.sh /app/ EXPOSE 18080 diff --git a/Dockerfile.default b/Dockerfile.default index bad770b..d6db39c 100644 --- a/Dockerfile.default +++ b/Dockerfile.default @@ -22,6 +22,8 @@ ENV THREADS=4 ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" +ENV BATCHSIZE=8 + COPY ./run.sh /app/ EXPOSE 18080 diff --git a/run.sh b/run.sh index 8db96c8..ef1339f 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/sh -/app/codegen-serve -t $THREADS -m $MODEL \ No newline at end of file +/app/codegen-serve -t $THREADS -m $MODEL -b $BATCHSIZE \ No newline at end of file