Merge pull request #30 from ravenscroftj/release/0.0.5

Release/0.0.5
This commit is contained in:
James Ravenscroft 2023-06-13 22:39:40 +01:00 committed by GitHub
commit 90c2310516
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 192 additions and 19 deletions

View File

@ -2,8 +2,8 @@ name: Build on Commit
on:
push:
branches: [ "main" ]
tags: ['*']
branches: [ '**' ]
tags: ['**']
pull_request:
branches: [ "main" ]
@ -74,17 +74,20 @@ jobs:
# You can convert this to a matrix build if you need cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-latest
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
matrix:
include:
- build: 'avx2'
defines: '-DGGML_STATIC=On'
- build: 'avx'
defines: '-DGGML_AVX2=OFF -DGGML_STATIC=On'
- build: 'avx512'
defines: '-DGGML_AVX512=ON -DGGML_STATIC=On'
- build: 'avx2-openblas'
defines: '-DGGML_OPENBLAS=ON -DGGML_STATIC=On'
- build: 'avx2-cuda'
defines: '-DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DGGML_STATIC=Off'
steps:
- uses: actions/checkout@v3
@ -94,12 +97,23 @@ jobs:
- name: Install Dependencies
run: sudo apt-get update && sudo apt-get install -yq libboost-dev
- name: Install OpenBlas
if: ${{ matrix.build == 'avx2-openblas' }}
run: sudo apt-get install libopenblas-dev
- name: Install CUDA
if: ${{ matrix.build == 'avx2-cuda' }}
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo apt-get update && sudo apt-get install -yq libboost-dev cuda-nvcc-11-7 libcublas-dev-11-7
- name: Build
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: |
cd ${{github.workspace}}/ggml
cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static"
cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} ${{ matrix.defines }}
cd ${{github.workspace}}/ggml/build
make codegen codegen-serve codegen-quantize
chmod +x ${{github.workspace}}/ggml/build/bin/codegen
@ -117,7 +131,7 @@ jobs:
uses: actions/upload-artifact@v3.1.2
with:
# Artifact name
name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }} # optional, default is artifact
name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build }}-${{ steps.short-sha.outputs.sha }} # optional, default is artifact
# A file, directory or wildcard pattern that describes what to upload
path: ${{github.workspace}}/ggml/build/bin/codegen*
# The desired behavior if no files are found using the provided path.
@ -127,11 +141,143 @@ jobs:
run: |
cd ${{github.workspace}}/ggml/build/bin
zip turbopilot-${{ runner.os }}-${{ runner.arch }}.zip ./codegen*
zip turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build }}.zip ./codegen*
- name: Upload binaries to release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
token: ${{ secrets.PUBLISH_TOKEN }}
files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}.zip
files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build }}.zip
windows-latest-cmake:
runs-on: windows-latest
env:
OPENBLAS_VERSION: 0.3.23
OPENCL_VERSION: 2023.04.17
CLBLAST_VERSION: 1.6.0
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DGGML_AVX512=ON'
- build: 'openblas'
defines: '-DGGML_OPENBLAS=ON -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: true
- uses: benjlevesque/short-sha@v2.2
id: short-sha
with:
length: 6
- name: Install boost
uses: MarkusJx/install-boost@v2.4.1
id: install-boost
with:
# REQUIRED: Specify the required boost version
# A list of supported versions can be found here:
# https://github.com/MarkusJx/prebuilt-boost/blob/main/versions-manifest.json
boost_version: 1.78.0
# OPTIONAL: Specify a custon install location
boost_install_dir: C:\boost
# OPTIONAL: Specify a platform version
platform_version: 2022
# OPTIONAL: Specify a toolset
toolset: msvc
# NOTE: If a boost version matching all requirements cannot be found,
# this build step will fail
- name: Download OpenBLAS
id: get_openblas
if: ${{ matrix.build == 'openblas' }}
run: |
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
mkdir $env:RUNNER_TEMP/openblas
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
- name: Build
id: cmake_build
env:
BOOST_ROOT: ${{ steps.install-boost.outputs.BOOST_ROOT }}
run: |
cd ${{github.workspace}}/ggml
mkdir build
cd build
cmake .. ${{ matrix.defines }}
cmake --build . --config Release --target codegen codegen-serve codegen-quantize
# - name: Add libopenblas.dll
# id: add_libopenblas_dll
# if: ${{ matrix.build == 'openblas' }}
# run: |
# cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
# cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
- name: Check AVX512F support
id: check_avx512f
if: ${{ matrix.build == 'avx512' }}
continue-on-error: true
run: |
cd build
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
- uses: benjlevesque/short-sha@v2.2
id: short-sha
with:
length: 6
- name: Upload Build Artifacts
uses: actions/upload-artifact@v3.1.2
with:
# Artifact name
name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }}-${{ matrix.build }} # optional, default is artifact
# A file, directory or wildcard pattern that describes what to upload
path: ${{github.workspace}}\\ggml\\build\\bin\\Release\\codegen*
# The desired behavior if no files are found using the provided path.
- name: package artifacts for release
if: startsWith(github.ref, 'refs/tags/')
run: |
cd ${{github.workspace}}\ggml\build\bin\\Release\
7z a ${{github.workspace}}\ggml\build\bin\Release\turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build }}.zip ./codegen*
- name: Convert backslashes
id: convert_backslashes
run: |
$path = $env:GITHUB_WORKSPACE -replace '\\', '/'
echo "Converted path: $path"
echo "::set-output name=converted_path::$path"
- name: Upload binaries to release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
token: ${{ secrets.PUBLISH_TOKEN }}
files: ${{ steps.convert_backslashes.outputs.converted_path }}/ggml/build/bin/Release/turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build }}.zip

View File

@ -2,7 +2,7 @@ name: Docker Image CI
on:
push:
branches: [ "main" ]
branches: [ '**' ]
tags: ['*']
pull_request:
branches: [ "main" ]

View File

@ -22,6 +22,8 @@ ENV THREADS=4
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
ENV BATCHSIZE=64
COPY ./run.sh /app/
EXPOSE 18080

View File

@ -22,6 +22,8 @@ ENV THREADS=4
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
ENV BATCHSIZE=8
COPY ./run.sh /app/
EXPOSE 18080

View File

@ -9,6 +9,9 @@ TurboPilot is a self-hosted [copilot](https://github.com/features/copilot) clone
![a screen recording of turbopilot running through fauxpilot plugin](assets/vscode-status.gif)
**NEW:** As of v0.0.5 turbopilot supports cuda inference which greatly accelerates suggestions when working with longer prompts (i.e. longer existing code files).
## 🤝 Contributing
PRs to this project and the corresponding [GGML fork](https://github.com/ravenscroftj/ggml) are very welcome.
@ -81,6 +84,26 @@ docker run --rm -it \
ghcr.io/ravenscroftj/turbopilot:latest
```
#### Docker and CUDA
As of release v0.0.5 turbocode now supports CUDA inference. In order to run the cuda-enabled container you will need to have [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) enabled, use the cuda tagged versions and pass in `--gpus=all` to docker with access to your GPU like so:
```bash
docker run --gpus=all --rm -it \
-v ./models:/models \
-e THREADS=6 \
-e MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin" \
-p 18080:18080 \
ghcr.io/ravenscroftj/turbopilot:v0.0.5-cuda
```
You will need CUDA 11 or later to run this container. You should be able to see `/app/codegen-serve` listed when you run `nvidia-smi`.
#### Executable and CUDA
As of v0.0.5 a CUDA version of the linux executable is available - it requires that libcublas 11 be installed on the machine - I might build ubuntu debs at some point but for now running in docker may be more convenient if you want to use a CUDA GPU.
### 🌐 Using the API
#### Support for the official Copilot Plugin

2
ggml

@ -1 +1 @@
Subproject commit d3b152bf2deeebfe83c56701e9719502e2b331b2
Subproject commit 6c4fe0ef5e50b76dd2539130c109e12179da0bd2

2
run.sh
View File

@ -1,3 +1,3 @@
#!/bin/sh
/app/codegen-serve -t $THREADS -m $MODEL
/app/codegen-serve -t $THREADS -m $MODEL -b $BATCHSIZE