From 3b7770dbce165b9f93b651ed24dbcffbd571d126 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 09:18:40 +0100 Subject: [PATCH 01/10] add cuda to dependencies --- .github/workflows/build-cuda.yml | 84 ++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 .github/workflows/build-cuda.yml diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml new file mode 100644 index 0000000..f815bda --- /dev/null +++ b/.github/workflows/build-cuda.yml @@ -0,0 +1,84 @@ +name: Build CUDA binary on Ubuntu + +on: + push: + branches: [ '**' ] + tags: ['**'] + pull_request: + branches: [ "main" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +jobs: + + build-ubuntu: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + # strategy: + # matrix: + # include: + # - build: 'avx2' + # defines: '' + # - build: 'avx' + # defines: '-DLLAMA_AVX2=OFF' + # - build: 'avx512' + # defines: '-DLLAMA_AVX512=ON' + + + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Install Dependencies + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb + sudo dpkg -i cuda-keyring_1.0-1_all.deb + sudo apt-get update && sudo apt-get install -yq libboost-dev cuda + + - name: Build + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: | + cd ${{github.workspace}}/ggml + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" + cd ${{github.workspace}}/ggml/build + make codegen codegen-serve codegen-quantize + chmod +x ${{github.workspace}}/ggml/build/bin/codegen + chmod +x ${{github.workspace}}/ggml/build/bin/codegen-serve + chmod +x ${{github.workspace}}/ggml/build/bin/codegen-quantize + + + - uses: benjlevesque/short-sha@v2.2 + id: short-sha + with: + length: 6 + + + - name: Upload Build Artifacts + uses: actions/upload-artifact@v3.1.2 + with: + # Artifact name + name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }} # optional, default is artifact + # A file, directory or wildcard pattern that describes what to upload + path: ${{github.workspace}}/ggml/build/bin/codegen* + # The desired behavior if no files are found using the provided path. + + - name: package artifacts for release + if: startsWith(github.ref, 'refs/tags/') + + run: | + cd ${{github.workspace}}/ggml/build/bin + zip turbopilot-${{ runner.os }}-${{ runner.arch }}.zip ./codegen* + + - name: Upload binaries to release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + token: ${{ secrets.PUBLISH_TOKEN }} + files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}.zip From cb3fa20f1387bbdf67b8704eef1ee370cae5c332 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 09:24:21 +0100 Subject: [PATCH 02/10] try to be more targetted about cuda deps --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index f815bda..0572149 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -39,7 +39,7 @@ jobs: run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb sudo dpkg -i cuda-keyring_1.0-1_all.deb - sudo apt-get update && sudo apt-get install -yq libboost-dev cuda + sudo apt-get update && sudo apt-get install -yq libboost-dev cuda-nvcc-11-7 - name: Build # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. From ecbfed4fc5b86297a1a795403efd7c39cadeb017 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 09:29:57 +0100 Subject: [PATCH 03/10] add cuda to cmake args --- .github/workflows/build-cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 0572149..8117ecc 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -46,7 +46,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: | cd ${{github.workspace}}/ggml - cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_CUBLAS=ON cd ${{github.workspace}}/ggml/build make codegen codegen-serve codegen-quantize chmod +x ${{github.workspace}}/ggml/build/bin/codegen @@ -64,7 +64,7 @@ jobs: uses: actions/upload-artifact@v3.1.2 with: # Artifact name - name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }} # optional, default is artifact + name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }}-cuda # optional, default is artifact # A file, directory or wildcard pattern that describes what to upload path: ${{github.workspace}}/ggml/build/bin/codegen* # The desired behavior if no files are found using the provided path. From 20ca21a5396a2cc0ba31c6c7b7f848a707f627fc Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 09:32:31 +0100 Subject: [PATCH 04/10] add nvcc location to cmake args --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 8117ecc..658adbb 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -46,7 +46,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: | cd ${{github.workspace}}/ggml - cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_CUBLAS=ON + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc cd ${{github.workspace}}/ggml/build make codegen codegen-serve codegen-quantize chmod +x ${{github.workspace}}/ggml/build/bin/codegen From c355763419f72c0e86c9ea613aafb7067708edf7 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 09:34:26 +0100 Subject: [PATCH 05/10] add libcublas to deps --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 658adbb..730a900 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -39,7 +39,7 @@ jobs: run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb sudo dpkg -i cuda-keyring_1.0-1_all.deb - sudo apt-get update && sudo apt-get install -yq libboost-dev cuda-nvcc-11-7 + sudo apt-get update && sudo apt-get install -yq libboost-dev cuda-nvcc-11-7 libcublas-dev-11-7 - name: Build # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. From 0c737550559119e0891d343613ae63a8f373fe03 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 10:04:22 +0100 Subject: [PATCH 06/10] turn on static properly --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 730a900..f46229a 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -46,7 +46,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: | cd ${{github.workspace}}/ggml - cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D GGML_STATIC=ON -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc cd ${{github.workspace}}/ggml/build make codegen codegen-serve codegen-quantize chmod +x ${{github.workspace}}/ggml/build/bin/codegen From 222641c959eff8fb8a7dbc0fb548d9dc0f73589d Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Sun, 14 May 2023 10:10:33 +0100 Subject: [PATCH 07/10] turn on static properly and retain static flag --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index f46229a..0ed7cc4 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -46,7 +46,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: | cd ${{github.workspace}}/ggml - cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D GGML_STATIC=ON -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_STATIC=ON -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc cd ${{github.workspace}}/ggml/build make codegen codegen-serve codegen-quantize chmod +x ${{github.workspace}}/ggml/build/bin/codegen From e424931e2e6f306e4f6bc2e71c5346e458ee635f Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Tue, 16 May 2023 20:16:47 +0100 Subject: [PATCH 08/10] turn off static linking in cuda builds --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 0ed7cc4..29784c3 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -46,7 +46,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: | cd ${{github.workspace}}/ggml - cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -D CMAKE_EXE_LINKER_FLAGS="-static" -DGGML_STATIC=ON -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc cd ${{github.workspace}}/ggml/build make codegen codegen-serve codegen-quantize chmod +x ${{github.workspace}}/ggml/build/bin/codegen From ec1d897a00240b2f6a54b2750875848c4b9032b8 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Tue, 16 May 2023 20:23:57 +0100 Subject: [PATCH 09/10] fix name of build artifact for release --- .github/workflows/build-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml index 29784c3..ccb301c 100644 --- a/.github/workflows/build-cuda.yml +++ b/.github/workflows/build-cuda.yml @@ -81,4 +81,4 @@ jobs: if: startsWith(github.ref, 'refs/tags/') with: token: ${{ secrets.PUBLISH_TOKEN }} - files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}.zip + files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}-cuda.zip From d3f92176ca80f1e396ade4272141eab668326a7c Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Tue, 16 May 2023 20:27:16 +0100 Subject: [PATCH 10/10] add cuda stuff to readme --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0edb8d2..263d58d 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,12 @@ docker run --gpus=all --rm -it \ ghcr.io/ravenscroftj/turbopilot:v0.0.5-cuda ``` -You should be able to see `/app/codegen-serve` listed when you run `nvidia-smi`. +You will need CUDA 11 or later to run this container. You should be able to see `/app/codegen-serve` listed when you run `nvidia-smi`. + + +#### Executable and CUDA + +As of v0.0.5 a CUDA version of the linux executable is available - it requires that libcublas 11 be installed on the machine - I might build ubuntu debs at some point but for now running in docker may be more convenient if you want to use a CUDA GPU. ### 🌐 Using the API