Merge pull request #27 from ravenscroftj/feature/cuda-builds

Feature/cuda builds
2024-06-25 22:02:08 +00:00 · 2023-05-27 21:37:47 +01:00 · 2023-05-27 21:37:47 +01:00 · e30b7bf984
commit e30b7bf984
parent 79b2f5aae9 d3f92176ca
2 changed files with 90 additions and 1 deletions
--- a/.github/workflows/build-cuda.yml
+++ b/.github/workflows/build-cuda.yml
@ -0,0 +1,84 @@
+name: Build CUDA binary on Ubuntu
+
+on:
+  push:
+    branches: [ '**' ]
+    tags: ['**']
+  pull_request:
+    branches: [ "main" ]
+
+env:
+  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
+  BUILD_TYPE: Release
+
+jobs:
+
+  build-ubuntu:
+    # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
+    # You can convert this to a matrix build if you need cross-platform coverage.
+    # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
+    runs-on: ubuntu-latest
+
+    # strategy:
+    #       matrix:
+    #         include:
+    #         - build: 'avx2'
+    #           defines: ''
+    #         - build: 'avx'
+    #           defines: '-DLLAMA_AVX2=OFF'
+    #         - build: 'avx512'
+    #           defines: '-DLLAMA_AVX512=ON'
+
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: true
+
+    - name: Install Dependencies
+      run: |
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
+          sudo dpkg -i cuda-keyring_1.0-1_all.deb
+          sudo apt-get update && sudo apt-get install -yq libboost-dev cuda-nvcc-11-7 libcublas-dev-11-7
+
+    - name: Build
+      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
+      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
+      run: |
+        cd ${{github.workspace}}/ggml
+        cmake -B ${{github.workspace}}/ggml/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DGGML_CUBLAS=ON  -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
+        cd ${{github.workspace}}/ggml/build 
+        make codegen codegen-serve codegen-quantize
+        chmod +x ${{github.workspace}}/ggml/build/bin/codegen 
+        chmod +x ${{github.workspace}}/ggml/build/bin/codegen-serve 
+        chmod +x ${{github.workspace}}/ggml/build/bin/codegen-quantize
+        
+        
+    - uses: benjlevesque/short-sha@v2.2
+      id: short-sha
+      with:
+        length: 6
+
+
+    - name: Upload Build Artifacts
+      uses: actions/upload-artifact@v3.1.2
+      with:
+        # Artifact name
+        name: turbopilot-${{ runner.os }}-${{ runner.arch }}-${{ steps.short-sha.outputs.sha }}-cuda # optional, default is artifact
+        # A file, directory or wildcard pattern that describes what to upload
+        path: ${{github.workspace}}/ggml/build/bin/codegen*
+        # The desired behavior if no files are found using the provided path.
+        
+    - name: package artifacts for release
+      if: startsWith(github.ref, 'refs/tags/')
+
+      run: |
+        cd ${{github.workspace}}/ggml/build/bin 
+        zip turbopilot-${{ runner.os }}-${{ runner.arch }}.zip ./codegen*
+
+    - name: Upload binaries to release
+      uses: softprops/action-gh-release@v1
+      if: startsWith(github.ref, 'refs/tags/')
+      with:
+        token: ${{ secrets.PUBLISH_TOKEN }}
+        files: ${{github.workspace}}/ggml/build/bin/turbopilot-${{ runner.os }}-${{ runner.arch }}-cuda.zip
--- a/README.md
+++ b/README.md
@ -97,7 +97,12 @@ docker run --gpus=all --rm -it \
  ghcr.io/ravenscroftj/turbopilot:v0.0.5-cuda
 ```

-You should be able to see `/app/codegen-serve` listed when you run `nvidia-smi`.
+You will need CUDA 11 or later to run this container. You should be able to see `/app/codegen-serve` listed when you run `nvidia-smi`.
+
+
+#### Executable and CUDA
+
+As of v0.0.5 a CUDA version of the linux executable is available - it requires that libcublas 11 be installed on the machine - I might build ubuntu debs at some point but for now running in docker may be more convenient if you want to use a CUDA GPU.

 ### 🌐 Using the API